Skip to contents

Introduction

{anndataR} allows users to work with .h5ad files, access various slots in the datasets and convert these files to SingleCellExperiment objects or Seurat objects, and vice versa. This enables users to move data easily between the different programming languages and analysis ecosystems needed to perform single-cell data analysis. This package differs from zellkonverter because it reads and writes these .h5ad files natively in R, and allows conversion to and from Seurat objects as well.

Check out ?anndataR for a full list of the functions provided by this package.

Installation

Install using either BiocManager or from GitHub using pak:

if (!requireNamespace("BiocManager", quietly = TRUE)) {
  install.packages("BiocManager")
}
BiocManager::install("anndataR")
if (!require("pak", quietly = TRUE)) {
  install.packages("pak")
}
pak::pak("scverse/anndataR")

Usage

Here’s a quick example of how to use {anndataR}.

First, we fetch an example .h5ad file included in the package:

library(anndataR)

h5ad_path <- system.file("extdata", "example.h5ad", package = "anndataR")

Read an h5ad file in memory:

adata <- read_h5ad(h5ad_path)

Read an h5ad file on disk:

adata <- read_h5ad(h5ad_path, as = "HDF5AnnData")

View structure:

adata
#> AnnData object with n_obs × n_vars = 50 × 100
#>     obs: 'Float', 'FloatNA', 'Int', 'IntNA', 'Bool', 'BoolNA', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'leiden'
#>     var: 'String', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
#>     uns: 'Bool', 'BoolNA', 'Category', 'DataFrameEmpty', 'Int', 'IntNA', 'IntScalar', 'Sparse1D', 'String', 'String2D', 'StringScalar', 'hvg', 'leiden', 'log1p', 'neighbors', 'pca', 'rank_genes_groups', 'umap'
#>     obsm: 'X_pca', 'X_umap'
#>     varm: 'PCs'
#>     layers: 'counts', 'csc_counts', 'dense_X', 'dense_counts'
#>     obsp: 'connectivities', 'distances'

Access AnnData slots:

dim(adata$X)
#> [1]  50 100
adata$obs[1:5, 1:6]
#>         Float FloatNA Int IntNA  Bool BoolNA
#> Cell000 42.42     NaN   0    NA FALSE  FALSE
#> Cell001 42.42   42.42   1    42  TRUE     NA
#> Cell002 42.42   42.42   2    42  TRUE   TRUE
#> Cell003 42.42   42.42   3    42  TRUE   TRUE
#> Cell004 42.42   42.42   4    42  TRUE   TRUE
adata$var[1:5, 1:6]
#>          String n_cells_by_counts mean_counts log1p_mean_counts
#> Gene000 String0                44        1.94          1.078410
#> Gene001 String1                42        2.04          1.111858
#> Gene002 String2                43        2.12          1.137833
#> Gene003 String3                41        1.72          1.000632
#> Gene004 String4                42        2.06          1.118415
#>         pct_dropout_by_counts total_counts
#> Gene000                    12           97
#> Gene001                    16          102
#> Gene002                    14          106
#> Gene003                    18           86
#> Gene004                    16          103

Interoperability

Convert the AnnData object to a SingleCellExperiment object:

sce <- adata$as_SingleCellExperiment()
sce
#> class: SingleCellExperiment 
#> dim: 100 50 
#> metadata(18): Bool BoolNA ... rank_genes_groups umap
#> assays(5): counts csc_counts dense_X dense_counts X
#> rownames(100): Gene000 Gene001 ... Gene098 Gene099
#> rowData names(11): String n_cells_by_counts ... dispersions
#>   dispersions_norm
#> colnames(50): Cell000 Cell001 ... Cell048 Cell049
#> colData names(11): Float FloatNA ... log1p_total_counts leiden
#> reducedDimNames(2): X_pca X_umap
#> mainExpName: NULL
#> altExpNames(0):

Convert the AnnData object to a Seurat object:

obj <- adata$as_Seurat()
obj
#> An object of class Seurat 
#> 100 features across 50 samples within 1 assay 
#> Active assay: RNA (100 features, 0 variable features)
#>  5 layers present: counts, csc_counts, dense_X, dense_counts, X
#>  2 dimensional reductions calculated: X_pca, X_umap

Convert a SingleCellExperiment object to an AnnData object:

adata <- as_AnnData(sce)
adata
#> AnnData object with n_obs × n_vars = 50 × 100
#>     obs: 'Float', 'FloatNA', 'Int', 'IntNA', 'Bool', 'BoolNA', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'leiden'
#>     var: 'String', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
#>     uns: 'Bool', 'BoolNA', 'Category', 'DataFrameEmpty', 'Int', 'IntNA', 'IntScalar', 'Sparse1D', 'String', 'String2D', 'StringScalar', 'hvg', 'leiden', 'log1p', 'neighbors', 'pca', 'rank_genes_groups', 'umap'
#>     obsm: 'X_pca', 'X_umap'
#>     varm: 'X_pca'
#>     layers: 'counts', 'csc_counts', 'dense_X', 'dense_counts', 'X'
#>     obsp: 'connectivities', 'distances'

Convert a Seurat object to an AnnData object:

adata <- as_AnnData(obj)
adata
#> AnnData object with n_obs × n_vars = 50 × 100
#>     obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'Float', 'FloatNA', 'Int', 'IntNA', 'Bool', 'BoolNA', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'leiden'
#>     var: 'String', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
#>     uns: 'Bool', 'BoolNA', 'Category', 'DataFrameEmpty', 'Int', 'IntNA', 'IntScalar', 'Sparse1D', 'String', 'String2D', 'StringScalar', 'hvg', 'leiden', 'log1p', 'neighbors', 'pca', 'rank_genes_groups', 'umap'
#>     obsm: 'X_pca', 'X_umap'
#>     varm: 'X_pca'
#>     layers: 'counts', 'csc_counts', 'dense_X', 'dense_counts', 'X'
#>     obsp: 'connectivities', 'distances'

Manually create an object

adata <- AnnData(
  X = matrix(rnorm(100), nrow = 10),
  obs = data.frame(
    cell_type = factor(rep(c("A", "B"), each = 5))
  ),
  var = data.frame(
    gene_name = paste0("gene_", 1:10)
  )
)

adata
#> AnnData object with n_obs × n_vars = 10 × 10
#>     obs: 'cell_type'
#>     var: 'gene_name'

Write to disk:

Write an AnnData object to disk:

tmpfile <- tempfile(fileext = ".h5ad")
write_h5ad(adata, tmpfile)

Write an SCE object to disk:

tmpfile <- tempfile(fileext = ".h5ad")
write_h5ad(sce, tmpfile)

Write a Seurat object to disk:

tmpfile <- tempfile(fileext = ".h5ad")
write_h5ad(obj, tmpfile)

Session info

sessionInfo()
#> R version 4.5.1 (2025-06-13)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.2 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] anndataR_0.1.0.9009         SingleCellExperiment_1.30.1
#>  [3] SummarizedExperiment_1.38.1 Biobase_2.68.0             
#>  [5] GenomicRanges_1.60.0        GenomeInfoDb_1.44.0        
#>  [7] IRanges_2.42.0              S4Vectors_0.46.0           
#>  [9] BiocGenerics_0.54.0         generics_0.1.4             
#> [11] MatrixGenerics_1.20.0       matrixStats_1.5.0          
#> [13] SeuratObject_5.1.0          sp_2.2-0                   
#> [15] BiocStyle_2.36.0           
#> 
#> loaded via a namespace (and not attached):
#>   [1] RColorBrewer_1.1-3      jsonlite_2.0.0          magrittr_2.0.3         
#>   [4] spatstat.utils_3.1-4    farver_2.1.2            rmarkdown_2.29         
#>   [7] fs_1.6.6                ragg_1.4.0              vctrs_0.6.5            
#>  [10] ROCR_1.0-11             spatstat.explore_3.4-3  htmltools_0.5.8.1      
#>  [13] S4Arrays_1.8.1          SparseArray_1.8.0       sass_0.4.10            
#>  [16] sctransform_0.4.2       parallelly_1.45.0       KernSmooth_2.23-26     
#>  [19] bslib_0.9.0             htmlwidgets_1.6.4       desc_1.4.3             
#>  [22] ica_1.0-3               plyr_1.8.9              plotly_4.11.0          
#>  [25] zoo_1.8-14              cachem_1.1.0            igraph_2.1.4           
#>  [28] mime_0.13               lifecycle_1.0.4         pkgconfig_2.0.3        
#>  [31] Matrix_1.7-3            R6_2.6.1                fastmap_1.2.0          
#>  [34] GenomeInfoDbData_1.2.14 fitdistrplus_1.2-4      future_1.58.0          
#>  [37] shiny_1.11.1            digest_0.6.37           patchwork_1.3.1        
#>  [40] tensor_1.5.1            Seurat_5.3.0            RSpectra_0.16-2        
#>  [43] irlba_2.3.5.1           textshaping_1.0.1       progressr_0.15.1       
#>  [46] spatstat.sparse_3.1-0   polyclip_1.10-7         httr_1.4.7             
#>  [49] abind_1.4-8             compiler_4.5.1          bit64_4.6.0-1          
#>  [52] fastDummies_1.7.5       MASS_7.3-65             DelayedArray_0.34.1    
#>  [55] tools_4.5.1             lmtest_0.9-40           httpuv_1.6.16          
#>  [58] future.apply_1.20.0     goftest_1.2-3           glue_1.8.0             
#>  [61] nlme_3.1-168            promises_1.3.3          grid_4.5.1             
#>  [64] Rtsne_0.17              cluster_2.1.8.1         reshape2_1.4.4         
#>  [67] hdf5r_1.3.12            spatstat.data_3.1-6     gtable_0.3.6           
#>  [70] tidyr_1.3.1             data.table_1.17.8       XVector_0.48.0         
#>  [73] spatstat.geom_3.4-1     RcppAnnoy_0.0.22        ggrepel_0.9.6          
#>  [76] RANN_2.6.2              pillar_1.11.0           stringr_1.5.1          
#>  [79] spam_2.11-1             RcppHNSW_0.6.0          later_1.4.2            
#>  [82] splines_4.5.1           dplyr_1.1.4             lattice_0.22-7         
#>  [85] deldir_2.0-4            survival_3.8-3          bit_4.6.0              
#>  [88] tidyselect_1.2.1        miniUI_0.1.2            pbapply_1.7-2          
#>  [91] knitr_1.50              gridExtra_2.3           bookdown_0.43          
#>  [94] scattermore_1.2         xfun_0.52               stringi_1.8.7          
#>  [97] UCSC.utils_1.4.0        lazyeval_0.2.2          yaml_2.3.10            
#> [100] evaluate_1.0.4          codetools_0.2-20        tibble_3.3.0           
#> [103] BiocManager_1.30.26     cli_3.6.5               uwot_0.2.3             
#> [106] xtable_1.8-4            reticulate_1.42.0       systemfonts_1.2.3      
#> [109] jquerylib_0.1.4         Rcpp_1.1.0              spatstat.random_3.4-1  
#> [112] globals_0.18.0          png_0.1-8               spatstat.univar_3.1-4  
#> [115] parallel_4.5.1          pkgdown_2.1.3           ggplot2_3.5.2          
#> [118] dotCall64_1.2           listenv_0.9.1           viridisLite_0.4.2      
#> [121] scales_1.4.0            ggridges_0.5.6          purrr_1.1.0            
#> [124] crayon_1.5.3            rlang_1.1.6             cowplot_1.2.0