Skip to contents

Generate a mock synthetic dataset with different types of columns and layers. This is primarily designed for use in tests, examples, vignettes and other documentation but is also provided to users for creating reproducible examples.

Use get_generator_types() to get the available types for each slot.

Usage

generate_dataset(
  n_obs = 10L,
  n_vars = 20L,
  x_type = "numeric_matrix",
  layer_types = get_generator_types(slot = "layers"),
  obs_types = get_generator_types(slot = "obs"),
  var_types = get_generator_types(slot = "var"),
  obsm_types = get_generator_types(slot = "obsm"),
  varm_types = get_generator_types(slot = "varm"),
  obsp_types = get_generator_types(slot = "obsp"),
  varp_types = get_generator_types(slot = "varp"),
  uns_types = get_generator_types(slot = "uns"),
  example = FALSE,
  format = c("list", "AnnData", "SingleCellExperiment", "Seurat")
)

get_generator_types(example = FALSE, slot = NULL)

Arguments

n_obs

Number of observations to generate

n_vars

Number of variables to generate

x_type

Type of matrix to generate for X

layer_types

Types of matrices to generate for layers

obs_types

Types of vectors to generate for obs

var_types

Types of vectors to generate for var

obsm_types

Types of matrices to generate for obsm

varm_types

Types of matrices to generate for varm

obsp_types

Types of matrices to generate for obsp

varp_types

Types of matrices to generate for varp

uns_types

Types of objects to generate for uns

example

If TRUE, the types will be overridden with a small subset of types. This is useful for documentation.

format

Object type to output, one of "list", "AnnData", "SingleCellExperiment", or "Seurat".

slot

Which slot to return types for, if NULL a named list of all slots is returned

Value

For generate_dataset(), an object as defined by output containing the generated dataset

For get_generator_types(), a named list of character vectors or a single character vector if slot is not NULL

Details

To generate no data for a given slot, set the matching type argument to NULL or an empty vector, e.g. obs_types = c() will generate an empty obs data frame.

When generating SingleCellExperiment or Seurat objects, only some of the generated slots will be included in the output object. To generate a more complete object, use format = "AnnData" followed by adata$as_SingleCellExperiment() or adata$as_Seurat().

Use get_generator_types() to get a list of the available types for each slot, or for a specific slot by setting slot. If example = TRUE, only the example types are returned.

Examples

# Generate all types as a list
dummy <- generate_dataset()

# Generate the example types
dummy_example <- generate_dataset(example = TRUE)

# Generate an AnnData
dummy_anndata <- generate_dataset(format = "AnnData", example = TRUE)

# Generate a SingleCellExperiment
if (rlang::is_installed("SingleCellExperiment")) {
  dummy_sce <- generate_dataset(format = "SingleCellExperiment", example = TRUE)
}

# Generate a Seurat object
if (rlang::is_installed("SeuratObject")) {
  dummy_seurat <- generate_dataset(format = "Seurat", example = TRUE)
}
#> Warning: Data is of class matrix. Coercing to dgCMatrix.

# Get all available generator types
get_generator_types()
#> $X
#>  [1] "numeric_matrix"           "numeric_dense"           
#>  [3] "numeric_csparse"          "numeric_rsparse"         
#>  [5] "numeric_matrix_with_nas"  "numeric_dense_with_nas"  
#>  [7] "numeric_csparse_with_nas" "numeric_rsparse_with_nas"
#>  [9] "integer_matrix"           "integer_csparse"         
#> [11] "integer_rsparse"          "integer_matrix_with_nas" 
#> [13] "integer_csparse_with_nas" "integer_rsparse_with_nas"
#> 
#> $layers
#>  [1] "numeric_matrix"           "numeric_dense"           
#>  [3] "numeric_csparse"          "numeric_rsparse"         
#>  [5] "numeric_matrix_with_nas"  "numeric_dense_with_nas"  
#>  [7] "numeric_csparse_with_nas" "numeric_rsparse_with_nas"
#>  [9] "integer_matrix"           "integer_csparse"         
#> [11] "integer_rsparse"          "integer_matrix_with_nas" 
#> [13] "integer_csparse_with_nas" "integer_rsparse_with_nas"
#> 
#> $obs
#>  [1] "character"               "integer"                
#>  [3] "factor"                  "factor_ordered"         
#>  [5] "logical"                 "numeric"                
#>  [7] "character_with_nas"      "integer_with_nas"       
#>  [9] "factor_with_nas"         "factor_ordered_with_nas"
#> [11] "logical_with_nas"        "numeric_with_nas"       
#> 
#> $var
#>  [1] "character"               "integer"                
#>  [3] "factor"                  "factor_ordered"         
#>  [5] "logical"                 "numeric"                
#>  [7] "character_with_nas"      "integer_with_nas"       
#>  [9] "factor_with_nas"         "factor_ordered_with_nas"
#> [11] "logical_with_nas"        "numeric_with_nas"       
#> 
#> $obsm
#>  [1] "numeric_matrix"           "numeric_dense"           
#>  [3] "numeric_csparse"          "numeric_rsparse"         
#>  [5] "numeric_matrix_with_nas"  "numeric_dense_with_nas"  
#>  [7] "numeric_csparse_with_nas" "numeric_rsparse_with_nas"
#>  [9] "integer_matrix"           "integer_csparse"         
#> [11] "integer_rsparse"          "integer_matrix_with_nas" 
#> [13] "integer_csparse_with_nas" "integer_rsparse_with_nas"
#> [15] "character"                "integer"                 
#> [17] "factor"                   "factor_ordered"          
#> [19] "logical"                  "numeric"                 
#> [21] "character_with_nas"       "integer_with_nas"        
#> [23] "factor_with_nas"          "factor_ordered_with_nas" 
#> [25] "logical_with_nas"         "numeric_with_nas"        
#> 
#> $varm
#>  [1] "numeric_matrix"           "numeric_dense"           
#>  [3] "numeric_csparse"          "numeric_rsparse"         
#>  [5] "numeric_matrix_with_nas"  "numeric_dense_with_nas"  
#>  [7] "numeric_csparse_with_nas" "numeric_rsparse_with_nas"
#>  [9] "integer_matrix"           "integer_csparse"         
#> [11] "integer_rsparse"          "integer_matrix_with_nas" 
#> [13] "integer_csparse_with_nas" "integer_rsparse_with_nas"
#> [15] "character"                "integer"                 
#> [17] "factor"                   "factor_ordered"          
#> [19] "logical"                  "numeric"                 
#> [21] "character_with_nas"       "integer_with_nas"        
#> [23] "factor_with_nas"          "factor_ordered_with_nas" 
#> [25] "logical_with_nas"         "numeric_with_nas"        
#> 
#> $obsp
#>  [1] "numeric_matrix"           "numeric_dense"           
#>  [3] "numeric_csparse"          "numeric_rsparse"         
#>  [5] "numeric_matrix_with_nas"  "numeric_dense_with_nas"  
#>  [7] "numeric_csparse_with_nas" "numeric_rsparse_with_nas"
#>  [9] "integer_matrix"           "integer_csparse"         
#> [11] "integer_rsparse"          "integer_matrix_with_nas" 
#> [13] "integer_csparse_with_nas" "integer_rsparse_with_nas"
#> 
#> $varp
#>  [1] "numeric_matrix"           "numeric_dense"           
#>  [3] "numeric_csparse"          "numeric_rsparse"         
#>  [5] "numeric_matrix_with_nas"  "numeric_dense_with_nas"  
#>  [7] "numeric_csparse_with_nas" "numeric_rsparse_with_nas"
#>  [9] "integer_matrix"           "integer_csparse"         
#> [11] "integer_rsparse"          "integer_matrix_with_nas" 
#> [13] "integer_csparse_with_nas" "integer_rsparse_with_nas"
#> 
#> $uns
#>  [1] "scalar_character"               "scalar_integer"                
#>  [3] "scalar_factor"                  "scalar_factor_ordered"         
#>  [5] "scalar_logical"                 "scalar_numeric"                
#>  [7] "scalar_character_with_nas"      "scalar_integer_with_nas"       
#>  [9] "scalar_factor_with_nas"         "scalar_factor_ordered_with_nas"
#> [11] "scalar_logical_with_nas"        "scalar_numeric_with_nas"       
#> [13] "vec_character"                  "vec_integer"                   
#> [15] "vec_factor"                     "vec_factor_ordered"            
#> [17] "vec_logical"                    "vec_numeric"                   
#> [19] "vec_character_with_nas"         "vec_integer_with_nas"          
#> [21] "vec_factor_with_nas"            "vec_factor_ordered_with_nas"   
#> [23] "vec_logical_with_nas"           "vec_numeric_with_nas"          
#> [25] "df_character"                   "df_integer"                    
#> [27] "df_factor"                      "df_factor_ordered"             
#> [29] "df_logical"                     "df_numeric"                    
#> [31] "df_character_with_nas"          "df_integer_with_nas"           
#> [33] "df_factor_with_nas"             "df_factor_ordered_with_nas"    
#> [35] "df_logical_with_nas"            "df_numeric_with_nas"           
#> [37] "mat_numeric_matrix"             "mat_numeric_dense"             
#> [39] "mat_numeric_csparse"            "mat_numeric_rsparse"           
#> [41] "mat_numeric_matrix_with_nas"    "mat_numeric_dense_with_nas"    
#> [43] "mat_numeric_csparse_with_nas"   "mat_numeric_rsparse_with_nas"  
#> [45] "mat_integer_matrix"             "mat_integer_csparse"           
#> [47] "mat_integer_rsparse"            "mat_integer_matrix_with_nas"   
#> [49] "mat_integer_csparse_with_nas"   "mat_integer_rsparse_with_nas"  
#> [51] "list"                          
#> 

# Get generator types for a specific slot
get_generator_types(slot = "obs")
#>  [1] "character"               "integer"                
#>  [3] "factor"                  "factor_ordered"         
#>  [5] "logical"                 "numeric"                
#>  [7] "character_with_nas"      "integer_with_nas"       
#>  [9] "factor_with_nas"         "factor_ordered_with_nas"
#> [11] "logical_with_nas"        "numeric_with_nas"       

# Get generator types used when example = TRUE
get_generator_types(example = TRUE)
#> $X
#> [1] "numeric_matrix"
#> 
#> $layers
#> [1] "numeric_matrix"  "numeric_dense"   "numeric_csparse"
#> 
#> $obs
#> [1] "character" "integer"   "factor"   
#> 
#> $var
#> [1] "character" "integer"   "factor"   
#> 
#> $obsm
#> [1] "numeric_matrix"  "numeric_dense"   "numeric_csparse"
#> 
#> $varm
#> [1] "numeric_matrix"  "numeric_dense"   "numeric_csparse"
#> 
#> $obsp
#> [1] "numeric_matrix"  "numeric_dense"   "numeric_csparse"
#> 
#> $varp
#> [1] "numeric_matrix"  "numeric_dense"   "numeric_csparse"
#> 
#> $uns
#> [1] "scalar_character"   "vec_integer"        "df_logical"        
#> [4] "mat_numeric_matrix"
#>