I have the below code to convert a .csv/txt file into a .mtx file, a txt file containing row names, and a txt file containing column names. However, when I run the code on a csv file, the output folder is empty. Where can the error be? Much appreciated.
#########################################################################################
# IMPORT DATA
## Gene names are stored in rows
## Sample names are stored in columns
#########################################################################################
# IMPORT LIBRARIES
library(data.table)
library(DropletUtils)
library(Matrix)
#########################################################################################
# DEFINE FUNCTIONS
clean_dt_colnames <- function(dt, clean_barcodes) {setnames(dt, base::colnames(dt), clean_barcodes)}
make_sample_barcode_tab <- function(dt, sample_regex = NA) {samp_bc <- colnames(dt)
if (!is.na(sample_regex)) {
sample_names <- gsub(sample_regex, "\\1", samp_bc)
barcodes <- gsub(sample_regex, "\\2", samp_bc)
clean_dt_colnames(dt, barcodes)
} else {
barcodes <- samp_bc
sample_names <- rep_len("single_sample", length(barcodes))}
# first var in dt is the gene_names var (data.tables don't have rownames)
data.table(
sample = sample_names[-1],
barcode = barcodes[-1])}
list_barcodes_in_sample <- function(sample_barcode_tab) {
# nest each barcode group to separate data.table
nested_sample_dt <- sample_barcode_tab[, .(bc_list = list(.SD)), by = sample]
# convert nested data table to list
lapply(nested_sample_dt[["bc_list"]], unlist)}
sub_dt <- function(columns, dt) {
# subset a data table by character vector, to ease lapply
columns <- c("V1", columns)
dt[, ..columns]}
export_demultiplexed_data <- function(sample_dt, sparse_matrix_list, data_dir) {
nested_sample_dt <- sample_dt[, .(bc_list = list(.SD)), by = sample]
for (row in 1:nrow(nested_sample_dt)) {
fname <- file.path(data_dir, "out", nested_sample_dt[row][["sample"]])
# unnest barcodes in sample
expected_barcodes_in_sample <- nested_sample_dt[row, bc_list[[1]]][["barcode"]]
if (!identical(expected_barcodes_in_sample, colnames(sparse_matrix_list[[row]]))) {
stop("not the same barcodes")
}
DropletUtils::write10xCounts(fname,
sparse_matrix_list[[row]],
version = "3")}}
#########################################################################################
# DEFINE FILES & FOLDERS
data_dir <- "./"
setwd(data_dir)
csv_files <- list.files(data_dir, pattern = "*[ct]sv$")
print(csv_files)
output_dir <- file.path(data_dir, "out")
dir.create(output_dir)
#########################################################################################
# INSPECT DATA
csv_example <- count_data
# Look at the general structure of the matrix.
str(csv_example)
# print the column names, usually the barcodes
colnames(csv_example)
# print the first 20 rows of the first column (usually gene names)
head(csv_example[, 1], 20)
sample_regex <- NA
#########################################################################################
# PROCESS FILES
for (file in csv_files) {
csv_table <- fread(file)
setnames(csv_table, old = 1, new = "V1")
sample_tab <- make_sample_barcode_tab(csv_table, sample_regex)
gc()
# subset the original count data.table, separating by samples if present
dt_subset <- lapply(list_barcodes_in_sample(sample_tab), sub_dt, csv_table)
rm(csv_table)
gc()
# convert each subsetted count data.table to count matrix
counts <- lapply(dt_subset, as.matrix, rownames = "V1")
rm(dt_subset)
gc()
# convert each count matrix to sparse matrices
sparse_counts <- lapply(counts, Matrix, sparse = T)
rm(counts)
gc()
# export the data to one folder per sample
export_demultiplexed_data(sample_tab, sparse_counts, data_dir)}
(This code is adapted from the code in this link: https://www.biomage.net/blog/converting-csv-to-upload-to-cellenics)