library(dplyr)
library(Seurat)
options(bitmapType='cairo')
options(future.globals.maxSize = 8000 * 1024^2)

################### We converted all the raw STARsolo outputs into a tab-delimited text matrix (genes in rows, cells in columns) and merged all these matrices to form a single matrix. The barcodes for each tumor were prefixed with "TumorID_" #########

data <- read.table("All_SeqWell_220818_Raw_Expression.txt", sep="\t", head=TRUE, row.names=1)

Tumors.combined <- CreateSeuratObject(counts = data, project = "SeqWell", min.cells = 3, min.features = 200)

Tumors.combined[["percent.mt"]] <- PercentageFeatureSet(Tumors.combined, pattern = "^MT.")

all.genes <- rownames(Tumors.combined)


######## Filtering Low Quality Cells ##############

Tumors.combined <- subset(Tumors.combined, subset = nFeature_RNA > 500 & nFeature_RNA < 6000 & nCount_RNA > 1000 & percent.mt < 25)

pdf("SeqWell_WT_Mutant_Tumors_QC_AF.pdf", height = 6, width = 20)
VlnPlot(Tumors.combined, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
dev.off()

############### Normalization ##################

Tumors.combined <- NormalizeData(Tumors.combined)
all.genes <- rownames(Tumors.combined)
Tumors.combined <- ScaleData(Tumors.combined, features = all.genes)


######## Calculating Variable Scores for each gene in the matrix and outputting the results ##############

Tumors.combined <- FindVariableFeatures(Tumors.combined, selection.method="vst", nfeatures = 2000)

Var <- HVFInfo(object = Tumors.combined, selection.method="vst", assay = "RNA")

write.table(Var, file="SeqWell_Full_Gene_List_Variable_Score.txt", sep="\t", quote=FALSE, col.names=NA)

########## Identified top 4000 variable genes and saved them in a text file as a list "Variable_Genes_SeqWell.txt" (one gene per row) #################

Variable_Genes <- scan("Variable_Genes_SeqWell.txt", what="")

Matrix <- as.matrix(GetAssayData(Tumors.combined, slot = "counts"))


SeqWell3 <- Matrix[rownames(Matrix) %in% Variable_Genes,]

write.table(t(SeqWell3), file="SeqWell_Matrix_Filtered_For_NMF.txt", sep="\t", quote=FALSE, col.names=NA)