Pre-analysis for three datasets

Table of Contents

1 Mammary gland epithlial cells GSE103272

estrus (2729 cells) and diestrus (2439 cells)

https://www.nature.com/articles/s41467-017-01560-x

1.1 P7D diestrus

setwd('/n/core/Bioinformatics/analysis/CompBio/cbio.xig.103/data/package_v2')

library(Seurat)

p7d.data <- Read10X(data.dir = "GSE103272/P7D/filtered_gene_bc_matrices")

p7d <- CreateSeuratObject(raw.data = p7d.data, min.cells = 3, min.genes=200)
mito.genes <- grep(pattern = "^Mt", x = rownames(x = p7d@data), value = TRUE)
percent.mito <- Matrix::colSums(p7d@raw.data[mito.genes, ])/Matrix::colSums(p7d@raw.data)
p7d <- AddMetaData(object = p7d, metadata = percent.mito, col.name = "percent.mito")
VlnPlot(object = p7d, features.plot = c("nGene", "nUMI", "percent.mito"), nCol = 3)
savePlot('p7d.qa.png')
p7d <- NormalizeData(p7d)
p7d <- FindVariableGenes(p7d, x.low.cutoff = 0.05, x.high.cutoff = 8, y.cutoff = 0.5)
length(x = p7d@var.genes)## 1406
savePlot('p7d.var.gene.png')
p7d <- ScaleData(p7d, genes.use = p7d@var.genes, vars.to.regress = c("nUMI","percent.mito"))
p7d <- RunPCA(p7d, pcs.compute = 20, weight.by.var = FALSE,do.print=FALSE)
PCElbowPlot(object = p7d)
savePlot('p7d.pce.png')
## FindClusters
p7d <- FindClusters(p7d, reduction.type = "pca", dims.use = 1:8, resolution=0.8, save.SNN = T,temp.file.location=".",force.recalc = TRUE)
p7d <- RunTSNE(p7d, dims.use = 1:8, do.fast = T)
TSNEPlot(object = p7d, do.label = T, label.size = 6, plot.title='P7D')
savePlot('p7d.tsne.png')
saveRDS(p7d, file = "p7d.RDS")
## FindAllMarkers
p7d.markers <- FindAllMarkers(object = p7d, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
write.csv(p7d.markers,file='p7d.markers.csv')

QA/QC

p7d.qa.png p7d.var.gene.png

PCA

p7d.pce.png

Clusters and markers

p7d.markers.csv

Seurat object p7d.RDS

p7d.tsne.png

1.2 P7E estrus

setwd('/n/core/Bioinformatics/analysis/CompBio/cbio.xig.103/data/package_v2')

library(Seurat)

p7e.data <- Read10X(data.dir = "GSE103272/P7E/filtered_gene_bc_matrices")

p7e <- CreateSeuratObject(raw.data = p7e.data, min.cells = 3, min.genes=200)
mito.genes <- grep(pattern = "^Mt", x = rownames(x = p7e@data), value = TRUE)
percent.mito <- Matrix::colSums(p7e@raw.data[mito.genes, ])/Matrix::colSums(p7e@raw.data)
p7e <- AddMetaData(object = p7e, metadata = percent.mito, col.name = "percent.mito")
VlnPlot(object = p7e, features.plot = c("nGene", "nUMI", "percent.mito"), nCol = 3)
savePlot('p7e.qa.png')
p7e <- NormalizeData(p7e)
p7e <- FindVariableGenes(p7e, x.low.cutoff = 0.05, x.high.cutoff = 8, y.cutoff = 0.5)
length(x = p7e@var.genes)##1440
savePlot('p7e.var.gene.png')
p7e <- ScaleData(p7e, genes.use = p7e@var.genes, vars.to.regress = c("nUMI","percent.mito"))
dim(p7e@data) ## 2728 cells
p7e <- RunPCA(p7e, pcs.compute = 20, weight.by.var = FALSE,do.print=FALSE)
PCElbowPlot(object = p7e)
savePlot('p7e.pce.png')
## FindClusters
p7e <- FindClusters(p7e, reduction.type = "pca", dims.use = 1:6, resolution=0.8, save.SNN = T,temp.file.location=".",force.recalc = TRUE)
p7e <- RunTSNE(p7e, dims.use = 1:6, do.fast = T)
TSNEPlot(object = p7e, do.label = T, label.size = 6, plot.title='P7E')
savePlot('p7e.tsne.png')
saveRDS(p7e, file = "p7e.RDS")
## FindAllMarkers
p7e.markers <- FindAllMarkers(object = p7e, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
write.csv(p7e.markers,file='p7e.markers.csv')

QA/QC

p7e.qa.png p7e.var.gene.png

PCA

p7e.pce.png

Clusters and markers

p7e.markers.csv

Seurat object p7e.RDS

p7e.tsne.png

1.3 P7 combined

setwd('/n/core/Bioinformatics/analysis/CompBio/cbio.xig.103/data/package_v2')

library(Seurat)

p7d.data <- Read10X(data.dir = "GSE103272/P7D/filtered_gene_bc_matrices")
p7e.data <- Read10X(data.dir = "GSE103272/P7E/filtered_gene_bc_matrices")

colnames(p7d.data)=paste0('p7d-', colnames(p7d.data))
colnames(p7e.data)=paste0('p7e-', colnames(p7e.data))

p7.data = cbind(p7d.data, p7e.data)

p7 <- CreateSeuratObject(raw.data = p7.data, min.cells = 3, min.genes=200)
mito.genes <- grep(pattern = "^Mt", x = rownames(x = p7@data), value = TRUE)
percent.mito <- Matrix::colSums(p7@raw.data[mito.genes, ])/Matrix::colSums(p7@raw.data)
p7 <- AddMetaData(object = p7, metadata = percent.mito, col.name = "percent.mito")
VlnPlot(object = p7, features.plot = c("nGene", "nUMI", "percent.mito"), nCol = 3)
savePlot('p7.qa.png')
p7 <- NormalizeData(p7)
p7 <- FindVariableGenes(p7, x.low.cutoff = 0.05, x.high.cutoff = 8, y.cutoff = 0.5)
length(x = p7@var.genes)##1218
savePlot('p7.var.gene.png')
p7 <- ScaleData(p7, genes.use = p7@var.genes, vars.to.regress = c("nUMI","percent.mito"))
p7 <- RunPCA(p7, pcs.compute = 20, weight.by.var = FALSE,do.print=FALSE)
PCElbowPlot(object = p7)
savePlot('p7.pce.png')
## FindClusters
p7 <- FindClusters(p7, reduction.type = "pca", dims.use = 1:8, resolution=0.6, save.SNN = T,temp.file.location=".",force.recalc = TRUE)
p7 <- RunTSNE(p7, dims.use = 1:8, do.fast = T)
TSNEPlot(object = p7, do.label = T, label.size = 6, plot.title='P7 Combined')
savePlot('p7.tsne.png')
saveRDS(p7, file = "p7.RDS")

QA/QC

p7.qa.png p7.var.gene.png

PCA

p7.pce.png

Clusters

Seurat object p7.RDS

p7.tsne.png

2 IFN-beta-stimulated PBMCs GSE96583

2.1 Single sample

setwd('/n/core/Bioinformatics/analysis/CompBio/cbio.xig.103/data/package_v2')

library(Seurat)

ctrl.data <- read.table("GSE96583/immune_control_expression_matrix.txt", sep = "\t")
stim.data <- read.table("GSE96583/immune_stimulated_expression_matrix.txt", sep = "\t")

# Set up control object
ctrl <- CreateSeuratObject(raw.data = ctrl.data, project = "IMMUNE_CTRL", min.cells = 5)
ctrl@meta.data$stim <- "CTRL"
ctrl <- FilterCells(ctrl, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf)
ctrl <- NormalizeData(ctrl)
ctrl <- ScaleData(ctrl, display.progress = F)
# Set up stimulated object
stim <- CreateSeuratObject(raw.data = stim.data, project = "IMMUNE_STIM", min.cells = 5)
stim@meta.data$stim <- "STIM"
stim <- FilterCells(stim, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf)
stim <- NormalizeData(stim)
stim <- ScaleData(stim, display.progress = F)

# Gene selection 
ctrl <- FindVariableGenes(ctrl, do.plot = F)
stim <- FindVariableGenes(stim, do.plot = F)

# t-SNE and Clustering
ctrl <- RunPCA(ctrl,do.print = FALSE)
PCElbowPlot(object = ctrl)
savePlot('ctrl.pce.png')
ctrl <- FindClusters(ctrl, reduction.type = "pca", dims.use = 1:15, resolution=0.6, save.SNN =T,temp.file.location=".",force.recalc = TRUE,print.output = FALSE)
ctrl <- RunTSNE(ctrl, dims.use = 1:15, do.fast = T)
TSNEPlot(object = ctrl, do.label = T, label.size = 6, plot.title="Control")
savePlot('ctrl.tsne.png')
saveRDS(ctrl,file='ctrl.RDS')

stim <- RunPCA(stim,do.print = FALSE)
PCElbowPlot(object = stim)
savePlot('stim.pce.png')
stim <- FindClusters(stim, reduction.type = "pca", dims.use = 1:15, resolution=0.6, save.SNN =T,temp.file.location=".",force.recalc = TRUE ,print.output = FALSE)
stim <- RunTSNE(stim, dims.use = 1:15, do.fast = T)
TSNEPlot(object = stim, do.label = T, label.size = 6, plot.title="Stimulated")
savePlot('stim.tsne.png')
saveRDS(stim,file='stim.RDS')

# Find markers
ctrl.markers <- FindAllMarkers(object = ctrl, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
write.csv(ctrl.markers,file='ctrl.markers.csv')

stim.markers <- FindAllMarkers(object = stim, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
write.csv(stim.markers,file='stim.markers.csv')

PCA

ctrl.pce.png stim.pce.png

Clustering and Markers

ctrl.tsne.png stim.tsne.png

ctrl.makers.csv stim.markers.csv

Seurat objects ctrl.RDS and stim.RDS

2.2 Combined sample

setwd('/n/core/Bioinformatics/analysis/CompBio/cbio.xig.103/data/package_v2')

library(Seurat)

ctrl.data <- read.table("GSE96583/immune_control_expression_matrix.txt", sep = "\t")
stim.data <- read.table("GSE96583/immune_stimulated_expression_matrix.txt", sep = "\t")

## Combined 
colnames(ctrl.data)<-paste("ctrl", colnames(ctrl.data), sep="-")
colnames(stim.data)<-paste("stim", colnames(stim.data), sep="-")

immune.data <-cbind(stim.data, ctrl.data)

immune <- CreateSeuratObject(raw.data = immune.data, project = "IMMUNE", min.cells = 5)
immune <- FilterCells(immune, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf)
immune <- NormalizeData(immune)
immune <- ScaleData(immune, display.progress = F)
immune <- FindVariableGenes(immune, do.plot = F)
immune <- RunPCA(immune,do.print = FALSE)
immune <- FindClusters(immune, reduction.type = "pca", dims.use = 1:15, resolution=0.6, save.SNN =T,temp.file.location=".",force.recalc = TRUE ,print.output = FALSE)
immune <- RunTSNE(immune, dims.use = 1:15, do.fast = T)
TSNEPlot(object = immune, do.label = T, label.size = 6, plot.title="Immune Combined")
savePlot('immune.tsne.png')
saveRDS(immune,file='immune.RDS')

immune.tsne.png

Seurat objects immune.RDS

3 PBMCs

3.1 PBMC 4K

setwd('/n/core/Bioinformatics/analysis/CompBio/cbio.xig.103/data/package_v2')

library(Seurat)

pbmc4k.data <- Read10X(data.dir = "PBMCs/pbmc4k/filtered_gene_bc_matrices/GRCh38")

pbmc4k <- CreateSeuratObject(raw.data = pbmc4k.data, min.cells = 3, min.genes=200)
pbmc4k <- NormalizeData(pbmc4k)
pbmc4k <- FindVariableGenes(pbmc4k, x.low.cutoff = 0.05, x.high.cutoff = 8, y.cutoff = 0.5)
length(pbmc4k@var.genes) ## 1106
pbmc4k <- ScaleData(pbmc4k, genes.use = pbmc4k@var.genes, vars.to.regress = c("nUMI"))
pbmc4k <- RunPCA(pbmc4k, pcs.compute = 20, weight.by.var = FALSE,do.print=FALSE)
PCElbowPlot(object = pbmc4k)
savePlot('pbmc4k.pce.png')
## FindClusters
pbmc4k <- FindClusters(pbmc4k, reduction.type = "pca", dims.use = 1:9, resolution=0.8, save.SNN = T,temp.file.location=".",force.recalc = TRUE, k.param=15,k.scale=15)
pbmc4k <- RunTSNE(pbmc4k, dims.use = 1:9, do.fast = T)
TSNEPlot(object = pbmc4k, do.label = T, label.size = 6, plot.title='PBMC 4K')
savePlot('pbmc4k.tsne.png')
saveRDS(pbmc4k, file = "pbmc4k.RDS")
## FindAllMarkers
pbmc4k.markers <- FindAllMarkers(object = pbmc4k, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
write.csv(pbmc4k.markers,file='pbmc4k.markers.csv')

Clustering and Markers

pbmc4k.tsne.png

pbmc4k.markers.csv

Seurat object pbmc4k.RDS

3.2 PBMC 8K

setwd('/n/core/Bioinformatics/analysis/CompBio/cbio.xig.103/data/package_v2')

library(Seurat)

pbmc8k.data <- Read10X(data.dir = "PBMCs/pbmc8k/filtered_gene_bc_matrices/GRCh38")

pbmc8k <- CreateSeuratObject(raw.data = pbmc8k.data, min.cells = 3, min.genes=200)
pbmc8k <- NormalizeData(pbmc8k)
pbmc8k <- FindVariableGenes(pbmc8k, x.low.cutoff = 0.01, x.high.cutoff = 8, y.cutoff = 0.5)
length(x = pbmc8k@var.genes)##1497
pbmc8k <- ScaleData(pbmc8k, genes.use = pbmc8k@var.genes, vars.to.regress = c("nUMI"))
pbmc8k <- RunPCA(pbmc8k, pcs.compute = 20, weight.by.var = FALSE,do.print=FALSE)
PCElbowPlot(object = pbmc8k)
savePlot('pbmc8k.pce.png')

## FindClusters
pbmc8k <- FindClusters(pbmc8k, reduction.type = "pca", dims.use = 1:9, resolution=0.6, save.SNN = T,temp.file.location=".",force.recalc = TRUE)
pbmc8k <- RunTSNE(pbmc8k, dims.use = 1:9, do.fast = T)
TSNEPlot(object = pbmc8k, do.label = T, label.size = 6)
savePlot('pbmc8k.tsne.png')
saveRDS(pbmc8k, file = "pbmc8k.RDS")
## FindAllMarkers
pbmc8k.markers <- FindAllMarkers(object = pbmc8k, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
write.csv(pbmc8k.markers,file='pbmc8k.markers.csv')

Clustering and Markers

pbmc8k.tsne.png

pbmc8k.markers.csv

Seurat object pbmc8k.RDS

3.3 PBMC Combined

setwd('/n/core/Bioinformatics/analysis/CompBio/cbio.xig.103/package_v2')

library(Seurat)

pbmc4k.data <- Read10X(data.dir = "PBMCs/pbmc4k/filtered_gene_bc_matrices/GRCh38")
pbmc8k.data <- Read10X(data.dir = "PBMCs/pbmc8k/filtered_gene_bc_matrices/GRCh38")

## Combined 
colnames(pbmc4k.data)<-paste("pbmc4k", colnames(pbmc4k.data), sep="-")
colnames(pbmc8k.data)<-paste("pbmc8k", colnames(pbmc8k.data), sep="-")

pbmc.data <-cbind(pbmc4k.data, pbmc8k.data)

pbmc <- CreateSeuratObject(raw.data = pbmc.data, min.cells = 3, min.genes=200)
pbmc <- NormalizeData(pbmc)
pbmc <- FindVariableGenes(pbmc, x.low.cutoff = 0.01, x.high.cutoff = 8, y.cutoff = 0.5)
length(x = pbmc@var.genes)##1434
pbmc <- ScaleData(pbmc, genes.use = pbmc@var.genes, vars.to.regress = c("nUMI"))
pbmc <- RunPCA(pbmc, pcs.compute = 20, weight.by.var = FALSE,do.print=FALSE)
PCElbowPlot(object = pbmc)
savePlot('pbmc.pce.png')

## FindClusters
pbmc <- FindClusters(pbmc, reduction.type = "pca", dims.use = 1:9, resolution=0.6, save.SNN = T,temp.file.location=".",force.recalc = TRUE)
pbmc <- RunTSNE(pbmc, dims.use = 1:9, do.fast = T)
TSNEPlot(object = pbmc, do.label = T, label.size = 6)
savePlot('pbmc.tsne.png')
saveRDS(pbmc, file = "pbmc.RDS")

pbmc.tsne.png

Seurat object pbmc.RDS

R version 3.4.3 (2017-11-30)

Seurat_2.2.1

Author: Xin Gao

Created: 2018-05-04 Fri 19:54

Emacs 24.5.4 (Org mode 8.2.10)

Validate