This is a simplified version of the PBMC single-cell data analysis with Seurat.
# Load Seurat R package
library(Seurat)
# Set the seed of the random number generator:
set.seed(1234)
# Load data
<- Read10X(data.dir = "filtered_gene_bc_matrices/hg19/")
pbmc.data
# Build the Seurat object
<- CreateSeuratObject(counts = pbmc.data,
pbmc project = "PBMC")
# Filter out low-quality cells
"percent.mt"]] <- PercentageFeatureSet(pbmc,
pbmc[[pattern = "^MT-")
<- subset(pbmc,
pbmc subset = nFeature_RNA > 200 &
< 2500 &
nFeature_RNA < 5)
percent.mt
# Perform data normalization
<- NormalizeData(pbmc)
pbmc
# Find highly variable genes
<- FindVariableFeatures(pbmc,
pbmc nfeatures = 2000)
# Perform data scaling
<- ScaleData(pbmc,
pbmc features = rownames(pbmc))
# Perform PCA
<- RunPCA(pbmc)
pbmc
# Perform clustering
<- FindNeighbors(pbmc,
pbmc dims = 1:10)
<- FindClusters(pbmc,
pbmc resolution = 0.5)
# Perform UMAP dimensionality reduction
<- RunUMAP(pbmc,
pbmc dims = 1:10,
n.neighbors = 30,
min.dist = 0.3)
# Visualize UMAP plot
DimPlot(pbmc,
reduction = "umap")
The following questions will guide you though the (visual) assessment of the impact of changes in some parameter settings. In particular, we will change clustering resultion
, and the n.neighbors
and min.dist
parameters in UMAP dimensionality reduction.
Question: What happens if we increase the clustering
resolution
parameter to 1.5?
# Clustering
<- FindNeighbors(pbmc,
pbmc dims = 1:10)
<- FindClusters(pbmc,
pbmc resolution = 1.5) # Original value: 0.5
# UMAP
<- RunUMAP(pbmc,
pbmc dims = 1:10,
n.neighbors = 30,
min.dist = 0.3)
DimPlot(pbmc,
reduction = "umap")
Question: What happens if we decrease the clustering
resolution
parameter to 0.1?
# Clustering
<- FindNeighbors(pbmc,
pbmc dims = 1:10)
<- FindClusters(pbmc,
pbmc resolution = 0.1) # Original value: 0.5
# UMAP
<- RunUMAP(pbmc,
pbmc dims = 1:10,
n.neighbors = 30,
min.dist = 0.3)
DimPlot(pbmc,
reduction = "umap")
Question: What happens if we decrease the UMAP
n.neighbors
parameter to 5?
# Clustering
<- FindNeighbors(pbmc,
pbmc dims = 1:10)
<- FindClusters(pbmc,
pbmc resolution = 0.1)
# UMAP
<- RunUMAP(pbmc,
pbmc dims = 1:10,
n.neighbors = 5, # Original value: 30
min.dist = 0.3)
DimPlot(pbmc,
reduction = "umap")
Question: What happens if we increase the UMAP
n.neighbors
parameter to 1,000?
# Clustering
<- FindNeighbors(pbmc,
pbmc dims = 1:10)
<- FindClusters(pbmc,
pbmc resolution = 0.1)
# UMAP
<- RunUMAP(pbmc,
pbmc dims = 1:10,
n.neighbors = 1000, # Original value: 30
min.dist = 0.3)
DimPlot(pbmc,
reduction = "umap")
Question: What happens if we increase the UMAP
min.dist
parameter to 1, keepingn.neighbors = 30
?
# Clustering
<- FindNeighbors(pbmc,
pbmc dims = 1:10)
<- FindClusters(pbmc,
pbmc resolution = 0.1)
# UMAP
<- RunUMAP(pbmc,
pbmc dims = 1:10,
n.neighbors = 30, # Original value: 30
min.dist = 1) # Original value: 0.3
DimPlot(pbmc,
reduction = "umap")
Question: What happens if we decrease the UMAP
min.dist
parameter to 0.1, keepingn.neighbors = 30
?
# Clustering
<- FindNeighbors(pbmc,
pbmc dims = 1:10)
<- FindClusters(pbmc,
pbmc resolution = 0.1)
# UMAP
<- RunUMAP(pbmc,
pbmc dims = 1:10,
n.neighbors = 30, # Original value: 30
min.dist = 0.1) # Original value: 0.3
DimPlot(pbmc,
reduction = "umap")