Tutorial 3: Application on SeqFISH mouse embryo dataset.

In this vignette, We performed PROST on the processed mouse embryo ST data from (Lohoff, T. et al. 2022) generated by SeqFISH to evaluate its general applicability.
The original data can be downloaded from google drive.

Identify SVGs

1.Load PROST and its dependent packages

import pandas as pd 
import numpy as np 
import scanpy as sc 
import os 
import warnings 
warnings.filterwarnings("ignore") 
import matplotlib as mpl 
import matplotlib.pyplot as plt 
import PROST 
PROST.__version__ 


>>> ' 1.1.2 '

2.Set up the working environment and import data

# the location of R (used for the mclust clustering)
ENVpath = "your path of PROST_ENV"            # refer to 'How to use PROST' section  
os.environ['R_HOME'] = f'{ENVpath}/lib/R'
os.environ['R_USER'] = f'{ENVpath}/lib/python3.7/site-packages/rpy2'

# init
SEED = 818
PROST.setup_seed(SEED)

# Set directory (If you want to use additional data, please change the file path)
rootdir = 'datasets/SeqFISH/'

input_dir = os.path.join(rootdir)
output_dir = os.path.join(rootdir,'results/')
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

# Read counts and metadata
counts = pd.read_csv(input_dir + "counts.txt", sep = "\t")
metadata = pd.read_csv(input_dir + "metadata.txt", sep = "\t")
gene_name = counts.index

# Create anndata for embryo1 (embryo2 or embryo3)
embryo = 1
# Read data
metadata = metadata[metadata["embryo"]==f"embryo{embryo}"]
counts = counts.loc[:,metadata["uniqueID"]]
spatial = metadata[["x_global","y_global"]]
spatial.index = metadata["uniqueID"]

# Create anndata
adata = sc.AnnData(counts.T)
adata.var_names_make_unique()
# read spatial
adata.obsm["spatial"] = spatial.to_numpy()
# read annotation
annotation = metadata["celltype_mapped_refined"]
annotation.index = metadata["uniqueID"]
adata.obs["annotation"] = annotation
# save data
adata.write_h5ad(output_dir+f"/used_data_embryo{embryo}.h5")

3.Calculate and save PI

adata = sc.read(output_dir+f"/used_data_embryo{embryo}.h5")
adata = PROST.prepare_for_PI(adata, percentage = 0.01, platform="SeqFISH")
adata = PROST.cal_PI(adata, kernel_size=8, platform="SeqFISH",del_rate=0.05)

# Hypothesis test
'''
PROST.spatial_autocorrelation(adata, k = 10, permutations = None)
'''

# Save PI results
adata.write_h5ad(output_dir+f"/PI_result_embryo{embryo}.h5")


>>> Filtering genes ...
>>> Trying to set attribute `.var` of view, copying.
>>> Normalization to each gene:
>>> 100%|██████████| 351/351 [00:00<00:00, 5237.45it/s]
>>> Gaussian filtering for each gene:
>>> 100%|██████████| 351/351 [00:40<00:00,  8.67it/s]
>>> Binary segmentation for each gene:
>>> 100%|██████████| 351/351 [00:00<00:00, 18470.16it/s]
>>> Spliting subregions for each gene:
>>> 100%|██████████| 351/351 [00:00<00:00, 8355.38it/s]
>>> Computing PROST Index for each gene:
>>> 100%|██████████| 351/351 [00:39<00:00,  8.99it/s]
>>> PROST Index calculation completed !!

4.Draw SVGs detected by PI

PROST.plot_gene(adata, platform="SeqFISH", size = 0.3, top_n = 25, ncols_each_sheet = 5, nrows_each_sheet = 5,save_path = output_dir)


>>> Drawing pictures:
>>> 100%|██████████| 1/1 [00:15<00:00, 15.74s/it]
>>> Drawing completed !!

SeqFish_mouse_embryo_svgs

Clustering

# Set the number of clusters
n_clusters = 24

1.Read PI result and Expression data preprocessing

PROST.setup_seed(SEED)
# Read PI result
adata = sc.read(output_dir+f"/PI_result_embryo{embryo}.h5")

sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

2.Run PROST clustering

PROST.run_PNN(adata, 
        platform="SeqFISH", 
        min_distance = 3,
        init="mclust",
        n_clusters = n_clusters, 
        SEED=SEED,
        post_processing = False,
        cuda = False)


>>> Calculating adjacency matrix ...
>>> Running PCA ...
>>> Laplacian Smoothing ...
>>> Initializing cluster centers with mclust, n_clusters known
>>> Epoch: : 501it [3:17:42, 23.68s/it, loss=0.28359604]                         
>>> Clustering completed !!

3.Save result

adata.write_h5ad(output_dir + f"/PNN_result_embryo{embryo}.h5")
clustering = adata.obs["clustering"]
clustering.to_csv(output_dir + f"/clusters_embryo{embryo}.csv",header = False)
embedding = adata.obsm["PROST"]
np.savetxt(output_dir + f"/embedding_embryo{embryo}.txt",embedding)

4.Plot annotation

plt.rcParams["figure.figsize"] = (5,5)
ax = sc.pl.embedding(adata, basis="spatial", color="annotation",size = 7,s=6, show=False, title='annotation')
ax.invert_yaxis()
plt.axis('off')
plt.savefig(output_dir+f"/annotation_embryo{embryo}.png", dpi=600, bbox_inches='tight')

annotation results

5.Plot clustering result

plt.rcParams["figure.figsize"] = (5,5)
ax = sc.pl.embedding(adata, basis="spatial", color="clustering",size = 7,s=6, show=False, title='clustering')
ax.invert_yaxis()
plt.axis('off')
plt.savefig(output_dir+"/clustering.png", dpi=600, bbox_inches='tight')

SeqFish_mouse_embryo_clusterresult