%load_ext autoreload
%autoreload 2

import os
import sys

import matplotlib.colors as colors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns

import celloracle as co
co.__version__

'0.18.0'

#plt.rcParams["font.family"] = "arial"
plt.rcParams["figure.figsize"] = [6,6]
%config InlineBackend.figure_format = 'retina'
plt.rcParams["savefig.dpi"] = 600

%matplotlib inline

# Make folder to save plots
save_folder = "figures"
os.makedirs(save_folder, exist_ok=True)

# Load the tutorial oracle object. 
oracle = co.data.load_tutorial_oracle_object()
oracle

# Attention!! Please use the function below when you use your data. 
# oracle = co.load_hdf5("ORACLE OBJECT PATH")

Data not found in the local folder. Loading data from github. Data will be saved at /data/homezvol2/ddlin/celloracle_data/tutorial_data

  0%|          | 0.00/77.7M [00:00<?, ?B/s]

Oracle object

Meta data
    celloracle version used for instantiation: 0.10.0
    n_cells: 2671
    n_genes: 1999
    cluster_name: louvain_annot
    dimensional_reduction_name: X_draw_graph_fa
    n_target_genes_in_TFdict: 21259 genes
    n_regulatory_in_TFdict: 1093 genes
    n_regulatory_in_both_TFdict_and_scRNA-seq: 90 genes
    n_target_genes_both_TFdict_and_scRNA-seq: 1850 genes
    k_for_knn_imputation: 66
Status
    Gene expression matrix: Ready
    BaseGRN: Ready
    PCA calculation: Done
    Knn imputation: Done
    GRN calculation for simulation: Not finished

# Here, we load demo links object for the training purpose.
links = co.data.load_tutorial_links_object()

# Attention!! Please use the function below when you use your data.
# links = co.load_hdf5("YOUR LINK OBJCT PATH")

Data not found in the local folder. Loading data from github. Data will be saved at /data/homezvol2/ddlin/celloracle_data/tutorial_data

  0%|          | 0.00/59.6M [00:00<?, ?B/s]

links.filter_links()
oracle.get_cluster_specific_TFdict_from_Links(links_object=links)
oracle.fit_GRN_for_simulation(alpha=10, 
                              use_cluster_specific_TFdict=True)

  0%|          | 0/24 [00:00<?, ?it/s]

# Check gene expression 
goi = "Gata1"
sc.pl.draw_graph(oracle.adata, color=[goi, oracle.cluster_column_name],
                 layer="imputed_count", use_raw=False, cmap="viridis")

# Plot gene expression in histogram
sc.get.obs_df(oracle.adata, keys=[goi], layer="imputed_count").hist()
plt.show()

# Enter perturbation conditions to simulate signal propagation after the perturbation.
oracle.simulate_shift(perturb_condition={goi: 0.0},
                      n_propagation=3)

# Get transition probability
oracle.estimate_transition_prob(n_neighbors=200,
                                knn_random=True, 
                                sampled_fraction=1)

# Calculate embedding 
oracle.calculate_embedding_shift(sigma_corr=0.05)

fig, ax = plt.subplots(1, 2,  figsize=[13, 6])

scale = 25
# Show quiver plot
oracle.plot_quiver(scale=scale, ax=ax[0])
ax[0].set_title(f"Simulated cell identity shift vector: {goi} KO")

# Show quiver plot that was calculated with randomized graph.
oracle.plot_quiver_random(scale=scale, ax=ax[1])
ax[1].set_title(f"Randomized simulation vector")

plt.show()

# n_grid = 40 is a good starting value.
n_grid = 40 
oracle.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)

# Search for best min_mass.
oracle.suggest_mass_thresholds(n_suggestion=12)

min_mass = 0.01
oracle.calculate_mass_filter(min_mass=min_mass, plot=True)

fig, ax = plt.subplots(1, 2,  figsize=[13, 6])

scale_simulation = 0.5
# Show quiver plot
oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax[0])
ax[0].set_title(f"Simulated cell identity shift vector: {goi} KO")

# Show quiver plot that was calculated with randomized graph.
oracle.plot_simulation_flow_random_on_grid(scale=scale_simulation, ax=ax[1])
ax[1].set_title(f"Randomized simulation vector")

plt.show()

# Plot vector field with cell cluster 
fig, ax = plt.subplots(figsize=[8, 8])

oracle.plot_cluster_whole(ax=ax, s=10)
oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax, show_background=False)

# Visualize pseudotime
fig, ax = plt.subplots(figsize=[6,6])

sc.pl.embedding(adata=oracle.adata, basis=oracle.embedding_name, ax=ax, cmap="rainbow",
                color=["Pseudotime"])

from celloracle.applications import Gradient_calculator

# Instantiate Gradient calculator object
gradient = Gradient_calculator(oracle_object=oracle, pseudotime_key="Pseudotime")

gradient.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)
gradient.calculate_mass_filter(min_mass=min_mass, plot=True)

gradient.transfer_data_into_grid(args={"method": "polynomial", "n_poly":3}, plot=True)

# Calculate graddient
gradient.calculate_gradient()

# Show results
scale_dev = 40
gradient.visualize_results(scale=scale_dev, s=5)

# Visualize results
fig, ax = plt.subplots(figsize=[6, 6])
gradient.plot_dev_flow_on_grid(scale=scale_dev, ax=ax)

# Save gradient object if you want.
#gradient.to_hdf5("Paul_etal.celloracle.gradient")

from celloracle.applications import Oracle_development_module

# Make Oracle_development_module to compare two vector field
dev = Oracle_development_module()

# Load development flow
dev.load_differentiation_reference_data(gradient_object=gradient)

# Load simulation result
dev.load_perturb_simulation_data(oracle_object=oracle)


# Calculate inner produc scores
dev.calculate_inner_product()
dev.calculate_digitized_ip(n_bins=10)

# Show perturbation scores
vm = 0.02

fig, ax = plt.subplots(1, 2, figsize=[12, 6])
dev.plot_inner_product_on_grid(vm=0.02, s=50, ax=ax[0])
ax[0].set_title(f"PS")

dev.plot_inner_product_random_on_grid(vm=vm, s=50, ax=ax[1])
ax[1].set_title(f"PS calculated with Randomized simulation vector")
plt.show()

# Show perturbation scores with perturbation simulation vector field
fig, ax = plt.subplots(figsize=[6, 6])
dev.plot_inner_product_on_grid(vm=vm, s=50, ax=ax)
dev.plot_simulation_flow_on_grid(scale=scale_simulation, show_background=False, ax=ax)

# Let's visualize the results 
dev.visualize_development_module_layout_0(s=5, 
                                          scale_for_simulation=scale_simulation,
                                          s_grid=50,
                                          scale_for_pseudotime=scale_dev, 
                                          vm=vm)

2025-08-20 07:07:35,940 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
2025-08-20 07:07:35,944 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.

# Get cell index list for the cells of interest
clusters = ['Ery_0', 'Ery_1', 'Ery_2', 'Ery_3', 'Ery_4', 'Ery_5', 'Ery_6',
            'Ery_7', 'Ery_8', 'Ery_9', 'MEP_0', 'Mk_0']
cell_idx = np.where(oracle.adata.obs["louvain_annot"].isin(clusters))[0]

# Check index
print(cell_idx)

[   0    2    4 ... 2666 2668 2670]

dev = Oracle_development_module()

# Load development flow
dev.load_differentiation_reference_data(gradient_object=gradient)

# Load simulation result
dev.load_perturb_simulation_data(oracle_object=oracle, 
                                 cell_idx_use=cell_idx, # Enter cell id list
                                 name="Lineage_MEP" # Name of this cell group. You can enter any name.
                                 )

# Calculation
dev.calculate_inner_product()
dev.calculate_digitized_ip(n_bins=10)

# Let's visualize the results 
dev.visualize_development_module_layout_0(s=5, 
                                          scale_for_simulation=scale_simulation,
                                          s_grid=50,
                                          scale_for_pseudotime=scale_dev, 
                                          vm=vm)

2025-08-20 07:07:39,467 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
2025-08-20 07:07:39,472 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.

from celloracle.visualizations.config import CONFIG
CONFIG["cmap_ps"] = "coolwarm"

dev.visualize_development_module_layout_0(s=5, 
                                          scale_for_simulation=scale_simulation,
                                          s_grid=50,
                                          scale_for_pseudotime=scale_dev, 
                                          vm=vm)

2025-08-20 07:07:42,331 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
2025-08-20 07:07:42,335 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.

0. Import libraries¶

0.1. Import public libraries¶

0.1. Make a folder to save graph¶

1. Load data¶

1.1. Load processed oracle object¶

1.2. Load inferred GRNs¶

2. Make predictive models for simulation¶

3. In silico TF perturbation analysis¶

3.1. Check gene expression pattern.¶

3.2. Calculate future gene expression after perturbation.¶

3.3. Calculate transition probability between cells¶

4. Visualization¶

4.1. Quiver plot: Show the direction of cell transition at single cell resolution¶

Caution: It is very important to find the optimal `scale` parameter.¶

4.2. Vector field graph¶

4.2.1 Find parameters for n_grid and min_mass¶

4.2.2 Plot vector fields¶

5. [This step is optional] Compare simulation vector with development vectors¶

5.0. Method overview¶

5.1 Prepare pseudotime data¶

5.2. Check data¶

5.3. Make Gradient_calculator object¶

5.4 Transfer pseudotime values to the grid points.¶

5.5. Calculate Gradient vectors¶

5.6. Calculate Inner product between two vectors¶

5.7. Show results¶

6. Focus on a single development lineage to interpret the results in detail¶

0. Import libraries¶

0.1. Import public libraries¶

0.1. Make a folder to save graph¶

1. Load data¶

1.1. Load processed oracle object¶

1.2. Load inferred GRNs¶

2. Make predictive models for simulation¶

3. In silico TF perturbation analysis¶

3.1. Check gene expression pattern.¶

3.2. Calculate future gene expression after perturbation.¶

3.3. Calculate transition probability between cells¶

4. Visualization¶

4.1. Quiver plot: Show the direction of cell transition at single cell resolution¶

Caution: It is very important to find the optimal scale parameter.¶

4.2. Vector field graph¶

4.2.1 Find parameters for n_grid and min_mass¶

4.2.2 Plot vector fields¶

5. [This step is optional] Compare simulation vector with development vectors¶

5.0. Method overview¶

5.1 Prepare pseudotime data¶

5.2. Check data¶

5.3. Make Gradient_calculator object¶

5.4 Transfer pseudotime values to the grid points.¶

5.5. Calculate Gradient vectors¶

5.6. Calculate Inner product between two vectors¶

5.7. Show results¶

6. Focus on a single development lineage to interpret the results in detail¶

Caution: It is very important to find the optimal `scale` parameter.¶