Source code for sfplot.plotting.plotting

# Cell-GPS plotting.py

import os
from typing import Optional

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.neighbors import NearestNeighbors

from ..preprocessing.data_processing import load_xenium_data



[docs]
def generate_cluster_distance_heatmap_from_path(
    base_path: str,
    sample: str,
    figsize: tuple = (8, 8),
    output_dir: Optional[str] = None,
    show_dendrogram: bool = True  # new parameter: whether to draw dendrogram (default: True)
):
    """
    Generate and save a distance heatmap from each cell cluster to its nearest cluster center.

    Parameters:
    ----------
    base_path : str
        Base path where data is stored.
    sample : str
        Sample name used to specify the data folder.
    output_dir : Optional[str]
        Output directory for the PDF file. Defaults to current working directory.

    Returns:
    -------
    None
    """
    # Set output directory
    if output_dir is None:
        output_dir = os.getcwd()
    os.makedirs(output_dir, exist_ok=True)

    # Construct path and read data
    folder = os.path.join(base_path, sample)
    adata = load_xenium_data(folder)

    # 1. Extract coordinates and cluster information
    coords = adata.obsm["spatial"]  # (n_cells, 2) or (n_cells, 3)
    clusters = adata.obs["Cluster"].astype("category")  # cluster information
    unique_clusters = clusters.cat.categories  # list of unique clusters

    # ---------------- (key change) ----------------
    # 2. Create a result DataFrame for storing nearest-cluster distances for each cell
    #    Note: using adata.obs["cell_id"] as row index
    df_nearest_cluster_dist = pd.DataFrame(
        index=adata.obs["cell_id"],
        columns=unique_clusters,
        dtype=float
    )

    # 3. For each cluster, build a nearest-neighbor model and query distances from all cells
    for c in unique_clusters:
        # 3.1 Extract coordinates for this cluster
        mask_c = (clusters == c)
        coords_c = coords[mask_c]

        # If this cluster has no cells, set the entire column to NaN
        if coords_c.shape[0] == 0:
            df_nearest_cluster_dist.loc[:, c] = np.nan
            continue

        # 3.2 Build nearest-neighbor model
        nbrs_c = NearestNeighbors(n_neighbors=1, algorithm="auto")
        nbrs_c.fit(coords_c)

        # 3.3 Query nearest distances from all cells to this cluster
        dist_c, _ = nbrs_c.kneighbors(coords)
        df_nearest_cluster_dist[c] = dist_c[:, 0]

    # 4. Save results to adata.uns (or another suitable location)
    adata.uns["nearest_cluster_dist"] = df_nearest_cluster_dist

    # ------------------- Below: hierarchical clustering visualization of distance matrix -------------------
    # 5. Build a Series with cell_id as index and cluster as values for groupby alignment
    clusters_by_id = pd.Series(
        data=clusters.values,  # cluster values
        index=adata.obs["cell_id"],  # aligned with df_nearest_cluster_dist.index
        name="Cluster"
    )

    # Group df_nearest_cluster_dist by cluster using cell_id as index and compute mean
    df_group_mean = df_nearest_cluster_dist.groupby(clusters_by_id).mean()

    # 6. Drop columns that are entirely NaN (optional)
    df_group_mean_clean = df_group_mean.dropna(axis=1, how="all")

    if df_group_mean_clean.empty:
        print(f"Warning: df_group_mean_clean is empty for sample {sample}.")
        print("Check if there are clusters that exist in the data.")
        # If needed, process other samples here
        return

    # Visualize this matrix with clustermap
    plt.figure(figsize=figsize)
    g = sns.clustermap(
        df_group_mean_clean,
        cmap="RdBu",
        figsize=figsize,
        row_cluster=True,
        col_cluster=True,
        linewidths=0.5,
        annot=False
    )

    # 3) Set heatmap cells to be square
    g.ax_heatmap.set_aspect("equal")

    # If drawing dendrogram, adjust dendrogram and color legend positions
    if show_dendrogram:
        # 4) Fix row dendrogram alignment with heatmap in y direction
        row_dendro_pos = g.ax_row_dendrogram.get_position()
        heatmap_pos = g.ax_heatmap.get_position()
        g.ax_row_dendrogram.set_position([
            row_dendro_pos.x0,
            heatmap_pos.y0,
            row_dendro_pos.width,
            heatmap_pos.height
        ])

        # 5) Fix column dendrogram alignment with heatmap in x direction
        col_dendro_pos = g.ax_col_dendrogram.get_position()
        g.ax_col_dendrogram.set_position([
            heatmap_pos.x0,
            col_dendro_pos.y0,
            heatmap_pos.width,
            col_dendro_pos.height
        ])

        # 6) Adjust color legend (g.cax) position
        # Compute the blank area in the top-left:
        # Horizontal: from left edge of row dendrogram to left edge of heatmap;
        # Vertical: from top edge of column dendrogram to top edge of heatmap.
        empty_left = g.ax_row_dendrogram.get_position().x0
        empty_right = heatmap_pos.x0
        empty_width = empty_right - empty_left

        col_dendro_bbox = g.ax_col_dendrogram.get_position()
        empty_bottom = col_dendro_bbox.y0 + col_dendro_bbox.height
        empty_top = heatmap_pos.y0 + heatmap_pos.height
        empty_height = empty_top - empty_bottom

        # To avoid oversized legend, use 80% of the blank area and center it
        cbar_width = empty_width * 0.3
        cbar_height = empty_height * 0.7
        cbar_x = empty_left + (empty_width - cbar_width) / 2
        cbar_y = empty_bottom + (empty_height - cbar_height) / 2

        g.cax.set_position([cbar_x, cbar_y, cbar_width, cbar_height])

    # Set axis labels and title
    g.ax_heatmap.set_xlabel("Findee", fontsize=12)
    g.ax_heatmap.set_ylabel("Searcher", fontsize=12)
    g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), rotation=0)

    # Set the overall figure title (not the heatmap title)
    g.fig.suptitle(f"Cell-GPS of {sample}", fontsize=12, y=1)

    # 7. Save as PDF with sample name
    output_file = os.path.join(output_dir, f"Cell-GPS_of_{sample}.pdf")
    plt.savefig(output_file, format="pdf", bbox_inches="tight")
    plt.close()

    print(f"Sample {sample} done. PDF saved to {output_file}")




[docs]
def generate_cluster_distance_heatmap_from_adata(
    adata: 'anndata.AnnData',
    cluster_col: str = "Cluster",
    output_dir: Optional[str] = None,
    output_filename: Optional[str] = None,
    figsize: tuple = (8, 8),
    cmap: str = "RdBu",
    max_scale: float = 10,
    show_dendrogram: bool = True  # new parameter: whether to draw dendrogram (default: True)
):
    """
    Generate and save a distance heatmap from each cell cluster to its nearest cluster center.

    Parameters:
    ----------
    adata : anndata.AnnData
        AnnData object containing preprocessed data.
    cluster_col : str, optional
        Column name in `adata.obs` containing cluster information. Defaults to "Cluster".
    output_dir : Optional[str]
        Output directory for the PDF file. Defaults to current working directory.
    output_filename : Optional[str]
        Output file name. If not specified, uses "clustermap_output_{sample}.pdf".
    figsize : tuple, optional
        Size of the heatmap. Defaults to (7, 7).
    cmap : str, optional
        Colormap for the heatmap. Defaults to "RdBu".
    max_scale : float, optional
        `max_value` parameter for `sc.pp.scale`, used to clip Z-scores. Defaults to 10.

    Returns:
    -------
    None
    """
    # Set output directory
    if output_dir is None:
        output_dir = os.getcwd()
    os.makedirs(output_dir, exist_ok=True)

    # Check whether cluster_col exists in adata.obs
    if cluster_col not in adata.obs.columns:
        raise ValueError(f"'{cluster_col}' does not exist in adata.obs. Please check the column name.")

    # Extract cluster information
    clusters = adata.obs[cluster_col].astype("category")  # cluster information
    unique_clusters = clusters.cat.categories  # list of unique clusters

    # 1. Extract coordinate information
    if "spatial" not in adata.obsm:
        raise ValueError("'spatial' coordinate info does not exist in adata.obsm. Please ensure spatial coordinates are present.")
    coords = adata.obsm["spatial"]  # (n_cells, 2) or (n_cells, 3)

    # 2. Create a result DataFrame for storing nearest-cluster distances for each cell
    #    Note: using adata.obs["cell_id"] as row index
    if "cell_id" not in adata.obs.columns:
        raise ValueError("'cell_id' column does not exist in adata.obs. Please ensure the data contains 'cell_id'.")
    df_nearest_cluster_dist = pd.DataFrame(
        index=adata.obs["cell_id"],
        columns=unique_clusters,
        dtype=float
    )

    # 3. For each cluster, build a nearest-neighbor model and query distances from all cells
    for c in unique_clusters:
        # 3.1 Extract coordinates for this cluster
        mask_c = (clusters == c)
        coords_c = coords[mask_c]

        # If this cluster has no cells, set the entire column to NaN
        if coords_c.shape[0] == 0:
            df_nearest_cluster_dist.loc[:, c] = np.nan
            continue

        # 3.2 Build nearest-neighbor model
        nbrs_c = NearestNeighbors(n_neighbors=1, algorithm="auto")
        nbrs_c.fit(coords_c)

        # 3.3 Query nearest distances from all cells to this cluster
        dist_c, _ = nbrs_c.kneighbors(coords)
        df_nearest_cluster_dist[c] = dist_c[:, 0]

    # 4. Save results to adata.uns (or another suitable location)
    adata.uns["nearest_cluster_dist"] = df_nearest_cluster_dist

    # ------------------- Below: hierarchical clustering visualization of distance matrix -------------------
    # 5. Build a Series with cell_id as index and cluster as values for groupby alignment
    clusters_by_id = pd.Series(
        data=clusters.values,  # cluster values
        index=adata.obs["cell_id"],  # aligned with df_nearest_cluster_dist.index
        name=cluster_col
    )

    # Group df_nearest_cluster_dist by cluster using cell_id as index and compute mean
    df_group_mean = df_nearest_cluster_dist.groupby(clusters_by_id).mean()

    # 6. Drop columns that are entirely NaN
    df_group_mean_clean = df_group_mean.dropna(axis=1, how="all")

    if df_group_mean_clean.empty:
        print(f"Warning: df_group_mean_clean is empty. Please check that clusters exist in the data.")
        return

    # Visualize this matrix with clustermap
    plt.figure(figsize=figsize)
    g = sns.clustermap(
        df_group_mean_clean,
        cmap=cmap,
        figsize=figsize,
        row_cluster=True,
        col_cluster=True,
        linewidths=0.5,
        annot=False
    )

    # 3) Set heatmap cells to be square
    g.ax_heatmap.set_aspect("equal")

    # If drawing dendrogram, adjust dendrogram and color legend positions
    if show_dendrogram:
        # 4) Fix row dendrogram alignment with heatmap in y direction
        row_dendro_pos = g.ax_row_dendrogram.get_position()
        heatmap_pos = g.ax_heatmap.get_position()
        g.ax_row_dendrogram.set_position([
            row_dendro_pos.x0,
            heatmap_pos.y0,
            row_dendro_pos.width,
            heatmap_pos.height
        ])

        # 5) Fix column dendrogram alignment with heatmap in x direction
        col_dendro_pos = g.ax_col_dendrogram.get_position()
        g.ax_col_dendrogram.set_position([
            heatmap_pos.x0,
            col_dendro_pos.y0,
            heatmap_pos.width,
            col_dendro_pos.height
        ])

        # 6) Adjust color legend (g.cax) position
        # Compute the blank area in the top-left:
        # Horizontal: from left edge of row dendrogram to left edge of heatmap;
        # Vertical: from top edge of column dendrogram to top edge of heatmap.
        empty_left = g.ax_row_dendrogram.get_position().x0
        empty_right = heatmap_pos.x0
        empty_width = empty_right - empty_left

        col_dendro_bbox = g.ax_col_dendrogram.get_position()
        empty_bottom = col_dendro_bbox.y0 + col_dendro_bbox.height
        empty_top = heatmap_pos.y0 + heatmap_pos.height
        empty_height = empty_top - empty_bottom

        # To avoid oversized legend, use 80% of the blank area and center it
        cbar_width = empty_width * 0.3
        cbar_height = empty_height * 0.7
        cbar_x = empty_left + (empty_width - cbar_width) / 2
        cbar_y = empty_bottom + (empty_height - cbar_height) / 2

        g.cax.set_position([cbar_x, cbar_y, cbar_width, cbar_height])

    # Set axis labels and title
    g.ax_heatmap.set_xlabel("Findee", fontsize=12)
    g.ax_heatmap.set_ylabel("Searcher", fontsize=12)
    g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), rotation=0)

    # Set the overall figure title (not the heatmap title)
    sample = adata.uns.get("sample", "Sample")  # assumes sample name is stored in adata.uns
    g.fig.suptitle(f"Cell-GPS of {sample}", fontsize=12, y=1)

    # 7. Save as PDF with sample name
    if output_filename is None:
        output_filename = f"Cell-GPS_of_{sample}.pdf"
    output_file = os.path.join(output_dir, output_filename)
    plt.savefig(output_file, format="pdf", bbox_inches="tight")
    plt.close()

    print(f"Cluster distance heatmap saved to {output_file}")




[docs]
def generate_cluster_distance_heatmap_from_df(
    df: pd.DataFrame,
    x_col: str = 'x',
    y_col: str = 'y',
    celltype_col: str = 'celltype',
    sample: str = 'Sample',
    output_dir: Optional[str] = None,
    output_filename: Optional[str] = None,
    figsize: tuple = (8, 8),
    cmap: str = "RdBu",
    show_dendrogram: bool = True  # whether to draw dendrogram (default: True)
):
    """
    Generate and save a distance heatmap from each cell cluster to its nearest cluster center.

    Parameters:
    ----------
    df : pd.DataFrame
        DataFrame containing cell data.
    x_col : str, optional
        Column name for x coordinates. Defaults to 'x'.
    y_col : str, optional
        Column name for y coordinates. Defaults to 'y'.
    celltype_col : str, optional
        Column name for cell type. Defaults to 'celltype'.
    output_dir : Optional[str]
        Output directory for the PDF file. Defaults to current working directory.
    output_filename : Optional[str]
        Output file name. If not specified, uses "clustermap_output.pdf".
    figsize : tuple, optional
        Size of the heatmap. Defaults to (8, 8).
    cmap : str, optional
        Colormap for the heatmap. Defaults to "RdBu".

    Returns:
    -------
    None
    """
    # Set output directory
    if output_dir is None:
        output_dir = os.getcwd()
    os.makedirs(output_dir, exist_ok=True)

    # Check whether required columns exist in the DataFrame
    required_columns = {x_col, y_col, celltype_col}
    if not required_columns.issubset(df.columns):
        raise ValueError(f"DataFrame must contain the following columns: {required_columns}")

    # Extract cluster information
    clusters = df[celltype_col].astype('category')
    unique_clusters = clusters.cat.categories

    # Extract coordinate information
    coords = df[[x_col, y_col]].values  # (n_cells, 2)

    # Create a result DataFrame for storing nearest-cluster distances for each cell
    df_nearest_cluster_dist = pd.DataFrame(
        index=df.index,
        columns=unique_clusters,
        dtype=float
    )

    # For each cluster, build a nearest-neighbor model and query distances
    for c in unique_clusters:
        # Extract coordinates for this cluster
        mask_c = (clusters == c)
        coords_c = coords[mask_c]

        # If this cluster has no cells, set the entire column to NaN
        if coords_c.shape[0] == 0:
            df_nearest_cluster_dist.loc[:, c] = np.nan
            continue

        # Build nearest-neighbor model
        nbrs_c = NearestNeighbors(n_neighbors=1, algorithm='auto')
        nbrs_c.fit(coords_c)

        # Query nearest distances to this cluster
        dist_c, _ = nbrs_c.kneighbors(coords)
        df_nearest_cluster_dist[c] = dist_c[:, 0]

    # ------------------- Below: hierarchical clustering visualization of distance matrix -------------------
    # Group df_nearest_cluster_dist by cluster using celltype as index and compute mean
    df_group_mean = df_nearest_cluster_dist.groupby(clusters).mean()

    # Drop columns that are entirely NaN
    df_group_mean_clean = df_group_mean.dropna(axis=1, how='all')

    if df_group_mean_clean.empty:
        print("Warning: df_group_mean_clean is empty. Please check that clusters exist in the data.")
        return

    # Visualize this matrix with clustermap
    g = sns.clustermap(
        df_group_mean_clean,
        cmap=cmap,
        figsize=figsize,
        row_cluster=True,
        col_cluster=True,
        linewidths=0.5,
        annot=False
    )

    # Set heatmap cells to be square
    g.ax_heatmap.set_aspect('equal')

    # If drawing dendrogram, adjust dendrogram and color legend positions
    if show_dendrogram:
        # Fix row dendrogram alignment with heatmap in y direction
        row_dendro_pos = g.ax_row_dendrogram.get_position()
        heatmap_pos = g.ax_heatmap.get_position()
        g.ax_row_dendrogram.set_position([
            row_dendro_pos.x0,
            heatmap_pos.y0,
            row_dendro_pos.width,
            heatmap_pos.height
        ])

        # Fix column dendrogram alignment with heatmap in x direction
        col_dendro_pos = g.ax_col_dendrogram.get_position()
        g.ax_col_dendrogram.set_position([
            heatmap_pos.x0,
            col_dendro_pos.y0,
            heatmap_pos.width,
            col_dendro_pos.height
        ])

        # Adjust color legend (g.cax) position
        empty_left = g.ax_row_dendrogram.get_position().x0
        empty_right = heatmap_pos.x0
        empty_width = empty_right - empty_left

        col_dendro_bbox = g.ax_col_dendrogram.get_position()
        empty_bottom = col_dendro_bbox.y0 + col_dendro_bbox.height
        empty_top = heatmap_pos.y0 + heatmap_pos.height
        empty_height = empty_top - empty_bottom

        cbar_width = empty_width * 0.3
        cbar_height = empty_height * 0.7
        cbar_x = empty_left + (empty_width - cbar_width) / 2
        cbar_y = empty_bottom + (empty_height - cbar_height) / 2

        g.cax.set_position([cbar_x, cbar_y, cbar_width, cbar_height])

    # Set axis labels and title
    g.ax_heatmap.set_xlabel("Findee", fontsize=12)
    g.ax_heatmap.set_ylabel("Searcher", fontsize=12)
    g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), rotation=0)
    g.fig.suptitle(f"Cell-GPS of {sample}", fontsize=12, y=1)

    # Save as PDF
    if output_filename is None:
        output_filename = "clustermap_output.pdf"
    output_file = os.path.join(output_dir, output_filename)
    plt.savefig(output_file, format="pdf", bbox_inches="tight")
    plt.close()

    print(f"Cluster distance heatmap saved to {output_file}")
Source code for sfplot.plotting.plotting

Cell-GPS

Navigation

Related Topics