Source code for sfplot.preprocessing.visium_preprocessing
from pathlib import Path
import importlib
import os
import shutil
import tempfile
import pandas as pd
def _load_visium_reader():
try:
return importlib.import_module("spatialdata_io").visium
except ImportError as exc:
raise ImportError(
"read_visium_bin requires spatialdata_io and its spatialdata/ome_zarr/zarr "
"dependency stack. Please install compatible versions before using this helper."
) from exc
[docs]
def read_visium_bin(base: Path, dataset_id: str, use_filtered: bool = True, keep_tmp: bool = False):
"""
Adapter for spatialdata-io 0.3.0, reads Visium HD output containing Parquet coordinates.
Does not write any files to base.
"""
spatial_dir = base / "spatial"
pqt = spatial_dir / "tissue_positions.parquet"
if not pqt.exists():
raise FileNotFoundError(f"{pqt} does not exist")
pos = pd.read_parquet(pqt)
if "barcode" not in pos.columns:
pos = pos.rename_axis("barcode").reset_index()
# Normalize column names
rename = {}
if "array_row" not in pos.columns:
for cand in ["row", "array_y", "grid_y", "spot_row"]:
if cand in pos.columns: rename[cand] = "array_row"; break
if "array_col" not in pos.columns:
for cand in ["col", "array_x", "grid_x", "spot_col"]:
if cand in pos.columns: rename[cand] = "array_col"; break
if "pxl_col_in_fullres" not in pos.columns:
for cand in ["pxl_x", "pxl_col", "x", "image_x"]:
if cand in pos.columns: rename[cand] = "pxl_col_in_fullres"; break
if "pxl_row_in_fullres" not in pos.columns:
for cand in ["pxl_y", "pxl_row", "y", "image_y"]:
if cand in pos.columns: rename[cand] = "pxl_row_in_fullres"; break
if "in_tissue" not in pos.columns:
for cand in ["inTissue", "intissue", "in_tissue_flag", "is_tissue"]:
if cand in pos.columns: rename[cand] = "in_tissue"; break
pos = pos.rename(columns=rename)
for need in ["barcode","in_tissue","array_row","array_col","pxl_row_in_fullres","pxl_col_in_fullres"]:
if need not in pos.columns: pos[need] = 0
pos = pos[["barcode","in_tissue","array_row","array_col","pxl_row_in_fullres","pxl_col_in_fullres"]]
# Create shadow directory structure
shadow_dir = Path(tempfile.mkdtemp(prefix=f"visium_shadow_{dataset_id}_"))
shadow_spatial = shadow_dir / "spatial"
shadow_spatial.mkdir(parents=True, exist_ok=True)
# Write tissue_positions_list.csv (no header)
pos.to_csv(shadow_spatial / "tissue_positions_list.csv", index=False, header=False)
# Copy scalefactors
shutil.copy2(spatial_dir / "scalefactors_json.json", shadow_spatial / "scalefactors_json.json")
# Symlink or copy counts
counts_file = "filtered_feature_bc_matrix.h5" if use_filtered else "raw_feature_bc_matrix.h5"
counts_src = base / counts_file
counts_shadow = shadow_dir / counts_file
try:
os.symlink(counts_src, counts_shadow)
except Exception:
shutil.copy2(counts_src, counts_shadow)
try:
# Call visium: without passing tissue_positions_file, let it auto-discover
visium = _load_visium_reader()
sdata = visium(
path=shadow_dir,
dataset_id=dataset_id,
counts_file=counts_file,
scalefactors_file="spatial/scalefactors_json.json",
)
finally:
if not keep_tmp:
shutil.rmtree(shadow_dir, ignore_errors=True)
return sdata