Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@
# If building the documentation fails because of a missing link that is outside your control,
# you can add an exception to this list.
("py:class", "Path"),
("py:class", "pathlib._local.Path"),
("py:class", "AnnData"),
("py:class", "SpatialData"),
("py:func", "imageio.imread"), # maybe this can be fixed
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ dev = [
"pre-commit"
]
doc = [
"sphinx>=4.5",
"sphinx>=4.5,<9",
"sphinx-book-theme>=1.0.0",
"myst-nb",
"sphinxcontrib-bibtex>=1.0.0",
Expand Down
81 changes: 37 additions & 44 deletions src/spatialdata_io/readers/xenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
from dask.dataframe import read_parquet
from dask_image.imread import imread
from geopandas import GeoDataFrame
from joblib import Parallel, delayed
from pyarrow import Table
from shapely import GeometryType, Polygon, from_ragged_array
from spatialdata import SpatialData
from spatialdata._core.query.relational_query import get_element_instances
from spatialdata._logging import logger
from spatialdata.models import (
Image2DModel,
Labels2DModel,
Expand Down Expand Up @@ -61,7 +61,7 @@ def xenium(
*,
cells_boundaries: bool = True,
nucleus_boundaries: bool = True,
cells_as_circles: bool | None = None,
cells_as_circles: bool = False,
cells_labels: bool = True,
nucleus_labels: bool = True,
transcripts: bool = True,
Expand Down Expand Up @@ -136,7 +136,7 @@ def xenium(

Notes
-----
Old versions. Until spatialdata-io v0.1.3post0: previously, `cells_as_circles` was `True` by default; the table was associated to the
Old versions. Until spatialdata-io v0.6.0: `cells_as_circles` was `True` by default; the table was associated to the
circles when `cells_as_circles` was `True`, and the table was associated to the polygons when `cells_as_circles`
was `False`; the radii of the circles were computed form the nuclei instead of the cells.

Expand All @@ -153,14 +153,6 @@ def xenium(
... )
>>> sdata.write("path/to/data.zarr")
"""
if cells_as_circles is None:
cells_as_circles = True
warnings.warn(
"The default value of `cells_as_circles` will change to `False` in the next release. "
"Please pass `True` explicitly to maintain the current behavior.",
DeprecationWarning,
stacklevel=3,
)
image_models_kwargs, labels_models_kwargs = _initialize_raster_models_kwargs(
image_models_kwargs, labels_models_kwargs
)
Expand Down Expand Up @@ -223,18 +215,16 @@ def xenium(
# labels.
if nucleus_labels:
labels["nucleus_labels"], _ = _get_labels_and_indices_mapping(
path,
XeniumKeys.CELLS_ZARR,
specs,
path=path,
specs=specs,
mask_index=0,
labels_name="nucleus_labels",
labels_models_kwargs=labels_models_kwargs,
)
if cells_labels:
labels["cell_labels"], cell_labels_indices_mapping = _get_labels_and_indices_mapping(
path,
XeniumKeys.CELLS_ZARR,
specs,
path=path,
specs=specs,
mask_index=1,
labels_name="cell_labels",
labels_models_kwargs=labels_models_kwargs,
Expand Down Expand Up @@ -360,8 +350,8 @@ def filter(self, record: logging.LogRecord) -> bool:
return False
return True

logger = tifffile.logger()
logger.addFilter(IgnoreSpecificMessage())
tf_logger = tifffile.logger()
tf_logger.addFilter(IgnoreSpecificMessage())
image_models_kwargs = dict(image_models_kwargs)
assert "c_coords" not in image_models_kwargs, (
"The channel names for the morphology focus images are handled internally"
Expand All @@ -374,7 +364,7 @@ def filter(self, record: logging.LogRecord) -> bool:
image_models_kwargs,
)
del image_models_kwargs["c_coords"]
logger.removeFilter(IgnoreSpecificMessage())
tf_logger.removeFilter(IgnoreSpecificMessage())

if table is not None:
tables["table"] = table
Expand Down Expand Up @@ -402,14 +392,16 @@ def filter(self, record: logging.LogRecord) -> bool:
def _decode_cell_id_column(cell_id_column: pd.Series) -> pd.Series:
if isinstance(cell_id_column.iloc[0], bytes):
return cell_id_column.str.decode("utf-8")
if not isinstance(cell_id_column.iloc[0], str):
cell_id_column.index = cell_id_column.index.astype(str)
return cell_id_column


def _get_polygons(
path: Path,
file: str,
specs: dict[str, Any],
idx: ArrayLike | None = None,
idx: pd.Series | None = None,
) -> GeoDataFrame:
# seems to be faster than pd.read_parquet
df = pq.read_table(path / file).to_pandas()
Expand Down Expand Up @@ -448,7 +440,7 @@ def _get_polygons(
if version is not None and version < packaging.version.parse("2.0.0"):
assert idx is not None
assert len(idx) == len(geo_df)
assert index.equals(idx)
assert np.array_equal(index.values, idx.values)
else:
if np.unique(geo_df.index).size != len(geo_df):
warnings.warn(
Expand All @@ -464,7 +456,6 @@ def _get_polygons(

def _get_labels_and_indices_mapping(
path: Path,
file: str,
specs: dict[str, Any],
mask_index: int,
labels_name: str,
Expand Down Expand Up @@ -493,36 +484,38 @@ def _get_labels_and_indices_mapping(
cell_id, dataset_suffix = z["cell_id"][...].T
cell_id_str = cell_id_str_from_prefix_suffix_uint32(cell_id, dataset_suffix)

# this information will probably be available in the `label_id` column for version > 2.0.0 (see public
# release notes mentioned above)
real_label_index = get_element_instances(labels).values

# background removal
if real_label_index[0] == 0:
real_label_index = real_label_index[1:]

if version < packaging.version.parse("2.0.0"):
expected_label_index = z["seg_mask_value"][...]

if not np.array_equal(expected_label_index, real_label_index):
raise ValueError(
"The label indices from the labels differ from the ones from the input data. Please report "
f"this issue. Real label indices: {real_label_index}, expected label indices: "
f"{expected_label_index}."
)
label_index = z["seg_mask_value"][...]
else:
labels_positional_indices = z["polygon_sets"][f"{mask_index}"]["cell_index"][...]
if not np.array_equal(labels_positional_indices, np.arange(len(labels_positional_indices))):
raise ValueError(
"The positional indices of the labels do not match the expected range. Please report this issue."
# For v >= 2.0.0, seg_mask_value is no longer available in the zarr;
# read label_id from the corresponding parquet boundary file instead
boundaries_file = XeniumKeys.NUCLEUS_BOUNDARIES_FILE if mask_index == 0 else XeniumKeys.CELL_BOUNDARIES_FILE
boundary_columns = pq.read_schema(path / boundaries_file).names
if "label_id" in boundary_columns:
boundary_df = pq.read_table(path / boundaries_file, columns=[XeniumKeys.CELL_ID, "label_id"]).to_pandas()
unique_pairs = boundary_df.drop_duplicates(subset=[XeniumKeys.CELL_ID, "label_id"]).copy()
unique_pairs[XeniumKeys.CELL_ID] = _decode_cell_id_column(unique_pairs[XeniumKeys.CELL_ID])
cell_id_to_label_id = unique_pairs.set_index(XeniumKeys.CELL_ID)["label_id"]
label_index = cell_id_to_label_id.loc[cell_id_str].values
else:
# fallback for dev versions around 2.0.0 that lack both seg_mask_value and label_id
logger.warn(
f"Could not find the labels ids from the metadata for version {version}. Using a fallback (slower) implementation."
)
import dask.config

with dask.config.set(num_workers=1):
label_index = get_element_instances(labels).values

if label_index[0] == 0:
label_index = label_index[1:]

# labels_index is an uint32, so let's cast to np.int64 to avoid the risk of overflow on some systems
indices_mapping = pd.DataFrame(
{
"region": labels_name,
"cell_id": cell_id_str,
"label_index": real_label_index.astype(np.int64),
"label_index": label_index.astype(np.int64),
}
)
# because AnnData converts the indices to str
Expand Down
Loading