-
Notifications
You must be signed in to change notification settings - Fork 87
refactor filter_table_by_elements
#701
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -58,31 +58,6 @@ def get_element_annotators(sdata: SpatialData, element_name: str) -> set[str]: | |
| return table_names | ||
|
|
||
|
|
||
| def _filter_table_by_element_names(table: AnnData | None, element_names: str | list[str]) -> AnnData | None: | ||
| """ | ||
| Filter an AnnData table to keep only the rows that are in the coordinate system. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| table | ||
| The table to filter; if None, returns None | ||
| element_names | ||
| The element_names to keep in the tables obs.region column | ||
|
|
||
| Returns | ||
| ------- | ||
| The filtered table, or None if the input table was None | ||
| """ | ||
| if table is None or not table.uns.get(TableModel.ATTRS_KEY): | ||
| return None | ||
| table_mapping_metadata = table.uns[TableModel.ATTRS_KEY] | ||
| region_key = table_mapping_metadata[TableModel.REGION_KEY_KEY] | ||
| table.obs = pd.DataFrame(table.obs) | ||
| table = table[table.obs[region_key].isin(element_names)].copy() | ||
| table.uns[TableModel.ATTRS_KEY][TableModel.REGION_KEY] = table.obs[region_key].unique().tolist() | ||
| return table | ||
|
|
||
|
|
||
| @singledispatch | ||
| def get_element_instances( | ||
| element: SpatialElement, | ||
|
|
@@ -110,8 +85,10 @@ def get_element_instances( | |
| def _( | ||
| element: DataArray | DataTree, | ||
| return_background: bool = False, | ||
| ) -> pd.Index: | ||
| ) -> pd.Index | None: | ||
| model = get_model(element) | ||
| if model in [Image2DModel, Image3DModel]: | ||
| return None | ||
| assert model in [Labels2DModel, Labels3DModel], "Expected a `Labels` element. Found an `Image` instead." | ||
| if isinstance(element, DataArray): | ||
| # get unique labels value (including 0 if present) | ||
|
|
@@ -145,8 +122,8 @@ def _( | |
|
|
||
| # TODO: replace function use throughout repo by `join_sdata_spatialelement_table` | ||
| def _filter_table_by_elements( | ||
| table: AnnData | None, elements_dict: dict[str, dict[str, Any]], match_rows: bool = False | ||
| ) -> AnnData | None: | ||
| table: AnnData | list[AnnData], elements_dict: dict[str, dict[str, Any]], match_rows: bool = False | ||
|
Comment on lines
124
to
+125
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new PR #1131 replaces the old implementation of |
||
| ) -> AnnData: | ||
| """ | ||
| Filter an AnnData table to keep only the rows that are in the elements. | ||
|
|
||
|
|
@@ -163,42 +140,38 @@ def _filter_table_by_elements( | |
| ------- | ||
| The filtered table (eventually with reordered rows), or None if the input table was None. | ||
| """ | ||
| assert set(elements_dict.keys()).issubset({"images", "labels", "shapes", "points"}) | ||
| assert len(elements_dict) > 0, "elements_dict must not be empty" | ||
| assert any( | ||
| len(elements) > 0 for elements in elements_dict.values() | ||
| ), "elements_dict must contain at least one dict which contains at least one element" | ||
| if table is None: | ||
| return None | ||
|
|
||
| def _validate_elements_dict(elements_dict: dict[str, dict[str, Any]]) -> None: | ||
| assert set(elements_dict.keys()).issubset({"images", "labels", "shapes", "points"}) | ||
| assert len(elements_dict) > 0, "elements_dict must not be empty" | ||
| assert any( | ||
| len(elements) > 0 for elements in elements_dict.values() | ||
| ), "elements_dict must contain at least one dict which contains at least one element" | ||
|
|
||
| def _get_matching_indices( | ||
| table: AnnData, region_key: str, instance_key: str, name: str, instances: ArrayLike | ||
| ) -> ArrayLike: | ||
| return ((table.obs[region_key] == name) & (table.obs[instance_key].isin(instances))).to_numpy() | ||
|
|
||
| def _filter_table(table: AnnData, to_keep: ArrayLike) -> AnnData: | ||
| table.obs = pd.DataFrame(table.obs) | ||
| return table[to_keep, :] | ||
|
|
||
| _validate_elements_dict(elements_dict) | ||
| to_keep = np.zeros(len(table), dtype=bool) | ||
| region_key = table.uns[TableModel.ATTRS_KEY][TableModel.REGION_KEY_KEY] | ||
| instance_key = table.uns[TableModel.ATTRS_KEY][TableModel.INSTANCE_KEY] | ||
| instances = None | ||
| for _, elements in elements_dict.items(): | ||
| _, region_key, instance_key = get_table_keys(table) | ||
|
|
||
| for elements in elements_dict.values(): | ||
| for name, element in elements.items(): | ||
| if get_model(element) == Labels2DModel or get_model(element) == Labels3DModel: | ||
| if isinstance(element, DataArray): | ||
| # get unique labels value (including 0 if present) | ||
| instances = da.unique(element.data).compute() | ||
| else: | ||
| assert isinstance(element, DataTree) | ||
| v = element["scale0"].values() | ||
| assert len(v) == 1 | ||
| xdata = next(iter(v)) | ||
| # can be slow | ||
| instances = da.unique(xdata.data).compute() | ||
| instances = np.sort(instances) | ||
| elif get_model(element) == ShapesModel: | ||
| instances = element.index.to_numpy() | ||
| elif get_model(element) == PointsModel: | ||
| instances = element.compute().index.to_numpy() | ||
| else: | ||
| continue | ||
| indices = ((table.obs[region_key] == name) & (table.obs[instance_key].isin(instances))).to_numpy() | ||
| to_keep = to_keep | indices | ||
| model = get_model(element) | ||
| instances = get_element_instances(element) | ||
| if instances is not None: | ||
| indices = _get_matching_indices(table, region_key, instance_key, name, instances) | ||
| to_keep |= indices | ||
|
|
||
| original_table = table | ||
| table.obs = pd.DataFrame(table.obs) | ||
| table = table[to_keep, :] | ||
| table = _filter_table(table, to_keep) | ||
|
|
||
| if match_rows: | ||
| assert instances is not None | ||
| assert isinstance(instances, np.ndarray) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -735,10 +735,17 @@ def _filter_tables( | |
| continue | ||
| # each mode here requires paths or elements, using assert here to avoid mypy errors. | ||
| if by == "cs": | ||
| from spatialdata._core.query.relational_query import _filter_table_by_element_names | ||
| from spatialdata._core.query.relational_query import _filter_table_by_elements | ||
|
|
||
| assert element_names is not None | ||
| table = _filter_table_by_element_names(table, element_names) | ||
| elements_dict = {} | ||
| for element_type in ["images", "labels", "shapes", "points"]: | ||
| elements = getattr(self, element_type) | ||
| if elements: # Check if the dictionary is not empty | ||
| elements_dict[element_type] = { | ||
| name: elements[name] for name in element_names if name in elements | ||
| } | ||
| table = _filter_table_by_elements(table, elements_dict=elements_dict) | ||
|
Comment on lines
+738
to
+748
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks good! #1131 implements this change now. |
||
| if len(table) != 0: | ||
| tables[table_name] = table | ||
| elif by == "elements": | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -135,9 +135,15 @@ def test_filter_by_coordinate_system(full_sdata: SpatialData) -> None: | |
| def test_filter_by_coordinate_system_also_table(full_sdata: SpatialData) -> None: | ||
| from spatialdata.models import TableModel | ||
|
|
||
| rng = np.random.default_rng(seed=0) | ||
| full_sdata["table"].obs["annotated_shapes"] = rng.choice(["circles", "poly"], size=full_sdata["table"].shape[0]) | ||
| adata = full_sdata["table"] | ||
| adata = full_sdata["table"].copy() | ||
|
|
||
| circles_instances = full_sdata["circles"].index.values | ||
| poly_instances = full_sdata["poly"].index.values | ||
|
|
||
| adata = adata[: len(circles_instances) + len(poly_instances), :].copy() | ||
| adata.obs["annotated_shapes"] = ["circles"] * len(circles_instances) + ["poly"] * len(poly_instances) | ||
| adata.obs["instance_id"] = np.concatenate([circles_instances, poly_instances]) | ||
|
|
||
|
Comment on lines
-138
to
+146
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this test had quite a big bug. Basically, the table natively from conftest annotates labels, but here it was re used to annotate shapes and circles. Now, both shapes and circles have 5 instances only, and so the table was being filtered only by coordinate system, but this meant that the table had the first five instances mapping to the poly/circles, but then all the other instances also present, which did not map to anything. This is because the filtering was happening with the now removed I will add test so that the filter_table function always return correct tables.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! Addressed in #1131 |
||
| del adata.uns[TableModel.ATTRS_KEY] | ||
| del full_sdata.tables["table"] | ||
| full_sdata.table = TableModel.parse( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this function is buggy, and it doesn't make sense with the new multiple table design, as it can return wrong tables. I removed it and refactored it in the other filter method. I will add test for this. See other comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, #1131 removes the function as you suggested.