Check for mismatch of column names with alt expts (#58)

- Check if the column names of the alternative experiments match with the column names of the main experiment. This is the equivalent to the ``withDimnames`` parameter in the R implementation. - On **getters** of alternative experiments, if `with_dim_names` is True, column names of the alternative experiment are **replaced** with the column names of the main experiment. - On **setters** of alternative experiments, if `with_dim_names` is True, column names of the alternative experiment are **checked** with the column names of the main experiment and an Exception is raised if they do not match.
BiocPy · Jan 21, 2025 · bb97d63 · bb97d63
1 parent 6af9fa3
commit bb97d63
Show file tree

Hide file tree

Showing 3 changed files with 116 additions and 14 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## Version 0.5.4
+
+- Check if the column names of the alternative experiments match with the column names of the main experiment. This is the equivalent to the ``withDimnames`` parameter in the R implementation.
+  - On **getters** of alternative experiments, if `with_dim_names` is True, column names of the alternative experiment are **replaced** with the
+  column names of the main experiment.
+  - On **setters** of alternative experiments, if `with_dim_names` is True, column names of the alternative experiment are **checked** with the
+  column names of the main experiment and an Exception is raised if they do not match.
+
 ## Version 0.5.1 - 0.5.3
 
 - Add wrapper class methods to combine experiments by rows or columns.

diff --git a/src/singlecellexperiment/SingleCellExperiment.py b/src/singlecellexperiment/SingleCellExperiment.py
@@ -45,7 +45,7 @@ def _validate_reduced_dims(reduced_dims, shape):
             raise ValueError(f"Reduced dimension: '{rdname}' does not contain embeddings for all cells.")
 
 
-def _validate_alternative_experiments(alternative_experiments, shape):
+def _validate_alternative_experiments(alternative_experiments, shape, column_names, with_dim_names=True):
     if alternative_experiments is None:
         raise ValueError("'alternative_experiments' cannot be `None`, must be assigned to an empty dictionary.")
 
@@ -62,6 +62,15 @@ def _validate_alternative_experiments(alternative_experiments, shape):
         if shape[1] != alternative_experiment.shape[1]:
             raise ValueError(f"Alternative experiment: '{alt_name}' does not contain same number of" " cells.")
 
+        _alt_cnames = alternative_experiment.get_column_names()
+        _alt_cnames = None if _alt_cnames is None else list(_alt_cnames)
+        if _alt_cnames is not None:
+            if list(column_names) != _alt_cnames:
+                if with_dim_names:
+                    raise Exception(f"Column names do not match for alternative_experiment: {alt_name}")
+                else:
+                    warn(f"Column names do not match for alternative_experiment: {alt_name}", UserWarning)
+
 
 def _validate_pairs(pairs):
     if pairs is not None:
@@ -102,6 +111,7 @@ def __init__(
         alternative_experiments: Optional[Dict[str, Any]] = None,
         row_pairs: Optional[Any] = None,
         column_pairs: Optional[Any] = None,
+        alternative_experiment_check_dim_names: bool = True,
         validate: bool = True,
     ) -> None:
         """Initialize a single-cell experiment.
@@ -166,6 +176,13 @@ def __init__(
                 (e.g., sc-atac, crispr) and values as subclasses of
                 :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
 
+            alternative_experiment_check_dim_names:
+                Whether to check if the column names of the alternative experiment match the column names
+                of the main experiment. This is the equivalent to the ``withDimnames``
+                parameter in the R implementation.
+
+                Defaults to True.
+
             row_pairs:
                 Row pairings/relationships between features.
 
@@ -201,7 +218,12 @@ def __init__(
 
         if validate:
             _validate_reduced_dims(self._reduced_dims, self._shape)
-            _validate_alternative_experiments(self._alternative_experiments, self._shape)
+            _validate_alternative_experiments(
+                self._alternative_experiments,
+                self._shape,
+                self.get_column_names(),
+                with_dim_names=alternative_experiment_check_dim_names,
+            )
             _validate_pairs(self._row_pairs)
             _validate_pairs(self._column_pairs)
 
@@ -605,32 +627,52 @@ def main_experiment_name(self, name: Optional[str]):
     ######>> alternative_experiments <<######
     #########################################
 
-    def get_alternative_experiments(self) -> Dict[str, Any]:
+    def get_alternative_experiments(self, with_dim_names: bool = True) -> Dict[str, Any]:
         """Access alternative experiments.
 
+        Args:
+            with_dim_names:
+                Whether to replace the column names of the alternative experiment with the column names
+                of the main experiment. This is the equivalent to the ``withDimnames``
+                parameter in the R implementation.
+
+                Defaults to True.
+
         Returns:
-            A dictionary with names of
-            the experiments as keys and value the experiment.
+            A dictionary with experiment names as keys and value the alternative experiment.
         """
-        return self._alternative_experiments
+        _out = OrderedDict()
+        for name in self.get_alternative_experiment_names():
+            _out[name] = self.get_alternative_experiment(name, with_dim_names=with_dim_names)
+
+        return _out
 
     def set_alternative_experiments(
-        self, alternative_experiments: Dict[str, Any], in_place: bool = False
+        self, alternative_experiments: Dict[str, Any], with_dim_names: bool = True, in_place: bool = False
     ) -> "SingleCellExperiment":
         """Set new alternative experiments.
 
         Args:
             alternative_experiments:
                 New alternative experiments.
 
+            with_dim_names:
+                Whether to check if the column names of the alternative experiment match the column names
+                of the main experiment. This is the equivalent to the ``withDimnames``
+                parameter in the R implementation.
+
+                Defaults to True.
+
             in_place:
                 Whether to modify the ``SingleCellExperiment`` in place.
 
         Returns:
             A modified ``SingleCellExperiment`` object, either as a copy of the original
             or as a reference to the (in-place-modified) original.
         """
-        _validate_alternative_experiments(alternative_experiments, self.shape)
+        _validate_alternative_experiments(
+            alternative_experiments, self.shape, self.get_column_names(), with_dim_names=with_dim_names
+        )
         output = self._define_output(in_place)
         output._alternative_experiments = alternative_experiments
         return output
@@ -705,13 +747,20 @@ def alternative_experiment_names(self, names: List[str]):
     ######>> alternative_experiment getter <<######
     ###############################################
 
-    def get_alternative_experiment(self, name: Union[str, int]) -> Any:
+    def get_alternative_experiment(self, name: Union[str, int], with_dim_names: bool = True) -> Any:
         """Access alternative experiment by name.
 
         Args:
             name:
                 Name or index position of the alternative experiment.
 
+            with_dim_names:
+                Whether to replace the column names of the alternative experiment with the column names
+                of the main experiment. This is the equivalent to the ``withDimnames``
+                parameter in the R implementation.
+
+                Defaults to True.
+
         Raises:
             AttributeError:
                 If the dimension name does not exist.
@@ -721,28 +770,35 @@ def get_alternative_experiment(self, name: Union[str, int]) -> Any:
         Returns:
             The alternative experiment.
         """
+        _out = None
+
         if isinstance(name, int):
             if name < 0:
                 raise IndexError("Index cannot be negative.")
 
             if name > len(self.alternative_experiment_names):
                 raise IndexError("Index greater than the number of alternative experiments.")
 
-            return self._alternative_experiments[self.alternative_experiment_names[name]]
+            _out = self._alternative_experiments[self.alternative_experiment_names[name]]
         elif isinstance(name, str):
             if name not in self._alternative_experiments:
                 raise AttributeError(f"Alternative experiment: {name} does not exist.")
 
-            return self._alternative_experiments[name]
+            _out = self._alternative_experiments[name]
+        else:
+            raise TypeError(f"'name' must be a string or integer, provided '{type(name)}'.")
 
-        raise TypeError(f"'name' must be a string or integer, provided '{type(name)}'.")
+        if with_dim_names:
+            _out = _out.set_column_names(self.get_column_names())
+
+        return _out
 
     def alternative_experiment(self, name: Union[str, int]) -> Any:
         """Alias for :py:meth:`~get_alternative_experiment`, for back-compatibility."""
         return self.get_alternative_experiment(name=name)
 
     def set_alternative_experiment(
-        self, name: str, alternative_experiment: Any, in_place: bool = False
+        self, name: str, alternative_experiment: Any, with_dim_names: bool = True, in_place: bool = False
     ) -> "SingleCellExperiment":
         """Add or replace :py:attr:`~singlecellexperiment.SingleCellExperiment.alternative_experiment`'s.
 
@@ -755,6 +811,13 @@ def set_alternative_experiment(
                 Is a subclasses of
                 :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
 
+            with_dim_names:
+                Whether to check if the column names of the alternative experiment match the column names
+                of the main experiment. This is the equivalent to the ``withDimnames``
+                parameter in the R implementation.
+
+                Defaults to True.
+
             in_place:
                 Whether to modify the ``SingleCellExperiment`` in place.
 
@@ -769,7 +832,9 @@ def set_alternative_experiment(
             _tmp_alt_expt = _tmp_alt_expt.copy()
         _tmp_alt_expt[name] = alternative_experiment
 
-        _validate_alternative_experiments(_tmp_alt_expt, self._shape)
+        _validate_alternative_experiments(
+            _tmp_alt_expt, self._shape, self.get_column_names(), with_dim_names=with_dim_names
+        )
         output._alternative_experiments = _tmp_alt_expt
         return output
 

diff --git a/tests/test_sce.py b/tests/test_sce.py
@@ -143,3 +143,32 @@ def test_SCE_creation_modifications():
 
     tse.set_reduced_dimension("something", np.random.rand(tse.shape[1], 4), in_place=True)
     assert nassay_tse.get_reduced_dimension_names() == tse.get_reduced_dimension_names()
+
+def test_SCE_different_alt_names():
+    rse = SummarizedExperiment(
+        assays={"counts": counts}, row_data=row_data, column_data=pd.DataFrame(index = ["ChIP"] * 6 )
+    )
+
+    with pytest.raises(Exception):
+        tse = SingleCellExperiment(
+            assays={"counts": counts},
+            row_data=row_data,
+            column_data=col_data,
+            alternative_experiments={"alt": rse},
+        )
+
+    with pytest.raises(Exception):
+        tse = SingleCellExperiment(
+            assays={"counts": counts},
+            row_data=row_data,
+            column_data=pd.DataFrame(index = ["ChIP", "Input"] * 3),
+            alternative_experiments={"alt": rse},
+        )
+
+    with pytest.raises(Exception):
+        tse = SingleCellExperiment(
+            assays={"counts": counts},
+            row_data=row_data,
+            column_data=pd.DataFrame(index = ["ChIP", "Input", "Input"] * 2),
+            alternative_experiments={"alt": rse},
+        )