Skip to content

Commit

Permalink
Check for mismatch of column names with alt expts (#58)
Browse files Browse the repository at this point in the history
- Check if the column names of the alternative experiments match with the column names of the main experiment. This is the equivalent to the ``withDimnames`` parameter in the R implementation. 
  - On **getters** of alternative experiments, if `with_dim_names` is True, column names of the alternative experiment are **replaced** with the
  column names of the main experiment. 
  - On **setters** of alternative experiments, if `with_dim_names` is True, column names of the alternative experiment are **checked** with the
  column names of the main experiment and an Exception is raised if they do not match.
  • Loading branch information
jkanche authored Jan 21, 2025
1 parent 6af9fa3 commit bb97d63
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 14 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## Version 0.5.4

- Check if the column names of the alternative experiments match with the column names of the main experiment. This is the equivalent to the ``withDimnames`` parameter in the R implementation.
- On **getters** of alternative experiments, if `with_dim_names` is True, column names of the alternative experiment are **replaced** with the
column names of the main experiment.
- On **setters** of alternative experiments, if `with_dim_names` is True, column names of the alternative experiment are **checked** with the
column names of the main experiment and an Exception is raised if they do not match.

## Version 0.5.1 - 0.5.3

- Add wrapper class methods to combine experiments by rows or columns.
Expand Down
93 changes: 79 additions & 14 deletions src/singlecellexperiment/SingleCellExperiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _validate_reduced_dims(reduced_dims, shape):
raise ValueError(f"Reduced dimension: '{rdname}' does not contain embeddings for all cells.")


def _validate_alternative_experiments(alternative_experiments, shape):
def _validate_alternative_experiments(alternative_experiments, shape, column_names, with_dim_names=True):
if alternative_experiments is None:
raise ValueError("'alternative_experiments' cannot be `None`, must be assigned to an empty dictionary.")

Expand All @@ -62,6 +62,15 @@ def _validate_alternative_experiments(alternative_experiments, shape):
if shape[1] != alternative_experiment.shape[1]:
raise ValueError(f"Alternative experiment: '{alt_name}' does not contain same number of" " cells.")

_alt_cnames = alternative_experiment.get_column_names()
_alt_cnames = None if _alt_cnames is None else list(_alt_cnames)
if _alt_cnames is not None:
if list(column_names) != _alt_cnames:
if with_dim_names:
raise Exception(f"Column names do not match for alternative_experiment: {alt_name}")
else:
warn(f"Column names do not match for alternative_experiment: {alt_name}", UserWarning)


def _validate_pairs(pairs):
if pairs is not None:
Expand Down Expand Up @@ -102,6 +111,7 @@ def __init__(
alternative_experiments: Optional[Dict[str, Any]] = None,
row_pairs: Optional[Any] = None,
column_pairs: Optional[Any] = None,
alternative_experiment_check_dim_names: bool = True,
validate: bool = True,
) -> None:
"""Initialize a single-cell experiment.
Expand Down Expand Up @@ -166,6 +176,13 @@ def __init__(
(e.g., sc-atac, crispr) and values as subclasses of
:py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
alternative_experiment_check_dim_names:
Whether to check if the column names of the alternative experiment match the column names
of the main experiment. This is the equivalent to the ``withDimnames``
parameter in the R implementation.
Defaults to True.
row_pairs:
Row pairings/relationships between features.
Expand Down Expand Up @@ -201,7 +218,12 @@ def __init__(

if validate:
_validate_reduced_dims(self._reduced_dims, self._shape)
_validate_alternative_experiments(self._alternative_experiments, self._shape)
_validate_alternative_experiments(
self._alternative_experiments,
self._shape,
self.get_column_names(),
with_dim_names=alternative_experiment_check_dim_names,
)
_validate_pairs(self._row_pairs)
_validate_pairs(self._column_pairs)

Expand Down Expand Up @@ -605,32 +627,52 @@ def main_experiment_name(self, name: Optional[str]):
######>> alternative_experiments <<######
#########################################

def get_alternative_experiments(self) -> Dict[str, Any]:
def get_alternative_experiments(self, with_dim_names: bool = True) -> Dict[str, Any]:
"""Access alternative experiments.
Args:
with_dim_names:
Whether to replace the column names of the alternative experiment with the column names
of the main experiment. This is the equivalent to the ``withDimnames``
parameter in the R implementation.
Defaults to True.
Returns:
A dictionary with names of
the experiments as keys and value the experiment.
A dictionary with experiment names as keys and value the alternative experiment.
"""
return self._alternative_experiments
_out = OrderedDict()
for name in self.get_alternative_experiment_names():
_out[name] = self.get_alternative_experiment(name, with_dim_names=with_dim_names)

return _out

def set_alternative_experiments(
self, alternative_experiments: Dict[str, Any], in_place: bool = False
self, alternative_experiments: Dict[str, Any], with_dim_names: bool = True, in_place: bool = False
) -> "SingleCellExperiment":
"""Set new alternative experiments.
Args:
alternative_experiments:
New alternative experiments.
with_dim_names:
Whether to check if the column names of the alternative experiment match the column names
of the main experiment. This is the equivalent to the ``withDimnames``
parameter in the R implementation.
Defaults to True.
in_place:
Whether to modify the ``SingleCellExperiment`` in place.
Returns:
A modified ``SingleCellExperiment`` object, either as a copy of the original
or as a reference to the (in-place-modified) original.
"""
_validate_alternative_experiments(alternative_experiments, self.shape)
_validate_alternative_experiments(
alternative_experiments, self.shape, self.get_column_names(), with_dim_names=with_dim_names
)
output = self._define_output(in_place)
output._alternative_experiments = alternative_experiments
return output
Expand Down Expand Up @@ -705,13 +747,20 @@ def alternative_experiment_names(self, names: List[str]):
######>> alternative_experiment getter <<######
###############################################

def get_alternative_experiment(self, name: Union[str, int]) -> Any:
def get_alternative_experiment(self, name: Union[str, int], with_dim_names: bool = True) -> Any:
"""Access alternative experiment by name.
Args:
name:
Name or index position of the alternative experiment.
with_dim_names:
Whether to replace the column names of the alternative experiment with the column names
of the main experiment. This is the equivalent to the ``withDimnames``
parameter in the R implementation.
Defaults to True.
Raises:
AttributeError:
If the dimension name does not exist.
Expand All @@ -721,28 +770,35 @@ def get_alternative_experiment(self, name: Union[str, int]) -> Any:
Returns:
The alternative experiment.
"""
_out = None

if isinstance(name, int):
if name < 0:
raise IndexError("Index cannot be negative.")

if name > len(self.alternative_experiment_names):
raise IndexError("Index greater than the number of alternative experiments.")

return self._alternative_experiments[self.alternative_experiment_names[name]]
_out = self._alternative_experiments[self.alternative_experiment_names[name]]
elif isinstance(name, str):
if name not in self._alternative_experiments:
raise AttributeError(f"Alternative experiment: {name} does not exist.")

return self._alternative_experiments[name]
_out = self._alternative_experiments[name]
else:
raise TypeError(f"'name' must be a string or integer, provided '{type(name)}'.")

raise TypeError(f"'name' must be a string or integer, provided '{type(name)}'.")
if with_dim_names:
_out = _out.set_column_names(self.get_column_names())

return _out

def alternative_experiment(self, name: Union[str, int]) -> Any:
"""Alias for :py:meth:`~get_alternative_experiment`, for back-compatibility."""
return self.get_alternative_experiment(name=name)

def set_alternative_experiment(
self, name: str, alternative_experiment: Any, in_place: bool = False
self, name: str, alternative_experiment: Any, with_dim_names: bool = True, in_place: bool = False
) -> "SingleCellExperiment":
"""Add or replace :py:attr:`~singlecellexperiment.SingleCellExperiment.alternative_experiment`'s.
Expand All @@ -755,6 +811,13 @@ def set_alternative_experiment(
Is a subclasses of
:py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
with_dim_names:
Whether to check if the column names of the alternative experiment match the column names
of the main experiment. This is the equivalent to the ``withDimnames``
parameter in the R implementation.
Defaults to True.
in_place:
Whether to modify the ``SingleCellExperiment`` in place.
Expand All @@ -769,7 +832,9 @@ def set_alternative_experiment(
_tmp_alt_expt = _tmp_alt_expt.copy()
_tmp_alt_expt[name] = alternative_experiment

_validate_alternative_experiments(_tmp_alt_expt, self._shape)
_validate_alternative_experiments(
_tmp_alt_expt, self._shape, self.get_column_names(), with_dim_names=with_dim_names
)
output._alternative_experiments = _tmp_alt_expt
return output

Expand Down
29 changes: 29 additions & 0 deletions tests/test_sce.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,32 @@ def test_SCE_creation_modifications():

tse.set_reduced_dimension("something", np.random.rand(tse.shape[1], 4), in_place=True)
assert nassay_tse.get_reduced_dimension_names() == tse.get_reduced_dimension_names()

def test_SCE_different_alt_names():
rse = SummarizedExperiment(
assays={"counts": counts}, row_data=row_data, column_data=pd.DataFrame(index = ["ChIP"] * 6 )
)

with pytest.raises(Exception):
tse = SingleCellExperiment(
assays={"counts": counts},
row_data=row_data,
column_data=col_data,
alternative_experiments={"alt": rse},
)

with pytest.raises(Exception):
tse = SingleCellExperiment(
assays={"counts": counts},
row_data=row_data,
column_data=pd.DataFrame(index = ["ChIP", "Input"] * 3),
alternative_experiments={"alt": rse},
)

with pytest.raises(Exception):
tse = SingleCellExperiment(
assays={"counts": counts},
row_data=row_data,
column_data=pd.DataFrame(index = ["ChIP", "Input", "Input"] * 2),
alternative_experiments={"alt": rse},
)

0 comments on commit bb97d63

Please sign in to comment.