Skip to content

Commit

Permalink
Coerce IRanges as pandas Dataframe. (#11)
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche authored Nov 17, 2023
1 parent 6adb1af commit 6c23cd2
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 0 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ testing =
biocutils
numpy
ncls==0.0.68
pandas

[options.entry_points]
# Add here console scripts like:
Expand Down
64 changes: 64 additions & 0 deletions src/iranges/IRanges.py
Original file line number Diff line number Diff line change
Expand Up @@ -1846,6 +1846,70 @@ def distance(self, query: "IRanges") -> List[Optional[int]]:

return all_distances

########################
#### pandas interop ####
########################

def to_pandas(self) -> "pandas.DataFrame":
"""Convert this ``IRanges`` object into a :py:class:`~pandas.DataFrame`.
Returns:
A :py:class:`~pandas.DataFrame` object.
"""
import pandas as pd

_starts = self._start
_widths = self._width
_ends = self.get_end

output = pd.DataFrame({"starts": _starts, "widths": _widths, "ends": _ends})

if self._mcols is not None and self._mcols.shape[1] > 0:
output = pd.concat([output, self._mcols.to_pandas()])

if self._names is not None:
output.index = self._names

return output

@classmethod
def from_pandas(cls, input: "pandas.DataFrame") -> "IRanges":
"""Create a ``IRanges`` from a :py:class:`~pandas.DataFrame` object.
Args:
input:
Input data. must contain columns 'start' and 'width'.
Returns:
A ``IRanges`` object.
"""

from pandas import DataFrame

if not isinstance(input, DataFrame):
raise TypeError("`input` is not a pandas `DataFrame` object.")

if "start" not in input.columns:
raise ValueError("'input' must contain column 'start'.")
start = input["start"].tolist()

if "width" not in input.columns:
raise ValueError("'input' must contain column 'width'.")
width = input["width"].tolist()

# mcols
mcols_df = input.drop(columns=["start", "width"])

mcols = None
if (not mcols_df.empty) or len(mcols_df.columns) > 0:
mcols = BiocFrame.from_pandas(mcols_df)

names = None
if input.index is not None:
names = [str(i) for i in input.index.to_list()]

return cls(start=start, width=width, names=names, mcols=mcols)


@combine_sequences.register
def _combine_IRanges(*x: IRanges) -> IRanges:
Expand Down
17 changes: 17 additions & 0 deletions tests/test_IRanges_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import numpy as np
import pandas as pd
from iranges import IRanges
from biocframe import BiocFrame

__author__ = "Jayaram Kancherla"
__copyright__ = "jkanche"
__license__ = "MIT"


def test_pandas():
df = pd.DataFrame({"start": [1, 2, 3, 4], "width": [4, 5, 6, 7]})

x = IRanges.from_pandas(df)
assert (x.get_start() == np.array(df["start"])).all()
assert (x.get_width() == np.array(df["width"])).all()
assert isinstance(x.mcols, BiocFrame)

0 comments on commit 6c23cd2

Please sign in to comment.