Skip to content

Commit

Permalink
Stronger types for indices (#174)
Browse files Browse the repository at this point in the history
  • Loading branch information
sampsyo authored May 11, 2024
2 parents de556a9 + 5d95497 commit dade07a
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 94 deletions.
23 changes: 10 additions & 13 deletions flatgfa/src/cmds.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::flatgfa;
use crate::flatgfa::{self, Handle, Segment};
use crate::pool::{self, Id, Pool};
use argh::FromArgs;
use bstr::BStr;
Expand Down Expand Up @@ -59,7 +59,7 @@ pub fn stats(gfa: &flatgfa::FlatGFA, args: Stats) {
gfa.steps.len()
);
} else if args.self_loops {
let mut counts: HashMap<Id, usize> = HashMap::new();
let mut counts: HashMap<Id<Segment>, usize> = HashMap::new();
let mut total: usize = 0;
for link in gfa.links.iter() {
if link.from.segment() == link.to.segment() {
Expand Down Expand Up @@ -161,12 +161,12 @@ pub fn extract(gfa: &flatgfa::FlatGFA, args: Extract) -> Result<flatgfa::HeapSto
struct SubgraphBuilder<'a> {
old: &'a flatgfa::FlatGFA<'a>,
store: flatgfa::HeapStore,
seg_map: HashMap<Id, Id>,
seg_map: HashMap<Id<Segment>, Id<Segment>>,
}

struct SubpathStart {
step: Id, // The id of the first step in the subpath.
pos: usize, // The bp position at the start of the subpath.
step: Id<Handle>, // The id of the first step in the subpath.
pos: usize, // The bp position at the start of the subpath.
}

impl<'a> SubgraphBuilder<'a> {
Expand All @@ -179,7 +179,7 @@ impl<'a> SubgraphBuilder<'a> {
}

/// Add a segment from the source graph to this subgraph.
fn include_seg(&mut self, seg_id: Id) {
fn include_seg(&mut self, seg_id: Id<Segment>) {
let seg = self.old.segs.get_id(seg_id);
let new_seg_id = self.store.add_seg(
seg.name,
Expand All @@ -199,10 +199,7 @@ impl<'a> SubgraphBuilder<'a> {

/// Add a single subpath from the given path to the subgraph.
fn include_subpath(&mut self, path: &flatgfa::Path, start: &SubpathStart, end_pos: usize) {
let steps = pool::Span {
start: start.step,
end: self.store.steps.next_id(),
};
let steps = pool::Span::new(start.step, self.store.steps.next_id());
let name = format!("{}:{}-{}", self.old.get_path_name(path), start.pos, end_pos);
self.store
.add_path(name.as_bytes(), steps, std::iter::empty());
Expand Down Expand Up @@ -250,7 +247,7 @@ impl<'a> SubgraphBuilder<'a> {
}

/// Check whether a segment from the old graph is in the subgraph.
fn contains(&self, old_seg_id: Id) -> bool {
fn contains(&self, old_seg_id: Id<Segment>) -> bool {
self.seg_map.contains_key(&old_seg_id)
}

Expand All @@ -259,7 +256,7 @@ impl<'a> SubgraphBuilder<'a> {
///
/// Include any links between the segments in the neighborhood and subpaths crossing
/// through the neighborhood.
fn extract(&mut self, origin: Id, dist: usize) {
fn extract(&mut self, origin: Id<Segment>, dist: usize) {
self.include_seg(origin);

// Find the set of all segments that are 1 link away.
Expand Down Expand Up @@ -302,7 +299,7 @@ pub fn depth(gfa: &flatgfa::FlatGFA) {
for path in gfa.paths {
let path_name = gfa.get_path_name(path);
for step in gfa.get_steps(path) {
let seg_id = step.segment() as usize;
let seg_id = step.segment().index();
// Increment depths
depths[seg_id] = depths[seg_id] + 1;
// Update uniq_paths
Expand Down
2 changes: 1 addition & 1 deletion flatgfa/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ impl Toc {
+ self.links.bytes::<flatgfa::Link>()
+ self.steps.bytes::<flatgfa::Handle>()
+ self.seq_data.bytes::<u8>()
+ self.overlaps.bytes::<Span>()
+ self.overlaps.bytes::<Span<flatgfa::AlignOp>>()
+ self.alignment.bytes::<flatgfa::AlignOp>()
+ self.name_data.bytes::<u8>()
+ self.optional_data.bytes::<u8>()
Expand Down
53 changes: 27 additions & 26 deletions flatgfa/src/flatgfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pub struct FlatGFA<'a> {
/// Both paths and links can have overlaps, which are CIGAR sequences. They
/// are all stored together here in a flat pool, elements of which point
/// to chunks of `alignment`.
pub overlaps: &'a [Span],
pub overlaps: &'a [Span<AlignOp>],

/// The CIGAR aligment operations that make up the overlaps. `overlaps`
/// contains range of indices in this pool.
Expand Down Expand Up @@ -73,10 +73,10 @@ pub struct Segment {
pub name: usize,

/// The base-pair sequence for the segment. This is a range in the `seq_data` pool.
pub seq: Span,
pub seq: Span<u8>,

/// Segments can have optional fields. This is a range in the `optional_data` pool.
pub optional: Span,
pub optional: Span<u8>,
}

impl Segment {
Expand All @@ -91,14 +91,14 @@ impl Segment {
pub struct Path {
/// The path's name. This can be an arbitrary string. It is a range in the
/// `name_data` pool.
pub name: Span,
pub name: Span<u8>,

/// The sequence of path steps. This is a range in the `steps` pool.
pub steps: Span,
pub steps: Span<Handle>,

/// The CIGAR overlaps for each step on the path. This is a range in the
/// `overlaps` pool.
pub overlaps: Span,
pub overlaps: Span<Span<AlignOp>>,
}

/// An allowed edge between two oriented segments.
Expand All @@ -113,12 +113,12 @@ pub struct Link {

/// The CIGAR overlap between the segments. This is a range in the
/// `alignment` pool.
pub overlap: Span,
pub overlap: Span<AlignOp>,
}

impl Link {
/// Is either end of the link the given segment? If so, return the other end.
pub fn incident_seg(&self, seg_id: Id) -> Option<Id> {
pub fn incident_seg(&self, seg_id: Id<Segment>) -> Option<Id<Segment>> {
if self.from.segment() == seg_id {
Some(self.to.segment())
} else if self.to.segment() == seg_id {
Expand Down Expand Up @@ -162,16 +162,17 @@ pub struct Handle(u32);

impl Handle {
/// Create a new handle referring to a segment ID and an orientation.
pub fn new(segment: Id, orient: Orientation) -> Self {
assert!(segment & (1 << (u32::BITS - 1)) == 0, "index too large");
pub fn new(segment: Id<Segment>, orient: Orientation) -> Self {
let seg_num: u32 = segment.into();
assert!(seg_num & (1 << (u32::BITS - 1)) == 0, "index too large");
let orient_bit: u8 = orient.into();
assert!(orient_bit & !1 == 0, "invalid orientation");
Self(segment << 1 | (orient_bit as u32))
Self(seg_num << 1 | (orient_bit as u32))
}

/// Get the segment ID. This is an index in the `segs` pool.
pub fn segment(&self) -> Id {
self.0 >> 1
pub fn segment(&self) -> Id<Segment> {
(self.0 >> 1).into()
}

/// Get the orientation (+ or -) for the handle.
Expand Down Expand Up @@ -243,21 +244,21 @@ impl<'a> FlatGFA<'a> {
}

/// Look up a segment by its name.
pub fn find_seg(&self, name: usize) -> Option<Id> {
pub fn find_seg(&self, name: usize) -> Option<Id<Segment>> {
// TODO Make this more efficient by maintaining the name index? This would not be
// too hard; we already have the machinery in `parse.rs`...
self.segs
.iter()
.position(|seg| seg.name == name)
.map(|i| i as Id)
.map(|i| Id::new(i))
}

/// Look up a path by its name.
pub fn find_path(&self, name: &BStr) -> Option<Id> {
pub fn find_path(&self, name: &BStr) -> Option<Id<Path>> {
self.paths
.iter()
.position(|path| self.get_path_name(path) == name)
.map(|i| i as Id)
.map(|i| Id::new(i))
}

/// Get all the steps for a path.
Expand All @@ -266,7 +267,7 @@ impl<'a> FlatGFA<'a> {
}

/// Get all the overlaps for a path. This may be empty (`*` in the GFA file).
pub fn get_overlaps(&self, path: &Path) -> &[Span] {
pub fn get_overlaps(&self, path: &Path) -> &[Span<AlignOp>] {
&self.overlaps.get_span(path.overlaps)
}

Expand All @@ -286,7 +287,7 @@ impl<'a> FlatGFA<'a> {
}

/// Look up a CIGAR alignment.
pub fn get_alignment(&self, overlap: Span) -> Alignment {
pub fn get_alignment(&self, overlap: Span<AlignOp>) -> Alignment {
Alignment {
ops: &self.alignment.get_span(overlap),
}
Expand All @@ -307,7 +308,7 @@ pub struct GFAStore<'a, P: PoolFamily<'a>> {
pub links: P::Pool<Link>,
pub steps: P::Pool<Handle>,
pub seq_data: P::Pool<u8>,
pub overlaps: P::Pool<Span>,
pub overlaps: P::Pool<Span<AlignOp>>,
pub alignment: P::Pool<AlignOp>,
pub name_data: P::Pool<u8>,
pub optional_data: P::Pool<u8>,
Expand All @@ -322,7 +323,7 @@ impl<'a, P: PoolFamily<'a>> GFAStore<'a, P> {
}

/// Add a new segment to the GFA file.
pub fn add_seg(&mut self, name: usize, seq: &[u8], optional: &[u8]) -> Id {
pub fn add_seg(&mut self, name: usize, seq: &[u8], optional: &[u8]) -> Id<Segment> {
self.segs.add(Segment {
name,
seq: self.seq_data.add_slice(seq),
Expand All @@ -334,9 +335,9 @@ impl<'a, P: PoolFamily<'a>> GFAStore<'a, P> {
pub fn add_path(
&mut self,
name: &[u8],
steps: Span,
steps: Span<Handle>,
overlaps: impl Iterator<Item = Vec<AlignOp>>,
) -> Id {
) -> Id<Path> {
let overlaps = self.overlaps.add_iter(
overlaps
.into_iter()
Expand All @@ -351,17 +352,17 @@ impl<'a, P: PoolFamily<'a>> GFAStore<'a, P> {
}

/// Add a sequence of steps.
pub fn add_steps(&mut self, steps: impl Iterator<Item = Handle>) -> Span {
pub fn add_steps(&mut self, steps: impl Iterator<Item = Handle>) -> Span<Handle> {
self.steps.add_iter(steps)
}

/// Add a single step.
pub fn add_step(&mut self, step: Handle) -> Id {
pub fn add_step(&mut self, step: Handle) -> Id<Handle> {
self.steps.add(step)
}

/// Add a link between two (oriented) segments.
pub fn add_link(&mut self, from: Handle, to: Handle, overlap: Vec<AlignOp>) -> Id {
pub fn add_link(&mut self, from: Handle, to: Handle, overlap: Vec<AlignOp>) -> Id<Link> {
self.links.add(Link {
from,
to,
Expand Down
8 changes: 4 additions & 4 deletions flatgfa/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,12 @@ impl<'a, P: flatgfa::PoolFamily<'a>> Parser<'a, P> {

fn add_seg(&mut self, seg: gfaline::Segment) {
let seg_id = self.flat.add_seg(seg.name, seg.seq, seg.data);
self.seg_ids.insert(seg.name, seg_id);
self.seg_ids.insert(seg.name, seg_id.into());
}

fn add_link(&mut self, link: gfaline::Link) {
let from = Handle::new(self.seg_ids.get(link.from_seg), link.from_orient);
let to = Handle::new(self.seg_ids.get(link.to_seg), link.to_orient);
let from = Handle::new(self.seg_ids.get(link.from_seg).into(), link.from_orient);
let to = Handle::new(self.seg_ids.get(link.to_seg).into(), link.to_orient);
self.flat.add_link(from, to, link.overlap);
}

Expand All @@ -151,7 +151,7 @@ impl<'a, P: flatgfa::PoolFamily<'a>> Parser<'a, P> {
let mut step_parser = gfaline::StepsParser::new(rest);
let steps = self.flat.add_steps((&mut step_parser).map(|(name, dir)| {
Handle::new(
self.seg_ids.get(name),
self.seg_ids.get(name).into(),
if dir {
Orientation::Forward
} else {
Expand Down
Loading

0 comments on commit dade07a

Please sign in to comment.