-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
374 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,6 @@ __pycache__/ | |
**/*.og | ||
**/*.out | ||
**/*.flatgfa | ||
*.json | ||
og_to_gfa.py | ||
compute_maxes.py | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
graphs/ | ||
results/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
%.svg: %.csv bar.vl.json | ||
jq '.data.url |= "$<"' bar.vl.json | npx -p vega -p vega-lite vl2svg > $@ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
{ | ||
"data": { | ||
"url": "FILE.csv", | ||
"format": { | ||
"type": "csv", | ||
"parse": {"mean": "number", "stddev": "number"} | ||
} | ||
}, | ||
"layer": [ | ||
{ | ||
"mark": "bar", | ||
"encoding": { | ||
"x": { "field": "graph", "type": "nominal", "title": null }, | ||
"xOffset": { "field": "cmd" }, | ||
"y": { "field": "mean", "type": "quantitative", | ||
"title": "running time (seconds)" }, | ||
"color": { "field": "cmd", "title": null } | ||
} | ||
}, | ||
{ | ||
"mark": {"type": "errorbar", "ticks": {"color": "black"}}, | ||
"encoding": { | ||
"x": { "field": "graph", "type": "nominal" }, | ||
"xOffset": { "field": "cmd" }, | ||
"y": { "field": "mean", "type": "quantitative", | ||
"title": "running time (seconds)" }, | ||
"yError": { "field": "stddev" } | ||
} | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
try: | ||
import tomllib | ||
except ImportError: | ||
import tomli as tomllib | ||
import os | ||
import subprocess | ||
from subprocess import PIPE | ||
from shlex import quote | ||
import json | ||
import tempfile | ||
from dataclasses import dataclass | ||
import csv | ||
import argparse | ||
import datetime | ||
import logging | ||
from contextlib import contextmanager | ||
import time | ||
|
||
BASE = os.path.dirname(__file__) | ||
GRAPHS_TOML = os.path.join(BASE, "graphs.toml") | ||
CONFIG_TOML = os.path.join(BASE, "config.toml") | ||
GRAPHS_DIR = os.path.join(BASE, "graphs") | ||
RESULTS_DIR = os.path.join(BASE, "results") | ||
ALL_TOOLS = ["slow_odgi", "odgi", "flatgfa"] | ||
DECOMPRESS = { | ||
".gz": ["gunzip"], | ||
".zst": ["zstd", "-d"], | ||
} | ||
|
||
|
||
def check_wait(popen): | ||
err = popen.wait() | ||
if err: | ||
raise subprocess.CalledProcessError(err, popen.args) | ||
|
||
|
||
@contextmanager | ||
def logtime(log): | ||
start = time.time() | ||
yield | ||
dur = time.time() - start | ||
log.info("done in %.1f seconds", dur) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class HyperfineResult: | ||
command: str | ||
mean: float | ||
stddev: float | ||
median: float | ||
min: float | ||
max: float | ||
count: float | ||
|
||
@classmethod | ||
def from_json(cls, obj): | ||
return cls( | ||
command=obj["command"], | ||
mean=obj["mean"], | ||
stddev=obj["stddev"], | ||
median=obj["median"], | ||
min=obj["min"], | ||
max=obj["max"], | ||
count=len(obj["times"]), | ||
) | ||
|
||
|
||
def hyperfine(cmds): | ||
"""Run Hyperfine to compare the commands.""" | ||
with tempfile.NamedTemporaryFile(delete=False) as tmp: | ||
tmp.close() | ||
subprocess.run( | ||
["hyperfine", "-N", "-w", "1", "--export-json", tmp.name] + cmds, | ||
check=True, | ||
) | ||
with open(tmp.name, "rb") as f: | ||
data = json.load(f) | ||
return [HyperfineResult.from_json(r) for r in data["results"]] | ||
os.unlink(tmp.name) | ||
|
||
|
||
def graph_path(name, ext): | ||
return os.path.join(GRAPHS_DIR, f"{name}.{ext}") | ||
|
||
|
||
def fetch_file(dest, url): | ||
os.makedirs(GRAPHS_DIR, exist_ok=True) | ||
|
||
_, ext = os.path.splitext(url) | ||
if ext in DECOMPRESS: | ||
# Decompress the file while downloading. | ||
with open(dest, "wb") as f: | ||
curl = subprocess.Popen(["curl", "-L", url], stdout=PIPE) | ||
decomp = subprocess.Popen(DECOMPRESS[ext], stdin=curl.stdout, stdout=f) | ||
curl.stdout.close() | ||
check_wait(decomp) | ||
else: | ||
# Just fetch the raw file. | ||
subprocess.run(["curl", "-L", "-o", dest, url], check=True) | ||
|
||
|
||
class Runner: | ||
def __init__(self, graphs, config): | ||
self.graphs = graphs | ||
self.config = config | ||
|
||
# Some shorthands for tool paths. | ||
self.odgi = config["tools"]["odgi"] | ||
self.fgfa = config["tools"]["fgfa"] | ||
self.slow_odgi = config["tools"]["slow_odgi"] | ||
|
||
self.log = logging.getLogger("pollen-bench") | ||
self.log.addHandler(logging.StreamHandler()) | ||
self.log.setLevel(logging.DEBUG) | ||
|
||
@classmethod | ||
def default(cls): | ||
with open(GRAPHS_TOML, "rb") as f: | ||
graphs = tomllib.load(f) | ||
with open(CONFIG_TOML, "rb") as f: | ||
config = tomllib.load(f) | ||
return cls(graphs, config) | ||
|
||
def fetch_graph(self, name): | ||
"""Fetch a single graph, given by its <suite>.<graph> name.""" | ||
suite, key = name.split(".") | ||
url = self.graphs[suite][key] | ||
dest = graph_path(name, "gfa") | ||
|
||
# If the file exists, don't re-download. | ||
if os.path.exists(dest): | ||
self.log.info("gfa already fetched for %s", name) | ||
return | ||
|
||
self.log.info("fetching graph %s", name) | ||
fetch_file(dest, url) | ||
|
||
def odgi_convert(self, name): | ||
"""Convert a GFA to odgi's `.og` format.""" | ||
og = graph_path(name, "og") | ||
if os.path.exists(og): | ||
self.log.info("og exists for %s", name) | ||
return | ||
|
||
gfa = graph_path(name, "gfa") | ||
self.log.info("converting %s to og", name) | ||
with logtime(self.log): | ||
subprocess.run([self.odgi, "build", "-g", gfa, "-o", og]) | ||
|
||
def flatgfa_convert(self, name): | ||
"""Convert a GFA to the FlatGFA format.""" | ||
flatgfa = graph_path(name, "flatgfa") | ||
if os.path.exists(flatgfa): | ||
self.log.info("flatgfa exists for %s", name) | ||
return | ||
|
||
gfa = graph_path(name, "gfa") | ||
self.log.info("converting %s to flatgfa", name) | ||
with logtime(self.log): | ||
subprocess.run([self.fgfa, "-I", gfa, "-o", flatgfa]) | ||
|
||
def compare_paths(self, name, tools): | ||
"""Compare odgi and FlatGFA implementations of path-name extraction.""" | ||
commands = { | ||
"odgi": f'{self.odgi} paths -i {quote(graph_path(name, "og"))} -L', | ||
"flatgfa": f'{self.fgfa} -i {quote(graph_path(name, "flatgfa"))} paths', | ||
"slow_odgi": f'{self.slow_odgi} paths {quote(graph_path(name, "gfa"))}', | ||
} | ||
commands = {k: commands[k] for k in tools} | ||
|
||
self.log.info("comparing paths for %s", " ".join(tools)) | ||
with logtime(self.log): | ||
results = hyperfine(list(commands.values())) | ||
for cmd, res in zip(commands.keys(), results): | ||
yield { | ||
"cmd": cmd, | ||
"mean": res.mean, | ||
"stddev": res.stddev, | ||
"graph": name, | ||
"n": res.count, | ||
} | ||
|
||
|
||
def run_bench(graph_set, mode, tools, out_csv): | ||
runner = Runner.default() | ||
|
||
assert mode == "paths" | ||
graph_names = runner.config["graph_sets"][graph_set] | ||
|
||
# Fetch all the graphs and convert them to both odgi and FlatGFA. | ||
for graph in graph_names: | ||
runner.fetch_graph(graph) | ||
runner.odgi_convert(graph) | ||
runner.flatgfa_convert(graph) | ||
|
||
runner.log.debug("writing results to %s", out_csv) | ||
os.makedirs(os.path.dirname(out_csv), exist_ok=True) | ||
with open(out_csv, "w") as f: | ||
writer = csv.DictWriter(f, ["graph", "cmd", "mean", "stddev", "n"]) | ||
writer.writeheader() | ||
for graph in graph_names: | ||
for row in runner.compare_paths(graph, tools): | ||
writer.writerow(row) | ||
|
||
|
||
def gen_csv_name(graph_set, mode): | ||
ts = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S.%f") | ||
return os.path.join(RESULTS_DIR, f"{mode}-{graph_set}-{ts}.csv") | ||
|
||
|
||
def bench_main(): | ||
parser = argparse.ArgumentParser(description="benchmarks for GFA stuff") | ||
parser.add_argument( | ||
"--graph-set", "-g", help="name of input graph set", required=True | ||
) | ||
parser.add_argument("--mode", "-m", help="thing to benchmark", required=True) | ||
parser.add_argument("--tool", "-t", help="test this tool", action="append") | ||
parser.add_argument("--output", "-o", help="output CSV") | ||
|
||
args = parser.parse_args() | ||
tools = args.tool or ALL_TOOLS | ||
for tool in tools: | ||
assert tool in ALL_TOOLS, "unknown tool name" | ||
|
||
run_bench( | ||
graph_set=args.graph_set, | ||
mode=args.mode, | ||
tools=tools, | ||
out_csv=args.output or gen_csv_name(args.graph_set, args.mode), | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
bench_main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
[tools] | ||
odgi = "odgi" | ||
fgfa = "../flatgfa/target/release/fgfa" | ||
slow_odgi = "../.venv/bin/slow_odgi" | ||
|
||
[graph_sets] | ||
smoke = ["test.k"] | ||
mini = ["test.lpa", "test.chr6c4", "hprc.chrM"] | ||
med = ["hprc.chr20", "hprc.chrX", "1000gont.chr16"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# From: https://github.com/AndreaGuarracino/1000G-ONT-F100-PGGB/blob/master/data/1000G-ONT-F100-PGGB.gfa.urls.tsv | ||
[1000gont] | ||
chr1 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr1.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr2 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr2.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr3 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr3.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr4 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr4.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr5 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr5.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr6 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr6.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr7 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr7.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr8 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr8.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr9 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr9.30kbp.fa.gz.445f03b.e34d4cd.b691e61.smooth.final.gfa.zst" | ||
chr10 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr10.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr11 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr11.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr12 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr12.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr13 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr13.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr14 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr14.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr15 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr15.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr16 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr16.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr17 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr17.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr18 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr18.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr19 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr19.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr20 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr20.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr21 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr21.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chr22 = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chr22.30kbp.fa.gz.a8a102b.eb0f3d3.b691e61.smooth.final.gfa.zst" | ||
chrX = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chrX.30kbp.fa.gz.a8a102b.eb0f3d3.a58faa8.smooth.final.gfa.zst" | ||
chrY = "https://garrisonlab.s3.amazonaws.com/1000G-ONT-F100-PGGB/1000G-ONT.100x2%2B4.chrY.30kbp.fa.gz.a8a102b.eb0f3d3.0713820.smooth.final.gfa.zst" | ||
|
||
# From: https://s3-us-west-2.amazonaws.com/human-pangenomics/index.html?prefix=pangenomes/freeze/freeze1/pggb/chroms/ | ||
[hprc] | ||
chrY = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chrY.hprc-v1.0-pggb.gfa.gz" | ||
chr1 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr1.hprc-v1.0-pggb.gfa.gz" | ||
chr10 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr10.hprc-v1.0-pggb.gfa.gz" | ||
chr11 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr11.hprc-v1.0-pggb.gfa.gz" | ||
chr12 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr12.hprc-v1.0-pggb.gfa.gz" | ||
chr13 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr13.hprc-v1.0-pggb.gfa.gz" | ||
chr14 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr14.hprc-v1.0-pggb.gfa.gz" | ||
chr15 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr15.hprc-v1.0-pggb.gfa.gz" | ||
chr16 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr16.hprc-v1.0-pggb.gfa.gz" | ||
chr17 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr17.hprc-v1.0-pggb.gfa.gz" | ||
chr18 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr18.hprc-v1.0-pggb.gfa.gz" | ||
chr19 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr19.hprc-v1.0-pggb.gfa.gz" | ||
chr2 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr2.hprc-v1.0-pggb.gfa.gz" | ||
chr20 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr20.hprc-v1.0-pggb.gfa.gz" | ||
chr21 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr21.hprc-v1.0-pggb.gfa.gz" | ||
chr22 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr22.hprc-v1.0-pggb.gfa.gz" | ||
chr3 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr3.hprc-v1.0-pggb.gfa.gz" | ||
chr4 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr4.hprc-v1.0-pggb.gfa.gz" | ||
chr5 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr5.hprc-v1.0-pggb.gfa.gz" | ||
chr6 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr6.hprc-v1.0-pggb.gfa.gz" | ||
chr7 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr7.hprc-v1.0-pggb.gfa.gz" | ||
chr8 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr8.hprc-v1.0-pggb.gfa.gz" | ||
chr9 = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chr9.hprc-v1.0-pggb.gfa.gz" | ||
chrM = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chrM.hprc-v1.0-pggb.gfa.gz" | ||
chrX = "https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chrX.hprc-v1.0-pggb.gfa.gz" | ||
|
||
# Small tests from odgi: | ||
# https://github.com/pangenome/odgi/tree/master/test | ||
[test] | ||
k = "https://raw.githubusercontent.com/pangenome/odgi/master/test/k.gfa" | ||
lpa = "https://raw.githubusercontent.com/pangenome/odgi/master/test/LPA.gfa" | ||
chr6c4 = "https://raw.githubusercontent.com/pangenome/odgi/master/test/chr6.C4.gfa" | ||
drb1 = "https://raw.githubusercontent.com/pangenome/odgi/master/test/DRB1-3123.gfa" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import tomllib | ||
import requests | ||
import os | ||
|
||
GRAPHS_TOML = os.path.join(os.path.dirname(__file__), "graphs.toml") | ||
SIZE_NAMES = { | ||
0: "", | ||
3: "k", | ||
6: "M", | ||
9: "G", | ||
12: "T", | ||
} | ||
|
||
|
||
def fmt_size(count): | ||
for scale, name in reversed(SIZE_NAMES.items()): | ||
unit = 10**scale | ||
if count > unit: | ||
return "{:.0f}{}B".format(count / unit, name) | ||
|
||
|
||
def show_sizes(): | ||
with open(GRAPHS_TOML, "rb") as f: | ||
graphs_data = tomllib.load(f) | ||
|
||
for category, graphs in graphs_data.items(): | ||
for name, url in graphs.items(): | ||
res = requests.head(url) | ||
length = int(res.headers["Content-Length"]) | ||
print(category, name, fmt_size(length)) | ||
|
||
|
||
if __name__ == "__main__": | ||
show_sizes() |