From cb0265d1b182ad3a8452f544c197ce54aef6feea Mon Sep 17 00:00:00 2001 From: venaturum Date: Fri, 10 Sep 2021 22:32:49 +1000 Subject: [PATCH] REFACTORED TESTS --- tests/test_dates/test_dates_arithmetic.py | 341 ++++ .../test_dates_arrays.py} | 0 tests/test_dates/test_dates_distribution.py | 451 +++++ tests/test_dates/test_dates_logical.py | 209 +++ tests/test_dates/test_dates_misc.py | 312 ++++ tests/test_dates/test_dates_relational.py | 209 +++ tests/test_dates/test_dates_sample.py | 266 +++ .../test_dates_stats.py} | 709 -------- tests/test_floats/test_floats_arithmetic.py | 264 +++ .../test_floats_arrays.py} | 0 tests/test_floats/test_floats_construction.py | 336 ++++ tests/test_floats/test_floats_distribution.py | 188 ++ tests/test_floats/test_floats_logical.py | 201 +++ tests/test_floats/test_floats_misc.py | 237 +++ tests/test_floats/test_floats_relational.py | 138 ++ tests/test_floats/test_floats_sample.py | 90 + tests/test_floats/test_floats_slicing.py | 310 ++++ tests/test_floats/test_floats_stats.py | 384 ++++ tests/test_stairs.py | 1596 ----------------- 19 files changed, 3936 insertions(+), 2305 deletions(-) create mode 100644 tests/test_dates/test_dates_arithmetic.py rename tests/{test_stairs_arrays_dates.py => test_dates/test_dates_arrays.py} (100%) create mode 100644 tests/test_dates/test_dates_distribution.py create mode 100644 tests/test_dates/test_dates_logical.py create mode 100644 tests/test_dates/test_dates_misc.py create mode 100644 tests/test_dates/test_dates_relational.py create mode 100644 tests/test_dates/test_dates_sample.py rename tests/{test_stairs_dates.py => test_dates/test_dates_stats.py} (57%) create mode 100644 tests/test_floats/test_floats_arithmetic.py rename tests/{test_stairs_arrays.py => test_floats/test_floats_arrays.py} (100%) create mode 100644 tests/test_floats/test_floats_construction.py create mode 100644 tests/test_floats/test_floats_distribution.py create mode 100644 tests/test_floats/test_floats_logical.py create mode 100644 tests/test_floats/test_floats_misc.py create mode 100644 tests/test_floats/test_floats_relational.py create mode 100644 tests/test_floats/test_floats_sample.py create mode 100644 tests/test_floats/test_floats_slicing.py create mode 100644 tests/test_floats/test_floats_stats.py delete mode 100644 tests/test_stairs.py diff --git a/tests/test_dates/test_dates_arithmetic.py b/tests/test_dates/test_dates_arithmetic.py new file mode 100644 index 0000000..e0c6323 --- /dev/null +++ b/tests/test_dates/test_dates_arithmetic.py @@ -0,0 +1,341 @@ +from datetime import datetime + +import pandas as pd +import pytest +import pytz + +from staircase import Stairs +from staircase.constants import inf + + +def pytest_generate_tests(metafunc): + if "date_func" in metafunc.fixturenames: + metafunc.parametrize( + "date_func", + ["pandas", "pydatetime", "numpy", "pandas_tz", "pydatetime_tz"], + indirect=True, + ) + + +@pytest.fixture +def date_func(request): + # returns a func which takes a pandas timestamp + if request.param == "pandas": + return lambda x: x + elif request.param == "pydatetime": + return pd.Timestamp.to_pydatetime + elif request.param == "numpy": + return pd.Timestamp.to_datetime64 + elif request.param == "pandas_tz": + return lambda ts: pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ) + elif request.param == "pydatetime_tz": + return lambda ts: ( + pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ).to_pydatetime() + ) + else: + assert False, "should not happen" + + +def timestamp(*args, date_func, **kwargs): + ts = pd.Timestamp(*args, **kwargs) + return date_func(ts) + + +def assert_expected_type(stairs, date_func): + if stairs._data is None: + return + example_type = timestamp(2020, 1, 1, date_func=date_func) + example_type = pd.Timestamp( + example_type + ) # pandas natively converts datetimes to timestamps + assert all( + [type(example_type) == type(x) for x in stairs._data.index] + ), "Unexpected type in step points" + if isinstance(example_type, (pd.Timestamp, datetime)): + assert all( + [example_type.tzinfo == x.tzinfo for x in stairs._data.index] + ), "Unexpected timezone in step points" + + +def s1(date_func): + int_seq1 = Stairs(initial_value=0) + int_seq1.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 2, + ) + int_seq1.layer( + timestamp(2020, 1, 3, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 6, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 7, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + -2.5, + ) + return int_seq1 + + +def s2(date_func): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq2.layer( + timestamp(2020, 1, 8, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 4.5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, 12, date_func=date_func), + timestamp(2020, 1, 4, date_func=date_func), + -2.5, + ) + return int_seq2 + + +def s3(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 10, date_func=date_func), + timestamp(2020, 1, 30, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 15, date_func=date_func), + timestamp(2020, 1, 18, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, 12, date_func=date_func), + timestamp(2020, 1, 21, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 23, date_func=date_func), + timestamp(2020, 1, 23, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 29, 12, date_func=date_func), + -1, + ) + return int_seq + + +def s4(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 9, date_func=date_func), + timestamp(2020, 1, 29, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 10, 12, date_func=date_func), + timestamp(2020, 1, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 12, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, date_func=date_func), + timestamp(2020, 1, 23, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 26, date_func=date_func), + timestamp(2020, 1, 26, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 28, 12, date_func=date_func), + -1, + ) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +def test_add_dates(date_func): + expected_step_changes = pd.Series( + { + timestamp("2020-01-01 00:00:00", date_func=date_func): -0.5, + timestamp("2020-01-02 00:00:00", date_func=date_func): 4.5, + timestamp("2020-01-02 12:00:00", date_func=date_func): -2.5, + timestamp("2020-01-03 00:00:00", date_func=date_func): 2.5, + timestamp("2020-01-04 00:00:00", date_func=date_func): 2.5, + timestamp("2020-01-05 00:00:00", date_func=date_func): -7.0, + timestamp("2020-01-06 00:00:00", date_func=date_func): -2.5, + timestamp("2020-01-07 00:00:00", date_func=date_func): 2.5, + timestamp("2020-01-08 00:00:00", date_func=date_func): 5, + timestamp("2020-01-10 00:00:00", date_func=date_func): -4.5, + } + ) + result = s1(date_func) + s2(date_func) + pd.testing.assert_series_equal( + result.step_changes, + expected_step_changes, + check_names=False, + check_index_type=False, + ) + assert_expected_type(result, date_func) + + +def test_subtract_dates(date_func): + expected_step_changes = pd.Series( + { + timestamp("2020-01-01 00:00:00", date_func=date_func): 4.5, + timestamp("2020-01-02 00:00:00", date_func=date_func): -4.5, + timestamp("2020-01-02 12:00:00", date_func=date_func): 2.5, + timestamp("2020-01-03 00:00:00", date_func=date_func): 2.5, + timestamp("2020-01-04 00:00:00", date_func=date_func): -2.5, + timestamp("2020-01-05 00:00:00", date_func=date_func): 2.0, + timestamp("2020-01-06 00:00:00", date_func=date_func): -2.5, + timestamp("2020-01-07 00:00:00", date_func=date_func): -2.5, + timestamp("2020-01-08 00:00:00", date_func=date_func): -5, + timestamp("2020-01-10 00:00:00", date_func=date_func): 5.5, + } + ) + result = s1(date_func) - s2(date_func) + pd.testing.assert_series_equal( + result.step_changes, + expected_step_changes, + check_names=False, + check_index_type=False, + ) + assert_expected_type(result, date_func) + + +def test_multiply_dates(date_func): + expected_step_changes = pd.Series( + { + timestamp("2020-01-01 00:00:00", date_func=date_func): -5.0, + timestamp("2020-01-02 00:00:00", date_func=date_func): 9.0, + timestamp("2020-01-02 12:00:00", date_func=date_func): -5.0, + timestamp("2020-01-03 00:00:00", date_func=date_func): -1.25, + timestamp("2020-01-04 00:00:00", date_func=date_func): 11.25, + timestamp("2020-01-05 00:00:00", date_func=date_func): -14.0, + timestamp("2020-01-06 00:00:00", date_func=date_func): 6.25, + timestamp("2020-01-07 00:00:00", date_func=date_func): -1.25, + timestamp("2020-01-08 00:00:00", date_func=date_func): -2.5, + timestamp("2020-01-10 00:00:00", date_func=date_func): 2.5, + } + ) + result = s1(date_func) * s2(date_func) + pd.testing.assert_series_equal( + result.step_changes, + expected_step_changes, + check_names=False, + check_index_type=False, + ) + assert_expected_type(result, date_func) + + +def test_multiply_dates_scalar(date_func): + expected_step_changes = pd.Series( + { + timestamp("2020-01-01 00:00:00", date_func=date_func): 6.0, + timestamp("2020-01-03 00:00:00", date_func=date_func): 7.5, + timestamp("2020-01-05 00:00:00", date_func=date_func): -7.5, + timestamp("2020-01-06 00:00:00", date_func=date_func): -7.5, + timestamp("2020-01-10 00:00:00", date_func=date_func): 1.5, + } + ) + result = s1(date_func) * 3 + pd.testing.assert_series_equal( + result.step_changes, + expected_step_changes, + check_names=False, + check_index_type=False, + ) + assert_expected_type(result, date_func) + + +def test_divide_dates(date_func): + expected_step_changes = pd.Series( + { + timestamp("2020-01-01 00:00:00", date_func=date_func): -1.3333333333333333, + timestamp("2020-01-02 00:00:00", date_func=date_func): 2.0, + timestamp("2020-01-02 12:00:00", date_func=date_func): 3.3333333333333335, + timestamp("2020-01-03 00:00:00", date_func=date_func): 5.0, + timestamp("2020-01-04 00:00:00", date_func=date_func): -7.5, + timestamp("2020-01-05 00:00:00", date_func=date_func): -2.833333333333333, + timestamp("2020-01-06 00:00:00", date_func=date_func): 1.6666666666666665, + timestamp("2020-01-07 00:00:00", date_func=date_func): -0.8333333333333333, + timestamp("2020-01-08 00:00:00", date_func=date_func): 0.4166666666666667, + timestamp("2020-01-10 00:00:00", date_func=date_func): 0.08333333333333333, + } + ) + result = s1(date_func) / (s2(date_func) + 1) + pd.testing.assert_series_equal( + result.step_changes, + expected_step_changes, + check_names=False, + check_index_type=False, + ) + assert_expected_type(result, date_func) + + +def test_divide_dates_scalar(date_func): + expected_step_changes = pd.Series( + { + timestamp("2020-01-01 00:00:00", date_func=date_func): 4.0, + timestamp("2020-01-03 00:00:00", date_func=date_func): 5.0, + timestamp("2020-01-05 00:00:00", date_func=date_func): -5.0, + timestamp("2020-01-06 00:00:00", date_func=date_func): -5.0, + timestamp("2020-01-10 00:00:00", date_func=date_func): 1.0, + } + ) + result = s1(date_func) / 0.5 + pd.testing.assert_series_equal( + result.step_changes, + expected_step_changes, + check_names=False, + check_index_type=False, + ) + assert_expected_type(result, date_func) diff --git a/tests/test_stairs_arrays_dates.py b/tests/test_dates/test_dates_arrays.py similarity index 100% rename from tests/test_stairs_arrays_dates.py rename to tests/test_dates/test_dates_arrays.py diff --git a/tests/test_dates/test_dates_distribution.py b/tests/test_dates/test_dates_distribution.py new file mode 100644 index 0000000..3cb8184 --- /dev/null +++ b/tests/test_dates/test_dates_distribution.py @@ -0,0 +1,451 @@ +import itertools +from datetime import datetime + +import pandas as pd +import pytest +import pytz + +from staircase import Stairs +from staircase.constants import inf + + +def pytest_generate_tests(metafunc): + if "date_func" in metafunc.fixturenames: + metafunc.parametrize( + "date_func", + ["pandas", "pydatetime", "numpy", "pandas_tz", "pydatetime_tz"], + indirect=True, + ) + + +@pytest.fixture +def date_func(request): + # returns a func which takes a pandas timestamp + if request.param == "pandas": + return lambda x: x + elif request.param == "pydatetime": + return pd.Timestamp.to_pydatetime + elif request.param == "numpy": + return pd.Timestamp.to_datetime64 + elif request.param == "pandas_tz": + return lambda ts: pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ) + elif request.param == "pydatetime_tz": + return lambda ts: ( + pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ).to_pydatetime() + ) + else: + assert False, "should not happen" + + +def timestamp(*args, date_func, **kwargs): + ts = pd.Timestamp(*args, **kwargs) + return date_func(ts) + + +def assert_expected_type(stairs, date_func): + if stairs._data is None: + return + example_type = timestamp(2020, 1, 1, date_func=date_func) + example_type = pd.Timestamp( + example_type + ) # pandas natively converts datetimes to timestamps + assert all( + [type(example_type) == type(x) for x in stairs._data.index] + ), "Unexpected type in step points" + if isinstance(example_type, (pd.Timestamp, datetime)): + assert all( + [example_type.tzinfo == x.tzinfo for x in stairs._data.index] + ), "Unexpected timezone in step points" + + +def s1(date_func): + int_seq1 = Stairs(initial_value=0) + int_seq1.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 2, + ) + int_seq1.layer( + timestamp(2020, 1, 3, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 6, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 7, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + -2.5, + ) + return int_seq1 + + +def s2(date_func): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq2.layer( + timestamp(2020, 1, 8, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 4.5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, 12, date_func=date_func), + timestamp(2020, 1, 4, date_func=date_func), + -2.5, + ) + return int_seq2 + + +def s3(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 10, date_func=date_func), + timestamp(2020, 1, 30, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 15, date_func=date_func), + timestamp(2020, 1, 18, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, 12, date_func=date_func), + timestamp(2020, 1, 21, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 23, date_func=date_func), + timestamp(2020, 1, 23, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 29, 12, date_func=date_func), + -1, + ) + return int_seq + + +def s4(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 9, date_func=date_func), + timestamp(2020, 1, 29, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 10, 12, date_func=date_func), + timestamp(2020, 1, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 12, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, date_func=date_func), + timestamp(2020, 1, 23, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 26, date_func=date_func), + timestamp(2020, 1, 26, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 28, 12, date_func=date_func), + -1, + ) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +def test_percentile_dates_1(date_func): + assert s1(date_func).percentile(20) == -0.5, "Expected 20th percentile to be -0.5" + assert s1(date_func).percentile(40) == -0.5, "Expected 40th percentile to be -0.5" + assert s1(date_func).percentile(60) == 2, "Expected 60th percentile to be 2" + assert s1(date_func).percentile(80) == 4.5, "Expected 80th percentile to be 4.5" + + +def test_percentile_dates_2(date_func): + assert ( + s1(date_func) + .clip(None, timestamp(2020, 1, 6, date_func=date_func)) + .percentile(20) + == 2 + ), "Expected 20th percentile to be 2" + assert ( + s1(date_func) + .clip(None, timestamp(2020, 1, 6, date_func=date_func)) + .percentile(40) + == 2 + ), "Expected 40th percentile to be 2" + assert ( + s1(date_func) + .clip(None, timestamp(2020, 1, 6, date_func=date_func)) + .percentile(60) + == 3.25 + ), "Expected 60th percentile to be 3.25" + assert ( + s1(date_func) + .clip(None, timestamp(2020, 1, 6, date_func=date_func)) + .percentile(80) + == 4.5 + ), "Expected 80th percentile to be 4.5" + + +def test_percentile_dates_3(date_func): + assert ( + s1(date_func) + .clip(timestamp(2020, 1, 4, date_func=date_func), None) + .percentile(20) + == -0.5 + ), "Expected 20th percentile to be -0.5" + assert ( + s1(date_func) + .clip(timestamp(2020, 1, 4, date_func=date_func), None) + .percentile(40) + == -0.5 + ), "Expected 40th percentile to be -0.5" + assert ( + s1(date_func) + .clip(timestamp(2020, 1, 4, date_func=date_func), None) + .percentile(60) + == -0.5 + ), "Expected 60th percentile to be -0.5" + assert ( + s1(date_func) + .clip(timestamp(2020, 1, 4, date_func=date_func), None) + .percentile(80) + == 2 + ), "Expected 80th percentile to be 2" + + +def test_percentile_dates_4(date_func): + assert ( + s1(date_func) + .clip( + timestamp(2020, 1, 4, date_func=date_func), + timestamp(2020, 1, 8, date_func=date_func), + ) + .percentile(20) + == -0.5 + ), "Expected 20th percentile to be -0.5" + assert ( + s1(date_func) + .clip( + timestamp(2020, 1, 4, date_func=date_func), + timestamp(2020, 1, 8, date_func=date_func), + ) + .percentile(40) + == -0.5 + ), "Expected 40th percentile to be -0.5" + assert ( + s1(date_func) + .clip( + timestamp(2020, 1, 4, date_func=date_func), + timestamp(2020, 1, 8, date_func=date_func), + ) + .percentile(60) + == 2 + ), "Expected 60th percentile to be 2" + assert ( + s1(date_func) + .clip( + timestamp(2020, 1, 4, date_func=date_func), + timestamp(2020, 1, 8, date_func=date_func), + ) + .percentile(80) + == 4.5 + ), "Expected 80th percentile to be 4.5" + + +def test_get_percentiles_dates_1(date_func): + expected_step_values = pd.Series( + [-0.5, 2.0, 4.5, 4.5], index=[0, 44.444444, 77.77777778, 100] + ) + pd.testing.assert_series_equal( + s1(date_func).percentile.step_values, + expected_step_values, + check_names=False, + check_index_type=False, + ) + + +def test_get_percentiles_dates_2(date_func): + expected_step_values = pd.Series([2, 4.5, 4.5], index=[0, 60, 100]) + pd.testing.assert_series_equal( + s1(date_func) + .clip(None, timestamp(2020, 1, 6, date_func=date_func)) + .percentile.step_values, + expected_step_values, + check_names=False, + check_index_type=False, + ) + + +def test_get_percentiles_dates_3(date_func): + expected_step_values = pd.Series( + [-0.5, 2.0, 4.5, 4.5], index=[0, 66.6666666667, 83.333333333, 100] + ) + pd.testing.assert_series_equal( + s1(date_func) + .clip(timestamp(2020, 1, 4, date_func=date_func), None) + .percentile.step_values, + expected_step_values, + check_names=False, + check_index_type=False, + ) + + +def test_get_percentiles_dates_4(date_func): + expected_step_values = pd.Series([-0.5, 2.0, 4.5, 4.5], index=[0, 50, 75, 100]) + pd.testing.assert_series_equal( + s1(date_func) + .clip( + timestamp(2020, 1, 4, date_func=date_func), + timestamp(2020, 1, 8, date_func=date_func), + ) + .percentile.step_values, + expected_step_values, + check_names=False, + check_index_type=False, + ) + + +@pytest.mark.parametrize( + "stairs_func, bounds, cuts", + itertools.product( + [s1, s2, s3, s4], + [ + ((2020, 1, 3), (2020, 1, 4)), + ((2020, 1, 1), (2020, 1, 4)), + ((2020, 1, 2), (2020, 2, 4)), + ], + ["unit", (-2, 0, 0.5, 4, 4.5, 7)], + ), +) +def test_hist_default_bins_left_closed(date_func, stairs_func, bounds, cuts): + stairs_instance = stairs_func(date_func) + bounds = [timestamp(*args, date_func=date_func) for args in bounds] + + def make_expected_result(interval_index, lower, upper): + return pd.Series( + [ + ((stairs_instance >= i.left) * (stairs_instance < i.right)).agg( + "mean", (lower, upper) + ) + for i in interval_index + ], + index=interval_index, + ) + + hist = stairs_instance.clip(*bounds).hist(bins=cuts, stat="probability") + expected = make_expected_result(hist.index, *bounds) + pd.testing.assert_series_equal( + hist, + expected, + check_names=False, + check_index_type=False, + ) + + +@pytest.mark.parametrize( + "stairs_func, bounds, cuts", + itertools.product( + [s1, s2, s3, s4], + [ + ((2020, 1, 3), (2020, 1, 4)), + ((2020, 1, 1), (2020, 1, 4)), + ((2020, 1, 2), (2020, 2, 4)), + ], + ["unit", (-2, 0, 0.5, 4, 4.5, 7)], + ), +) +def test_hist_default_bins_right_closed(date_func, stairs_func, bounds, cuts): + stairs_instance = stairs_func(date_func) + bounds = [timestamp(*args, date_func=date_func) for args in bounds] + + def make_expected_result(interval_index, lower, upper): + return pd.Series( + [ + ((stairs_instance > i.left) * (stairs_instance <= i.right)).agg( + "mean", (lower, upper) + ) + for i in interval_index + ], + index=interval_index, + ) + + hist = stairs_instance.clip(*bounds).hist( + bins=cuts, closed="right", stat="probability" + ) + expected = make_expected_result(hist.index, *bounds) + pd.testing.assert_series_equal( + hist, + expected, + check_names=False, + check_index_type=False, + ) + + +@pytest.mark.parametrize( + "stairs_func, bounds, closed", + itertools.product( + [s1, s2, s3, s4], + [ + ((2020, 1, 3), (2020, 1, 4)), + ((2020, 1, 1), (2020, 1, 4)), + ((2020, 1, 2), (2020, 2, 4)), + ], + ["left", "right"], + ), +) +def test_hist_default_bins(date_func, stairs_func, bounds, closed): + # really testing the default binning process here + stairs_instance = stairs_func(date_func) + bounds = [timestamp(*args, date_func=date_func) for args in bounds] + hist = stairs_instance.clip(*bounds).hist(closed=closed, stat="probability") + assert abs(hist.sum() - 1) < 0.000001 diff --git a/tests/test_dates/test_dates_logical.py b/tests/test_dates/test_dates_logical.py new file mode 100644 index 0000000..a4374ac --- /dev/null +++ b/tests/test_dates/test_dates_logical.py @@ -0,0 +1,209 @@ +from datetime import datetime + +import pandas as pd +import pytest +import pytz + +from staircase import Stairs + + +def pytest_generate_tests(metafunc): + if "date_func" in metafunc.fixturenames: + metafunc.parametrize( + "date_func", + ["pandas", "pydatetime", "numpy", "pandas_tz", "pydatetime_tz"], + indirect=True, + ) + + +@pytest.fixture +def date_func(request): + # returns a func which takes a pandas timestamp + if request.param == "pandas": + return lambda x: x + elif request.param == "pydatetime": + return pd.Timestamp.to_pydatetime + elif request.param == "numpy": + return pd.Timestamp.to_datetime64 + elif request.param == "pandas_tz": + return lambda ts: pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ) + elif request.param == "pydatetime_tz": + return lambda ts: ( + pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ).to_pydatetime() + ) + else: + assert False, "should not happen" + + +def timestamp(*args, date_func, **kwargs): + ts = pd.Timestamp(*args, **kwargs) + return date_func(ts) + + +def assert_expected_type(stairs, date_func): + if stairs._data is None: + return + example_type = timestamp(2020, 1, 1, date_func=date_func) + example_type = pd.Timestamp( + example_type + ) # pandas natively converts datetimes to timestamps + assert all( + [type(example_type) == type(x) for x in stairs._data.index] + ), "Unexpected type in step points" + if isinstance(example_type, (pd.Timestamp, datetime)): + assert all( + [example_type.tzinfo == x.tzinfo for x in stairs._data.index] + ), "Unexpected timezone in step points" + + +def _compare_iterables(it1, it2): + it1 = [i for i in it1 if i is not None] + it2 = [i for i in it2 if i is not None] + for e1, e2 in zip(it1, it2): + if e1 != e2: + return False + return True + + +def s1(date_func): + int_seq1 = Stairs(initial_value=0) + int_seq1.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 2, + ) + int_seq1.layer( + timestamp(2020, 1, 3, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 6, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 7, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + -2.5, + ) + return int_seq1 + + +def s2(date_func): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq2.layer( + timestamp(2020, 1, 8, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 4.5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, 12, date_func=date_func), + timestamp(2020, 1, 4, date_func=date_func), + -2.5, + ) + return int_seq2 + + +def s3(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 10, date_func=date_func), + timestamp(2020, 1, 30, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 15, date_func=date_func), + timestamp(2020, 1, 18, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, 12, date_func=date_func), + timestamp(2020, 1, 21, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 23, date_func=date_func), + timestamp(2020, 1, 23, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 29, 12, date_func=date_func), + -1, + ) + return int_seq + + +def s4(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 9, date_func=date_func), + timestamp(2020, 1, 29, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 10, 12, date_func=date_func), + timestamp(2020, 1, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 12, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, date_func=date_func), + timestamp(2020, 1, 23, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 26, date_func=date_func), + timestamp(2020, 1, 26, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 28, 12, date_func=date_func), + -1, + ) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() diff --git a/tests/test_dates/test_dates_misc.py b/tests/test_dates/test_dates_misc.py new file mode 100644 index 0000000..d341bde --- /dev/null +++ b/tests/test_dates/test_dates_misc.py @@ -0,0 +1,312 @@ +from datetime import datetime + +import pandas as pd +import pytest +import pytz + +import staircase.test_data as test_data +from staircase import Stairs +from staircase.constants import inf + + +def pytest_generate_tests(metafunc): + if "date_func" in metafunc.fixturenames: + metafunc.parametrize( + "date_func", + ["pandas", "pydatetime", "numpy", "pandas_tz", "pydatetime_tz"], + indirect=True, + ) + + +@pytest.fixture +def date_func(request): + # returns a func which takes a pandas timestamp + if request.param == "pandas": + return lambda x: x + elif request.param == "pydatetime": + return pd.Timestamp.to_pydatetime + elif request.param == "numpy": + return pd.Timestamp.to_datetime64 + elif request.param == "pandas_tz": + return lambda ts: pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ) + elif request.param == "pydatetime_tz": + return lambda ts: ( + pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ).to_pydatetime() + ) + else: + assert False, "should not happen" + + +def timestamp(*args, date_func, **kwargs): + ts = pd.Timestamp(*args, **kwargs) + return date_func(ts) + + +def assert_expected_type(stairs, date_func): + if stairs._data is None: + return + example_type = timestamp(2020, 1, 1, date_func=date_func) + example_type = pd.Timestamp( + example_type + ) # pandas natively converts datetimes to timestamps + assert all( + [type(example_type) == type(x) for x in stairs._data.index] + ), "Unexpected type in step points" + if isinstance(example_type, (pd.Timestamp, datetime)): + assert all( + [example_type.tzinfo == x.tzinfo for x in stairs._data.index] + ), "Unexpected timezone in step points" + + +def _compare_iterables(it1, it2): + it1 = [i for i in it1 if i is not None] + it2 = [i for i in it2 if i is not None] + for e1, e2 in zip(it1, it2): + if e1 != e2: + return False + return True + + +def s1(date_func): + int_seq1 = Stairs(initial_value=0) + int_seq1.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 2, + ) + int_seq1.layer( + timestamp(2020, 1, 3, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 6, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 7, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + -2.5, + ) + return int_seq1 + + +def s2(date_func): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq2.layer( + timestamp(2020, 1, 8, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 4.5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, 12, date_func=date_func), + timestamp(2020, 1, 4, date_func=date_func), + -2.5, + ) + return int_seq2 + + +def s3(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 10, date_func=date_func), + timestamp(2020, 1, 30, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 15, date_func=date_func), + timestamp(2020, 1, 18, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, 12, date_func=date_func), + timestamp(2020, 1, 21, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 23, date_func=date_func), + timestamp(2020, 1, 23, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 29, 12, date_func=date_func), + -1, + ) + return int_seq + + +def s4(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 9, date_func=date_func), + timestamp(2020, 1, 29, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 10, 12, date_func=date_func), + timestamp(2020, 1, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 12, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, date_func=date_func), + timestamp(2020, 1, 23, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 26, date_func=date_func), + timestamp(2020, 1, 26, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 28, 12, date_func=date_func), + -1, + ) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +def test_plot(date_func): + s1(date_func).plot() + + +def test_step_changes_dates(date_func): + expected_step_changes = pd.Series( + [2, 2.5, -2.5, -2.5, 0.5], + index=[ + timestamp("2020-1-1", date_func=date_func), + timestamp("2020-1-3", date_func=date_func), + timestamp("2020-1-5", date_func=date_func), + timestamp("2020-1-6", date_func=date_func), + timestamp("2020-1-10", date_func=date_func), + ], + ) + pd.testing.assert_series_equal( + s1(date_func).step_changes, + expected_step_changes, + check_names=False, + check_index_type=False, + ) + + +def test_dataframe_dates(date_func): + ans = pd.DataFrame( + { + "start": [ + -inf, + timestamp("2020-01-01", date_func=date_func), + timestamp("2020-01-03", date_func=date_func), + timestamp("2020-01-05", date_func=date_func), + timestamp("2020-01-06", date_func=date_func), + timestamp("2020-01-10", date_func=date_func), + ], + "end": [ + timestamp("2020-01-01", date_func=date_func), + timestamp("2020-01-03", date_func=date_func), + timestamp("2020-01-05", date_func=date_func), + timestamp("2020-01-06", date_func=date_func), + timestamp("2020-01-10", date_func=date_func), + inf, + ], + "value": [0, 2, 4.5, 2, -0.5, 0], + } + ) + pd.testing.assert_frame_equal(s1(date_func).to_frame(), ans) + + +def test_shift(date_func): + ans = Stairs(initial_value=0) + ans.layer( + timestamp(2020, 1, 2, date_func=date_func), + timestamp(2020, 1, 11, date_func=date_func), + 2, + ) + ans.layer( + timestamp(2020, 1, 4, date_func=date_func), + timestamp(2020, 1, 6, date_func=date_func), + 2.5, + ) + ans.layer( + timestamp(2020, 1, 7, date_func=date_func), + timestamp(2020, 1, 8, date_func=date_func), + -2.5, + ) + ans.layer( + timestamp(2020, 1, 8, date_func=date_func), + timestamp(2020, 1, 11, date_func=date_func), + -2.5, + ) + result = s1(date_func).shift(pd.Timedelta(24, unit="H")) + assert bool(result == ans) + assert_expected_type(result, date_func) + + +def test_make_test_data(): + assert isinstance(test_data.make_test_data(dates=True), pd.DataFrame) + + +@pytest.mark.parametrize( + "kwargs", + [ + { + "lower": (2020, 1, 1), + }, + { + "lower": (2020, 1, 1), + "upper": (2020, 1, 8), + }, + { + "upper": (2020, 1, 8), + }, + ], +) +def test_clip_expected_type(date_func, kwargs): + kwargs = kwargs.copy() + kwargs = {key: timestamp(*val, date_func=date_func) for key, val in kwargs.items()} + result = s1(date_func).clip(**kwargs) + assert_expected_type(result, date_func) diff --git a/tests/test_dates/test_dates_relational.py b/tests/test_dates/test_dates_relational.py new file mode 100644 index 0000000..1f34852 --- /dev/null +++ b/tests/test_dates/test_dates_relational.py @@ -0,0 +1,209 @@ +from datetime import datetime + +import pandas as pd +import pytest +import pytz + +from staircase import Stairs +from staircase.constants import inf + + +def pytest_generate_tests(metafunc): + if "date_func" in metafunc.fixturenames: + metafunc.parametrize( + "date_func", + ["pandas", "pydatetime", "numpy", "pandas_tz", "pydatetime_tz"], + indirect=True, + ) + + +@pytest.fixture +def date_func(request): + # returns a func which takes a pandas timestamp + if request.param == "pandas": + return lambda x: x + elif request.param == "pydatetime": + return pd.Timestamp.to_pydatetime + elif request.param == "numpy": + return pd.Timestamp.to_datetime64 + elif request.param == "pandas_tz": + return lambda ts: pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ) + elif request.param == "pydatetime_tz": + return lambda ts: ( + pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ).to_pydatetime() + ) + else: + assert False, "should not happen" + + +def timestamp(*args, date_func, **kwargs): + ts = pd.Timestamp(*args, **kwargs) + return date_func(ts) + + +def assert_expected_type(stairs, date_func): + if stairs._data is None: + return + example_type = timestamp(2020, 1, 1, date_func=date_func) + example_type = pd.Timestamp( + example_type + ) # pandas natively converts datetimes to timestamps + assert all( + [type(example_type) == type(x) for x in stairs._data.index] + ), "Unexpected type in step points" + if isinstance(example_type, (pd.Timestamp, datetime)): + assert all( + [example_type.tzinfo == x.tzinfo for x in stairs._data.index] + ), "Unexpected timezone in step points" + + +def s1(date_func): + int_seq1 = Stairs(initial_value=0) + int_seq1.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 2, + ) + int_seq1.layer( + timestamp(2020, 1, 3, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 6, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 7, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + -2.5, + ) + return int_seq1 + + +def s2(date_func): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq2.layer( + timestamp(2020, 1, 8, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 4.5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, 12, date_func=date_func), + timestamp(2020, 1, 4, date_func=date_func), + -2.5, + ) + return int_seq2 + + +def s3(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 10, date_func=date_func), + timestamp(2020, 1, 30, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 15, date_func=date_func), + timestamp(2020, 1, 18, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, 12, date_func=date_func), + timestamp(2020, 1, 21, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 23, date_func=date_func), + timestamp(2020, 1, 23, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 29, 12, date_func=date_func), + -1, + ) + return int_seq + + +def s4(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 9, date_func=date_func), + timestamp(2020, 1, 29, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 10, 12, date_func=date_func), + timestamp(2020, 1, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 12, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, date_func=date_func), + timestamp(2020, 1, 23, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 26, date_func=date_func), + timestamp(2020, 1, 26, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 28, 12, date_func=date_func), + -1, + ) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +def test_eq(): + assert Stairs(initial_value=3) == 3 + + +def test_ne(date_func): + assert s1(date_func) != 3 diff --git a/tests/test_dates/test_dates_sample.py b/tests/test_dates/test_dates_sample.py new file mode 100644 index 0000000..6bc88fb --- /dev/null +++ b/tests/test_dates/test_dates_sample.py @@ -0,0 +1,266 @@ +from datetime import datetime + +import pandas as pd +import pytest +import pytz + +from staircase import Stairs +from staircase.constants import inf + + +def pytest_generate_tests(metafunc): + if "date_func" in metafunc.fixturenames: + metafunc.parametrize( + "date_func", + ["pandas", "pydatetime", "numpy", "pandas_tz", "pydatetime_tz"], + indirect=True, + ) + + +@pytest.fixture +def date_func(request): + # returns a func which takes a pandas timestamp + if request.param == "pandas": + return lambda x: x + elif request.param == "pydatetime": + return pd.Timestamp.to_pydatetime + elif request.param == "numpy": + return pd.Timestamp.to_datetime64 + elif request.param == "pandas_tz": + return lambda ts: pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ) + elif request.param == "pydatetime_tz": + return lambda ts: ( + pd.Timestamp.tz_localize( + ts, pytz.timezone("Australia/Sydney") + ).to_pydatetime() + ) + else: + assert False, "should not happen" + + +def timestamp(*args, date_func, **kwargs): + ts = pd.Timestamp(*args, **kwargs) + return date_func(ts) + + +def assert_expected_type(stairs, date_func): + if stairs._data is None: + return + example_type = timestamp(2020, 1, 1, date_func=date_func) + example_type = pd.Timestamp( + example_type + ) # pandas natively converts datetimes to timestamps + assert all( + [type(example_type) == type(x) for x in stairs._data.index] + ), "Unexpected type in step points" + if isinstance(example_type, (pd.Timestamp, datetime)): + assert all( + [example_type.tzinfo == x.tzinfo for x in stairs._data.index] + ), "Unexpected timezone in step points" + + +def _compare_iterables(it1, it2): + it1 = [i for i in it1 if i is not None] + it2 = [i for i in it2 if i is not None] + for e1, e2 in zip(it1, it2): + if e1 != e2: + return False + return True + + +def s1(date_func): + int_seq1 = Stairs(initial_value=0) + int_seq1.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 2, + ) + int_seq1.layer( + timestamp(2020, 1, 3, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 6, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq1.layer( + timestamp(2020, 1, 7, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + -2.5, + ) + return int_seq1 + + +def s2(date_func): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer( + timestamp(2020, 1, 1, date_func=date_func), + timestamp(2020, 1, 7, date_func=date_func), + -2.5, + ) + int_seq2.layer( + timestamp(2020, 1, 8, date_func=date_func), + timestamp(2020, 1, 10, date_func=date_func), + 5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, date_func=date_func), + timestamp(2020, 1, 5, date_func=date_func), + 4.5, + ) + int_seq2.layer( + timestamp(2020, 1, 2, 12, date_func=date_func), + timestamp(2020, 1, 4, date_func=date_func), + -2.5, + ) + return int_seq2 + + +def s3(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 10, date_func=date_func), + timestamp(2020, 1, 30, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 15, date_func=date_func), + timestamp(2020, 1, 18, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, 12, date_func=date_func), + timestamp(2020, 1, 21, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 23, date_func=date_func), + timestamp(2020, 1, 23, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 29, 12, date_func=date_func), + -1, + ) + return int_seq + + +def s4(date_func): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer( + timestamp(2020, 1, 9, date_func=date_func), + timestamp(2020, 1, 29, date_func=date_func), + 1, + ) + int_seq.layer( + timestamp(2020, 1, 10, 12, date_func=date_func), + timestamp(2020, 1, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 12, 12, date_func=date_func), + timestamp(2020, 1, 13, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 20, date_func=date_func), + timestamp(2020, 1, 23, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 26, date_func=date_func), + timestamp(2020, 1, 26, 12, date_func=date_func), + -1, + ) + int_seq.layer( + timestamp(2020, 1, 27, date_func=date_func), + timestamp(2020, 1, 28, 12, date_func=date_func), + -1, + ) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +def test_sample_dates_1(date_func): + assert s1(date_func).sample(timestamp(2020, 1, 6, date_func=date_func)) == -0.5 + + +def test_limit_dates_1(date_func): + assert ( + s1(date_func).limit(timestamp(2020, 1, 6, date_func=date_func), side="right") + == -0.5 + ) + + +def test_limit_dates_2(date_func): + assert ( + s1(date_func).limit(timestamp(2020, 1, 6, date_func=date_func), side="left") + == 2 + ) + + +def test_sample_dates_4(date_func): + assert _compare_iterables( + s1(date_func).sample( + [ + timestamp(2020, 1, 4, date_func=date_func), + timestamp(2020, 1, 6, date_func=date_func), + ] + ), + [4.5, -0.5], + ) + + +def test_limit_dates_3(date_func): + assert _compare_iterables( + s1(date_func).limit( + [ + timestamp(2020, 1, 4, date_func=date_func), + timestamp(2020, 1, 6, date_func=date_func), + ], + side="right", + ), + [4.5, -0.5], + ) + + +def test_limit_dates_4(date_func): + assert _compare_iterables( + s1(date_func).limit( + [ + timestamp(2020, 1, 4, date_func=date_func), + timestamp(2020, 1, 6, date_func=date_func), + ], + side="left", + ), + [4.5, 2], + ) diff --git a/tests/test_stairs_dates.py b/tests/test_dates/test_dates_stats.py similarity index 57% rename from tests/test_stairs_dates.py rename to tests/test_dates/test_dates_stats.py index 7677fee..7bc811a 100644 --- a/tests/test_stairs_dates.py +++ b/tests/test_dates/test_dates_stats.py @@ -1,4 +1,3 @@ -import itertools from datetime import datetime import numpy as np @@ -6,9 +5,7 @@ import pytest import pytz -import staircase.test_data as test_data from staircase import Stairs -from staircase.constants import inf def pytest_generate_tests(metafunc): @@ -64,15 +61,6 @@ def assert_expected_type(stairs, date_func): ), "Unexpected timezone in step points" -def _compare_iterables(it1, it2): - it1 = [i for i in it1 if i is not None] - it2 = [i for i in it2 if i is not None] - for e1, e2 in zip(it1, it2): - if e1 != e2: - return False - return True - - def s1(date_func): int_seq1 = Stairs(initial_value=0) int_seq1.layer( @@ -410,608 +398,6 @@ def test_integral_dates_4(date_func): ), "Expected integral to be 132 hours" -# def test_integral_and_mean_dates_1(date_func): -# integral, mean = s1(date_func)._get_integral_and_mean() -# assert abs(mean - 13 / 9) <= 0.00001, "Expected mean to be 13/9" -# assert integral() / pd.Timedelta("1 H") == 312, "Expected integral to be 312 hours" - - -# def test_integral_and_mean_dates_2(date_func): -# integral, mean = s1(date_func)._get_integral_and_mean( -# (None, timestamp(2020, 1, 6, date_func=date_func)) -# ) -# assert mean == 3, "Expected mean to be 3" -# assert integral() / pd.Timedelta("1 D") == 15, "Expected integral to be 15" - - -# def test_integral_and_mean_3(date_func): -# integral, mean = s1(date_func)._get_integral_and_mean( -# (timestamp(2020, 1, 4, date_func=date_func), None) -# ) -# assert mean == 0.75, "Expected mean to be 0.75" -# assert integral() / pd.Timedelta("1 H") == 108, "Expected integral to be 108 hours" - - -# def test_integral_and_mean_dates_4(date_func): -# integral, mean = s1(date_func)._get_integral_and_mean( -# ( -# timestamp(2020, 1, 4, date_func=date_func), -# timestamp(2020, 1, 8, date_func=date_func), -# ) -# ) -# assert mean == 1.375, "Expected mean to be 1.375" -# assert integral() / pd.Timedelta("1 H") == 132, "Expected integral to be 132 hours" - - -def test_percentile_dates_1(date_func): - assert s1(date_func).percentile(20) == -0.5, "Expected 20th percentile to be -0.5" - assert s1(date_func).percentile(40) == -0.5, "Expected 40th percentile to be -0.5" - assert s1(date_func).percentile(60) == 2, "Expected 60th percentile to be 2" - assert s1(date_func).percentile(80) == 4.5, "Expected 80th percentile to be 4.5" - - -def test_percentile_dates_2(date_func): - assert ( - s1(date_func) - .clip(None, timestamp(2020, 1, 6, date_func=date_func)) - .percentile(20) - == 2 - ), "Expected 20th percentile to be 2" - assert ( - s1(date_func) - .clip(None, timestamp(2020, 1, 6, date_func=date_func)) - .percentile(40) - == 2 - ), "Expected 40th percentile to be 2" - assert ( - s1(date_func) - .clip(None, timestamp(2020, 1, 6, date_func=date_func)) - .percentile(60) - == 3.25 - ), "Expected 60th percentile to be 3.25" - assert ( - s1(date_func) - .clip(None, timestamp(2020, 1, 6, date_func=date_func)) - .percentile(80) - == 4.5 - ), "Expected 80th percentile to be 4.5" - - -def test_percentile_dates_3(date_func): - assert ( - s1(date_func) - .clip(timestamp(2020, 1, 4, date_func=date_func), None) - .percentile(20) - == -0.5 - ), "Expected 20th percentile to be -0.5" - assert ( - s1(date_func) - .clip(timestamp(2020, 1, 4, date_func=date_func), None) - .percentile(40) - == -0.5 - ), "Expected 40th percentile to be -0.5" - assert ( - s1(date_func) - .clip(timestamp(2020, 1, 4, date_func=date_func), None) - .percentile(60) - == -0.5 - ), "Expected 60th percentile to be -0.5" - assert ( - s1(date_func) - .clip(timestamp(2020, 1, 4, date_func=date_func), None) - .percentile(80) - == 2 - ), "Expected 80th percentile to be 2" - - -def test_percentile_dates_4(date_func): - assert ( - s1(date_func) - .clip( - timestamp(2020, 1, 4, date_func=date_func), - timestamp(2020, 1, 8, date_func=date_func), - ) - .percentile(20) - == -0.5 - ), "Expected 20th percentile to be -0.5" - assert ( - s1(date_func) - .clip( - timestamp(2020, 1, 4, date_func=date_func), - timestamp(2020, 1, 8, date_func=date_func), - ) - .percentile(40) - == -0.5 - ), "Expected 40th percentile to be -0.5" - assert ( - s1(date_func) - .clip( - timestamp(2020, 1, 4, date_func=date_func), - timestamp(2020, 1, 8, date_func=date_func), - ) - .percentile(60) - == 2 - ), "Expected 60th percentile to be 2" - assert ( - s1(date_func) - .clip( - timestamp(2020, 1, 4, date_func=date_func), - timestamp(2020, 1, 8, date_func=date_func), - ) - .percentile(80) - == 4.5 - ), "Expected 80th percentile to be 4.5" - - -def test_get_percentiles_dates_1(date_func): - expected_step_values = pd.Series( - [-0.5, 2.0, 4.5, 4.5], index=[0, 44.444444, 77.77777778, 100] - ) - pd.testing.assert_series_equal( - s1(date_func).percentile.step_values, - expected_step_values, - check_names=False, - check_index_type=False, - ) - - -def test_get_percentiles_dates_2(date_func): - expected_step_values = pd.Series([2, 4.5, 4.5], index=[0, 60, 100]) - pd.testing.assert_series_equal( - s1(date_func) - .clip(None, timestamp(2020, 1, 6, date_func=date_func)) - .percentile.step_values, - expected_step_values, - check_names=False, - check_index_type=False, - ) - - -def test_get_percentiles_dates_3(date_func): - expected_step_values = pd.Series( - [-0.5, 2.0, 4.5, 4.5], index=[0, 66.6666666667, 83.333333333, 100] - ) - pd.testing.assert_series_equal( - s1(date_func) - .clip(timestamp(2020, 1, 4, date_func=date_func), None) - .percentile.step_values, - expected_step_values, - check_names=False, - check_index_type=False, - ) - - -def test_get_percentiles_dates_4(date_func): - expected_step_values = pd.Series([-0.5, 2.0, 4.5, 4.5], index=[0, 50, 75, 100]) - pd.testing.assert_series_equal( - s1(date_func) - .clip( - timestamp(2020, 1, 4, date_func=date_func), - timestamp(2020, 1, 8, date_func=date_func), - ) - .percentile.step_values, - expected_step_values, - check_names=False, - check_index_type=False, - ) - - -def test_plot(date_func): - s1(date_func).plot() - - -# def test_resample_dates_1(date_func): -# assert s1(date_func).resample(timestamp(2020, 1, 4)).step_changes == { -# timestamp(2020, 1, 4).tz_localize(s1(date_func)._keys()[0].tz): 4.5 -# } - - -# def test_resample_dates_2(date_func): -# assert s1(date_func).resample(timestamp(2020, 1, 6), how="right").step_changes == { -# timestamp(2020, 1, 6).tz_localize(s1(date_func)._keys()[0].tz): -0.5 -# } - - -# def test_resample_dates_3(date_func): -# assert s1(date_func).resample(timestamp(2020, 1, 6), how="left").step_changes == { -# timestamp(2020, 1, 6).tz_localize(s1(date_func)._keys()[0].tz): 2 -# } - - -# def test_resample_dates_4(date_func): -# assert s1(date_func).resample( -# [timestamp(2020, 1, 4), timestamp(2020, 1, 6)] -# ).step_changes == { -# timestamp(2020, 1, 4).tz_localize(s1(date_func)._keys()[0].tz): 4.5, -# timestamp(2020, 1, 6).tz_localize(s1(date_func)._keys()[0].tz): -5.0, -# } - - -# def test_resample_dates_5(date_func): -# assert s1(date_func).resample( -# [timestamp(2020, 1, 4), timestamp(2020, 1, 6)], how="right" -# ).step_changes == { -# timestamp(2020, 1, 4).tz_localize(s1(date_func)._keys()[0].tz): 4.5, -# timestamp(2020, 1, 6).tz_localize(s1(date_func)._keys()[0].tz): -5.0, -# } - - -# def test_resample_dates_6(date_func): -# assert s1(date_func).resample( -# [timestamp(2020, 1, 4), timestamp(2020, 1, 6)], how="left" -# ).step_changes == { -# timestamp(2020, 1, 4).tz_localize(s1(date_func)._keys()[0].tz): 4.5, -# timestamp(2020, 1, 6).tz_localize(s1(date_func)._keys()[0].tz): -2.5, -# } - - -def test_sample_dates_1(date_func): - assert s1(date_func).sample(timestamp(2020, 1, 6, date_func=date_func)) == -0.5 - - -def test_limit_dates_1(date_func): - assert ( - s1(date_func).limit(timestamp(2020, 1, 6, date_func=date_func), side="right") - == -0.5 - ) - - -def test_limit_dates_2(date_func): - assert ( - s1(date_func).limit(timestamp(2020, 1, 6, date_func=date_func), side="left") - == 2 - ) - - -def test_sample_dates_4(date_func): - assert _compare_iterables( - s1(date_func).sample( - [ - timestamp(2020, 1, 4, date_func=date_func), - timestamp(2020, 1, 6, date_func=date_func), - ] - ), - [4.5, -0.5], - ) - - -def test_limit_dates_3(date_func): - assert _compare_iterables( - s1(date_func).limit( - [ - timestamp(2020, 1, 4, date_func=date_func), - timestamp(2020, 1, 6, date_func=date_func), - ], - side="right", - ), - [4.5, -0.5], - ) - - -def test_limit_dates_4(date_func): - assert _compare_iterables( - s1(date_func).limit( - [ - timestamp(2020, 1, 4, date_func=date_func), - timestamp(2020, 1, 6, date_func=date_func), - ], - side="left", - ), - [4.5, 2], - ) - - -def test_step_changes_dates(date_func): - expected_step_changes = pd.Series( - [2, 2.5, -2.5, -2.5, 0.5], - index=[ - timestamp("2020-1-1", date_func=date_func), - timestamp("2020-1-3", date_func=date_func), - timestamp("2020-1-5", date_func=date_func), - timestamp("2020-1-6", date_func=date_func), - timestamp("2020-1-10", date_func=date_func), - ], - ) - pd.testing.assert_series_equal( - s1(date_func).step_changes, - expected_step_changes, - check_names=False, - check_index_type=False, - ) - - -def test_dataframe_dates(date_func): - ans = pd.DataFrame( - { - "start": [ - -inf, - timestamp("2020-01-01", date_func=date_func), - timestamp("2020-01-03", date_func=date_func), - timestamp("2020-01-05", date_func=date_func), - timestamp("2020-01-06", date_func=date_func), - timestamp("2020-01-10", date_func=date_func), - ], - "end": [ - timestamp("2020-01-01", date_func=date_func), - timestamp("2020-01-03", date_func=date_func), - timestamp("2020-01-05", date_func=date_func), - timestamp("2020-01-06", date_func=date_func), - timestamp("2020-01-10", date_func=date_func), - inf, - ], - "value": [0, 2, 4.5, 2, -0.5, 0], - } - ) - pd.testing.assert_frame_equal(s1(date_func).to_frame(), ans) - - -def test_add_dates(date_func): - expected_step_changes = pd.Series( - { - timestamp("2020-01-01 00:00:00", date_func=date_func): -0.5, - timestamp("2020-01-02 00:00:00", date_func=date_func): 4.5, - timestamp("2020-01-02 12:00:00", date_func=date_func): -2.5, - timestamp("2020-01-03 00:00:00", date_func=date_func): 2.5, - timestamp("2020-01-04 00:00:00", date_func=date_func): 2.5, - timestamp("2020-01-05 00:00:00", date_func=date_func): -7.0, - timestamp("2020-01-06 00:00:00", date_func=date_func): -2.5, - timestamp("2020-01-07 00:00:00", date_func=date_func): 2.5, - timestamp("2020-01-08 00:00:00", date_func=date_func): 5, - timestamp("2020-01-10 00:00:00", date_func=date_func): -4.5, - } - ) - result = s1(date_func) + s2(date_func) - pd.testing.assert_series_equal( - result.step_changes, - expected_step_changes, - check_names=False, - check_index_type=False, - ) - assert_expected_type(result, date_func) - - -def test_subtract_dates(date_func): - expected_step_changes = pd.Series( - { - timestamp("2020-01-01 00:00:00", date_func=date_func): 4.5, - timestamp("2020-01-02 00:00:00", date_func=date_func): -4.5, - timestamp("2020-01-02 12:00:00", date_func=date_func): 2.5, - timestamp("2020-01-03 00:00:00", date_func=date_func): 2.5, - timestamp("2020-01-04 00:00:00", date_func=date_func): -2.5, - timestamp("2020-01-05 00:00:00", date_func=date_func): 2.0, - timestamp("2020-01-06 00:00:00", date_func=date_func): -2.5, - timestamp("2020-01-07 00:00:00", date_func=date_func): -2.5, - timestamp("2020-01-08 00:00:00", date_func=date_func): -5, - timestamp("2020-01-10 00:00:00", date_func=date_func): 5.5, - } - ) - result = s1(date_func) - s2(date_func) - pd.testing.assert_series_equal( - result.step_changes, - expected_step_changes, - check_names=False, - check_index_type=False, - ) - assert_expected_type(result, date_func) - - -def test_multiply_dates(date_func): - expected_step_changes = pd.Series( - { - timestamp("2020-01-01 00:00:00", date_func=date_func): -5.0, - timestamp("2020-01-02 00:00:00", date_func=date_func): 9.0, - timestamp("2020-01-02 12:00:00", date_func=date_func): -5.0, - timestamp("2020-01-03 00:00:00", date_func=date_func): -1.25, - timestamp("2020-01-04 00:00:00", date_func=date_func): 11.25, - timestamp("2020-01-05 00:00:00", date_func=date_func): -14.0, - timestamp("2020-01-06 00:00:00", date_func=date_func): 6.25, - timestamp("2020-01-07 00:00:00", date_func=date_func): -1.25, - timestamp("2020-01-08 00:00:00", date_func=date_func): -2.5, - timestamp("2020-01-10 00:00:00", date_func=date_func): 2.5, - } - ) - result = s1(date_func) * s2(date_func) - pd.testing.assert_series_equal( - result.step_changes, - expected_step_changes, - check_names=False, - check_index_type=False, - ) - assert_expected_type(result, date_func) - - -def test_multiply_dates_scalar(date_func): - expected_step_changes = pd.Series( - { - timestamp("2020-01-01 00:00:00", date_func=date_func): 6.0, - timestamp("2020-01-03 00:00:00", date_func=date_func): 7.5, - timestamp("2020-01-05 00:00:00", date_func=date_func): -7.5, - timestamp("2020-01-06 00:00:00", date_func=date_func): -7.5, - timestamp("2020-01-10 00:00:00", date_func=date_func): 1.5, - } - ) - result = s1(date_func) * 3 - pd.testing.assert_series_equal( - result.step_changes, - expected_step_changes, - check_names=False, - check_index_type=False, - ) - assert_expected_type(result, date_func) - - -def test_divide_dates(date_func): - expected_step_changes = pd.Series( - { - timestamp("2020-01-01 00:00:00", date_func=date_func): -1.3333333333333333, - timestamp("2020-01-02 00:00:00", date_func=date_func): 2.0, - timestamp("2020-01-02 12:00:00", date_func=date_func): 3.3333333333333335, - timestamp("2020-01-03 00:00:00", date_func=date_func): 5.0, - timestamp("2020-01-04 00:00:00", date_func=date_func): -7.5, - timestamp("2020-01-05 00:00:00", date_func=date_func): -2.833333333333333, - timestamp("2020-01-06 00:00:00", date_func=date_func): 1.6666666666666665, - timestamp("2020-01-07 00:00:00", date_func=date_func): -0.8333333333333333, - timestamp("2020-01-08 00:00:00", date_func=date_func): 0.4166666666666667, - timestamp("2020-01-10 00:00:00", date_func=date_func): 0.08333333333333333, - } - ) - result = s1(date_func) / (s2(date_func) + 1) - pd.testing.assert_series_equal( - result.step_changes, - expected_step_changes, - check_names=False, - check_index_type=False, - ) - assert_expected_type(result, date_func) - - -def test_divide_dates_scalar(date_func): - expected_step_changes = pd.Series( - { - timestamp("2020-01-01 00:00:00", date_func=date_func): 4.0, - timestamp("2020-01-03 00:00:00", date_func=date_func): 5.0, - timestamp("2020-01-05 00:00:00", date_func=date_func): -5.0, - timestamp("2020-01-06 00:00:00", date_func=date_func): -5.0, - timestamp("2020-01-10 00:00:00", date_func=date_func): 1.0, - } - ) - result = s1(date_func) / 0.5 - pd.testing.assert_series_equal( - result.step_changes, - expected_step_changes, - check_names=False, - check_index_type=False, - ) - assert_expected_type(result, date_func) - - -def test_to_frame(date_func): - s1(date_func).to_frame() - - -@pytest.mark.parametrize( - "stairs_func, bounds, cuts", - itertools.product( - [s1, s2, s3, s4], - [ - ((2020, 1, 3), (2020, 1, 4)), - ((2020, 1, 1), (2020, 1, 4)), - ((2020, 1, 2), (2020, 2, 4)), - ], - ["unit", (-2, 0, 0.5, 4, 4.5, 7)], - ), -) -def test_hist_default_bins_left_closed(date_func, stairs_func, bounds, cuts): - stairs_instance = stairs_func(date_func) - bounds = [timestamp(*args, date_func=date_func) for args in bounds] - - def make_expected_result(interval_index, lower, upper): - return pd.Series( - [ - ((stairs_instance >= i.left) * (stairs_instance < i.right)).agg( - "mean", (lower, upper) - ) - for i in interval_index - ], - index=interval_index, - ) - - hist = stairs_instance.clip(*bounds).hist(bins=cuts, stat="probability") - expected = make_expected_result(hist.index, *bounds) - pd.testing.assert_series_equal( - hist, - expected, - check_names=False, - check_index_type=False, - ) - - -@pytest.mark.parametrize( - "stairs_func, bounds, cuts", - itertools.product( - [s1, s2, s3, s4], - [ - ((2020, 1, 3), (2020, 1, 4)), - ((2020, 1, 1), (2020, 1, 4)), - ((2020, 1, 2), (2020, 2, 4)), - ], - ["unit", (-2, 0, 0.5, 4, 4.5, 7)], - ), -) -def test_hist_default_bins_right_closed(date_func, stairs_func, bounds, cuts): - stairs_instance = stairs_func(date_func) - bounds = [timestamp(*args, date_func=date_func) for args in bounds] - - def make_expected_result(interval_index, lower, upper): - return pd.Series( - [ - ((stairs_instance > i.left) * (stairs_instance <= i.right)).agg( - "mean", (lower, upper) - ) - for i in interval_index - ], - index=interval_index, - ) - - hist = stairs_instance.clip(*bounds).hist( - bins=cuts, closed="right", stat="probability" - ) - expected = make_expected_result(hist.index, *bounds) - pd.testing.assert_series_equal( - hist, - expected, - check_names=False, - check_index_type=False, - ) - - -@pytest.mark.parametrize( - "stairs_func, bounds, closed", - itertools.product( - [s1, s2, s3, s4], - [ - ((2020, 1, 3), (2020, 1, 4)), - ((2020, 1, 1), (2020, 1, 4)), - ((2020, 1, 2), (2020, 2, 4)), - ], - ["left", "right"], - ), -) -def test_hist_default_bins(date_func, stairs_func, bounds, closed): - # really testing the default binning process here - stairs_instance = stairs_func(date_func) - bounds = [timestamp(*args, date_func=date_func) for args in bounds] - hist = stairs_instance.clip(*bounds).hist(closed=closed, stat="probability") - assert abs(hist.sum() - 1) < 0.000001 - - -def test_shift(date_func): - ans = Stairs(initial_value=0) - ans.layer( - timestamp(2020, 1, 2, date_func=date_func), - timestamp(2020, 1, 11, date_func=date_func), - 2, - ) - ans.layer( - timestamp(2020, 1, 4, date_func=date_func), - timestamp(2020, 1, 6, date_func=date_func), - 2.5, - ) - ans.layer( - timestamp(2020, 1, 7, date_func=date_func), - timestamp(2020, 1, 8, date_func=date_func), - -2.5, - ) - ans.layer( - timestamp(2020, 1, 8, date_func=date_func), - timestamp(2020, 1, 11, date_func=date_func), - -2.5, - ) - result = s1(date_func).shift(pd.Timedelta(24, unit="H")) - assert bool(result == ans) - assert_expected_type(result, date_func) - - # low, high = timestamp(2020,1,1, date_func=date_func), timestamp(2020,1,10, date_func=date_func) # total_secs = int((high-low).total_seconds()) # pts = [low + pd.Timedelta(x, unit='sec') for x in np.linspace(0, total_secs, total_secs)] @@ -1537,101 +923,6 @@ def test_crosscov(date_func, kwargs, expected): ) -# @pytest.mark.parametrize( -# "kwargs, expected_index, expected_vals", -# [ -# ( -# {"window": (-pd.Timedelta(1, "d"), pd.Timedelta(1, "d"))}, -# [ -# (2019, 12, 31), -# (2020, 1, 2), -# (2020, 1, 4), -# (2020, 1, 5), -# (2020, 1, 6), -# (2020, 1, 7), -# (2020, 1, 9), -# (2020, 1, 11), -# ], -# [0.0, 2.0, 4.5, 3.25, 0.75, -0.5, -0.5, 0.0], -# ), -# ( -# {"window": (-pd.Timedelta(2, "d"), pd.Timedelta(0, "d"))}, -# [ -# (2020, 1, 1), -# (2020, 1, 3), -# (2020, 1, 5), -# (2020, 1, 6), -# (2020, 1, 7), -# (2020, 1, 8), -# (2020, 1, 10), -# (2020, 1, 12), -# ], -# [0.0, 2.0, 4.5, 3.25, 0.75, -0.5, -0.5, 0.0], -# ), -# ( -# { -# "window": (-pd.Timedelta(1, "d"), pd.Timedelta(1, "d")), -# "lower": (2020, 1, 3), -# "upper": (2020, 1, 8), -# }, -# [ -# (2020, 1, 4), -# (2020, 1, 5), -# (2020, 1, 6), -# (2020, 1, 7), -# ], -# [4.5, 3.25, 0.75, -0.5], -# ), -# ], -# ) -# def test_s1_rolling_mean(date_func, kwargs, expected_index, expected_vals): -# expected_index = [timestamp(*args, date_func=date_func) for args in expected_index] -# new_kwargs = {**kwargs} -# if "lower" in kwargs: -# new_kwargs["lower"] = timestamp(*kwargs["lower"], date_func=date_func) -# if "upper" in kwargs: -# new_kwargs["upper"] = timestamp(*kwargs["upper"], date_func=date_func) -# new_kwargs - -# rm = s1(date_func).rolling_mean(**new_kwargs) -# assert list(rm.values) == expected_vals -# assert list(rm.index) == expected_index - - -def test_eq(): - assert Stairs(initial_value=3) == 3 - - -def test_ne(date_func): - assert s1(date_func) != 3 - - -def test_make_test_data(): - assert isinstance(test_data.make_test_data(dates=True), pd.DataFrame) - - -@pytest.mark.parametrize( - "kwargs", - [ - { - "lower": (2020, 1, 1), - }, - { - "lower": (2020, 1, 1), - "upper": (2020, 1, 8), - }, - { - "upper": (2020, 1, 8), - }, - ], -) -def test_clip_expected_type(date_func, kwargs): - kwargs = kwargs.copy() - kwargs = {key: timestamp(*val, date_func=date_func) for key, val in kwargs.items()} - result = s1(date_func).clip(**kwargs) - assert_expected_type(result, date_func) - - def test_integral_overflow(): with pytest.raises(OverflowError): s = ( diff --git a/tests/test_floats/test_floats_arithmetic.py b/tests/test_floats/test_floats_arithmetic.py new file mode 100644 index 0000000..aa62eb3 --- /dev/null +++ b/tests/test_floats/test_floats_arithmetic.py @@ -0,0 +1,264 @@ +import numpy as np +import pandas as pd +import pytest + +from staircase import Stairs + + +def s1(closed="left"): + int_seq1 = Stairs(initial_value=0, closed=closed) + int_seq1.layer(1, 10, 2) + int_seq1.layer(-4, 5, -1.75) + int_seq1.layer(3, 5, 2.5) + int_seq1.layer(6, 7, -2.5) + int_seq1.layer(7, 10, -2.5) + return int_seq1 + + +def s2(): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer(1, 7, -2.5) + int_seq2.layer(8, 10, 5) + int_seq2.layer(2, 5, 4.5) + int_seq2.layer(2.5, 4, -2.5) + int_seq2.layer(-2, 1, -1.75) + return int_seq2 + + +def s3(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-10, 10, 1) + int_seq.layer(-8, -7, -1) + int_seq.layer(-5, -2, -1) + int_seq.layer(0.5, 1, -1) + int_seq.layer(3, 3.5, -1) + int_seq.layer(7, 9.5, -1) + return int_seq + + +def s4(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-11, 9, 1) + int_seq.layer(-9.5, -8, -1) + int_seq.layer(-7.5, -7, -1) + int_seq.layer(0, 3, -1) + int_seq.layer(6, 6.5, -1) + int_seq.layer(7, 8.5, -1) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +def test_add_1(s1_fix, s2_fix): + assert pd.Series.equals( + (s1_fix + s2_fix).step_changes, + pd.Series( + { + -4: -1.75, + -2: -1.75, + 1: 1.25, + 2: 4.5, + 2.5: -2.5, + 3: 2.5, + 4: 2.5, + 5: -5.25, + 6: -2.5, + 7: 2.5, + 8: 5, + 10: -4.5, + } + ), + ) + + +def test_add_2(s1_fix): + s = s1_fix + 3 + assert s.initial_value == 3 + assert pd.Series.equals( + s.step_changes, + s1_fix.step_changes, + ) + + +def test_add_3(s1_fix): + s = 3 + s1_fix + assert s.initial_value == 3 + assert pd.Series.equals( + s.step_changes, + s1_fix.step_changes, + ) + + +def test_sub_1(s1_fix, s2_fix): + assert pd.Series.equals( + (s1_fix - s2_fix).step_values, + pd.Series( + { + -4.0: -1.75, + -2.0: 0.0, + 1.0: 2.75, + 2.0: -1.75, + 2.5: 0.75, + 3.0: 3.25, + 4.0: 0.75, + 5.0: 4.5, + 6.0: 2.0, + 7.0: -0.5, + 8.0: -5.5, + 10.0: 0.0, + } + ), + ) + + +def test_sub_2(s1_fix): + s = s1_fix - 3 + assert s.initial_value == -3 + assert pd.Series.equals( + s.step_changes, + s1_fix.step_changes, + ) + + +def test_sub_3(s1_fix): + s = 3 - s1_fix + assert s.initial_value == 3 + assert pd.Series.equals( + s.step_changes, + -(s1_fix.step_changes), + ) + + +def test_divide(s1_fix, s2_fix): + assert pd.Series.equals( + (s1_fix / (s2_fix + 1)).step_changes, + pd.Series( + { + -4: -1.75, + -2: 4.083333333333334, + 1: -2.5, + 2: 0.25, + 2.5: 0.4166666666666667, + 3: 5.0, + 4: -4.583333333333333, + 5: -2.25, + 6: 1.6666666666666665, + 7: -0.8333333333333333, + 8: 0.4166666666666667, + 10: 0.08333333333333333, + } + ), + ) + + +def test_divide_scalar(s1_fix): + assert pd.Series.equals( + (s1_fix / 0.5).step_changes, + pd.Series( + { + -4: -3.5, + 1: 4.0, + 3: 5.0, + 5: -1.5, + 6: -5.0, + 10: 1.0, + } + ), + ) + + +def test_scalar_divide(): + s = Stairs().layer([1, 2, 5], [3, 4, 7], [1, -1, 2]) + assert pd.Series.equals( + (2 / s).step_values, + pd.Series( + { + 1: 2.0, + 2: np.nan, + 3: -2.0, + 4: np.nan, + 5: 1.0, + 7: np.nan, + } + ), + ) + + +def test_multiply(s1_fix, s2_fix): + assert pd.Series.equals( + (s1_fix * s2_fix).step_changes, + pd.Series( + { + -2: 3.0625, + 1: -3.6875, + 2: 1.125, + 2.5: -0.625, + 3: -1.25, + 4: 6.875, + 5: -10.5, + 6: 6.25, + 7: -1.25, + 8: -2.5, + 10: 2.5, + } + ), + ) + + +def test_multiply_scalar(s1_fix): + assert pd.Series.equals( + (s1_fix * 3).step_changes, + pd.Series( + { + -4: -5.25, + 1: 6.0, + 3: 7.5, + 5: -2.25, + 6: -7.5, + 10: 1.5, + } + ), + ) + + +def test_multiply_scalar_2(s1_fix): + assert pd.Series.equals( + (3 * s1_fix).step_changes, + pd.Series( + { + -4: -5.25, + 1: 6.0, + 3: 7.5, + 5: -2.25, + 6: -7.5, + 10: 1.5, + } + ), + ) + + +def test_negate(s1_fix): + pd.testing.assert_series_equal( + (-s1_fix).step_values, + pd.Series({-4: 1.75, 1: -0.25, 3: -2.75, 5: -2.0, 6: 0.5, 10: 0.0}), + check_names=False, + check_index_type=False, + ) diff --git a/tests/test_stairs_arrays.py b/tests/test_floats/test_floats_arrays.py similarity index 100% rename from tests/test_stairs_arrays.py rename to tests/test_floats/test_floats_arrays.py diff --git a/tests/test_floats/test_floats_construction.py b/tests/test_floats/test_floats_construction.py new file mode 100644 index 0000000..5149f41 --- /dev/null +++ b/tests/test_floats/test_floats_construction.py @@ -0,0 +1,336 @@ +import itertools + +import numpy as np +import pandas as pd +import pytest + +from staircase import Stairs + + +def _expand_interval_definition(start, end=None, value=1): + return start, end, value + + +def _compare_iterables(it1, it2): + it1 = [i for i in it1 if i is not None] + it2 = [i for i in it2 if i is not None] + if len(it2) != len(it1): + return False + for e1, e2 in zip(it1, it2): + if e1 != e2: + return False + return True + + +def s1(closed="left"): + int_seq1 = Stairs(initial_value=0, closed=closed) + int_seq1.layer(1, 10, 2) + int_seq1.layer(-4, 5, -1.75) + int_seq1.layer(3, 5, 2.5) + int_seq1.layer(6, 7, -2.5) + int_seq1.layer(7, 10, -2.5) + return int_seq1 + + +def s2(): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer(1, 7, -2.5) + int_seq2.layer(8, 10, 5) + int_seq2.layer(2, 5, 4.5) + int_seq2.layer(2.5, 4, -2.5) + int_seq2.layer(-2, 1, -1.75) + return int_seq2 + + +def s3(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-10, 10, 1) + int_seq.layer(-8, -7, -1) + int_seq.layer(-5, -2, -1) + int_seq.layer(0.5, 1, -1) + int_seq.layer(3, 3.5, -1) + int_seq.layer(7, 9.5, -1) + return int_seq + + +def s4(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-11, 9, 1) + int_seq.layer(-9.5, -8, -1) + int_seq.layer(-7.5, -7, -1) + int_seq.layer(0, 3, -1) + int_seq.layer(6, 6.5, -1) + int_seq.layer(7, 8.5, -1) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +def test_init(): + assert Stairs(initial_value=0).identical(Stairs()) + assert Stairs().identical(Stairs(initial_value=0)) + + +@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) +def test_init2(init_value): + int_seq = Stairs(initial_value=init_value) + assert ( + int_seq.number_of_steps == 0 + ), "Initialised Stairs should have exactly one interval" + + +@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) +def test_init3(init_value): + int_seq = Stairs(initial_value=init_value) + assert ( + len(int_seq.step_points) == 0 + ), "Initialised Stairs should not have any finite interval endpoints" + + +@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) +def test_init4(init_value): + int_seq = Stairs(initial_value=init_value) + assert ( + int_seq(-1) == init_value + ), "Initialised Stairs should have initial value everywhere" + assert ( + int_seq(0) == init_value + ), "Initialised Stairs should have initial value everywhere" + assert ( + int_seq(1) == init_value + ), "Initialised Stairs should have initial value everywhere" + + +@pytest.mark.parametrize( + "init_value, added_interval", + itertools.product( + [0, 1.25, -1.25], + [(-2, 1), (3, 5, 2), (1, 5, -1), (-5, -3, 3), (3,), (2, None, 2)], + ), +) +def test_one_finite_interval(init_value, added_interval): + e = 0.0001 + int_seq = Stairs(initial_value=init_value) + int_seq.layer(*added_interval) + start, end, value = _expand_interval_definition(*added_interval) + assert int_seq.number_of_steps == 2 - ( + end is None + ), "One finite interval added to initial infinite interval should result in 3 intervals" + assert _compare_iterables( + int_seq.step_points, (start, end) + ), "Finite endpoints are not what is expected" + assert ( + int_seq(float("-inf")) == init_value + ), "Adding finite interval should not change initial value" + assert int_seq(float("inf")) == init_value + value * ( + end is None + ), "Adding finite interval should not change final value" + assert int_seq(start - e) == init_value + assert int_seq(start) == init_value + value + assert int_seq(start + e) == init_value + value + if end is not None: + assert int_seq(end - e) == init_value + value + assert int_seq(end) == init_value + + +@pytest.mark.parametrize( + "init_value, endpoints, value", + itertools.product( + [0, 1.25, -1.25, 2, -2], + [(-2, 1, 3), (-2, -1, 3), (-3, -2, -1), (1, 2, 3)], + [-1, 2, 3], + ), +) +def test_two_adjacent_finite_interval_same_value(init_value, endpoints, value): + e = 0.0001 + int_seq = Stairs(initial_value=init_value) + point1, point2, point3 = endpoints + int_seq.layer(point1, point2, value) + int_seq.layer(point2, point3, value) + assert int_seq.number_of_steps == 2, "Expected result to be 3 intervals" + assert _compare_iterables( + int_seq.step_points, (point1, point3) + ), "Finite endpoints are not what is expected" + assert ( + int_seq(float("-inf")) == init_value + ), "Adding finite interval should not change initial value" + assert ( + int_seq(float("inf")) == init_value + ), "Adding finite interval should not change final value" + assert int_seq(point1 - e) == init_value + assert int_seq(point1) == init_value + value + assert int_seq(point2) == init_value + value + assert int_seq(point3 - e) == init_value + value + assert int_seq(point3) == init_value + + +@pytest.mark.parametrize( + "init_value, endpoints, value, delta", + itertools.product( + [0, 1.25, -1.25, 2, -2], + [(-2, 1, 3), (-2, -1, 3), (-3, -2, -1), (1, 2, 3)], + [-1, 2, 4], + [3, -3, 1.5, -1.5], + ), +) +def test_two_adjacent_finite_interval_different_value( + init_value, endpoints, value, delta +): + e = 0.0001 + int_seq = Stairs(initial_value=init_value) + point1, point2, point3 = endpoints + int_seq.layer(point1, point2, value) + int_seq.layer(point2, point3, value + delta) + assert int_seq.number_of_steps == 3, "Expected result to be 4 intervals" + assert _compare_iterables( + int_seq.step_points, (point1, point2, point3) + ), "Finite endpoints are not what is expected" + assert ( + int_seq(float("-inf")) == init_value + ), "Adding finite interval should not change initial value" + assert ( + int_seq(float("inf")) == init_value + ), "Adding finite interval should not change final value" + assert int_seq(point1 - e) == init_value + assert int_seq(point1) == init_value + value + assert int_seq(point2) == init_value + value + delta + assert int_seq(point3 - e) == init_value + value + delta + assert int_seq(point3) == init_value + + +@pytest.mark.parametrize( + "init_value, endpoints, value, delta", + itertools.product( + [0, 1.25, -1.25, 2, -2], + [(-2, 1, 2, 3), (-3, -2, -1, 3), (-4, -3, -2, -1), (0, 1, 2, 3)], + [-1, 2, 4], + [3, -3, 1.5, -1.5], + ), +) +def test_two_overlapping_finite_interval(init_value, endpoints, value, delta): + e = 0.0001 + int_seq = Stairs(initial_value=init_value) + point1, point2, point3, point4 = endpoints + int_seq.layer(point1, point3, value) + int_seq.layer(point2, point4, value + delta) + assert int_seq.number_of_steps == 4, "Expected result to be 5 intervals" + assert _compare_iterables( + int_seq.step_points, (point1, point2, point3, point4) + ), "Finite endpoints are not what is expected" + assert ( + int_seq(float("-inf")) == init_value + ), "Adding finite interval should not change initial value" + assert ( + int_seq(float("inf")) == init_value + ), "Adding finite interval should not change final value" + assert int_seq(point1 - e) == init_value + assert int_seq(point1) == init_value + value + assert int_seq(point2) == init_value + 2 * value + delta + assert int_seq(point3 - e) == init_value + 2 * value + delta + assert int_seq(point3) == init_value + value + delta + assert int_seq(point4 - e) == init_value + value + delta + assert int_seq(point4) == init_value + + +@pytest.mark.parametrize( + "init_value, endpoints, value, delta", + itertools.product( + [0, 1.25, -1.25, 2, -2], + [(-2, 1, 2, 3), (-3, -2, -1, 3), (-4, -3, -2, -1), (0, 1, 2, 3)], + [-1, 2, 4], + [3, -3, 1.5, -1.5], + ), +) +def test_two_finite_interval_one_subinterval(init_value, endpoints, value, delta): + e = 0.0001 + int_seq = Stairs(initial_value=init_value) + point1, point2, point3, point4 = endpoints + int_seq.layer(point1, point4, value) + int_seq.layer(point2, point3, value + delta) + assert int_seq.number_of_steps == 4, "Expected result to be 5 intervals" + assert _compare_iterables( + int_seq.step_points, (point1, point2, point3, point4) + ), "Finite endpoints are not what is expected" + assert ( + int_seq.initial_value == init_value + ), "Adding finite interval should not change initial value" + assert ( + int_seq(float("inf")) == init_value + ), "Adding finite interval should not change final value" + assert int_seq(point1 - e) == init_value + assert int_seq(point1) == init_value + value + assert int_seq(point2) == init_value + 2 * value + delta + assert int_seq(point3 - e) == init_value + 2 * value + delta + assert int_seq(point3) == init_value + value + assert int_seq(point4 - e) == init_value + value + assert int_seq(point4) == init_value + + +@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) +def test_layer1(init_value): + intervals_to_add = [(-2, 1), (3, 5), (1, 5), (-5, -3), (None, 0), (0, None)] + int_seq = Stairs(initial_value=init_value) + int_seq2 = Stairs(initial_value=init_value) + for start, end in intervals_to_add: + int_seq.layer(start, end) + starts, ends = list(zip(*intervals_to_add)) + starts = [{None: np.nan}.get(x, x) for x in starts] + ends = [{None: np.nan}.get(x, x) for x in ends] + int_seq2.layer(starts, ends) + assert int_seq.identical(int_seq2) + assert int_seq2.identical(int_seq) + + +@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) +def test_layer2(init_value): + intervals_to_add = [(-2, 1, 1), (3, 5, 2), (1, 5, -1), (-5, -3, 3)] + int_seq = Stairs(initial_value=init_value) + int_seq2 = Stairs(initial_value=init_value) + for interval in intervals_to_add: + int_seq.layer(*interval) + starts, ends, values = list(zip(*intervals_to_add)) + int_seq2.layer(starts, ends, values) + assert int_seq.identical(int_seq2) + assert int_seq2.identical(int_seq) + + +def test_layering_index(s1_fix): + result = Stairs( + start=pd.Index([1, -4, 3, 6, 7]), + end=pd.Index([10, 5, 5, 7, 10]), + value=pd.Index([2, -1.75, 2.5, -2.5, -2.5]), + ) + assert result.identical(s1_fix) + + +def test_layering_frame(s1_fix): + df = pd.DataFrame( + { + "start": [1, -4, 3, 6, 7], + "end": [10, 5, 5, 7, 10], + "value": [2, -1.75, 2.5, -2.5, -2.5], + } + ) + assert Stairs(df, "start", "end", "value").identical(s1_fix) + + +def test_layering_trivial_1(s1_fix): + assert s1_fix.copy().layer(1, 1).identical(s1_fix) diff --git a/tests/test_floats/test_floats_distribution.py b/tests/test_floats/test_floats_distribution.py new file mode 100644 index 0000000..4ece058 --- /dev/null +++ b/tests/test_floats/test_floats_distribution.py @@ -0,0 +1,188 @@ +import itertools + +import numpy as np +import pandas as pd +import pytest + +from staircase import Stairs + + +def s1(closed="left"): + int_seq1 = Stairs(initial_value=0, closed=closed) + int_seq1.layer(1, 10, 2) + int_seq1.layer(-4, 5, -1.75) + int_seq1.layer(3, 5, 2.5) + int_seq1.layer(6, 7, -2.5) + int_seq1.layer(7, 10, -2.5) + return int_seq1 + + +def s2(): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer(1, 7, -2.5) + int_seq2.layer(8, 10, 5) + int_seq2.layer(2, 5, 4.5) + int_seq2.layer(2.5, 4, -2.5) + int_seq2.layer(-2, 1, -1.75) + return int_seq2 + + +def s3(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-10, 10, 1) + int_seq.layer(-8, -7, -1) + int_seq.layer(-5, -2, -1) + int_seq.layer(0.5, 1, -1) + int_seq.layer(3, 3.5, -1) + int_seq.layer(7, 9.5, -1) + return int_seq + + +def s4(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-11, 9, 1) + int_seq.layer(-9.5, -8, -1) + int_seq.layer(-7.5, -7, -1) + int_seq.layer(0, 3, -1) + int_seq.layer(6, 6.5, -1) + int_seq.layer(7, 8.5, -1) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +@pytest.mark.parametrize( + "stairs_instance, bounds, cuts", + itertools.product( + [s1(), s2(), s3(), s4()], + [(3, 4), (0, 10), (-10, 30), (-5, -1)], + ["unit", (0, 2.5, 4, 4.5, 7)], + ), +) +def test_hist_left_closed(stairs_instance, bounds, cuts): + def make_expected_result(interval_index, lower, upper): + return pd.Series( + [ + ((stairs_instance >= i.left) * (stairs_instance < i.right)).agg( + "mean", (lower, upper) + ) + for i in interval_index + ], + index=interval_index, + dtype="float64", + ) + + hist = stairs_instance.clip(*bounds).hist(bins=cuts, stat="probability") + expected = make_expected_result(hist.index, *bounds) + assert (hist.apply(round, 5) == expected.apply(round, 5)).all(), f"{bounds}, {cuts}" + + +@pytest.mark.parametrize( + "stairs_instance, bounds, cuts", + itertools.product( + [s1(), s2(), s3(), s4()], + [(3, 4), (0, 10), (-10, 30), (-5, -1)], + ["unit", (0, 2.5, 4, 4.5, 7)], + ), +) +def test_hist_right_closed(stairs_instance, bounds, cuts): + def make_expected_result(interval_index, lower, upper): + return pd.Series( + [ + ((stairs_instance > i.left) * (stairs_instance <= i.right)).agg( + "mean", (lower, upper) + ) + for i in interval_index + ], + index=interval_index, + dtype="float64", + ) + + hist = stairs_instance.clip(*bounds).hist( + bins=cuts, closed="right", stat="probability" + ) + expected = make_expected_result(hist.index, *bounds) + assert (hist.apply(round, 5) == expected.apply(round, 5)).all(), f"{bounds}, {cuts}" + + +@pytest.mark.parametrize( + "stairs_instance, bounds, closed", + itertools.product( + [s1(), s2(), s3(), s4()], + [(3, 4), (0, 10), (-10, 30), (-5, -1)], + ["left", "right"], + ), +) +def test_hist_default_bins(stairs_instance, bounds, closed): + # really testing the default binning process here + hist = stairs_instance.clip(*bounds).hist(closed=closed, stat="probability") + assert abs(hist.sum() - 1) < 0.000001 + + +def test_value_sums(s1_fix): + pd.testing.assert_series_equal( + s1_fix.value_sums(), + pd.Series({-1.75: 5, -0.5: 4, 0.25: 2, 2.0: 1, 2.75: 2}), + check_names=False, + check_index_type=False, + ) + + +def test_hist_frequency(s1_fix): + index = pd.IntervalIndex.from_breaks([-2, 0, 2, 3], closed="left") + pd.testing.assert_series_equal( + s1_fix.hist(bins=[-2, 0, 2, 3], stat="frequency"), + pd.Series([4.5, 1, 3], index=index), + check_names=False, + check_index_type=False, + ) + + +def test_hist_density(s1_fix): + index = pd.IntervalIndex.from_breaks([-2, 0, 2, 3], closed="left") + pd.testing.assert_series_equal( + s1_fix.hist(bins=[-2, 0, 2, 3], stat="density"), + pd.Series([0.36, 0.08, 0.12], index=index), + check_names=False, + check_index_type=False, + ) + + +def test_hist_probability(s1_fix): + index = pd.IntervalIndex.from_breaks([-2, 0, 2, 3], closed="left") + pd.testing.assert_series_equal( + s1_fix.hist(bins=[-2, 0, 2, 3], stat="probability"), + pd.Series([0.642857, 0.142857, 0.214286], index=index), + check_names=False, + check_index_type=False, + ) + + +def test_quantiles(s1_fix): + assert (s1_fix.quantiles(4) == np.array([-1.75, -0.5, 0.25])).all() + + +def test_fractile(s1_fix): + assert list(map(s1().fractile, (0.25, 0.5, 0.75))) == [ + -1.75, + -0.5, + 0.25, + ] diff --git a/tests/test_floats/test_floats_logical.py b/tests/test_floats/test_floats_logical.py new file mode 100644 index 0000000..168ae4b --- /dev/null +++ b/tests/test_floats/test_floats_logical.py @@ -0,0 +1,201 @@ +import itertools + +import numpy as np +import pandas as pd +import pytest + +import staircase.test_data as test_data +from staircase import Stairs + + +def _expand_interval_definition(start, end=None, value=1): + return start, end, value + + +def _compare_iterables(it1, it2): + it1 = [i for i in it1 if i is not None] + it2 = [i for i in it2 if i is not None] + if len(it2) != len(it1): + return False + for e1, e2 in zip(it1, it2): + if e1 != e2: + return False + return True + + +def s1(closed="left"): + int_seq1 = Stairs(initial_value=0, closed=closed) + int_seq1.layer(1, 10, 2) + int_seq1.layer(-4, 5, -1.75) + int_seq1.layer(3, 5, 2.5) + int_seq1.layer(6, 7, -2.5) + int_seq1.layer(7, 10, -2.5) + return int_seq1 + + +def s2(): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer(1, 7, -2.5) + int_seq2.layer(8, 10, 5) + int_seq2.layer(2, 5, 4.5) + int_seq2.layer(2.5, 4, -2.5) + int_seq2.layer(-2, 1, -1.75) + return int_seq2 + + +def s3(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-10, 10, 1) + int_seq.layer(-8, -7, -1) + int_seq.layer(-5, -2, -1) + int_seq.layer(0.5, 1, -1) + int_seq.layer(3, 3.5, -1) + int_seq.layer(7, 9.5, -1) + return int_seq + + +def s4(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-11, 9, 1) + int_seq.layer(-9.5, -8, -1) + int_seq.layer(-7.5, -7, -1) + int_seq.layer(0, 3, -1) + int_seq.layer(6, 6.5, -1) + int_seq.layer(7, 8.5, -1) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +def test_logical_and_scalar_1(s3_fix): + assert (s3_fix & 1).identical(s3_fix) + + +def test_logical_rand_scalar_1(s3_fix): + assert (1 & s3_fix).identical(s3_fix) + + +def test_logical_and_scalar_2(s3_fix): + assert (s3_fix & 0).identical(0) + + +def test_logical_rand_scalar_2(s3_fix): + assert (0 & s3_fix).identical(0) + + +def test_logical_and_scalar_3(s3_fix): + assert (s3_fix & np.nan).identical(np.nan) + + +def test_logical_rand_scalar_3(s3_fix): + assert (np.nan & s3_fix).identical(np.nan) + + +def test_logical_or_scalar_1(s3_fix): + assert (s3_fix | 1).identical(1) + + +def test_logical_ror_scalar_1(s3_fix): + assert (1 | s3_fix).identical(1) + + +def test_logical_or_scalar_2(s3_fix): + assert (s3_fix | 0).identical(s3_fix) + + +def test_logical_ror_scalar_2(s3_fix): + assert (0 | s3_fix).identical(s3_fix) + + +def test_logical_or_scalar_3(s3_fix): + assert (s3_fix | np.nan).identical(np.nan) + + +def test_logical_ror_scalar_3(s3_fix): + assert (np.nan | s3_fix).identical(np.nan) + + +def test_logical_xor_scalar_1(s3_fix): + assert (s3_fix ^ 1).identical(~s3_fix) + + +def test_logical_rxor_scalar_1(s3_fix): + assert (1 ^ s3_fix).identical(~s3_fix) + + +def test_logical_xor_scalar_2(s3_fix): + assert (s3_fix ^ 0).identical(s3_fix) + + +def test_logical_rxor_scalar_2(s3_fix): + assert (0 ^ s3_fix).identical(s3_fix) + + +def test_logical_xor_scalar_3(s3_fix): + assert (s3_fix ^ np.nan).identical(np.nan) + + +def test_logical_rxor_scalar_3(s3_fix): + assert (np.nan ^ s3_fix).identical(np.nan) + + +def test_make_boolean(s2_fix): + int_seq = s2_fix + calc = int_seq.make_boolean() + expected = Stairs() + expected.layer(-2, 7, 1) + expected.layer(8, 10, 1) + assert calc.identical(expected), "Boolean calculation not what it should be" + assert expected.identical(calc), "Boolean calculation not what it should be" + + +def test_invert(s2_fix): + int_seq = s2_fix + calc = ~int_seq + expected = Stairs(initial_value=1) + expected.layer(-2, 7, -1) + expected.layer(8, 10, -1) + assert calc.identical(expected), "Invert calculation not what it should be" + assert expected.identical(calc), "Invert calculation not what it should be" + + +def test_and(s3_fix, s4_fix): + calc = s3_fix & s4_fix + expected = Stairs(initial_value=0) + expected.layer(-10, -9.5) + expected.layer(-7, -5) + expected.layer(-2, 0) + expected.layer(3.5, 6) + expected.layer(6.5, 7) + assert calc.identical(expected), "AND calculation not what it should be" + assert expected.identical(calc), "AND calculation not what it should be" + + +def test_or(s3_fix, s4_fix): + calc = s3_fix | s4_fix + expected = Stairs(initial_value=0) + expected.layer(-11, -7.5) + expected.layer(-7, 0.5) + expected.layer(1, 7) + expected.layer(8.5, 9) + expected.layer(9.5, 10) + assert calc.identical(expected), "OR calculation not what it should be" + assert expected.identical(calc), "OR calculation not what it should be" diff --git a/tests/test_floats/test_floats_misc.py b/tests/test_floats/test_floats_misc.py new file mode 100644 index 0000000..1ecc99b --- /dev/null +++ b/tests/test_floats/test_floats_misc.py @@ -0,0 +1,237 @@ +import numpy as np +import pandas as pd +import pytest + +import staircase.test_data as test_data +from staircase import Stairs + + +def s1(closed="left"): + int_seq1 = Stairs(initial_value=0, closed=closed) + int_seq1.layer(1, 10, 2) + int_seq1.layer(-4, 5, -1.75) + int_seq1.layer(3, 5, 2.5) + int_seq1.layer(6, 7, -2.5) + int_seq1.layer(7, 10, -2.5) + return int_seq1 + + +def s2(): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer(1, 7, -2.5) + int_seq2.layer(8, 10, 5) + int_seq2.layer(2, 5, 4.5) + int_seq2.layer(2.5, 4, -2.5) + int_seq2.layer(-2, 1, -1.75) + return int_seq2 + + +def s3(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-10, 10, 1) + int_seq.layer(-8, -7, -1) + int_seq.layer(-5, -2, -1) + int_seq.layer(0.5, 1, -1) + int_seq.layer(3, 3.5, -1) + int_seq.layer(7, 9.5, -1) + return int_seq + + +def s4(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-11, 9, 1) + int_seq.layer(-9.5, -8, -1) + int_seq.layer(-7.5, -7, -1) + int_seq.layer(0, 3, -1) + int_seq.layer(6, 6.5, -1) + int_seq.layer(7, 8.5, -1) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) +def test_copy_and_equality(init_value): + int_seq = Stairs(initial_value=init_value) + int_seq_copy = int_seq.copy() + assert int_seq.identical(int_seq_copy) + assert int_seq_copy.identical(int_seq) + + +@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) +def test_deepcopy(init_value): + int_seq = Stairs(initial_value=init_value) + int_seq_copy = int_seq.copy() + int_seq_copy.layer(1, 2) + assert not int_seq.identical(int_seq_copy) + assert not int_seq_copy.identical(int_seq) + + +def test_to_dataframe(s1_fix): + s1_fix.to_frame() + + +@pytest.mark.parametrize( + "kwargs, expected_index, expected_vals", + [ + ( + {"window": (-1, 1)}, + [-5, -3, 0, 2, 4, 5, 6, 7, 9, 11], + [0.0, -1.75, -1.75, 0.25, 2.75, 2.375, 0.75, -0.5, -0.5, 0.0], + ), + ( + {"window": (-2, 0)}, + [-4, -2, 1, 3, 5, 6, 7, 8, 10, 12], + [0.0, -1.75, -1.75, 0.25, 2.75, 2.375, 0.75, -0.5, -0.5, 0.0], + ), + ( + {"window": (-1, 1), "where": (0, 8)}, + [1, 2, 4, 5, 6, 7], + [-0.75, 0.25, 2.75, 2.375, 0.75, -0.5], + ), + ], +) +def test_s1_rolling_mean(s1_fix, kwargs, expected_index, expected_vals): + rm = s1_fix.rolling_mean(**kwargs) + assert list(rm.values) == expected_vals + assert list(rm.index) == expected_index + + +@pytest.mark.parametrize( + "kwargs", + [ + {}, + {"arrows": True, "style": "hlines"}, + {"arrows": False, "style": "hlines"}, + {"arrows": True, "style": "step"}, + {"arrows": False, "style": "step"}, + ], +) +def test_plot(s1_fix, kwargs): + s1_fix.plot(**kwargs) + + +def test_plot_trivial_1(): + Stairs().plot() + + +def test_plot_trivial_2(): + Stairs(initial_value=np.nan).plot() + + +def test_plot_ecdf(s1_fix): + s1_fix.plot.ecdf() + + +def test_plot_bad_backend(s1_fix): + with pytest.raises(ValueError): + s1_fix.plot(backend="") + + +def test_plot_ecdf_bad_backend(s1_fix): + with pytest.raises(ValueError): + s1_fix.plot.ecdf(backend="") + + +def test_diff(s1_fix): + assert pd.Series.equals( + s1_fix.diff(1).step_changes, + pd.Series( + { + -4: -1.75, + -3: 1.75, + 1: 2, + 2: -2, + 3: 2.5, + 4: -2.5, + 5: -0.75, + 6: -1.75, + 7: 2.5, + 10: 0.5, + 11: -0.5, + } + ), + ) + + +def test_str(s1_fix): + assert str(s1_fix) is not None + assert str(s1_fix) != "" + + +def test_repr(s1_fix): + assert repr(s1_fix) is not None + assert repr(s1_fix) != "" + + +def test_make_test_data(): + assert type(test_data.make_test_data()) == pd.DataFrame + + +def test_pipe(s1_fix): + def is_stairs(s): + return isinstance(s, Stairs) + + assert s1().pipe(is_stairs) + + +def test_step_changes(s1_fix): + pd.testing.assert_series_equal( + s1_fix.step_changes, + pd.Series({-4: -1.75, 1: 2.0, 3: 2.5, 5: -0.75, 6: -2.5, 10: 0.5}), + check_names=False, + check_index_type=False, + ) + + +def test_step_values(s1_fix): + pd.testing.assert_series_equal( + s1_fix.step_values, + pd.Series({-4: -1.75, 1: 0.25, 3: 2.75, 5: 2.0, 6: -0.5, 10: 0.0}), + check_names=False, + check_index_type=False, + ) + + +def test_step_points(s1_fix): + assert list(s1_fix.step_points) == [-4, 1, 3, 5, 6, 10] + + +def test_step_changes_stepless(): + pd.testing.assert_series_equal( + Stairs().step_changes, + pd.Series([], dtype="float64"), + check_names=False, + check_index_type=False, + ) + + +def test_step_values_stepless(): + pd.testing.assert_series_equal( + Stairs().step_values, + pd.Series([], dtype="float64"), + check_names=False, + check_index_type=False, + ) + + +def test_step_points_stepless(): + assert list(Stairs().step_points) == [] diff --git a/tests/test_floats/test_floats_relational.py b/tests/test_floats/test_floats_relational.py new file mode 100644 index 0000000..929f1b3 --- /dev/null +++ b/tests/test_floats/test_floats_relational.py @@ -0,0 +1,138 @@ +import pytest + +from staircase import Stairs + + +def s1(closed="left"): + int_seq1 = Stairs(initial_value=0, closed=closed) + int_seq1.layer(1, 10, 2) + int_seq1.layer(-4, 5, -1.75) + int_seq1.layer(3, 5, 2.5) + int_seq1.layer(6, 7, -2.5) + int_seq1.layer(7, 10, -2.5) + return int_seq1 + + +def s2(): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer(1, 7, -2.5) + int_seq2.layer(8, 10, 5) + int_seq2.layer(2, 5, 4.5) + int_seq2.layer(2.5, 4, -2.5) + int_seq2.layer(-2, 1, -1.75) + return int_seq2 + + +def s3(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-10, 10, 1) + int_seq.layer(-8, -7, -1) + int_seq.layer(-5, -2, -1) + int_seq.layer(0.5, 1, -1) + int_seq.layer(3, 3.5, -1) + int_seq.layer(7, 9.5, -1) + return int_seq + + +def s4(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-11, 9, 1) + int_seq.layer(-9.5, -8, -1) + int_seq.layer(-7.5, -7, -1) + int_seq.layer(0, 3, -1) + int_seq.layer(6, 6.5, -1) + int_seq.layer(7, 8.5, -1) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +def test_lt(s1_fix, s2_fix): + calc = s1_fix < s2_fix + expected = Stairs(initial_value=0) + expected.layer(-4, -2) + expected.layer(2, 2.5) + expected.layer(7, 10) + assert calc.identical(expected), "LT calculation not what it should be" + assert expected.identical(calc), "LT calculation not what it should be" + + +def test_gt(s1_fix, s2_fix): + calc = s1_fix > s2_fix + expected = Stairs(initial_value=0) + expected.layer(1, 2) + expected.layer(2.5, 7) + assert calc.identical(expected), "GT calculation not what it should be" + assert expected.identical(calc), "GT calculation not what it should be" + + +def test_le(s1_fix, s2_fix): + calc = s1_fix <= s2_fix + expected = Stairs(initial_value=1) + expected.layer(1, 2, -1) + expected.layer(2.5, 7, -1) + assert calc.identical(expected), "LE calculation not what it should be" + assert expected.identical(calc), "LE calculation not what it should be" + + +def test_ge(s1_fix, s2_fix): + calc = s1_fix >= s2_fix + expected = Stairs(initial_value=1) + expected.layer(-4, -2, -1) + expected.layer(2, 2.5, -1) + expected.layer(7, 10, -1) + assert calc.identical(expected), "GE calculation not what it should be" + assert expected.identical(calc), "GE calculation not what it should be" + + +def test_eq_1(s1_fix, s2_fix): + calc = s1_fix == s2_fix + expected = Stairs(initial_value=1) + expected.layer(-4, -2, -1) + expected.layer(1, 10, -1) + assert calc.identical(expected), "EQ calculation not what it should be" + assert expected.identical(calc), "EQ calculation not what it should be" + + +def test_eq_2(s1_fix, s2_fix): + calc = s1_fix == s2_fix + expected = Stairs(initial_value=1) + expected.layer(-4, -2, -1) + expected.layer(1, 10, -1) + assert calc.identical(expected), "EQ calculation not what it should be" + assert expected.identical(calc), "EQ calculation not what it should be" + + +def test_ne(s1_fix, s2_fix): + calc = s1_fix != s2_fix + expected = Stairs(initial_value=0) + expected.layer(-4, -2, 1) + expected.layer(1, 10, 1) + assert calc.identical(expected), "NOT EQUAL calculation not what it should be" + assert expected.identical(calc), "NOT EQUAL calculation not what it should be" + + +def test_eq_3(): + assert Stairs(initial_value=3) == 3 + + +def test_ne_3(s1_fix): + assert s1_fix != 3 diff --git a/tests/test_floats/test_floats_sample.py b/tests/test_floats/test_floats_sample.py new file mode 100644 index 0000000..153d056 --- /dev/null +++ b/tests/test_floats/test_floats_sample.py @@ -0,0 +1,90 @@ +import numpy as np +import pytest + +from staircase import Stairs + + +def s1(closed="left"): + int_seq1 = Stairs(initial_value=0, closed=closed) + int_seq1.layer(1, 10, 2) + int_seq1.layer(-4, 5, -1.75) + int_seq1.layer(3, 5, 2.5) + int_seq1.layer(6, 7, -2.5) + int_seq1.layer(7, 10, -2.5) + return int_seq1 + + +def s2(): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer(1, 7, -2.5) + int_seq2.layer(8, 10, 5) + int_seq2.layer(2, 5, 4.5) + int_seq2.layer(2.5, 4, -2.5) + int_seq2.layer(-2, 1, -1.75) + return int_seq2 + + +def s3(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-10, 10, 1) + int_seq.layer(-8, -7, -1) + int_seq.layer(-5, -2, -1) + int_seq.layer(0.5, 1, -1) + int_seq.layer(3, 3.5, -1) + int_seq.layer(7, 9.5, -1) + return int_seq + + +def s4(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-11, 9, 1) + int_seq.layer(-9.5, -8, -1) + int_seq.layer(-7.5, -7, -1) + int_seq.layer(0, 3, -1) + int_seq.layer(6, 6.5, -1) + int_seq.layer(7, 8.5, -1) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +@pytest.mark.parametrize( + "x, kwargs, expected_val", + [ + ( + [-4, -2, 1, 3], + {"side": "right"}, + np.array([-1.75, -1.75, 0.25, 2.75]), + ), + ( + [-4, -2, 1, 3], + {"side": "right"}, + np.array([-1.75, -1.75, 0.25, 2.75]), + ), + ( + [-4, -2, 1, 3], + {"side": "left"}, + np.array([0.0, -1.75, -1.75, 0.25]), + ), + ], +) +def test_s1_limit(s1_fix, x, kwargs, expected_val): + assert np.array_equal(s1_fix.limit(x, **kwargs), expected_val) diff --git a/tests/test_floats/test_floats_slicing.py b/tests/test_floats/test_floats_slicing.py new file mode 100644 index 0000000..d1b8782 --- /dev/null +++ b/tests/test_floats/test_floats_slicing.py @@ -0,0 +1,310 @@ +import numpy as np +import pandas as pd +import pytest + +from staircase import Stairs + + +def s1(closed="left"): + int_seq1 = Stairs(initial_value=0, closed=closed) + int_seq1.layer(1, 10, 2) + int_seq1.layer(-4, 5, -1.75) + int_seq1.layer(3, 5, 2.5) + int_seq1.layer(6, 7, -2.5) + int_seq1.layer(7, 10, -2.5) + return int_seq1 + + +def s2(): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer(1, 7, -2.5) + int_seq2.layer(8, 10, 5) + int_seq2.layer(2, 5, 4.5) + int_seq2.layer(2.5, 4, -2.5) + int_seq2.layer(-2, 1, -1.75) + return int_seq2 + + +def s3(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-10, 10, 1) + int_seq.layer(-8, -7, -1) + int_seq.layer(-5, -2, -1) + int_seq.layer(0.5, 1, -1) + int_seq.layer(3, 3.5, -1) + int_seq.layer(7, 9.5, -1) + return int_seq + + +def s4(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-11, 9, 1) + int_seq.layer(-9.5, -8, -1) + int_seq.layer(-7.5, -7, -1) + int_seq.layer(0, 3, -1) + int_seq.layer(6, 6.5, -1) + int_seq.layer(7, 8.5, -1) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +@pytest.mark.parametrize( + "x, kwargs, expected_val", + [ + ( + [-4, -2, 1, 3], + {"aggfunc": "mean", "window": (-0.5, 0.5)}, + np.array([-0.875, -1.75, -0.75, 1.5]), + ), + ( + [-4, -2, 1, 3], + {"aggfunc": "mean", "window": (-1, 0)}, + np.array([0.0, -1.75, -1.75, 0.25]), + ), + ( + [-4, -2, 1, 3], + {"aggfunc": "mean", "window": (0, 1)}, + np.array([-1.75, -1.75, 0.25, 2.75]), + ), + ], +) +def test_s1_agg_mean(s1_fix, x, kwargs, expected_val): + window = kwargs["window"] + x = np.array(x) + ii = pd.IntervalIndex.from_arrays(x + window[0], x + window[1]) + assert np.array_equal(s1_fix.slice(ii).mean().values, expected_val) + + +@pytest.mark.parametrize( + "closed, x, kwargs, expected_val", + [ + ( + "left", + [0, 2, 7], + {"aggfunc": "max", "window": (-1, 1)}, + np.array([-1.75, 0.25, -0.5]), + ), + ( + "right", + [0, 2, 7], + {"aggfunc": "max", "window": (-1, 1), "closed": "left"}, + np.array([-1.75, 0.25, 2.0]), + ), + ( + "left", + [0, 2, 7], + {"aggfunc": "max", "window": (-1, 1), "closed": "right"}, + np.array([0.25, 2.75, -0.5]), + ), + ( + "right", + [0, 2, 7], + {"aggfunc": "max", "window": (-1, 1), "closed": "right"}, + np.array([-1.75, 0.25, -0.5]), + ), + ], +) +def test_s1_agg_max(closed, x, kwargs, expected_val): + window = kwargs["window"] + x = np.array(x) + ii = pd.IntervalIndex.from_arrays( + x + window[0], x + window[1], closed=kwargs.get("closed", "left") + ) + assert np.array_equal(s1(closed=closed).slice(ii).max().values, expected_val) + + +def test_slicing_mean(s1_fix): + pd.testing.assert_series_equal( + s1_fix.slice(range(-4, 11, 2)).mean(), + pd.Series( + { + pd.Interval(-4, -2, closed="left"): -1.75, + pd.Interval(-2, 0, closed="left"): -1.75, + pd.Interval(0, 2, closed="left"): -0.75, + pd.Interval(2, 4, closed="left"): 1.5, + pd.Interval(4, 6, closed="left"): 2.375, + pd.Interval(6, 8, closed="left"): -0.5, + pd.Interval(8, 10, closed="left"): -0.5, + } + ), + check_names=False, + check_index_type=False, + ) + + +def test_slicing_max(s1_fix): + pd.testing.assert_series_equal( + s1_fix.slice(range(-4, 11, 2)).max(), + pd.Series( + { + pd.Interval(-4, -2, closed="left"): -1.75, + pd.Interval(-2, 0, closed="left"): -1.75, + pd.Interval(0, 2, closed="left"): 0.25, + pd.Interval(2, 4, closed="left"): 2.75, + pd.Interval(4, 6, closed="left"): 2.75, + pd.Interval(6, 8, closed="left"): -0.5, + pd.Interval(8, 10, closed="left"): -0.5, + } + ), + check_names=False, + check_index_type=False, + ) + + +def test_slicing_min(s1_fix): + pd.testing.assert_series_equal( + s1_fix.slice(range(-4, 11, 2)).min(), + pd.Series( + { + pd.Interval(-4, -2, closed="left"): -1.75, + pd.Interval(-2, 0, closed="left"): -1.75, + pd.Interval(0, 2, closed="left"): -1.75, + pd.Interval(2, 4, closed="left"): 0.25, + pd.Interval(4, 6, closed="left"): 2.0, + pd.Interval(6, 8, closed="left"): -0.5, + pd.Interval(8, 10, closed="left"): -0.5, + } + ), + check_names=False, + check_index_type=False, + ) + + +def test_slicing_mode(s1_fix): + pd.testing.assert_series_equal( + s1_fix.slice(range(-4, 11, 2)).mode(), + pd.Series( + { + pd.Interval(-4, -2, closed="left"): -1.75, + pd.Interval(-2, 0, closed="left"): -1.75, + pd.Interval(0, 2, closed="left"): -1.75, + pd.Interval(2, 4, closed="left"): 0.25, + pd.Interval(4, 6, closed="left"): 2.0, + pd.Interval(6, 8, closed="left"): -0.5, + pd.Interval(8, 10, closed="left"): -0.5, + } + ), + check_names=False, + check_index_type=False, + ) + + +def test_slicing_median(s1_fix): + pd.testing.assert_series_equal( + s1_fix.slice(range(-4, 11, 2)).median(), + pd.Series( + { + pd.Interval(-4, -2, closed="left"): -1.75, + pd.Interval(-2, 0, closed="left"): -1.75, + pd.Interval(0, 2, closed="left"): -0.75, + pd.Interval(2, 4, closed="left"): 1.5, + pd.Interval(4, 6, closed="left"): 2.375, + pd.Interval(6, 8, closed="left"): -0.5, + pd.Interval(8, 10, closed="left"): -0.5, + } + ), + check_names=False, + check_index_type=False, + ) + + +def test_slicing_agg_min(s1_fix): + pd.testing.assert_series_equal( + s1_fix.slice(range(-4, 11, 2)).agg("min")["min"], + pd.Series( + { + pd.Interval(-4, -2, closed="left"): -1.75, + pd.Interval(-2, 0, closed="left"): -1.75, + pd.Interval(0, 2, closed="left"): -1.75, + pd.Interval(2, 4, closed="left"): 0.25, + pd.Interval(4, 6, closed="left"): 2.0, + pd.Interval(6, 8, closed="left"): -0.5, + pd.Interval(8, 10, closed="left"): -0.5, + } + ), + check_names=False, + check_index_type=False, + ) + + +def test_slicing_apply_min(s1_fix): + pd.testing.assert_series_equal( + s1_fix.slice(range(-4, 11, 2)).apply(Stairs.min), + pd.Series( + { + pd.Interval(-4, -2, closed="left"): -1.75, + pd.Interval(-2, 0, closed="left"): -1.75, + pd.Interval(0, 2, closed="left"): -1.75, + pd.Interval(2, 4, closed="left"): 0.25, + pd.Interval(4, 6, closed="left"): 2.0, + pd.Interval(6, 8, closed="left"): -0.5, + pd.Interval(8, 10, closed="left"): -0.5, + } + ), + check_names=False, + check_index_type=False, + ) + + +def test_slicing_agg_min_max(s1_fix): + result = s1_fix.slice(range(-4, 11, 2)).agg(["min", "max"]) + pd.testing.assert_series_equal( + result["min"], + pd.Series( + { + pd.Interval(-4, -2, closed="left"): -1.75, + pd.Interval(-2, 0, closed="left"): -1.75, + pd.Interval(0, 2, closed="left"): -1.75, + pd.Interval(2, 4, closed="left"): 0.25, + pd.Interval(4, 6, closed="left"): 2.0, + pd.Interval(6, 8, closed="left"): -0.5, + pd.Interval(8, 10, closed="left"): -0.5, + } + ), + check_names=False, + check_index_type=False, + ) + pd.testing.assert_series_equal( + result["max"], + pd.Series( + { + pd.Interval(-4, -2, closed="left"): -1.75, + pd.Interval(-2, 0, closed="left"): -1.75, + pd.Interval(0, 2, closed="left"): 0.25, + pd.Interval(2, 4, closed="left"): 2.75, + pd.Interval(4, 6, closed="left"): 2.75, + pd.Interval(6, 8, closed="left"): -0.5, + pd.Interval(8, 10, closed="left"): -0.5, + } + ), + check_names=False, + check_index_type=False, + ) + + +def test_slicing_resample_mean(s1_fix): + pd.testing.assert_series_equal( + s1_fix.slice(range(0, 7, 2)).resample("mean").step_values, + pd.Series({-4: -1.75, 0: -0.75, 2: 1.5, 4: 2.375, 6: -0.5, 10: 0.0}), + check_names=False, + check_index_type=False, + ) diff --git a/tests/test_floats/test_floats_stats.py b/tests/test_floats/test_floats_stats.py new file mode 100644 index 0000000..540f375 --- /dev/null +++ b/tests/test_floats/test_floats_stats.py @@ -0,0 +1,384 @@ +import numpy as np +import pytest + +from staircase import Stairs + + +def s1(closed="left"): + int_seq1 = Stairs(initial_value=0, closed=closed) + int_seq1.layer(1, 10, 2) + int_seq1.layer(-4, 5, -1.75) + int_seq1.layer(3, 5, 2.5) + int_seq1.layer(6, 7, -2.5) + int_seq1.layer(7, 10, -2.5) + return int_seq1 + + +def s2(): + int_seq2 = Stairs(initial_value=0) + int_seq2.layer(1, 7, -2.5) + int_seq2.layer(8, 10, 5) + int_seq2.layer(2, 5, 4.5) + int_seq2.layer(2.5, 4, -2.5) + int_seq2.layer(-2, 1, -1.75) + return int_seq2 + + +def s3(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-10, 10, 1) + int_seq.layer(-8, -7, -1) + int_seq.layer(-5, -2, -1) + int_seq.layer(0.5, 1, -1) + int_seq.layer(3, 3.5, -1) + int_seq.layer(7, 9.5, -1) + return int_seq + + +def s4(): # boolean + int_seq = Stairs(initial_value=0) + int_seq.layer(-11, 9, 1) + int_seq.layer(-9.5, -8, -1) + int_seq.layer(-7.5, -7, -1) + int_seq.layer(0, 3, -1) + int_seq.layer(6, 6.5, -1) + int_seq.layer(7, 8.5, -1) + return int_seq + + +@pytest.fixture +def s1_fix(): + return s1() + + +@pytest.fixture +def s2_fix(): + return s2() + + +@pytest.fixture +def s3_fix(): + return s3() + + +@pytest.fixture +def s4_fix(): + return s4() + + +@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) +def test_base_integral_0_2(init_value): + int_seq = Stairs(initial_value=init_value) + assert int_seq.agg("integral", (0, 2)) == 2 * init_value + + +@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) +def test_base_integral_neg1_1(init_value): + int_seq = Stairs(initial_value=init_value) + assert int_seq.agg("integral", (-1, 1)) == 2 * init_value + + +@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) +def test_base_integral_neg2_0(init_value): + int_seq = Stairs(initial_value=init_value) + assert int_seq.agg("integral", (-2, 0)) == 2 * init_value + + +@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) +def test_base_integral_point5_1(init_value): + int_seq = Stairs(initial_value=init_value) + assert int_seq.agg("integral", (0.5, 1)) == 0.5 * init_value + + +def test_integral1(s1_fix, s2_fix): + assert s1_fix.integral() == -2.75 + assert s2_fix.integral() == -0.5 + + +def test_integral2(s1_fix, s2_fix): + assert s1_fix.agg("integral", (-1, 5.5)) == 3.5 + assert s2_fix.agg("integral", (-1, 5.5)) == -5 + + +def test_mean1(s1_fix, s2_fix): + assert abs(s1_fix.mean() - -0.19642857) < 0.000001 + assert abs(s2_fix.mean() - -0.04166666) < 0.000001 + + +def test_mean2(s1_fix, s2_fix): + assert abs(s1_fix.agg("mean", (2, 8)) - 1.125) < 0.000001 + assert abs(s2_fix.agg("mean", (2, 8)) - -0.45833333) < 0.000001 + + +def test_integral_0(): + assert Stairs(initial_value=0).layer(None, 0).integral() is np.nan + + +def test_mean_nan(): + assert Stairs(initial_value=0).layer(None, 0).mean() is np.nan + + +# np.var(st1(np.linspace(-4,10, 10000000))) = 2.501594244387741 +# np.var(st1(np.linspace(-5,10, 10000000))) = 2.3372686165530117 +# np.var(st1(np.linspace(1,12, 10000000))) = 1.5433884747933315 + + +@pytest.mark.parametrize( + "bounds, expected", + [ + ((), 2.501594244387741), + (((-5, 10),), 2.3372686165530117), + (((1, 12),), 1.5433884747933315), + ], +) +def test_s1_var(bounds, expected): + assert np.isclose(s1().agg("var", *bounds), expected, atol=0.0001) + + +# np.var(st2(np.linspace(-2, 10, 10000000))) = 7.024303861110942 +# np.var(st2(np.linspace(-3, 7.5, 10000000))) = 2.2678568437499633 +# np.var(st2(np.linspace(0, 14, 10000000))) = 5.538902194132663 + + +@pytest.mark.parametrize( + "bounds, expected", + [ + ((), 7.024303861110942), + (((-3, 7.5),), 2.2678568437499633), + (((0, 14),), 5.538902194132663), + ], +) +def test_s2_var(bounds, expected): + assert np.isclose(s2().agg("var", *bounds), expected, atol=0.0001) + + +# np.std(st1(np.linspace(-4,10, 10000000))) = 1.5816428940780978 +# np.std(st1(np.linspace(-5,10, 10000000))) = 1.528797568034358 +# np.std(st1(np.linspace(1,12, 10000000))) = 1.242331869829206 + + +@pytest.mark.parametrize( + "bounds, expected", + [ + ((), 1.5816428940780978), + (((-5, 10),), 1.528797568034358), + (((1, 12),), 1.242331869829206), + ], +) +def test_s1_std(bounds, expected): + assert np.isclose(s1().agg("std", *bounds), expected, atol=0.0001) + + +# np.std(st2(np.linspace(-2, 10, 10000000))) = 2.650340329299417 +# np.std(st2(np.linspace(-3, 7.5, 10000000))) = 1.5059405179986238 +# np.std(st2(np.linspace(0, 14, 10000000))) = 2.3534872411238315 + + +@pytest.mark.parametrize( + "bounds, expected", + [ + ((), 2.650340329299417), + (((-3, 7.5),), 1.5059405179986238), + (((0, 14),), 2.3534872411238315), + ], +) +def test_s2_std(bounds, expected): + assert np.isclose(s2().agg("std", *bounds), expected, atol=0.0001) + + +# # np.cov(st1(pts[:-100000]), st1(pts[100000:]))[0,1] = 1.9386094481108465 +# # np.cov(st1(np.linspace(-4, 8, 12*100000 + 1)), st1(np.linspace(-2, 10, 12*100000 + 1)))[0,1] = 1.1184896017794723 +# # np.cov(st1(np.linspace(-4, 8, 12*100000 + 1)), st1.shift(-2)(np.linspace(-4, 8, 12*100000 + 1)))[0,1] = 1.1184896017794723 + + +@pytest.mark.parametrize( + "kwargs, expected", + [ + ({"where": (-4, 10), "lag": 1}, 1.9386094481108465), + ({"where": (-4, 10), "lag": 2}, 1.1184896017794723), + ({"where": (-4, 8), "lag": 2, "clip": "post"}, 1.1184896017794723), + ], +) +def test_s1_autocov(kwargs, expected): + assert np.isclose(s1().cov(s1(), **kwargs), expected, atol=0.00001) + + +# # np.cov(st2(np.linspace(-2, 9, 11*100000 + 1)), st2(np.linspace(-1, 10, 11*100000 + 1)))[0,1 = 3.1022721590913256 +# # np.cov(st2(np.linspace(0, 6, 12*100000 + 1)), st2(np.linspace(2, 8, 12*100000 + 1)))[0,1] = -0.7291746267294938 +# # np.cov(st2(np.linspace(0, 6, 12*100000 + 1)), st2.shift(-2)(np.linspace(0, 6, 12*100000 + 1)))[0,1] = -0.7291746267294938 + + +@pytest.mark.parametrize( + "kwargs, expected", + [ + ({"where": (-2, 10), "lag": 1}, 3.1022721590913256), + ({"where": (0, 8), "lag": 2}, -0.7291746267294938), + ({"where": (0, 6), "lag": 2, "clip": "post"}, -0.7291746267294938), + ], +) +def test_s2_autocov(kwargs, expected): + assert np.isclose(s2().cov(s2(), **kwargs), expected, atol=0.00001) + + +# # np.cov(st1(np.linspace(-2, 9, 11*100000 + 1)), st2(np.linspace(-1, 10, 11*100000 + 1)))[0,1 = -0.08677679611199672 +# # np.cov(st1(np.linspace(0, 6, 12*100000 + 1)), st2(np.linspace(2, 8, 12*100000 + 1)))[0,1] = -1.970493123547197 +# # np.cov(st1(np.linspace(0, 6, 12*100000 + 1)), st2.shift(-2)(np.linspace(0, 6, 12*100000 + 1)))[0,1] = -1.970493123547197 + + +@pytest.mark.parametrize( + "kwargs, expected", + [ + ({"where": (-2, 10), "lag": 1}, -0.08677679611199672), + ({"where": (0, 8), "lag": 2}, -1.970493123547197), + ({"where": (0, 6), "lag": 2, "clip": "post"}, -1.970493123547197), + ], +) +def test_crosscov(kwargs, expected): + assert np.isclose(s1().cov(s2(), **kwargs), expected, atol=0.00001) + + +# # np.corrcoef(st1(pts[:-100000]), st1(pts[100000:]))[0,1] = 0.6927353407369307 +# # np.corrcoef(st1(np.linspace(-4, 8, 12*100000 + 1)), st1(np.linspace(-2, 10, 12*100000 + 1)))[0,1] = -0.2147502741669856 +# # np.corrcoef(st1(np.linspace(-4, 8, 12*100000 + 1)), st1.shift(-2)(np.linspace(-4, 8, 12*100000 + 1)))[0,1] = -0.2147502741669856 + + +@pytest.mark.parametrize( + "kwargs, expected", + [ + ({"where": (-2, 10), "lag": 1}, 0.6927353407369307), + ({"where": (0, 8), "lag": 2}, -0.2147502741669856), + ({"where": (0, 6), "lag": 2, "clip": "post"}, -0.2147502741669856), + ], +) +def test_s1_autocorr(kwargs, expected): + assert np.isclose(s1().corr(s1(), **kwargs), expected, atol=0.00001) + + +# # np.corrcoef(st2(pts[:-100000]), st2(pts[100000:]))[0,1] = 0.5038199912440895 +# # np.corrcoef(st2(np.linspace(-4, 8, 12*100000 + 1)), st2(np.linspace(-2, 10, 12*100000 + 1)))[0,1] = -0.2419504099129966 +# # np.corrcoef(st2(np.linspace(-4, 8, 12*100000 + 1)), st2.shift(-2)(np.linspace(-4, 8, 12*100000 + 1)))[0,1] = -0.2419504099129966 + + +@pytest.mark.parametrize( + "kwargs, expected", + [ + ({"where": (-2, 10), "lag": 1}, 0.5038199912440895), + ({"where": (0, 8), "lag": 2}, -0.2419504099129966), + ({"where": (0, 6), "lag": 2, "clip": "post"}, -0.2419504099129966), + ], +) +def test_s2_autocorr(kwargs, expected): + assert np.isclose(s2().corr(s2(), **kwargs), expected, atol=0.00001) + + +# # np.corrcoef(st1(pts[:-100000]), st2(pts[100000:]))[0,1] = -0.01966642657198049 +# # np.corrcoef(st1(np.linspace(-4, 8, 12*100000 + 1)), st2(np.linspace(-2, 10, 12*100000 + 1)))[0,1] = -0.7086484036832666 +# # np.corrcoef(st1(np.linspace(-4, 8, 12*100000 + 1)), st2.shift(-2)(np.linspace(-4, 8, 12*100000 + 1)))[0,1] = -0.7086484036832666 + + +@pytest.mark.parametrize( + "kwargs, expected", + [ + ({"where": (-2, 10), "lag": 1}, -0.01966642657198049), + ({"where": (0, 8), "lag": 2}, -0.7086484036832666), + ({"where": (0, 6), "lag": 2, "clip": "post"}, -0.7086484036832666), + ], +) +def test_crosscorr(kwargs, expected): + assert np.isclose(s1().corr(s2(), **kwargs), expected, atol=0.00001) + + +@pytest.mark.parametrize( + "closed, kwargs, expected_val", + [ + ( + "left", + {}, + -1.75, + ), + ( + "left", + {"where": (1, 6)}, + 0.25, + ), + ( + "right", + {"where": (1, 6), "closed": "left"}, + -1.75, + ), + ( + "left", + {"where": (1, 6), "closed": "right"}, + -0.5, + ), + ], +) +def test_s1_min(closed, kwargs, expected_val): + from staircase.core import stats + + assert stats.min(s1(closed=closed), **kwargs) == expected_val + + +@pytest.mark.parametrize( + "closed, kwargs, expected_val", + [ + ( + "left", + {}, + 2.75, + ), + ( + "left", + {"where": (-4, 1)}, + -1.75, + ), + ( + "right", + {"where": (-4, 1), "closed": "left"}, + 0.0, + ), + ( + "left", + {"where": (-4, 1), "closed": "right"}, + 0.25, + ), + ], +) +def test_s1_max(closed, kwargs, expected_val): + from staircase.core import stats + + assert stats.max(s1(closed=closed), **kwargs) == expected_val + + +@pytest.mark.parametrize( + "closed, kwargs, expected_val", + [ + ( + "left", + {}, + np.array([-1.75, -0.5, 0.0, 0.25, 2.0, 2.75]), + ), + ( + "left", + {"where": (-4, 10)}, + np.array([-1.75, -0.5, 0.25, 2.0, 2.75]), + ), + ( + "left", + {"where": (1, 6)}, + np.array([0.25, 2.0, 2.75]), + ), + ( + "right", + {"where": (1, 6), "closed": "left"}, + np.array([-1.75, 0.25, 2.0, 2.75]), + ), + ( + "left", + {"where": (1, 6), "closed": "right"}, + np.array([-0.5, 0.25, 2.0, 2.75]), + ), + ], +) +def test_s1_values_in_range(closed, kwargs, expected_val): + assert np.array_equal(s1(closed=closed).values_in_range(**kwargs), expected_val) diff --git a/tests/test_stairs.py b/tests/test_stairs.py deleted file mode 100644 index 6367d94..0000000 --- a/tests/test_stairs.py +++ /dev/null @@ -1,1596 +0,0 @@ -import itertools - -import numpy as np -import pandas as pd -import pytest - -import staircase.test_data as test_data -from staircase import Stairs - - -def _expand_interval_definition(start, end=None, value=1): - return start, end, value - - -def _compare_iterables(it1, it2): - it1 = [i for i in it1 if i is not None] - it2 = [i for i in it2 if i is not None] - if len(it2) != len(it1): - return False - for e1, e2 in zip(it1, it2): - if e1 != e2: - return False - return True - - -def s1(closed="left"): - int_seq1 = Stairs(initial_value=0, closed=closed) - int_seq1.layer(1, 10, 2) - int_seq1.layer(-4, 5, -1.75) - int_seq1.layer(3, 5, 2.5) - int_seq1.layer(6, 7, -2.5) - int_seq1.layer(7, 10, -2.5) - return int_seq1 - - -def s2(): - int_seq2 = Stairs(initial_value=0) - int_seq2.layer(1, 7, -2.5) - int_seq2.layer(8, 10, 5) - int_seq2.layer(2, 5, 4.5) - int_seq2.layer(2.5, 4, -2.5) - int_seq2.layer(-2, 1, -1.75) - return int_seq2 - - -def s3(): # boolean - int_seq = Stairs(initial_value=0) - int_seq.layer(-10, 10, 1) - int_seq.layer(-8, -7, -1) - int_seq.layer(-5, -2, -1) - int_seq.layer(0.5, 1, -1) - int_seq.layer(3, 3.5, -1) - int_seq.layer(7, 9.5, -1) - return int_seq - - -def s4(): # boolean - int_seq = Stairs(initial_value=0) - int_seq.layer(-11, 9, 1) - int_seq.layer(-9.5, -8, -1) - int_seq.layer(-7.5, -7, -1) - int_seq.layer(0, 3, -1) - int_seq.layer(6, 6.5, -1) - int_seq.layer(7, 8.5, -1) - return int_seq - - -@pytest.fixture -def s1_fix(): - return s1() - - -@pytest.fixture -def s2_fix(): - return s2() - - -@pytest.fixture -def s3_fix(): - return s3() - - -@pytest.fixture -def s4_fix(): - return s4() - - -def test_init(): - assert Stairs(initial_value=0).identical(Stairs()) - assert Stairs().identical(Stairs(initial_value=0)) - - -@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) -def test_init2(init_value): - int_seq = Stairs(initial_value=init_value) - assert ( - int_seq.number_of_steps == 0 - ), "Initialised Stairs should have exactly one interval" - - -@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) -def test_init3(init_value): - int_seq = Stairs(initial_value=init_value) - assert ( - len(int_seq.step_points) == 0 - ), "Initialised Stairs should not have any finite interval endpoints" - - -@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) -def test_init4(init_value): - int_seq = Stairs(initial_value=init_value) - assert ( - int_seq(-1) == init_value - ), "Initialised Stairs should have initial value everywhere" - assert ( - int_seq(0) == init_value - ), "Initialised Stairs should have initial value everywhere" - assert ( - int_seq(1) == init_value - ), "Initialised Stairs should have initial value everywhere" - - -@pytest.mark.parametrize( - "init_value, added_interval", - itertools.product( - [0, 1.25, -1.25], - [(-2, 1), (3, 5, 2), (1, 5, -1), (-5, -3, 3), (3,), (2, None, 2)], - ), -) -def test_one_finite_interval(init_value, added_interval): - e = 0.0001 - int_seq = Stairs(initial_value=init_value) - int_seq.layer(*added_interval) - start, end, value = _expand_interval_definition(*added_interval) - assert int_seq.number_of_steps == 2 - ( - end is None - ), "One finite interval added to initial infinite interval should result in 3 intervals" - assert _compare_iterables( - int_seq.step_points, (start, end) - ), "Finite endpoints are not what is expected" - assert ( - int_seq(float("-inf")) == init_value - ), "Adding finite interval should not change initial value" - assert int_seq(float("inf")) == init_value + value * ( - end is None - ), "Adding finite interval should not change final value" - assert int_seq(start - e) == init_value - assert int_seq(start) == init_value + value - assert int_seq(start + e) == init_value + value - if end is not None: - assert int_seq(end - e) == init_value + value - assert int_seq(end) == init_value - - -@pytest.mark.parametrize( - "init_value, endpoints, value", - itertools.product( - [0, 1.25, -1.25, 2, -2], - [(-2, 1, 3), (-2, -1, 3), (-3, -2, -1), (1, 2, 3)], - [-1, 2, 3], - ), -) -def test_two_adjacent_finite_interval_same_value(init_value, endpoints, value): - e = 0.0001 - int_seq = Stairs(initial_value=init_value) - point1, point2, point3 = endpoints - int_seq.layer(point1, point2, value) - int_seq.layer(point2, point3, value) - assert int_seq.number_of_steps == 2, "Expected result to be 3 intervals" - assert _compare_iterables( - int_seq.step_points, (point1, point3) - ), "Finite endpoints are not what is expected" - assert ( - int_seq(float("-inf")) == init_value - ), "Adding finite interval should not change initial value" - assert ( - int_seq(float("inf")) == init_value - ), "Adding finite interval should not change final value" - assert int_seq(point1 - e) == init_value - assert int_seq(point1) == init_value + value - assert int_seq(point2) == init_value + value - assert int_seq(point3 - e) == init_value + value - assert int_seq(point3) == init_value - - -@pytest.mark.parametrize( - "init_value, endpoints, value, delta", - itertools.product( - [0, 1.25, -1.25, 2, -2], - [(-2, 1, 3), (-2, -1, 3), (-3, -2, -1), (1, 2, 3)], - [-1, 2, 4], - [3, -3, 1.5, -1.5], - ), -) -def test_two_adjacent_finite_interval_different_value( - init_value, endpoints, value, delta -): - e = 0.0001 - int_seq = Stairs(initial_value=init_value) - point1, point2, point3 = endpoints - int_seq.layer(point1, point2, value) - int_seq.layer(point2, point3, value + delta) - assert int_seq.number_of_steps == 3, "Expected result to be 4 intervals" - assert _compare_iterables( - int_seq.step_points, (point1, point2, point3) - ), "Finite endpoints are not what is expected" - assert ( - int_seq(float("-inf")) == init_value - ), "Adding finite interval should not change initial value" - assert ( - int_seq(float("inf")) == init_value - ), "Adding finite interval should not change final value" - assert int_seq(point1 - e) == init_value - assert int_seq(point1) == init_value + value - assert int_seq(point2) == init_value + value + delta - assert int_seq(point3 - e) == init_value + value + delta - assert int_seq(point3) == init_value - - -@pytest.mark.parametrize( - "init_value, endpoints, value, delta", - itertools.product( - [0, 1.25, -1.25, 2, -2], - [(-2, 1, 2, 3), (-3, -2, -1, 3), (-4, -3, -2, -1), (0, 1, 2, 3)], - [-1, 2, 4], - [3, -3, 1.5, -1.5], - ), -) -def test_two_overlapping_finite_interval(init_value, endpoints, value, delta): - e = 0.0001 - int_seq = Stairs(initial_value=init_value) - point1, point2, point3, point4 = endpoints - int_seq.layer(point1, point3, value) - int_seq.layer(point2, point4, value + delta) - assert int_seq.number_of_steps == 4, "Expected result to be 5 intervals" - assert _compare_iterables( - int_seq.step_points, (point1, point2, point3, point4) - ), "Finite endpoints are not what is expected" - assert ( - int_seq(float("-inf")) == init_value - ), "Adding finite interval should not change initial value" - assert ( - int_seq(float("inf")) == init_value - ), "Adding finite interval should not change final value" - assert int_seq(point1 - e) == init_value - assert int_seq(point1) == init_value + value - assert int_seq(point2) == init_value + 2 * value + delta - assert int_seq(point3 - e) == init_value + 2 * value + delta - assert int_seq(point3) == init_value + value + delta - assert int_seq(point4 - e) == init_value + value + delta - assert int_seq(point4) == init_value - - -@pytest.mark.parametrize( - "init_value, endpoints, value, delta", - itertools.product( - [0, 1.25, -1.25, 2, -2], - [(-2, 1, 2, 3), (-3, -2, -1, 3), (-4, -3, -2, -1), (0, 1, 2, 3)], - [-1, 2, 4], - [3, -3, 1.5, -1.5], - ), -) -def test_two_finite_interval_one_subinterval(init_value, endpoints, value, delta): - e = 0.0001 - int_seq = Stairs(initial_value=init_value) - point1, point2, point3, point4 = endpoints - int_seq.layer(point1, point4, value) - int_seq.layer(point2, point3, value + delta) - assert int_seq.number_of_steps == 4, "Expected result to be 5 intervals" - assert _compare_iterables( - int_seq.step_points, (point1, point2, point3, point4) - ), "Finite endpoints are not what is expected" - assert ( - int_seq.initial_value == init_value - ), "Adding finite interval should not change initial value" - assert ( - int_seq(float("inf")) == init_value - ), "Adding finite interval should not change final value" - assert int_seq(point1 - e) == init_value - assert int_seq(point1) == init_value + value - assert int_seq(point2) == init_value + 2 * value + delta - assert int_seq(point3 - e) == init_value + 2 * value + delta - assert int_seq(point3) == init_value + value - assert int_seq(point4 - e) == init_value + value - assert int_seq(point4) == init_value - - -@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) -def test_copy_and_equality(init_value): - int_seq = Stairs(initial_value=init_value) - int_seq_copy = int_seq.copy() - assert int_seq.identical(int_seq_copy) - assert int_seq_copy.identical(int_seq) - - -@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) -def test_deepcopy(init_value): - int_seq = Stairs(initial_value=init_value) - int_seq_copy = int_seq.copy() - int_seq_copy.layer(1, 2) - assert not int_seq.identical(int_seq_copy) - assert not int_seq_copy.identical(int_seq) - - -@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) -def test_layer1(init_value): - intervals_to_add = [(-2, 1), (3, 5), (1, 5), (-5, -3), (None, 0), (0, None)] - int_seq = Stairs(initial_value=init_value) - int_seq2 = Stairs(initial_value=init_value) - for start, end in intervals_to_add: - int_seq.layer(start, end) - starts, ends = list(zip(*intervals_to_add)) - starts = [{None: np.nan}.get(x, x) for x in starts] - ends = [{None: np.nan}.get(x, x) for x in ends] - int_seq2.layer(starts, ends) - assert int_seq.identical(int_seq2) - assert int_seq2.identical(int_seq) - - -@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) -def test_layer2(init_value): - intervals_to_add = [(-2, 1, 1), (3, 5, 2), (1, 5, -1), (-5, -3, 3)] - int_seq = Stairs(initial_value=init_value) - int_seq2 = Stairs(initial_value=init_value) - for interval in intervals_to_add: - int_seq.layer(*interval) - starts, ends, values = list(zip(*intervals_to_add)) - int_seq2.layer(starts, ends, values) - assert int_seq.identical(int_seq2) - assert int_seq2.identical(int_seq) - - -def test_make_boolean(s2_fix): - int_seq = s2_fix - calc = int_seq.make_boolean() - expected = Stairs() - expected.layer(-2, 7, 1) - expected.layer(8, 10, 1) - assert calc.identical(expected), "Boolean calculation not what it should be" - assert expected.identical(calc), "Boolean calculation not what it should be" - - -def test_invert(s2_fix): - int_seq = s2_fix - calc = ~int_seq - expected = Stairs(initial_value=1) - expected.layer(-2, 7, -1) - expected.layer(8, 10, -1) - assert calc.identical(expected), "Invert calculation not what it should be" - assert expected.identical(calc), "Invert calculation not what it should be" - - -def test_and(s3_fix, s4_fix): - calc = s3_fix & s4_fix - expected = Stairs(initial_value=0) - expected.layer(-10, -9.5) - expected.layer(-7, -5) - expected.layer(-2, 0) - expected.layer(3.5, 6) - expected.layer(6.5, 7) - assert calc.identical(expected), "AND calculation not what it should be" - assert expected.identical(calc), "AND calculation not what it should be" - - -def test_or(s3_fix, s4_fix): - calc = s3_fix | s4_fix - expected = Stairs(initial_value=0) - expected.layer(-11, -7.5) - expected.layer(-7, 0.5) - expected.layer(1, 7) - expected.layer(8.5, 9) - expected.layer(9.5, 10) - assert calc.identical(expected), "OR calculation not what it should be" - assert expected.identical(calc), "OR calculation not what it should be" - - -def test_lt(s1_fix, s2_fix): - calc = s1_fix < s2_fix - expected = Stairs(initial_value=0) - expected.layer(-4, -2) - expected.layer(2, 2.5) - expected.layer(7, 10) - assert calc.identical(expected), "LT calculation not what it should be" - assert expected.identical(calc), "LT calculation not what it should be" - - -def test_gt(s1_fix, s2_fix): - calc = s1_fix > s2_fix - expected = Stairs(initial_value=0) - expected.layer(1, 2) - expected.layer(2.5, 7) - assert calc.identical(expected), "GT calculation not what it should be" - assert expected.identical(calc), "GT calculation not what it should be" - - -def test_le(s1_fix, s2_fix): - calc = s1_fix <= s2_fix - expected = Stairs(initial_value=1) - expected.layer(1, 2, -1) - expected.layer(2.5, 7, -1) - assert calc.identical(expected), "LE calculation not what it should be" - assert expected.identical(calc), "LE calculation not what it should be" - - -def test_ge(s1_fix, s2_fix): - calc = s1_fix >= s2_fix - expected = Stairs(initial_value=1) - expected.layer(-4, -2, -1) - expected.layer(2, 2.5, -1) - expected.layer(7, 10, -1) - assert calc.identical(expected), "GE calculation not what it should be" - assert expected.identical(calc), "GE calculation not what it should be" - - -def test_eq_1(s1_fix, s2_fix): - calc = s1_fix == s2_fix - expected = Stairs(initial_value=1) - expected.layer(-4, -2, -1) - expected.layer(1, 10, -1) - assert calc.identical(expected), "EQ calculation not what it should be" - assert expected.identical(calc), "EQ calculation not what it should be" - - -def test_eq_2(s1_fix, s2_fix): - calc = s1_fix == s2_fix - expected = Stairs(initial_value=1) - expected.layer(-4, -2, -1) - expected.layer(1, 10, -1) - assert calc.identical(expected), "EQ calculation not what it should be" - assert expected.identical(calc), "EQ calculation not what it should be" - - -def test_ne(s1_fix, s2_fix): - calc = s1_fix != s2_fix - expected = Stairs(initial_value=0) - expected.layer(-4, -2, 1) - expected.layer(1, 10, 1) - assert calc.identical(expected), "NOT EQUAL calculation not what it should be" - assert expected.identical(calc), "NOT EQUAL calculation not what it should be" - - -@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) -def test_base_integral_0_2(init_value): - int_seq = Stairs(initial_value=init_value) - assert int_seq.agg("integral", (0, 2)) == 2 * init_value - - -@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) -def test_base_integral_neg1_1(init_value): - int_seq = Stairs(initial_value=init_value) - assert int_seq.agg("integral", (-1, 1)) == 2 * init_value - - -@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) -def test_base_integral_neg2_0(init_value): - int_seq = Stairs(initial_value=init_value) - assert int_seq.agg("integral", (-2, 0)) == 2 * init_value - - -@pytest.mark.parametrize("init_value", [0, 1.25, -1.25, 2, -2]) -def test_base_integral_point5_1(init_value): - int_seq = Stairs(initial_value=init_value) - assert int_seq.agg("integral", (0.5, 1)) == 0.5 * init_value - - -def test_integral1(s1_fix, s2_fix): - assert s1_fix.integral() == -2.75 - assert s2_fix.integral() == -0.5 - - -def test_integral2(s1_fix, s2_fix): - assert s1_fix.agg("integral", (-1, 5.5)) == 3.5 - assert s2_fix.agg("integral", (-1, 5.5)) == -5 - - -def test_mean1(s1_fix, s2_fix): - assert abs(s1_fix.mean() - -0.19642857) < 0.000001 - assert abs(s2_fix.mean() - -0.04166666) < 0.000001 - - -def test_mean2(s1_fix, s2_fix): - assert abs(s1_fix.agg("mean", (2, 8)) - 1.125) < 0.000001 - assert abs(s2_fix.agg("mean", (2, 8)) - -0.45833333) < 0.000001 - - -def test_integral_0(): - assert Stairs(initial_value=0).layer(None, 0).integral() is np.nan - - -def test_mean_nan(): - assert Stairs(initial_value=0).layer(None, 0).mean() is np.nan - - -def test_to_dataframe(s1_fix): - s1_fix.to_frame() - - -@pytest.mark.parametrize( - "stairs_instance, bounds, cuts", - itertools.product( - [s1(), s2(), s3(), s4()], - [(3, 4), (0, 10), (-10, 30), (-5, -1)], - ["unit", (0, 2.5, 4, 4.5, 7)], - ), -) -def test_hist_left_closed(stairs_instance, bounds, cuts): - def make_expected_result(interval_index, lower, upper): - return pd.Series( - [ - ((stairs_instance >= i.left) * (stairs_instance < i.right)).agg( - "mean", (lower, upper) - ) - for i in interval_index - ], - index=interval_index, - dtype="float64", - ) - - hist = stairs_instance.clip(*bounds).hist(bins=cuts, stat="probability") - expected = make_expected_result(hist.index, *bounds) - assert (hist.apply(round, 5) == expected.apply(round, 5)).all(), f"{bounds}, {cuts}" - - -@pytest.mark.parametrize( - "stairs_instance, bounds, cuts", - itertools.product( - [s1(), s2(), s3(), s4()], - [(3, 4), (0, 10), (-10, 30), (-5, -1)], - ["unit", (0, 2.5, 4, 4.5, 7)], - ), -) -def test_hist_right_closed(stairs_instance, bounds, cuts): - def make_expected_result(interval_index, lower, upper): - return pd.Series( - [ - ((stairs_instance > i.left) * (stairs_instance <= i.right)).agg( - "mean", (lower, upper) - ) - for i in interval_index - ], - index=interval_index, - dtype="float64", - ) - - hist = stairs_instance.clip(*bounds).hist( - bins=cuts, closed="right", stat="probability" - ) - expected = make_expected_result(hist.index, *bounds) - assert (hist.apply(round, 5) == expected.apply(round, 5)).all(), f"{bounds}, {cuts}" - - -@pytest.mark.parametrize( - "stairs_instance, bounds, closed", - itertools.product( - [s1(), s2(), s3(), s4()], - [(3, 4), (0, 10), (-10, 30), (-5, -1)], - ["left", "right"], - ), -) -def test_hist_default_bins(stairs_instance, bounds, closed): - # really testing the default binning process here - hist = stairs_instance.clip(*bounds).hist(closed=closed, stat="probability") - assert abs(hist.sum() - 1) < 0.000001 - - -# np.var(st1(np.linspace(-4,10, 10000000))) = 2.501594244387741 -# np.var(st1(np.linspace(-5,10, 10000000))) = 2.3372686165530117 -# np.var(st1(np.linspace(1,12, 10000000))) = 1.5433884747933315 - - -@pytest.mark.parametrize( - "bounds, expected", - [ - ((), 2.501594244387741), - (((-5, 10),), 2.3372686165530117), - (((1, 12),), 1.5433884747933315), - ], -) -def test_s1_var(bounds, expected): - assert np.isclose(s1().agg("var", *bounds), expected, atol=0.0001) - - -# np.var(st2(np.linspace(-2, 10, 10000000))) = 7.024303861110942 -# np.var(st2(np.linspace(-3, 7.5, 10000000))) = 2.2678568437499633 -# np.var(st2(np.linspace(0, 14, 10000000))) = 5.538902194132663 - - -@pytest.mark.parametrize( - "bounds, expected", - [ - ((), 7.024303861110942), - (((-3, 7.5),), 2.2678568437499633), - (((0, 14),), 5.538902194132663), - ], -) -def test_s2_var(bounds, expected): - assert np.isclose(s2().agg("var", *bounds), expected, atol=0.0001) - - -# np.std(st1(np.linspace(-4,10, 10000000))) = 1.5816428940780978 -# np.std(st1(np.linspace(-5,10, 10000000))) = 1.528797568034358 -# np.std(st1(np.linspace(1,12, 10000000))) = 1.242331869829206 - - -@pytest.mark.parametrize( - "bounds, expected", - [ - ((), 1.5816428940780978), - (((-5, 10),), 1.528797568034358), - (((1, 12),), 1.242331869829206), - ], -) -def test_s1_std(bounds, expected): - assert np.isclose(s1().agg("std", *bounds), expected, atol=0.0001) - - -# np.std(st2(np.linspace(-2, 10, 10000000))) = 2.650340329299417 -# np.std(st2(np.linspace(-3, 7.5, 10000000))) = 1.5059405179986238 -# np.std(st2(np.linspace(0, 14, 10000000))) = 2.3534872411238315 - - -@pytest.mark.parametrize( - "bounds, expected", - [ - ((), 2.650340329299417), - (((-3, 7.5),), 1.5059405179986238), - (((0, 14),), 2.3534872411238315), - ], -) -def test_s2_std(bounds, expected): - assert np.isclose(s2().agg("std", *bounds), expected, atol=0.0001) - - -# # np.cov(st1(pts[:-100000]), st1(pts[100000:]))[0,1] = 1.9386094481108465 -# # np.cov(st1(np.linspace(-4, 8, 12*100000 + 1)), st1(np.linspace(-2, 10, 12*100000 + 1)))[0,1] = 1.1184896017794723 -# # np.cov(st1(np.linspace(-4, 8, 12*100000 + 1)), st1.shift(-2)(np.linspace(-4, 8, 12*100000 + 1)))[0,1] = 1.1184896017794723 - - -@pytest.mark.parametrize( - "kwargs, expected", - [ - ({"where": (-4, 10), "lag": 1}, 1.9386094481108465), - ({"where": (-4, 10), "lag": 2}, 1.1184896017794723), - ({"where": (-4, 8), "lag": 2, "clip": "post"}, 1.1184896017794723), - ], -) -def test_s1_autocov(kwargs, expected): - assert np.isclose(s1().cov(s1(), **kwargs), expected, atol=0.00001) - - -# # np.cov(st2(np.linspace(-2, 9, 11*100000 + 1)), st2(np.linspace(-1, 10, 11*100000 + 1)))[0,1 = 3.1022721590913256 -# # np.cov(st2(np.linspace(0, 6, 12*100000 + 1)), st2(np.linspace(2, 8, 12*100000 + 1)))[0,1] = -0.7291746267294938 -# # np.cov(st2(np.linspace(0, 6, 12*100000 + 1)), st2.shift(-2)(np.linspace(0, 6, 12*100000 + 1)))[0,1] = -0.7291746267294938 - - -@pytest.mark.parametrize( - "kwargs, expected", - [ - ({"where": (-2, 10), "lag": 1}, 3.1022721590913256), - ({"where": (0, 8), "lag": 2}, -0.7291746267294938), - ({"where": (0, 6), "lag": 2, "clip": "post"}, -0.7291746267294938), - ], -) -def test_s2_autocov(kwargs, expected): - assert np.isclose(s2().cov(s2(), **kwargs), expected, atol=0.00001) - - -# # np.cov(st1(np.linspace(-2, 9, 11*100000 + 1)), st2(np.linspace(-1, 10, 11*100000 + 1)))[0,1 = -0.08677679611199672 -# # np.cov(st1(np.linspace(0, 6, 12*100000 + 1)), st2(np.linspace(2, 8, 12*100000 + 1)))[0,1] = -1.970493123547197 -# # np.cov(st1(np.linspace(0, 6, 12*100000 + 1)), st2.shift(-2)(np.linspace(0, 6, 12*100000 + 1)))[0,1] = -1.970493123547197 - - -@pytest.mark.parametrize( - "kwargs, expected", - [ - ({"where": (-2, 10), "lag": 1}, -0.08677679611199672), - ({"where": (0, 8), "lag": 2}, -1.970493123547197), - ({"where": (0, 6), "lag": 2, "clip": "post"}, -1.970493123547197), - ], -) -def test_crosscov(kwargs, expected): - assert np.isclose(s1().cov(s2(), **kwargs), expected, atol=0.00001) - - -# # np.corrcoef(st1(pts[:-100000]), st1(pts[100000:]))[0,1] = 0.6927353407369307 -# # np.corrcoef(st1(np.linspace(-4, 8, 12*100000 + 1)), st1(np.linspace(-2, 10, 12*100000 + 1)))[0,1] = -0.2147502741669856 -# # np.corrcoef(st1(np.linspace(-4, 8, 12*100000 + 1)), st1.shift(-2)(np.linspace(-4, 8, 12*100000 + 1)))[0,1] = -0.2147502741669856 - - -@pytest.mark.parametrize( - "kwargs, expected", - [ - ({"where": (-2, 10), "lag": 1}, 0.6927353407369307), - ({"where": (0, 8), "lag": 2}, -0.2147502741669856), - ({"where": (0, 6), "lag": 2, "clip": "post"}, -0.2147502741669856), - ], -) -def test_s1_autocorr(kwargs, expected): - assert np.isclose(s1().corr(s1(), **kwargs), expected, atol=0.00001) - - -# # np.corrcoef(st2(pts[:-100000]), st2(pts[100000:]))[0,1] = 0.5038199912440895 -# # np.corrcoef(st2(np.linspace(-4, 8, 12*100000 + 1)), st2(np.linspace(-2, 10, 12*100000 + 1)))[0,1] = -0.2419504099129966 -# # np.corrcoef(st2(np.linspace(-4, 8, 12*100000 + 1)), st2.shift(-2)(np.linspace(-4, 8, 12*100000 + 1)))[0,1] = -0.2419504099129966 - - -@pytest.mark.parametrize( - "kwargs, expected", - [ - ({"where": (-2, 10), "lag": 1}, 0.5038199912440895), - ({"where": (0, 8), "lag": 2}, -0.2419504099129966), - ({"where": (0, 6), "lag": 2, "clip": "post"}, -0.2419504099129966), - ], -) -def test_s2_autocorr(kwargs, expected): - assert np.isclose(s2().corr(s2(), **kwargs), expected, atol=0.00001) - - -# # np.corrcoef(st1(pts[:-100000]), st2(pts[100000:]))[0,1] = -0.01966642657198049 -# # np.corrcoef(st1(np.linspace(-4, 8, 12*100000 + 1)), st2(np.linspace(-2, 10, 12*100000 + 1)))[0,1] = -0.7086484036832666 -# # np.corrcoef(st1(np.linspace(-4, 8, 12*100000 + 1)), st2.shift(-2)(np.linspace(-4, 8, 12*100000 + 1)))[0,1] = -0.7086484036832666 - - -@pytest.mark.parametrize( - "kwargs, expected", - [ - ({"where": (-2, 10), "lag": 1}, -0.01966642657198049), - ({"where": (0, 8), "lag": 2}, -0.7086484036832666), - ({"where": (0, 6), "lag": 2, "clip": "post"}, -0.7086484036832666), - ], -) -def test_crosscorr(kwargs, expected): - assert np.isclose(s1().corr(s2(), **kwargs), expected, atol=0.00001) - - -@pytest.mark.parametrize( - "kwargs, expected_index, expected_vals", - [ - ( - {"window": (-1, 1)}, - [-5, -3, 0, 2, 4, 5, 6, 7, 9, 11], - [0.0, -1.75, -1.75, 0.25, 2.75, 2.375, 0.75, -0.5, -0.5, 0.0], - ), - ( - {"window": (-2, 0)}, - [-4, -2, 1, 3, 5, 6, 7, 8, 10, 12], - [0.0, -1.75, -1.75, 0.25, 2.75, 2.375, 0.75, -0.5, -0.5, 0.0], - ), - ( - {"window": (-1, 1), "where": (0, 8)}, - [1, 2, 4, 5, 6, 7], - [-0.75, 0.25, 2.75, 2.375, 0.75, -0.5], - ), - ], -) -def test_s1_rolling_mean(s1_fix, kwargs, expected_index, expected_vals): - rm = s1_fix.rolling_mean(**kwargs) - assert list(rm.values) == expected_vals - assert list(rm.index) == expected_index - - -@pytest.mark.parametrize( - "closed, kwargs, expected_val", - [ - ( - "left", - {}, - -1.75, - ), - ( - "left", - {"where": (1, 6)}, - 0.25, - ), - ( - "right", - {"where": (1, 6), "closed": "left"}, - -1.75, - ), - ( - "left", - {"where": (1, 6), "closed": "right"}, - -0.5, - ), - ], -) -def test_s1_min(closed, kwargs, expected_val): - from staircase.core import stats - - assert stats.min(s1(closed=closed), **kwargs) == expected_val - - -@pytest.mark.parametrize( - "closed, kwargs, expected_val", - [ - ( - "left", - {}, - 2.75, - ), - ( - "left", - {"where": (-4, 1)}, - -1.75, - ), - ( - "right", - {"where": (-4, 1), "closed": "left"}, - 0.0, - ), - ( - "left", - {"where": (-4, 1), "closed": "right"}, - 0.25, - ), - ], -) -def test_s1_max(closed, kwargs, expected_val): - from staircase.core import stats - - assert stats.max(s1(closed=closed), **kwargs) == expected_val - - -@pytest.mark.parametrize( - "closed, kwargs, expected_val", - [ - ( - "left", - {}, - np.array([-1.75, -0.5, 0.0, 0.25, 2.0, 2.75]), - ), - ( - "left", - {"where": (-4, 10)}, - np.array([-1.75, -0.5, 0.25, 2.0, 2.75]), - ), - ( - "left", - {"where": (1, 6)}, - np.array([0.25, 2.0, 2.75]), - ), - ( - "right", - {"where": (1, 6), "closed": "left"}, - np.array([-1.75, 0.25, 2.0, 2.75]), - ), - ( - "left", - {"where": (1, 6), "closed": "right"}, - np.array([-0.5, 0.25, 2.0, 2.75]), - ), - ], -) -def test_s1_values_in_range(closed, kwargs, expected_val): - assert np.array_equal(s1(closed=closed).values_in_range(**kwargs), expected_val) - - -@pytest.mark.parametrize( - "x, kwargs, expected_val", - [ - ( - [-4, -2, 1, 3], - {"side": "right"}, - np.array([-1.75, -1.75, 0.25, 2.75]), - ), - ( - [-4, -2, 1, 3], - {"side": "right"}, - np.array([-1.75, -1.75, 0.25, 2.75]), - ), - ( - [-4, -2, 1, 3], - {"side": "left"}, - np.array([0.0, -1.75, -1.75, 0.25]), - ), - ], -) -def test_s1_sample(s1_fix, x, kwargs, expected_val): - assert np.array_equal(s1_fix.limit(x, **kwargs), expected_val) - - -@pytest.mark.parametrize( - "x, kwargs, expected_val", - [ - ( - [-4, -2, 1, 3], - {"aggfunc": "mean", "window": (-0.5, 0.5)}, - np.array([-0.875, -1.75, -0.75, 1.5]), - ), - ( - [-4, -2, 1, 3], - {"aggfunc": "mean", "window": (-1, 0)}, - np.array([0.0, -1.75, -1.75, 0.25]), - ), - ( - [-4, -2, 1, 3], - {"aggfunc": "mean", "window": (0, 1)}, - np.array([-1.75, -1.75, 0.25, 2.75]), - ), - ], -) -def test_s1_agg_mean(s1_fix, x, kwargs, expected_val): - window = kwargs["window"] - x = np.array(x) - ii = pd.IntervalIndex.from_arrays(x + window[0], x + window[1]) - assert np.array_equal(s1_fix.slice(ii).mean().values, expected_val) - - -@pytest.mark.parametrize( - "closed, x, kwargs, expected_val", - [ - ( - "left", - [0, 2, 7], - {"aggfunc": "max", "window": (-1, 1)}, - np.array([-1.75, 0.25, -0.5]), - ), - ( - "right", - [0, 2, 7], - {"aggfunc": "max", "window": (-1, 1), "closed": "left"}, - np.array([-1.75, 0.25, 2.0]), - ), - ( - "left", - [0, 2, 7], - {"aggfunc": "max", "window": (-1, 1), "closed": "right"}, - np.array([0.25, 2.75, -0.5]), - ), - ( - "right", - [0, 2, 7], - {"aggfunc": "max", "window": (-1, 1), "closed": "right"}, - np.array([-1.75, 0.25, -0.5]), - ), - ], -) -def test_s1_agg_max(closed, x, kwargs, expected_val): - window = kwargs["window"] - x = np.array(x) - ii = pd.IntervalIndex.from_arrays( - x + window[0], x + window[1], closed=kwargs.get("closed", "left") - ) - assert np.array_equal(s1(closed=closed).slice(ii).max().values, expected_val) - - -@pytest.mark.parametrize( - "kwargs", - [ - {}, - {"arrows": True, "style": "hlines"}, - {"arrows": False, "style": "hlines"}, - {"arrows": True, "style": "step"}, - {"arrows": False, "style": "step"}, - ], -) -def test_plot(s1_fix, kwargs): - s1_fix.plot(**kwargs) - - -def test_plot_trivial_1(): - Stairs().plot() - - -def test_plot_trivial_2(): - Stairs(initial_value=np.nan).plot() - - -def test_plot_ecdf(s1_fix): - s1_fix.plot.ecdf() - - -def test_plot_bad_backend(s1_fix): - with pytest.raises(ValueError): - s1_fix.plot(backend="") - - -def test_plot_ecdf_bad_backend(s1_fix): - with pytest.raises(ValueError): - s1_fix.plot.ecdf(backend="") - - -def test_add_1(s1_fix, s2_fix): - assert pd.Series.equals( - (s1_fix + s2_fix).step_changes, - pd.Series( - { - -4: -1.75, - -2: -1.75, - 1: 1.25, - 2: 4.5, - 2.5: -2.5, - 3: 2.5, - 4: 2.5, - 5: -5.25, - 6: -2.5, - 7: 2.5, - 8: 5, - 10: -4.5, - } - ), - ) - - -def test_add_2(s1_fix): - s = s1_fix + 3 - assert s.initial_value == 3 - assert pd.Series.equals( - s.step_changes, - s1_fix.step_changes, - ) - - -def test_add_3(s1_fix): - s = 3 + s1_fix - assert s.initial_value == 3 - assert pd.Series.equals( - s.step_changes, - s1_fix.step_changes, - ) - - -def test_sub_1(s1_fix, s2_fix): - assert pd.Series.equals( - (s1_fix - s2_fix).step_values, - pd.Series( - { - -4.0: -1.75, - -2.0: 0.0, - 1.0: 2.75, - 2.0: -1.75, - 2.5: 0.75, - 3.0: 3.25, - 4.0: 0.75, - 5.0: 4.5, - 6.0: 2.0, - 7.0: -0.5, - 8.0: -5.5, - 10.0: 0.0, - } - ), - ) - - -def test_sub_2(s1_fix): - s = s1_fix - 3 - assert s.initial_value == -3 - assert pd.Series.equals( - s.step_changes, - s1_fix.step_changes, - ) - - -def test_sub_3(s1_fix): - s = 3 - s1_fix - assert s.initial_value == 3 - assert pd.Series.equals( - s.step_changes, - -(s1_fix.step_changes), - ) - - -def test_divide(s1_fix, s2_fix): - assert pd.Series.equals( - (s1_fix / (s2_fix + 1)).step_changes, - pd.Series( - { - -4: -1.75, - -2: 4.083333333333334, - 1: -2.5, - 2: 0.25, - 2.5: 0.4166666666666667, - 3: 5.0, - 4: -4.583333333333333, - 5: -2.25, - 6: 1.6666666666666665, - 7: -0.8333333333333333, - 8: 0.4166666666666667, - 10: 0.08333333333333333, - } - ), - ) - - -def test_divide_scalar(s1_fix): - assert pd.Series.equals( - (s1_fix / 0.5).step_changes, - pd.Series( - { - -4: -3.5, - 1: 4.0, - 3: 5.0, - 5: -1.5, - 6: -5.0, - 10: 1.0, - } - ), - ) - - -def test_scalar_divide(): - s = Stairs().layer([1, 2, 5], [3, 4, 7], [1, -1, 2]) - assert pd.Series.equals( - (2 / s).step_values, - pd.Series( - { - 1: 2.0, - 2: np.nan, - 3: -2.0, - 4: np.nan, - 5: 1.0, - 7: np.nan, - } - ), - ) - - -def test_multiply(s1_fix, s2_fix): - assert pd.Series.equals( - (s1_fix * s2_fix).step_changes, - pd.Series( - { - -2: 3.0625, - 1: -3.6875, - 2: 1.125, - 2.5: -0.625, - 3: -1.25, - 4: 6.875, - 5: -10.5, - 6: 6.25, - 7: -1.25, - 8: -2.5, - 10: 2.5, - } - ), - ) - - -def test_multiply_scalar(s1_fix): - assert pd.Series.equals( - (s1_fix * 3).step_changes, - pd.Series( - { - -4: -5.25, - 1: 6.0, - 3: 7.5, - 5: -2.25, - 6: -7.5, - 10: 1.5, - } - ), - ) - - -def test_multiply_scalar_2(s1_fix): - assert pd.Series.equals( - (3 * s1_fix).step_changes, - pd.Series( - { - -4: -5.25, - 1: 6.0, - 3: 7.5, - 5: -2.25, - 6: -7.5, - 10: 1.5, - } - ), - ) - - -def test_eq_3(): - assert Stairs(initial_value=3) == 3 - - -def test_ne_3(s1_fix): - assert s1_fix != 3 - - -def test_diff(s1_fix): - assert pd.Series.equals( - s1_fix.diff(1).step_changes, - pd.Series( - { - -4: -1.75, - -3: 1.75, - 1: 2, - 2: -2, - 3: 2.5, - 4: -2.5, - 5: -0.75, - 6: -1.75, - 7: 2.5, - 10: 0.5, - 11: -0.5, - } - ), - ) - - -def test_str(s1_fix): - assert str(s1_fix) is not None - assert str(s1_fix) != "" - - -def test_repr(s1_fix): - assert repr(s1_fix) is not None - assert repr(s1_fix) != "" - - -def test_make_test_data(): - assert type(test_data.make_test_data()) == pd.DataFrame - - -def test_logical_and_scalar_1(s3_fix): - assert (s3_fix & 1).identical(s3_fix) - - -def test_logical_rand_scalar_1(s3_fix): - assert (1 & s3_fix).identical(s3_fix) - - -def test_logical_and_scalar_2(s3_fix): - assert (s3_fix & 0).identical(0) - - -def test_logical_rand_scalar_2(s3_fix): - assert (0 & s3_fix).identical(0) - - -def test_logical_and_scalar_3(s3_fix): - assert (s3_fix & np.nan).identical(np.nan) - - -def test_logical_rand_scalar_3(s3_fix): - assert (np.nan & s3_fix).identical(np.nan) - - -def test_logical_or_scalar_1(s3_fix): - assert (s3_fix | 1).identical(1) - - -def test_logical_ror_scalar_1(s3_fix): - assert (1 | s3_fix).identical(1) - - -def test_logical_or_scalar_2(s3_fix): - assert (s3_fix | 0).identical(s3_fix) - - -def test_logical_ror_scalar_2(s3_fix): - assert (0 | s3_fix).identical(s3_fix) - - -def test_logical_or_scalar_3(s3_fix): - assert (s3_fix | np.nan).identical(np.nan) - - -def test_logical_ror_scalar_3(s3_fix): - assert (np.nan | s3_fix).identical(np.nan) - - -def test_logical_xor_scalar_1(s3_fix): - assert (s3_fix ^ 1).identical(~s3_fix) - - -def test_logical_rxor_scalar_1(s3_fix): - assert (1 ^ s3_fix).identical(~s3_fix) - - -def test_logical_xor_scalar_2(s3_fix): - assert (s3_fix ^ 0).identical(s3_fix) - - -def test_logical_rxor_scalar_2(s3_fix): - assert (0 ^ s3_fix).identical(s3_fix) - - -def test_logical_xor_scalar_3(s3_fix): - assert (s3_fix ^ np.nan).identical(np.nan) - - -def test_logical_rxor_scalar_3(s3_fix): - assert (np.nan ^ s3_fix).identical(np.nan) - - -def test_slicing_mean(s1_fix): - pd.testing.assert_series_equal( - s1_fix.slice(range(-4, 11, 2)).mean(), - pd.Series( - { - pd.Interval(-4, -2, closed="left"): -1.75, - pd.Interval(-2, 0, closed="left"): -1.75, - pd.Interval(0, 2, closed="left"): -0.75, - pd.Interval(2, 4, closed="left"): 1.5, - pd.Interval(4, 6, closed="left"): 2.375, - pd.Interval(6, 8, closed="left"): -0.5, - pd.Interval(8, 10, closed="left"): -0.5, - } - ), - check_names=False, - check_index_type=False, - ) - - -def test_slicing_max(s1_fix): - pd.testing.assert_series_equal( - s1_fix.slice(range(-4, 11, 2)).max(), - pd.Series( - { - pd.Interval(-4, -2, closed="left"): -1.75, - pd.Interval(-2, 0, closed="left"): -1.75, - pd.Interval(0, 2, closed="left"): 0.25, - pd.Interval(2, 4, closed="left"): 2.75, - pd.Interval(4, 6, closed="left"): 2.75, - pd.Interval(6, 8, closed="left"): -0.5, - pd.Interval(8, 10, closed="left"): -0.5, - } - ), - check_names=False, - check_index_type=False, - ) - - -def test_slicing_min(s1_fix): - pd.testing.assert_series_equal( - s1_fix.slice(range(-4, 11, 2)).min(), - pd.Series( - { - pd.Interval(-4, -2, closed="left"): -1.75, - pd.Interval(-2, 0, closed="left"): -1.75, - pd.Interval(0, 2, closed="left"): -1.75, - pd.Interval(2, 4, closed="left"): 0.25, - pd.Interval(4, 6, closed="left"): 2.0, - pd.Interval(6, 8, closed="left"): -0.5, - pd.Interval(8, 10, closed="left"): -0.5, - } - ), - check_names=False, - check_index_type=False, - ) - - -def test_slicing_mode(s1_fix): - pd.testing.assert_series_equal( - s1_fix.slice(range(-4, 11, 2)).mode(), - pd.Series( - { - pd.Interval(-4, -2, closed="left"): -1.75, - pd.Interval(-2, 0, closed="left"): -1.75, - pd.Interval(0, 2, closed="left"): -1.75, - pd.Interval(2, 4, closed="left"): 0.25, - pd.Interval(4, 6, closed="left"): 2.0, - pd.Interval(6, 8, closed="left"): -0.5, - pd.Interval(8, 10, closed="left"): -0.5, - } - ), - check_names=False, - check_index_type=False, - ) - - -def test_slicing_median(s1_fix): - pd.testing.assert_series_equal( - s1_fix.slice(range(-4, 11, 2)).median(), - pd.Series( - { - pd.Interval(-4, -2, closed="left"): -1.75, - pd.Interval(-2, 0, closed="left"): -1.75, - pd.Interval(0, 2, closed="left"): -0.75, - pd.Interval(2, 4, closed="left"): 1.5, - pd.Interval(4, 6, closed="left"): 2.375, - pd.Interval(6, 8, closed="left"): -0.5, - pd.Interval(8, 10, closed="left"): -0.5, - } - ), - check_names=False, - check_index_type=False, - ) - - -def test_slicing_agg_min(s1_fix): - pd.testing.assert_series_equal( - s1_fix.slice(range(-4, 11, 2)).agg("min")["min"], - pd.Series( - { - pd.Interval(-4, -2, closed="left"): -1.75, - pd.Interval(-2, 0, closed="left"): -1.75, - pd.Interval(0, 2, closed="left"): -1.75, - pd.Interval(2, 4, closed="left"): 0.25, - pd.Interval(4, 6, closed="left"): 2.0, - pd.Interval(6, 8, closed="left"): -0.5, - pd.Interval(8, 10, closed="left"): -0.5, - } - ), - check_names=False, - check_index_type=False, - ) - - -def test_slicing_apply_min(s1_fix): - pd.testing.assert_series_equal( - s1_fix.slice(range(-4, 11, 2)).apply(Stairs.min), - pd.Series( - { - pd.Interval(-4, -2, closed="left"): -1.75, - pd.Interval(-2, 0, closed="left"): -1.75, - pd.Interval(0, 2, closed="left"): -1.75, - pd.Interval(2, 4, closed="left"): 0.25, - pd.Interval(4, 6, closed="left"): 2.0, - pd.Interval(6, 8, closed="left"): -0.5, - pd.Interval(8, 10, closed="left"): -0.5, - } - ), - check_names=False, - check_index_type=False, - ) - - -def test_slicing_agg_min_max(s1_fix): - result = s1_fix.slice(range(-4, 11, 2)).agg(["min", "max"]) - pd.testing.assert_series_equal( - result["min"], - pd.Series( - { - pd.Interval(-4, -2, closed="left"): -1.75, - pd.Interval(-2, 0, closed="left"): -1.75, - pd.Interval(0, 2, closed="left"): -1.75, - pd.Interval(2, 4, closed="left"): 0.25, - pd.Interval(4, 6, closed="left"): 2.0, - pd.Interval(6, 8, closed="left"): -0.5, - pd.Interval(8, 10, closed="left"): -0.5, - } - ), - check_names=False, - check_index_type=False, - ) - pd.testing.assert_series_equal( - result["max"], - pd.Series( - { - pd.Interval(-4, -2, closed="left"): -1.75, - pd.Interval(-2, 0, closed="left"): -1.75, - pd.Interval(0, 2, closed="left"): 0.25, - pd.Interval(2, 4, closed="left"): 2.75, - pd.Interval(4, 6, closed="left"): 2.75, - pd.Interval(6, 8, closed="left"): -0.5, - pd.Interval(8, 10, closed="left"): -0.5, - } - ), - check_names=False, - check_index_type=False, - ) - - -def test_slicing_resample_mean(s1_fix): - pd.testing.assert_series_equal( - s1_fix.slice(range(0, 7, 2)).resample("mean").step_values, - pd.Series({-4: -1.75, 0: -0.75, 2: 1.5, 4: 2.375, 6: -0.5, 10: 0.0}), - check_names=False, - check_index_type=False, - ) - - -def test_layering_index(s1_fix): - result = Stairs( - start=pd.Index([1, -4, 3, 6, 7]), - end=pd.Index([10, 5, 5, 7, 10]), - value=pd.Index([2, -1.75, 2.5, -2.5, -2.5]), - ) - assert result.identical(s1_fix) - - -def test_layering_frame(s1_fix): - df = pd.DataFrame( - { - "start": [1, -4, 3, 6, 7], - "end": [10, 5, 5, 7, 10], - "value": [2, -1.75, 2.5, -2.5, -2.5], - } - ) - assert Stairs(df, "start", "end", "value").identical(s1_fix) - - -def test_layering_trivial_1(s1_fix): - assert s1_fix.copy().layer(1, 1).identical(s1_fix) - - -def test_pipe(s1_fix): - def is_stairs(s): - return isinstance(s, Stairs) - - assert s1().pipe(is_stairs) - - -def test_value_sums(s1_fix): - pd.testing.assert_series_equal( - s1_fix.value_sums(), - pd.Series({-1.75: 5, -0.5: 4, 0.25: 2, 2.0: 1, 2.75: 2}), - check_names=False, - check_index_type=False, - ) - - -def test_step_changes(s1_fix): - pd.testing.assert_series_equal( - s1_fix.step_changes, - pd.Series({-4: -1.75, 1: 2.0, 3: 2.5, 5: -0.75, 6: -2.5, 10: 0.5}), - check_names=False, - check_index_type=False, - ) - - -def test_step_values(s1_fix): - pd.testing.assert_series_equal( - s1_fix.step_values, - pd.Series({-4: -1.75, 1: 0.25, 3: 2.75, 5: 2.0, 6: -0.5, 10: 0.0}), - check_names=False, - check_index_type=False, - ) - - -def test_step_points(s1_fix): - assert list(s1_fix.step_points) == [-4, 1, 3, 5, 6, 10] - - -def test_step_changes_stepless(): - pd.testing.assert_series_equal( - Stairs().step_changes, - pd.Series([], dtype="float64"), - check_names=False, - check_index_type=False, - ) - - -def test_step_values_stepless(): - pd.testing.assert_series_equal( - Stairs().step_values, - pd.Series([], dtype="float64"), - check_names=False, - check_index_type=False, - ) - - -def test_step_points_stepless(): - assert list(Stairs().step_points) == [] - - -def test_negate(s1_fix): - pd.testing.assert_series_equal( - (-s1_fix).step_values, - pd.Series({-4: 1.75, 1: -0.25, 3: -2.75, 5: -2.0, 6: 0.5, 10: 0.0}), - check_names=False, - check_index_type=False, - ) - - -def test_hist_frequency(s1_fix): - index = pd.IntervalIndex.from_breaks([-2, 0, 2, 3], closed="left") - pd.testing.assert_series_equal( - s1_fix.hist(bins=[-2, 0, 2, 3], stat="frequency"), - pd.Series([4.5, 1, 3], index=index), - check_names=False, - check_index_type=False, - ) - - -def test_hist_density(s1_fix): - index = pd.IntervalIndex.from_breaks([-2, 0, 2, 3], closed="left") - pd.testing.assert_series_equal( - s1_fix.hist(bins=[-2, 0, 2, 3], stat="density"), - pd.Series([0.36, 0.08, 0.12], index=index), - check_names=False, - check_index_type=False, - ) - - -def test_hist_probability(s1_fix): - index = pd.IntervalIndex.from_breaks([-2, 0, 2, 3], closed="left") - pd.testing.assert_series_equal( - s1_fix.hist(bins=[-2, 0, 2, 3], stat="probability"), - pd.Series([0.642857, 0.142857, 0.214286], index=index), - check_names=False, - check_index_type=False, - ) - - -def test_quantiles(s1_fix): - assert (s1_fix.quantiles(4) == np.array([-1.75, -0.5, 0.25])).all() - - -def test_fractile(s1_fix): - assert list(map(s1().fractile, (0.25, 0.5, 0.75))) == [ - -1.75, - -0.5, - 0.25, - ]