Skip to content

Commit

Permalink
Fix upcast issues and more tests for different chunks/blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescAlted committed Mar 22, 2024
1 parent d6e08ba commit c9cce06
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 19 deletions.
25 changes: 17 additions & 8 deletions blosc2/lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,7 @@ def evaluate_chunks(expression: str, operands: dict, shape, dtype, **kwargs) ->
The output array.
"""
operand = operands["o0"]
chunks = operand.chunks
blocks = operand.blocks
# Due to padding, it is critical to have the same chunks and blocks as the operands
out = blosc2.empty(shape, chunks=chunks, blocks=blocks, dtype=dtype, **kwargs)
out = None
for info in operands["o0"].iterchunks_info():
# Iterate over the operands and get the chunks
chunk_operands = {}
Expand All @@ -335,9 +332,13 @@ def evaluate_chunks(expression: str, operands: dict, shape, dtype, **kwargs) ->
chunk_operands[key] = npbuff
# Evaluate the expression using chunks of operands
result = ne.evaluate(expression, chunk_operands)
# Sometimes, some constants make the result to be of a different dtype
if result.dtype != out.dtype:
result = result.astype(out.dtype)
# Sometimes NumPy/numexpr upcast the result
if result.dtype != dtype:
# Make the output array be of the same dtype as the numexpr result
dtype = result.dtype
if out is None:
# Due to padding, it is critical to have the same chunks and blocks as the operands
out = blosc2.empty(shape, chunks=operand.chunks, blocks=operand.blocks, dtype=dtype, **kwargs)
out.schunk.update_data(info.nchunk, result, copy=False)
return out

Expand Down Expand Up @@ -371,9 +372,9 @@ def evaluate_slices(
:ref:`NDArray`
The output array.
"""
out = blosc2.empty(shape, dtype=dtype, **kwargs)
operand = operands["o0"]
chunks = operand.chunks
out = None
for info in operand.iterchunks_info():
# Iterate over the operands and get the chunks
chunk_operands = {}
Expand All @@ -391,8 +392,16 @@ def evaluate_slices(
# Get the slice of each operand
for key, value in operands.items():
chunk_operands[key] = value[slice_]

# Evaluate the expression using chunks of operands
result = ne.evaluate(expression, chunk_operands)
# Sometimes NumPy/numexpr upcast the result
if result.dtype != dtype:
# Make the output array be of the same dtype as the numexpr result
dtype = result.dtype
if out is None:
# Let's use the same chunks as the first operand (it could have been any automatic too)
out = blosc2.empty(shape, chunks=chunks, dtype=dtype, **kwargs)
out[slice_] = result
return out

Expand Down
50 changes: 39 additions & 11 deletions tests/ndarray/test_lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import blosc2

NITEMS_SMALL = 1_000
NITEMS = 1_000_000
NITEMS = 100_000


@pytest.fixture(params=[np.float32, np.float64])
Expand All @@ -26,20 +26,48 @@ def shape_fixture(request):
return request.param


# params: (same_chunks, same_blocks)
@pytest.fixture(params=[(True, True), (True, False), (False, True), (False, False)])
def chunks_blocks_fixture(request):
return request.param


@pytest.fixture
def array_fixture(dtype_fixture, shape_fixture):
def array_fixture(dtype_fixture, shape_fixture, chunks_blocks_fixture):
nelems = np.prod(shape_fixture)
na1 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(shape_fixture)
# For generality, use partitions with padding
chunks = [c // 11 for c in na1.shape]
blocks = [c // 71 for c in na1.shape]
chunks = chunks1 = blocks = blocks1 = None
same_chunks_blocks = chunks_blocks_fixture[0] and chunks_blocks_fixture[1]
same_chunks = chunks_blocks_fixture[0]
same_blocks = chunks_blocks_fixture[1]
if same_chunks_blocks:
# For full generality, use partitions with padding
chunks = chunks1 = [c // 11 for c in na1.shape]
blocks = blocks1 = [c // 71 for c in na1.shape]
elif same_chunks:
chunks = [c // 11 for c in na1.shape]
blocks = [c // 71 for c in na1.shape]
chunks1 = [c // 11 for c in na1.shape]
blocks1 = [c // 51 for c in na1.shape]
elif same_blocks:
chunks = [c // 11 for c in na1.shape]
blocks = [c // 71 for c in na1.shape]
chunks1 = [c // 23 for c in na1.shape]
blocks1 = [c // 71 for c in na1.shape]
else:
# Different chunks and blocks
chunks = [c // 17 for c in na1.shape]
blocks = [c // 19 for c in na1.shape]
chunks1 = [c // 23 for c in na1.shape]
blocks1 = [c // 29 for c in na1.shape]
a1 = blosc2.asarray(na1, chunks=chunks, blocks=blocks)
na2 = np.copy(na1)
a2 = blosc2.asarray(na2)
a2 = blosc2.asarray(na2, chunks=chunks, blocks=blocks)
na3 = np.copy(na1)
a3 = blosc2.asarray(na3)
# Let other operands have chunks1 and blocks1
a3 = blosc2.asarray(na3, chunks=chunks1, blocks=blocks1)
na4 = np.copy(na1)
a4 = blosc2.asarray(na4)
a4 = blosc2.asarray(na4, chunks=chunks1, blocks=blocks1)
return a1, a2, a3, a4, na1, na2, na3, na4


Expand Down Expand Up @@ -92,10 +120,10 @@ def test_complex_getitem_slice(array_fixture):
# TODO: This stopped to work when we added chunks and blocks with padding
def test_expression_with_constants(array_fixture):
a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture
# Test with operands with same chunks and blocks
expr = a1 + 2 - a3 * 3.14
nres = ne.evaluate("na1 + 2 - na3 * 3.14")
res = expr.evaluate()
np.testing.assert_allclose(res[:], nres)
np.testing.assert_allclose(expr[:], nres)


# TODO: extend this to more expressions, functions and types
# TODO: extend this to more expressions, but specially functions and dtypes

0 comments on commit c9cce06

Please sign in to comment.