Skip to content

Commit

Permalink
Some adjustments for cache sizes for Apple Silicon
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescAlted committed Jan 4, 2025
1 parent 06350a0 commit 3616ded
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 22 deletions.
18 changes: 10 additions & 8 deletions bench/ndarray/cengine-expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@
import numpy as np

# Create some data operands
N = 20_000 # working size of 6 GB
N = 20_000 # working size of 3 GB
dtype = "float32"
chunks = (100, N)
blocks = (1, N)
chunks, blocks= None, None
cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4)

t0 = time()
# a = blosc2.linspace(0, 1, N * N, dtype="float32", shape=(N, N), cparams=cparams)
a = blosc2.linspace(0, 1, N * N, shape=(N, N), cparams=cparams, chunks=chunks, blocks=blocks)
b = blosc2.linspace(1, 2, N * N, shape=(N, N), cparams=cparams, chunks=chunks, blocks=blocks)
c = blosc2.linspace(-10, 10, N, cparams=cparams) # broadcasting is supported
a = blosc2.linspace(0, 1, N * N, dtype=dtype, shape=(N, N), cparams=cparams, chunks=chunks, blocks=blocks)
b = blosc2.linspace(1, 2, N * N, dtype=dtype, shape=(N, N), cparams=cparams, chunks=chunks, blocks=blocks)
#c = blosc2.linspace(-10, 10, N, dtype=dtype, cparams=cparams) # broadcasting is supported
c = blosc2.linspace(-10, 10, N * N, dtype=dtype, shape=(N, N), cparams=cparams)
print("Time to create data: ", time() - t0)
print("a.chunks, a.blocks, a.schunk.cratio: ", a.chunks, a.blocks, a.schunk.cratio)

Expand All @@ -21,13 +23,13 @@
expr = ((a ** 3 + blosc2.sin(a * 2)) < c) & (b > 0)
print(f"Time to create expression: {time() - t0:.5f}")

# Evaluate while reducing (yep, reductions are in) along axis 1
# Compute while reducing (yep, reductions are in) along axis 1
t0 = time()
out = expr[:]
t1 = time() - t0
print(f"Time to compute with Blosc2: {t1:.5f}")

# Evaluate using NumPy operands
# Compute using NumPy operands
na, nb, nc = a[:], b[:], c[:]

@blosc2.cengine
Expand All @@ -39,7 +41,7 @@ def compute_expression(na, nb, nc):
t1 = time() - t0
print(f"Time to compute with NumPy operands and Blosc2 engine: {t1:.5f}")

# Evaluate using NumPy compute engine
# Compute using NumPy compute engine
t0 = time()
nout = ((na ** 3 + np.sin(na * 2)) < nc) & (nb > 0)
t2 = time() - t0
Expand Down
18 changes: 10 additions & 8 deletions bench/ndarray/cengine-reduc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@
import numpy as np

# Create some data operands
N = 20_000 # working size of 6 GB
N = 20_000 # working size of 3 GB
dtype = "float32"
chunks = (100, N)
blocks = (1, N)
chunks, blocks= None, None
cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4)

t0 = time()
# a = blosc2.linspace(0, 1, N * N, dtype="float32", shape=(N, N), cparams=cparams)
a = blosc2.linspace(0, 1, N * N, shape=(N, N), cparams=cparams, chunks=chunks, blocks=blocks)
b = blosc2.linspace(1, 2, N * N, shape=(N, N), cparams=cparams, chunks=chunks, blocks=blocks)
c = blosc2.linspace(-10, 10, N, cparams=cparams) # broadcasting is supported
a = blosc2.linspace(0, 1, N * N, dtype=dtype, shape=(N, N), cparams=cparams, chunks=chunks, blocks=blocks)
b = blosc2.linspace(1, 2, N * N, dtype=dtype, shape=(N, N), cparams=cparams, chunks=chunks, blocks=blocks)
c = blosc2.linspace(-10, 10, N, dtype=dtype, cparams=cparams) # broadcasting is supported
#c = blosc2.linspace(-10, 10, N * N, dtype=dtype, shape=(N, N), cparams=cparams)
print("Time to create data: ", time() - t0)
print("a.chunks, a.blocks, a.schunk.cratio: ", a.chunks, a.blocks, a.schunk.cratio)

Expand All @@ -21,13 +23,13 @@
expr = ((a ** 3 + blosc2.sin(a * 2)) < c) & (b > 0)
print(f"Time to create expression: {time() - t0:.5f}")

# Evaluate while reducing (yep, reductions are in) along axis 1
# Compute while reducing (yep, reductions are in) along axis 1
t0 = time()
out = blosc2.sum(expr, axis=1) # , cparams=cparams)
t1 = time() - t0
print(f"Time to compute with Blosc2: {t1:.5f}")

# Evaluate using NumPy operands
# Compute using NumPy operands
na, nb, nc = a[:], b[:], c[:]

@blosc2.cengine
Expand All @@ -42,7 +44,7 @@ def compute_expression(na, nb, nc):
def compute_expression_numpy(na, nb, nc):
return np.sum(((na ** 3 + np.sin(na * 2)) < nc) & (nb > 0), axis=1)

# Evaluate using NumPy compute engine
# Compute using NumPy compute engine
t0 = time()
nout = compute_expression_numpy(na, nb, nc)
t2 = time() - t0
Expand Down
13 changes: 7 additions & 6 deletions src/blosc2/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1253,10 +1253,11 @@ def get_chunksize(blocksize, l3_minimum=2**20, l3_maximum=2**26):
if isinstance(l2_cache_size, int) and l2_cache_size > chunksize:
chunksize = l2_cache_size

# When evaluating expressions, it is convenient to keep chunks for all operands in L3 cache,
# so let's divide by 4 (3 operands + result is a typical situation for moderately complex
# expressions)
chunksize //= 4
# When computing expressions on Intel arch, it is convenient to keep chunks for all operands
# in L3 cache, so let's divide by 4 (3 operands + result is a typical situation for moderately
# complex expressions)
if platform.machine() == "x86_64":
chunksize //= 4

# Ensure a minimum size
if chunksize < l3_minimum:
Expand Down Expand Up @@ -1407,8 +1408,8 @@ def compute_chunks_blocks( # noqa: C901
# For modern Intel/AMD archs, experiments say to use half of the L2 cache size
max_blocksize = blosc2.cpu_info["l2_cache_size"] // 2
elif platform.system() == "Darwin" and "arm" in platform.machine():
# For Apple Silicon, experiments say to use half of the L1 cache size
max_blocksize = blosc2.cpu_info["l1_data_cache_size"] // 2
# For Apple Silicon, experiments say we can use the full L1 data cache size
max_blocksize = blosc2.cpu_info["l1_data_cache_size"]
if "clevel" in cparams and cparams["clevel"] == 0:
# Experiments show that, when no compression is used, it is not a good idea
# to exceed half of private cache for the blocksize because speed suffers
Expand Down

0 comments on commit 3616ded

Please sign in to comment.