diff --git a/blosc/b2nd-private.h b/blosc/b2nd-private.h new file mode 100644 index 00000000..be69acfc --- /dev/null +++ b/blosc/b2nd-private.h @@ -0,0 +1,40 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (c) 2021 The Blosc Development Team + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + + +#ifndef BLOSC_B2ND_PRIVATE_H +#define BLOSC_B2ND_PRIVATE_H + +#include "b2nd.h" + +#include +#include +#include + +/********************************************************************* + + Functions meant to be used internally. + +*********************************************************************/ + +/** + * @brief Get the chunk indexes needed to get the slice. + * + * @param array The b2nd array. + * @param start The coordinates where the slice will begin. + * @param stop The coordinates where the slice will end. + * @param chunks_idx The pointer to the buffer where the indexes of the chunks will be written. + * + * @return The number of chunks needed to get the slice. If some problem is + * detected, a negative code is returned instead. + */ +int b2nd_get_slice_nchunks(b2nd_array_t *array, const int64_t *start, const int64_t *stop, int64_t **chunks_idx); + +#endif /* BLOSC_B2ND_PRIVATE_H */ diff --git a/blosc/b2nd.c b/blosc/b2nd.c index ed367cc8..74e977c3 100644 --- a/blosc/b2nd.c +++ b/blosc/b2nd.c @@ -926,6 +926,92 @@ int b2nd_get_slice(b2nd_context_t *ctx, b2nd_array_t **array, const b2nd_array_t } +int b2nd_get_slice_nchunks(b2nd_array_t *array, const int64_t *start, const int64_t *stop, int64_t **chunks_idx) { + BLOSC_ERROR_NULL(array, BLOSC2_ERROR_NULL_POINTER); + BLOSC_ERROR_NULL(start, BLOSC2_ERROR_NULL_POINTER); + BLOSC_ERROR_NULL(stop, BLOSC2_ERROR_NULL_POINTER); + + int8_t ndim = array->ndim; + + // 0-dim case + if (ndim == 0) { + *chunks_idx = malloc(1 * sizeof(int64_t)); + *chunks_idx[0] = 0; + return 1; + } + + int64_t chunks_in_array[B2ND_MAX_DIM] = {0}; + for (int i = 0; i < ndim; ++i) { + chunks_in_array[i] = array->extshape[i] / array->chunkshape[i]; + } + + int64_t chunks_in_array_strides[B2ND_MAX_DIM]; + chunks_in_array_strides[ndim - 1] = 1; + for (int i = ndim - 2; i >= 0; --i) { + chunks_in_array_strides[i] = chunks_in_array_strides[i + 1] * chunks_in_array[i + 1]; + } + + // Compute the number of chunks to update + int64_t update_start[B2ND_MAX_DIM]; + int64_t update_shape[B2ND_MAX_DIM]; + + int64_t update_nchunks = 1; + for (int i = 0; i < ndim; ++i) { + int64_t pos = 0; + while (pos <= start[i]) { + pos += array->chunkshape[i]; + } + update_start[i] = pos / array->chunkshape[i] - 1; + while (pos < stop[i]) { + pos += array->chunkshape[i]; + } + update_shape[i] = pos / array->chunkshape[i] - update_start[i]; + update_nchunks *= update_shape[i]; + } + + int nchunks = 0; + // Initially we do not know the number of chunks that will be affected + *chunks_idx = malloc(array->sc->nchunks * sizeof(int64_t)); + int64_t *ptr = *chunks_idx; + for (int update_nchunk = 0; update_nchunk < update_nchunks; ++update_nchunk) { + int64_t nchunk_ndim[B2ND_MAX_DIM] = {0}; + blosc2_unidim_to_multidim(ndim, update_shape, update_nchunk, nchunk_ndim); + for (int i = 0; i < ndim; ++i) { + nchunk_ndim[i] += update_start[i]; + } + int64_t nchunk; + blosc2_multidim_to_unidim(nchunk_ndim, ndim, chunks_in_array_strides, &nchunk); + + // Check if the chunk is inside the slice domain + int64_t chunk_start[B2ND_MAX_DIM] = {0}; + int64_t chunk_stop[B2ND_MAX_DIM] = {0}; + for (int i = 0; i < ndim; ++i) { + chunk_start[i] = nchunk_ndim[i] * array->chunkshape[i]; + chunk_stop[i] = chunk_start[i] + array->chunkshape[i]; + if (chunk_stop[i] > array->shape[i]) { + chunk_stop[i] = array->shape[i]; + } + } + bool chunk_empty = false; + for (int i = 0; i < ndim; ++i) { + chunk_empty |= (chunk_stop[i] <= start[i] || chunk_start[i] >= stop[i]); + } + if (chunk_empty) { + continue; + } + + ptr[nchunks] = nchunk; + nchunks++; + } + + if (nchunks < array->sc->nchunks) { + *chunks_idx = reallocf(ptr, nchunks * sizeof(int64_t)); + } + + return nchunks; +} + + int b2nd_squeeze(b2nd_array_t *array) { BLOSC_ERROR_NULL(array, BLOSC2_ERROR_NULL_POINTER); diff --git a/blosc/blosc2.c b/blosc/blosc2.c index 5dea6bc7..9734c466 100644 --- a/blosc/blosc2.c +++ b/blosc/blosc2.c @@ -13,6 +13,8 @@ #include "blosc-private.h" #include "../plugins/codecs/zfp/blosc2-zfp.h" #include "frame.h" +#include "b2nd-private.h" +#include "schunk-private.h" #if defined(USING_CMAKE) #include "config.h" @@ -4656,3 +4658,25 @@ void blosc2_multidim_to_unidim(const int64_t *index, int8_t ndim, const int64_t *i += index[j] * strides[j]; } } + +int blosc2_get_slice_nchunks(blosc2_schunk* schunk, int64_t *start, int64_t *stop, int64_t **chunks_idx) { + BLOSC_ERROR_NULL(schunk, BLOSC2_ERROR_NULL_POINTER); + if (blosc2_meta_exists(schunk, "b2nd") < 0) { + // Try with a caterva metalayer; we are meant to be backward compatible with it + if (blosc2_meta_exists(schunk, "caterva") < 0) { + return schunk_get_slice_nchunks(schunk, *start, *stop, chunks_idx); + } + } + + b2nd_array_t *array; + int rc = b2nd_from_schunk(schunk, &array); + if (rc < 0) { + BLOSC_TRACE_ERROR("Could not get b2nd array from schunk."); + return rc; + } + rc = b2nd_get_slice_nchunks(array, start, stop, chunks_idx); + array->sc = NULL; // Free only array struct + b2nd_free(array); + + return rc; +} diff --git a/blosc/schunk-private.h b/blosc/schunk-private.h new file mode 100644 index 00000000..51a5ae74 --- /dev/null +++ b/blosc/schunk-private.h @@ -0,0 +1,40 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (c) 2021 The Blosc Development Team + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + + +#ifndef BLOSC_SCHUNK_PRIVATE_H +#define BLOSC_SCHUNK_PRIVATE_H + +#include "b2nd.h" + +#include +#include +#include + +/********************************************************************* + + Functions meant to be used internally. + +*********************************************************************/ + +/** + * @brief Get the chunk indexes needed to get the slice. + * + * @param schunk The super-chunk. + * @param start Index (0-based) where the slice begins. + * @param stop The first index (0-based) that is not in the selected slice. + * @param chunks_idx The pointer to the buffer where the indexes will be written. + * + * + * @return The number of chunks needed to get the slice. If some problem is + * detected, a negative code is returned instead. + */ +int schunk_get_slice_nchunks(blosc2_schunk *schunk, int64_t start, int64_t stop, int64_t **chunks_idx); +#endif /* BLOSC_SCHUNK_PRIVATE_H */ diff --git a/blosc/schunk.c b/blosc/schunk.c index 20f0bf33..0cd1444b 100644 --- a/blosc/schunk.c +++ b/blosc/schunk.c @@ -1379,6 +1379,27 @@ int blosc2_schunk_set_slice_buffer(blosc2_schunk *schunk, int64_t start, int64_t } +int schunk_get_slice_nchunks(blosc2_schunk *schunk, int64_t start, int64_t stop, int64_t **chunks_idx) { + BLOSC_ERROR_NULL(schunk, BLOSC2_ERROR_NULL_POINTER); + + int64_t byte_start = start * schunk->typesize; + int64_t byte_stop = stop * schunk->typesize; + int64_t nchunk_start = byte_start / schunk->chunksize; + int64_t nchunk_stop = byte_stop / schunk->chunksize; + if (byte_stop % schunk->chunksize != 0) { + nchunk_stop++; + } + int64_t nchunk = nchunk_start; + int nchunks = (int)(nchunk_stop - nchunk_start); + *chunks_idx = malloc(nchunks * sizeof(int64_t)); + int64_t *ptr = *chunks_idx; + for (int64_t i = 0; i < nchunks; ++i) { + ptr[i] = nchunk; + nchunk++; + } + return nchunks; +} + /* Reorder the chunk offsets of an existing super-chunk. */ int blosc2_schunk_reorder_offsets(blosc2_schunk *schunk, int64_t *offsets_order) { // Check that the offsets order are correct diff --git a/include/blosc2.h b/include/blosc2.h index de7e16db..10b3cab2 100644 --- a/include/blosc2.h +++ b/include/blosc2.h @@ -2486,6 +2486,21 @@ BLOSC_EXPORT void blosc2_unidim_to_multidim(uint8_t ndim, int64_t *shape, int64_ */ BLOSC_EXPORT void blosc2_multidim_to_unidim(const int64_t *index, int8_t ndim, const int64_t *strides, int64_t *i); +/* + * @brief Get the unidimensional chunk indexes needed to get a slice of a schunk or a b2nd array + * + * @param schunk The super-chunk (of b2nd array or not). + * @param start Index (0-based if it is a schunk) where the slice begins. + * @param stop The first index (0-based if it is a schunk) that is not in the selected slice. + * @param chunks_idx The pointer to the buffer where the indexes will be written. It is the user responsibility + * to free the buffer. + * + * + * @return The number of chunks needed to get the slice. If some problem is + * detected, a negative code is returned instead. + */ +BLOSC_EXPORT int blosc2_get_slice_nchunks(blosc2_schunk* schunk, int64_t *start, int64_t *stop, int64_t **chunks_idx); + #ifdef __cplusplus } #endif diff --git a/tests/test_get_slice_nchunks.c b/tests/test_get_slice_nchunks.c new file mode 100644 index 00000000..89a4f4d8 --- /dev/null +++ b/tests/test_get_slice_nchunks.c @@ -0,0 +1,172 @@ +/* + Copyright (c) 2021 The Blosc Development Team + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +*/ + +#include +#include "test_common.h" + +#define CHUNKSIZE (200 * 1000) +#define NTHREADS (2) + +/* Global vars */ +int tests_run = 0; + +typedef struct { + int nchunks; + int64_t start; + int64_t stop; + char* urlpath; + bool contiguous; + bool shorter_last_chunk; + int64_t nchunk_start; + int64_t nchunk_stop; +} test_data; + +test_data tdata; + +typedef struct { + int nchunks; + int64_t start; + int64_t stop; + bool shorter_last_chunk; + int64_t nchunk_start; + int64_t nchunk_stop; +} test_ndata; + +test_ndata tndata[] = { + {10, 0, 10 * CHUNKSIZE, false, 0, 10}, //whole schunk + {5, 3, 200, false, 0, 1}, //piece of 1 block + {33, 5, 679, false, 0, 1}, // blocks of same chunk + {12, 129 * 100, 134 * 100 * 3, false, 0, 1}, // blocks of different chunks + {2, 200 * 100, CHUNKSIZE * 2, false, 0, 2}, // 1 chunk + {5, 0, CHUNKSIZE * 5 + 200 * 100 + 300, true, 0, 6}, // last chunk shorter + {2, 10, CHUNKSIZE * 2 + 400, true, 0, 3}, // start != 0, last chunk shorter + {12, CHUNKSIZE * 1 + 300, CHUNKSIZE * 4 + 100, false, 1, 5}, // start not in first chunk +}; + +typedef struct { + bool contiguous; + char *urlpath; +} test_storage; + +test_storage tstorage[] = { + {false, NULL}, // memory - schunk + {true, NULL}, // memory - cframe + {true, "test_get_slice_nchunks.b2frame"}, // disk - cframe + {false, "test_get_slice_nchunks.b2frame"}, // disk - sframe +}; + + +static char* test_get_slice_nchunks(void) { + static int32_t data[CHUNKSIZE]; + int32_t *data_; + int32_t isize = CHUNKSIZE * sizeof(int32_t); + int rc; + blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; + blosc2_dparams dparams = BLOSC2_DPARAMS_DEFAULTS; + blosc2_schunk* schunk; + + /* Initialize the Blosc compressor */ + blosc2_init(); + + /* Create a super-chunk container */ + blosc2_remove_urlpath(tdata.urlpath); + cparams.typesize = sizeof(int32_t); + cparams.clevel = 5; + cparams.nthreads = NTHREADS; + dparams.nthreads = NTHREADS; + cparams.blocksize = 0; + blosc2_storage storage = {.cparams=&cparams, .dparams=&dparams, + .urlpath=tdata.urlpath, .contiguous=tdata.contiguous}; + schunk = blosc2_schunk_new(&storage); + + // Feed it with data + if (!tdata.shorter_last_chunk) { + for (int nchunk = 0; nchunk < tdata.nchunks; nchunk++) { + for (int i = 0; i < CHUNKSIZE; i++) { + data[i] = i + nchunk * CHUNKSIZE; + } + int64_t nchunks_ = blosc2_schunk_append_buffer(schunk, data, isize); + mu_assert("ERROR: bad append in frame", nchunks_ > 0); + } + } + else { + data_ = malloc(sizeof(int32_t) * tdata.stop); + for (int i = 0; i < tdata.stop; i++) { + data_[i] = i; + } + for (int nchunk = 0; nchunk < tdata.nchunks; nchunk++) { + int64_t nchunks_ = blosc2_schunk_append_buffer(schunk, data_ + nchunk * CHUNKSIZE, isize); + mu_assert("ERROR: bad append in frame", nchunks_ > 0); + } + int64_t nchunks_ = blosc2_schunk_append_buffer(schunk, data_ + tdata.nchunks * CHUNKSIZE, + (tdata.stop % CHUNKSIZE) * sizeof(int32_t)); + mu_assert("ERROR: bad append in frame", nchunks_ > 0); + } + + // Get slice nchunks + int64_t *chunks_indexes; + rc = blosc2_get_slice_nchunks(schunk, &tdata.start, &tdata.stop, &chunks_indexes); + mu_assert("ERROR: cannot get slice correctly.", rc >= 0); + mu_assert("ERROR: wrong number of chunks.", rc == (tdata.nchunk_stop - tdata.nchunk_start)); + int nchunk = tdata.nchunk_start; + for (int i = 0; i < rc; ++i) { + mu_assert("ERROR: wrong nchunk index retrieved.", chunks_indexes[i] == nchunk); + nchunk++; + } + + + /* Free resources */ + free(chunks_indexes); + blosc2_schunk_free(schunk); + blosc2_remove_urlpath(tdata.urlpath); + /* Destroy the Blosc environment */ + blosc2_destroy(); + + + return EXIT_SUCCESS; +} + +static char *all_tests(void) { + for (int i = 0; i < (int) ARRAY_SIZE(tstorage); ++i) { + for (int j = 0; j < (int) ARRAY_SIZE(tndata); ++j) { + tdata.contiguous = tstorage[i].contiguous; + tdata.urlpath = tstorage[i].urlpath; + tdata.nchunks = tndata[j].nchunks; + tdata.start = tndata[j].start; + tdata.stop = tndata[j].stop; + tdata.shorter_last_chunk = tndata[j].shorter_last_chunk; + tdata.nchunk_start = tndata[j].nchunk_start; + tdata.nchunk_stop = tndata[j].nchunk_stop; + mu_run_test(test_get_slice_nchunks); + } + } + + return EXIT_SUCCESS; +} + + +int main(void) { + char *result; + + install_blosc_callback_test(); /* optionally install callback test */ + blosc2_init(); + + /* Run all the suite */ + result = all_tests(); + if (result != EXIT_SUCCESS) { + printf(" (%s)\n", result); + } + else { + printf(" ALL TESTS PASSED"); + } + printf("\tTests run: %d\n", tests_run); + + blosc2_destroy(); + + return result != EXIT_SUCCESS; +}