Skip to content

Commit

Permalink
Add funcs for getting chunk indexs of slice
Browse files Browse the repository at this point in the history
  • Loading branch information
martaiborra committed Dec 21, 2023
1 parent 017b278 commit 6c24439
Show file tree
Hide file tree
Showing 7 changed files with 398 additions and 0 deletions.
40 changes: 40 additions & 0 deletions blosc/b2nd-private.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*********************************************************************
Blosc - Blocked Shuffling and Compression Library
Copyright (c) 2021 The Blosc Development Team <blosc@blosc.org>
https://blosc.org
License: BSD 3-Clause (see LICENSE.txt)
See LICENSE.txt for details about copyright and rights to use.
**********************************************************************/


#ifndef BLOSC_B2ND_PRIVATE_H
#define BLOSC_B2ND_PRIVATE_H

#include "b2nd.h"

#include <stdbool.h>
#include <stdlib.h>
#include <stdint.h>

/*********************************************************************
Functions meant to be used internally.
*********************************************************************/

/**
* @brief Get the chunk indexes needed to get the slice.
*
* @param array The b2nd array.
* @param start The coordinates where the slice will begin.
* @param stop The coordinates where the slice will end.
* @param chunks_idx The pointer to the buffer where the indexes of the chunks will be written.
*
* @return The number of chunks needed to get the slice. If some problem is
* detected, a negative code is returned instead.
*/
int b2nd_get_slice_nchunks(b2nd_array_t *array, const int64_t *start, const int64_t *stop, int64_t **chunks_idx);

#endif /* BLOSC_B2ND_PRIVATE_H */
86 changes: 86 additions & 0 deletions blosc/b2nd.c
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,92 @@ int b2nd_get_slice(b2nd_context_t *ctx, b2nd_array_t **array, const b2nd_array_t
}


int b2nd_get_slice_nchunks(b2nd_array_t *array, const int64_t *start, const int64_t *stop, int64_t **chunks_idx) {
BLOSC_ERROR_NULL(array, BLOSC2_ERROR_NULL_POINTER);
BLOSC_ERROR_NULL(start, BLOSC2_ERROR_NULL_POINTER);
BLOSC_ERROR_NULL(stop, BLOSC2_ERROR_NULL_POINTER);

int8_t ndim = array->ndim;

// 0-dim case
if (ndim == 0) {
*chunks_idx = malloc(1 * sizeof(int64_t));
*chunks_idx[0] = 0;
return 1;
}

int64_t chunks_in_array[B2ND_MAX_DIM] = {0};
for (int i = 0; i < ndim; ++i) {
chunks_in_array[i] = array->extshape[i] / array->chunkshape[i];
}

int64_t chunks_in_array_strides[B2ND_MAX_DIM];
chunks_in_array_strides[ndim - 1] = 1;
for (int i = ndim - 2; i >= 0; --i) {
chunks_in_array_strides[i] = chunks_in_array_strides[i + 1] * chunks_in_array[i + 1];
}

// Compute the number of chunks to update
int64_t update_start[B2ND_MAX_DIM];
int64_t update_shape[B2ND_MAX_DIM];

int64_t update_nchunks = 1;
for (int i = 0; i < ndim; ++i) {
int64_t pos = 0;
while (pos <= start[i]) {
pos += array->chunkshape[i];
}
update_start[i] = pos / array->chunkshape[i] - 1;
while (pos < stop[i]) {
pos += array->chunkshape[i];
}
update_shape[i] = pos / array->chunkshape[i] - update_start[i];
update_nchunks *= update_shape[i];
}

int nchunks = 0;
// Initially we do not know the number of chunks that will be affected
*chunks_idx = malloc(array->sc->nchunks * sizeof(int64_t));
int64_t *ptr = *chunks_idx;
for (int update_nchunk = 0; update_nchunk < update_nchunks; ++update_nchunk) {
int64_t nchunk_ndim[B2ND_MAX_DIM] = {0};
blosc2_unidim_to_multidim(ndim, update_shape, update_nchunk, nchunk_ndim);
for (int i = 0; i < ndim; ++i) {
nchunk_ndim[i] += update_start[i];
}
int64_t nchunk;
blosc2_multidim_to_unidim(nchunk_ndim, ndim, chunks_in_array_strides, &nchunk);

// Check if the chunk is inside the slice domain
int64_t chunk_start[B2ND_MAX_DIM] = {0};
int64_t chunk_stop[B2ND_MAX_DIM] = {0};
for (int i = 0; i < ndim; ++i) {
chunk_start[i] = nchunk_ndim[i] * array->chunkshape[i];
chunk_stop[i] = chunk_start[i] + array->chunkshape[i];
if (chunk_stop[i] > array->shape[i]) {
chunk_stop[i] = array->shape[i];
}
}
bool chunk_empty = false;
for (int i = 0; i < ndim; ++i) {
chunk_empty |= (chunk_stop[i] <= start[i] || chunk_start[i] >= stop[i]);
}
if (chunk_empty) {
continue;
}

ptr[nchunks] = nchunk;
nchunks++;
}

if (nchunks < array->sc->nchunks) {
*chunks_idx = reallocf(ptr, nchunks * sizeof(int64_t));
}

return nchunks;
}


int b2nd_squeeze(b2nd_array_t *array) {
BLOSC_ERROR_NULL(array, BLOSC2_ERROR_NULL_POINTER);

Expand Down
24 changes: 24 additions & 0 deletions blosc/blosc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "blosc-private.h"
#include "../plugins/codecs/zfp/blosc2-zfp.h"
#include "frame.h"
#include "b2nd-private.h"
#include "schunk-private.h"

#if defined(USING_CMAKE)
#include "config.h"
Expand Down Expand Up @@ -4656,3 +4658,25 @@ void blosc2_multidim_to_unidim(const int64_t *index, int8_t ndim, const int64_t
*i += index[j] * strides[j];
}
}

int blosc2_get_slice_nchunks(blosc2_schunk* schunk, int64_t *start, int64_t *stop, int64_t **chunks_idx) {
BLOSC_ERROR_NULL(schunk, BLOSC2_ERROR_NULL_POINTER);
if (blosc2_meta_exists(schunk, "b2nd") < 0) {
// Try with a caterva metalayer; we are meant to be backward compatible with it
if (blosc2_meta_exists(schunk, "caterva") < 0) {
return schunk_get_slice_nchunks(schunk, *start, *stop, chunks_idx);
}
}

b2nd_array_t *array;
int rc = b2nd_from_schunk(schunk, &array);
if (rc < 0) {
BLOSC_TRACE_ERROR("Could not get b2nd array from schunk.");
return rc;
}
rc = b2nd_get_slice_nchunks(array, start, stop, chunks_idx);
array->sc = NULL; // Free only array struct
b2nd_free(array);

return rc;
}
40 changes: 40 additions & 0 deletions blosc/schunk-private.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*********************************************************************
Blosc - Blocked Shuffling and Compression Library
Copyright (c) 2021 The Blosc Development Team <blosc@blosc.org>
https://blosc.org
License: BSD 3-Clause (see LICENSE.txt)
See LICENSE.txt for details about copyright and rights to use.
**********************************************************************/


#ifndef BLOSC_SCHUNK_PRIVATE_H
#define BLOSC_SCHUNK_PRIVATE_H

#include "b2nd.h"

#include <stdbool.h>
#include <stdlib.h>
#include <stdint.h>

/*********************************************************************
Functions meant to be used internally.
*********************************************************************/

/**
* @brief Get the chunk indexes needed to get the slice.
*
* @param schunk The super-chunk.
* @param start Index (0-based) where the slice begins.
* @param stop The first index (0-based) that is not in the selected slice.
* @param chunks_idx The pointer to the buffer where the indexes will be written.
*
*
* @return The number of chunks needed to get the slice. If some problem is
* detected, a negative code is returned instead.
*/
int schunk_get_slice_nchunks(blosc2_schunk *schunk, int64_t start, int64_t stop, int64_t **chunks_idx);
#endif /* BLOSC_SCHUNK_PRIVATE_H */
21 changes: 21 additions & 0 deletions blosc/schunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -1379,6 +1379,27 @@ int blosc2_schunk_set_slice_buffer(blosc2_schunk *schunk, int64_t start, int64_t
}


int schunk_get_slice_nchunks(blosc2_schunk *schunk, int64_t start, int64_t stop, int64_t **chunks_idx) {
BLOSC_ERROR_NULL(schunk, BLOSC2_ERROR_NULL_POINTER);

int64_t byte_start = start * schunk->typesize;
int64_t byte_stop = stop * schunk->typesize;
int64_t nchunk_start = byte_start / schunk->chunksize;
int64_t nchunk_stop = byte_stop / schunk->chunksize;
if (byte_stop % schunk->chunksize != 0) {
nchunk_stop++;
}
int64_t nchunk = nchunk_start;
int nchunks = (int)(nchunk_stop - nchunk_start);
*chunks_idx = malloc(nchunks * sizeof(int64_t));
int64_t *ptr = *chunks_idx;
for (int64_t i = 0; i < nchunks; ++i) {
ptr[i] = nchunk;
nchunk++;
}
return nchunks;
}

/* Reorder the chunk offsets of an existing super-chunk. */
int blosc2_schunk_reorder_offsets(blosc2_schunk *schunk, int64_t *offsets_order) {
// Check that the offsets order are correct
Expand Down
15 changes: 15 additions & 0 deletions include/blosc2.h
Original file line number Diff line number Diff line change
Expand Up @@ -2486,6 +2486,21 @@ BLOSC_EXPORT void blosc2_unidim_to_multidim(uint8_t ndim, int64_t *shape, int64_
*/
BLOSC_EXPORT void blosc2_multidim_to_unidim(const int64_t *index, int8_t ndim, const int64_t *strides, int64_t *i);

/*
* @brief Get the unidimensional chunk indexes needed to get a slice of a schunk or a b2nd array
*
* @param schunk The super-chunk (of b2nd array or not).
* @param start Index (0-based if it is a schunk) where the slice begins.
* @param stop The first index (0-based if it is a schunk) that is not in the selected slice.
* @param chunks_idx The pointer to the buffer where the indexes will be written. It is the user responsibility
* to free the buffer.
*
*
* @return The number of chunks needed to get the slice. If some problem is
* detected, a negative code is returned instead.
*/
BLOSC_EXPORT int blosc2_get_slice_nchunks(blosc2_schunk* schunk, int64_t *start, int64_t *stop, int64_t **chunks_idx);

#ifdef __cplusplus
}
#endif
Expand Down
Loading

0 comments on commit 6c24439

Please sign in to comment.