#ifndef GPUARRAY_ARRAY_H #define GPUARRAY_ARRAY_H /** * \file array.h * \brief Array functions. */ #include #ifdef _MSC_VER #ifndef inline #define inline __inline #endif #endif #ifdef __cplusplus extern "C" { #endif #ifdef CONFUSE_EMACS } #endif /** * Main array structure. */ typedef struct _GpuArray { /** * Device data buffer. */ gpudata *data; /** * Size of each dimension. The number of elements is #nd. */ size_t *dimensions; /** * Stride for each dimension. The number of elements is #nd. */ ssize_t *strides; /** * Offset to the first array element into the device data buffer. */ size_t offset; /** * Number of dimensions. */ unsigned int nd; /** * Flags for this array (see \ref aflags). */ int flags; /** * Type of the array elements. */ int typecode; /** * \defgroup aflags Array Flags * @{ */ /* Try to keep in sync with numpy values for now */ /** * Array is C-contiguous. */ #define GA_C_CONTIGUOUS 0x0001 /** * Array is Fortran-contiguous. */ #define GA_F_CONTIGUOUS 0x0002 /** * Buffer data is properly aligned for the type. This should always * be true for arrays allocated through this library. * * If this isn't true you can't use kernels on the data, since they * require aligned access. */ #define GA_ALIGNED 0x0100 /** * Can write to the data buffer. (This is always true for arrays * allocated through this library). */ #define GA_WRITEABLE 0x0400 /** * Array data is behaved (properly aligned and writable). */ #define GA_BEHAVED (GA_ALIGNED|GA_WRITEABLE) /** * Array layout is that of a C array. */ #define GA_CARRAY (GA_C_CONTIGUOUS|GA_BEHAVED) /** * Array layout is that of a Fortran array. */ #define GA_FARRAY (GA_F_CONTIGUOUS|GA_BEHAVED) /** * @} */ /* Numpy flags that will not be supported at this level (and why): NPY_OWNDATA: data is refcounted NPY_NOTSWAPPED: data is alway native endian NPY_FORCECAST: no casts NPY_ENSUREARRAY: no inherited classes NPY_UPDATEIFCOPY: cannot support without refcount (or somesuch) Maybe will define other flags later */ } GpuArray; /** * Type used to specify the desired order to some functions */ typedef enum _ga_order { /** * Any order is fine. */ GA_ANY_ORDER=-1, /** * C order is desired. */ GA_C_ORDER=0, /** * Fortran order is desired. */ GA_F_ORDER=1 } ga_order; /** * Checks if all the specified flags are set. * * \param a array * \param flags flags to check * * \returns true if all flags in `flags` are set and false otherwise. */ static inline int GpuArray_CHKFLAGS(const GpuArray *a, int flags) { return (a->flags & flags) == flags; } /* Add tests here when you need them */ /** * Checks if the array data is writable. * * \param a array * * \returns true if the data area of `a` is writable */ #define GpuArray_ISWRITEABLE(a) GpuArray_CHKFLAGS(a, GA_WRITEABLE) /** * Checks if the array elements are aligned. * * \param a array * * \returns true if the elements of `a` are aligned. */ #define GpuArray_ISALIGNED(a) GpuArray_CHKFLAGS(a, GA_ALIGNED) /** * Checks if the array elements are contiguous in memory. * * \param a array * * \returns true if the data area of `a` is contiguous */ #define GpuArray_ISONESEGMENT(a) ((a)->flags & (GA_C_CONTIGUOUS|GA_F_CONTIGUOUS)) /** * Checks if the array elements are c contiguous in memory. * * \param a array * * \returns true if the data area of `a` is contiguous */ #define GpuArray_IS_C_CONTIGUOUS(a) ((a)->flags & GA_C_CONTIGUOUS) /** * Checks if the array elements are f contiguous in memory. * * \param a array * * \returns true if the data area of `a` is contiguous */ #define GpuArray_IS_F_CONTIGUOUS(a) ((a)->flags & GA_F_CONTIGUOUS) /** * This is the same as GpuArray_IS_F_CONTIGUOUS, but not the same as PyArray_ISFORTRAN. * * PyArray_ISFORTRAN checks if the array elements are laid out if * Fortran order and NOT c order. * * \param a array * * \returns true if the data area of `a` is Fortran-contiguous */ #define GpuArray_ISFORTRAN(a) (GpuArray_CHKFLAGS(a, GA_F_CONTIGUOUS)) /** * Retrive the size of the elements in the array. * * \param a array * * \returns the size of the array elements. */ #define GpuArray_ITEMSIZE(a) gpuarray_get_elsize((a)->typecode) /** * Initialize and allocate a new empty (uninitialized data) array. * * \param a the GpuArray structure to initialize. Content will be * ignored so make sure to deallocate any previous array first. * \param ctx context in which to allocate array data. Must come from * the same backend as the operations vector. * \param typecode type of the elements in the array * \param nd desired order (number of dimensions) * \param dims size for each dimension. * \param ord desired layout of data. * * \returns A return of GA_NO_ERROR means that the structure is * properly initialized and that the memory requested is reserved on * the device. Any other error code means that the structure is * left uninitialized. */ GPUARRAY_PUBLIC int GpuArray_empty(GpuArray *a, gpucontext *ctx, int typecode, unsigned int nd, const size_t *dims, ga_order ord); /** * Initialize and allocate a new zero-initialized array. * * \param a the GpuArray structure to initialize. Content will be * ignored so make sure to deallocate any previous array first. * \param ctx context in which to allocate array data. Must come from * the same backend as the operations vector. * \param typecode type of the elements in the array * \param nd desired order (number of dimensions) * \param dims size for each dimension. * \param ord desired layout of data. * * \returns A return of GA_NO_ERROR means that the structure is * properly initialized and that the memory requested is reserved on * the device. Any other error code means that the structure is * left uninitialized. */ GPUARRAY_PUBLIC int GpuArray_zeros(GpuArray *a, gpucontext *ctx, int typecode, unsigned int nd, const size_t *dims, ga_order ord); /** * Initialize and allocate a new array structure from a pre-existing buffer. * * The array will be considered to own the gpudata structure after the * call is made and will free it when deallocated. An error return * from this function will deallocate `data`. * This increment the ref count of gpudata. This seem to contradict the above. * * \param a the GpuArray structure to initialize. Content will be * ignored so make sure to deallocate any previous array first. * \param data buffer to user. * \param offset position of the first data element of the array in the buffer. * \param typecode type of the elements in the array * \param nd order of the data (number of dimensions). * \param dims size for each dimension. * \param strides stride for each dimension. * \param writeable true if the buffer is writable false otherwise. * * \returns A return of GA_NO_ERROR means that the structure is * properly initialized. Any other error code means that the structure * is left uninitialized and the provided buffer is deallocated. */ GPUARRAY_PUBLIC int GpuArray_fromdata(GpuArray *a, gpudata *data, size_t offset, int typecode, unsigned int nd, const size_t *dims, const ssize_t *strides, int writeable); GPUARRAY_PUBLIC int GpuArray_copy_from_host(GpuArray *a, gpucontext *ctx, void *buf, int typecode, unsigned int nd, const size_t *dims, const ssize_t *strides); /** * Initialize an array structure to provide a view of another. * * The new structure will point to the same data area and have the * same values of properties as the source one. The data area is * shared and writes from one array will be reflected in the other. * The properties are copied and not shared and can be modified * independantly. * * \param v the result array * \param a the source array * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_view(GpuArray *v, const GpuArray *a); /** * Blocks until all operations (kernels, copies) involving `a` are finished. * * \param a the array to synchronize * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_sync(GpuArray *a); /** * Returns a sub-view of a source array. * * The indexing follows simple basic model where each dimension is * indexed separately. For a single dimension the indexing selects * from the start index (included) to the end index (excluded) while * selecting one over step elements. As an example for the array `[ 0 * 1 2 3 4 5 6 7 8 9 ]` indexed with start index 1 stop index 8 and * step 2 the result would be `[ 1 3 5 7 ]`. * * The special value 0 for step means that only one element * corresponding to the start index and the resulting array order will * be one smaller. * * \param r the result array * \param a the source array * \param starts the start of the subsection for each dimension (length must be a->nd) * \param stops the end of the subsection for each dimension (length must be a->nd) * \param steps the steps for the subsection for each dimension (length must be a->nd) * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_index(GpuArray *r, const GpuArray *a, const ssize_t *starts, const ssize_t *stops, const ssize_t *steps); GPUARRAY_PUBLIC int GpuArray_index_inplace(GpuArray *a, const ssize_t *starts, const ssize_t *stops, const ssize_t *steps); /** * Take a portion of an array along axis 0. * * This operation allows arbitrary indexing of an array along its * first axis. The indexed array `v` can be of any dimension or * strides. The result and index array (`a` and `i` respectively) need * to be C contiguous. * * The dimension 0 of `a` has to match dimension 0 of `i` and the * others have to match their equivalent on `v`. `i` has to have a * single dimension. * * If `check_error` is not 0, the function will check for indexing * errors in the kernel and will return GA_VALUE_ERROR in that * case. No other error will produce that error code. This is not * always done because it introduces a synchronization point which may * affect performance. * * \param a the result array (nd) * \param v the source array (nd) * \param i the index array (1d) * \param check_error whether to check for index errors or not * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_take1(GpuArray *a, const GpuArray *v, const GpuArray *i, int check_error); /** * Sets the content of an array to the content of another array. * * The value array must be smaller or equal in number of dimensions to * the destination array. Each of its dimensions' size must be either * exactly equal to the destination array's corresponding dimensions * or 1. Dimensions of size 1 will be repeated to fill the full size * of the destination array. Extra size 1 dimensions will be added at * the end to make the two arrays shape-equivalent. * * \param a the destination array * \param v the value array * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_setarray(GpuArray *a, const GpuArray *v); /** * Change the dimensions of an array. * * Return a new array with the desired dimensions. The new dimensions * must have the same total size as the old ones. A copy of the * underlying data may be performed if necessary, unless `nocopy` is * 0. * * \param res the result array * \param a the source array * \param nd new dimensions order * \param newdims new dimensions (length is nd) * \param ord the desired resulting order * \param nocopy if 0 error out if a data copy is required. * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_reshape(GpuArray *res, const GpuArray *a, unsigned int nd, const size_t *newdims, ga_order ord, int nocopy); GPUARRAY_PUBLIC int GpuArray_reshape_inplace(GpuArray *a, unsigned int nd, const size_t *newdims, ga_order ord); /** * Rearrange the axes of an array. * * Return a new array with its shape and strides swapped accordingly * to the `new_axes` parameter. If `new_axes` is NULL then the order * is reversed. The returned array is a view on the data of the old * one. * * \param res the result array * \param a the source array * \param new_axes either NULL or a list of a->nd elements * * \return GA_NO_ERROR if the operation was successful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_transpose(GpuArray *res, const GpuArray *a, const unsigned int *new_axes); GPUARRAY_PUBLIC int GpuArray_transpose_inplace(GpuArray *a, const unsigned int *new_axes); /** * Release all device and host memory associated with `a`. * * This function frees all host memory, and releases the device memory * if it is the owner. In case an array has views it is the * responsability of the caller to ensure a base array is not cleared * before its views. * * This function will also zero out the structure to prevent * accidental reuse. * * \param a the array to clear */ GPUARRAY_PUBLIC void GpuArray_clear(GpuArray *a); /** * Checks if two arrays may share device memory. * * \param a an array * \param b an array * * \returns 1 if `a` and `b` may share a portion of their data. */ GPUARRAY_PUBLIC int GpuArray_share(const GpuArray *a, const GpuArray *b); /** * Retursns the context of an array. * * \param a an array * * \returns the context in which `a` was allocated. */ GPUARRAY_PUBLIC gpucontext *GpuArray_context(const GpuArray *a); /** * Copies all the elements of one array to another. * * The arrays `src` and `dst` must have the same size (total number of * elements) and be in the same context. * * \param dst destination array * \param src source array * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_move(GpuArray *dst, const GpuArray *src); /** * Copy data from the host memory to the device memory. * * \param dst destination array (must be contiguous) * \param src source host memory (contiguous block) * \param src_sz size of data to copy (in bytes) * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_write(GpuArray *dst, const void *src, size_t src_sz); /** * Copy data from the device memory to the host memory. * * \param dst destination host memory (contiguous block) * \param dst_sz size of data to copy (in bytes) * \param src source array (must be contiguous) * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_read(void *dst, size_t dst_sz, const GpuArray *src); /** * Set all of an array's data to a byte pattern. * * \param a an array (must be contiguous) * \param data the byte to repeat * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_memset(GpuArray *a, int data); /** * Make a copy of an array. * * This is analogue to GpuArray_view() except it copies the device * memory and no data is shared. * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_copy(GpuArray *res, const GpuArray *a, ga_order order); /** * Copy between arrays in different contexts. * * This works like GpuArray_move() except it will work between arrays * that aren't in the same context. * * Source and target arrays must be contiguous. This restriction may * be lifted in the future. * * \param r result array * \param a array to transfer * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_transfer(GpuArray *res, const GpuArray *a); /** * Split an array into multiple views. * * The created arrays will be sub-portions of `a` where `axis` is * divided according to the values in `p`. No checks are performed on * the values in `p` except to make sure that they don't reference * values outside of the bounds of the source array. * * If an error occurs partway during the operation, the created arrays * will be cleared before returning. * * \param rs list of array pointers to store results (must be of length n+1) * \param a array to split * \param n number of splits (length of p) * \param p list of split points * \param axis axis to split * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_split(GpuArray **rs, const GpuArray *a, size_t n, size_t *p, unsigned int axis); /** * Concatenate the arrays in `as` along the axis `axis`. * * If an error occurs during the operation, the result array may be * cleared before returning. * * \param r the result array * \param as list of pointer to arrays to concatenate * \param n number of array in list `as` * \param axis the axis along which to concatenate * \param restype the typecode of the result array * * \return GA_NO_ERROR if the operation was succesful. * \return an error code otherwise */ GPUARRAY_PUBLIC int GpuArray_concatenate(GpuArray *r, const GpuArray **as, size_t n, unsigned int axis, int restype); /** * Get a description of the last error in the context of `a`. * * The description may reflect operations with other arrays in the * same context if other operations were performed between the * occurence of the error and the call to this function. * * Operations in other contexts, however have no incidence on the * return value. * * \param a an array * \param err the error code returned * * \returns A user-readable string describing the nature of the error. */ GPUARRAY_PUBLIC const char *GpuArray_error(const GpuArray *a, int err); /** * Print a textual description of `a` to the specified file * descriptor. * * \param fd a file descriptior open for writing * \param a an array */ GPUARRAY_PUBLIC void GpuArray_fprintf(FILE *fd, const GpuArray *a); GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a); #ifdef __cplusplus } #endif #endif