Skip to content

Commit 8968327

Browse files
committed
Add documentation for gpuarray/collectives.h and implementation
1 parent 4d3ad15 commit 8968327

2 files changed

Lines changed: 73 additions & 35 deletions

File tree

src/gpuarray/collectives.h

Lines changed: 61 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -17,65 +17,91 @@ extern "C" {
1717
************************************************************************************/
1818

1919
/**
20-
* \brief TODO
21-
* \param src [const GpuArray*] TODO
22-
* \param opcode [int] TODO
23-
* \param root [int] TODO
24-
* \param comm [gpucomm*] TODO
25-
* \return int TODO
20+
* \brief Reduce collective operation for non root participant ranks in a
21+
* communicator world.
22+
* \param src [const GpuArray*] array to be reduced
23+
* \param opcode [int] reduce operation code, see \ref enum _gpucomm_reduce_ops
24+
* \param root [int] rank in \ref comm which will collect result
25+
* \param comm [gpucomm*] gpu communicator world
26+
* \note Root rank of reduce operation must call \ref GpuArray_reduce.
27+
* \note Must be called separately for each rank in `comm`, except root rank.
28+
* \return int error code, GA_NO_ERROR if success
2629
*/
2730
GPUARRAY_PUBLIC int GpuArray_reduce_from(const GpuArray* src, int opcode, int root,
2831
gpucomm* comm);
2932

3033
/**
31-
* \brief TODO
32-
* \param src [const GpuArray*] TODO
33-
* \param dest [GpuArray*] TODO
34-
* \param opcode [int] TODO
35-
* \param root [int] TODO
36-
* \param comm [gpucomm*] TODO
37-
* \return int TODO
34+
* \brief Reduce collective operation for ranks in a communicator world.
35+
* \param src [const GpuArray*] array to be reduced
36+
* \param dest [GpuArray*] array to collect reduce operation result
37+
* \param opcode [int] reduce operation code, see \ref enum _gpucomm_reduce_ops
38+
* \param root [int] rank in `comm` which will collect result
39+
* \param comm [gpucomm*] gpu communicator world
40+
* \note Can be used by root and non root ranks alike.
41+
* \note Non root ranks can call this, using a NULL `dest`.
42+
* \note Must be called separately for each rank in `comm` (non root can call \ref
43+
* GpuArray_reduce_from instead).
44+
* \return int error code, GA_NO_ERROR if success
3845
*/
3946
GPUARRAY_PUBLIC int GpuArray_reduce(const GpuArray* src, GpuArray* dest, int opcode,
4047
int root, gpucomm* comm);
4148

4249
/**
43-
* \brief TODO
44-
* \param src [const GpuArray*] TODO
45-
* \param dest [GpuArray*] TODO
46-
* \param opcode [int] TODO
47-
* \param comm [gpucomm*] TODO
48-
* \return int TODO
50+
* \brief AllReduce collective operation for ranks in a communicator world.
51+
*
52+
* Reduces `src` using op operation and leaves identical copies of result in `dest`
53+
* on each rank of `comm`.
54+
*
55+
* \param src [const GpuArray*] array to be reduced
56+
* \param dest [GpuArray*] array to collect reduce operation result
57+
* \param opcode [int] reduce operation code, see \ref enum _gpucomm_reduce_ops
58+
* \param comm [gpucomm*] gpu communicator world
59+
* \note Must be called separately for each rank in `comm`.
60+
* \return int error code, GA_NO_ERROR if success
4961
*/
5062
GPUARRAY_PUBLIC int GpuArray_all_reduce(const GpuArray* src, GpuArray* dest,
5163
int opcode, gpucomm* comm);
5264

5365
/**
54-
* \brief TODO
55-
* \param src [const GpuArray*] TODO
56-
* \param dest [GpuArray*] TODO
57-
* \param opcode [int] TODO
58-
* \param comm [gpucomm*] TODO
59-
* \return int TODO
66+
* \brief ReduceScatter collective operation for ranks in a communicator world.
67+
*
68+
* Reduces data in `src` using `opcode` operation and leaves reduced result scattered
69+
* over `dest` in the user-defined rank order in `comm`.
70+
*
71+
* \param src [const GpuArray*] array to be reduced
72+
* \param dest [GpuArray*] array to collect reduce operation scattered result
73+
* \param opcode [int] reduce operation code, see \ref enum _gpucomm_reduce_ops
74+
* \param comm [gpucomm*] gpu communicator world
75+
* \note Must be called separately for each rank in `comm`.
76+
* \return int error code, GA_NO_ERROR if success
6077
*/
6178
GPUARRAY_PUBLIC int GpuArray_reduce_scatter(const GpuArray* src, GpuArray* dest,
6279
int opcode, gpucomm* comm);
6380

6481
/**
65-
* \brief TODO
66-
* \param array [GpuArray*] TODO
67-
* \param root [int] TODO
68-
* \param comm [gpucomm*] TODO
69-
* \return int TODO
82+
* \brief Broadcast collective operation for ranks in a communicator world.
83+
*
84+
* Copies `array` to all ranks in `comm`.
85+
*
86+
* \param array [GpuArray*] array to be broadcasted, if root rank, else to receive
87+
* \param root [int] rank in `comm` which broadcasts its array
88+
* \param comm [gpucomm*] gpu communicator world
89+
* \note Must be called separately for each rank in `comm`.
90+
* \return int error code, GA_NO_ERROR if success
7091
*/
7192
GPUARRAY_PUBLIC int GpuArray_broadcast(GpuArray* array, int root, gpucomm* comm);
7293

7394
/**
74-
* \brief TODO
75-
* \param src [const GpuArray*] TODO
76-
* \param dest [GpuArray*] TODO
77-
* \param comm [gpucomm*] TODO
78-
* \return int TODO
95+
* \brief AllGather collective operation for ranks in a communicator world.
96+
*
97+
* Each rank receives all `src` arrays from every rank in the user-defined rank order
98+
* in `comm`.
99+
*
100+
* \param src [const GpuArray*] array to be gathered
101+
* \param dest [GpuArray*] array to receive all gathered arrays from ranks in `comm`
102+
* \param comm [gpucomm*] gpu communicator world
103+
* \note Must be called separately for each rank in `comm`.
104+
* \return int error code, GA_NO_ERROR if success
79105
*/
80106
GPUARRAY_PUBLIC int GpuArray_all_gather(const GpuArray* src, GpuArray* dest,
81107
gpucomm* comm);

src/gpuarray_array_collectives.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
#include "private.h"
77

8+
/**
9+
* \brief Finds total number of elements contained in `array`.
10+
*/
811
static inline int find_total_elems(const GpuArray* array)
912
{
1013
unsigned int i;
@@ -13,6 +16,15 @@ static inline int find_total_elems(const GpuArray* array)
1316
return (int)total_elems;
1417
}
1518

19+
/**
20+
* \brief Checks if `src` and `dest` arrays are appropriate to participate in a
21+
* collective operation.
22+
*
23+
* Checks to see if they contain the appropriate number of elements, if they are
24+
* properly aligned (contiguous) and writeable (for `dest`) and if they contain
25+
* elements of the same datatype. It returns the number of elements of the array with
26+
* the less length.
27+
*/
1628
static inline int check_gpuarrays(int times_src, const GpuArray* src, int times_dest,
1729
const GpuArray* dest, int* count)
1830
{

0 commit comments

Comments
 (0)