@@ -17,65 +17,91 @@ extern "C" {
1717************************************************************************************/
1818
1919/**
20- * \brief TODO
21- * \param src [const GpuArray*] TODO
22- * \param opcode [int] TODO
23- * \param root [int] TODO
24- * \param comm [gpucomm*] TODO
25- * \return int TODO
20+ * \brief Reduce collective operation for non root participant ranks in a
21+ * communicator world.
22+ * \param src [const GpuArray*] array to be reduced
23+ * \param opcode [int] reduce operation code, see \ref enum _gpucomm_reduce_ops
24+ * \param root [int] rank in \ref comm which will collect result
25+ * \param comm [gpucomm*] gpu communicator world
26+ * \note Root rank of reduce operation must call \ref GpuArray_reduce.
27+ * \note Must be called separately for each rank in `comm`, except root rank.
28+ * \return int error code, GA_NO_ERROR if success
2629 */
2730GPUARRAY_PUBLIC int GpuArray_reduce_from (const GpuArray * src , int opcode , int root ,
2831 gpucomm * comm );
2932
3033/**
31- * \brief TODO
32- * \param src [const GpuArray*] TODO
33- * \param dest [GpuArray*] TODO
34- * \param opcode [int] TODO
35- * \param root [int] TODO
36- * \param comm [gpucomm*] TODO
37- * \return int TODO
34+ * \brief Reduce collective operation for ranks in a communicator world.
35+ * \param src [const GpuArray*] array to be reduced
36+ * \param dest [GpuArray*] array to collect reduce operation result
37+ * \param opcode [int] reduce operation code, see \ref enum _gpucomm_reduce_ops
38+ * \param root [int] rank in `comm` which will collect result
39+ * \param comm [gpucomm*] gpu communicator world
40+ * \note Can be used by root and non root ranks alike.
41+ * \note Non root ranks can call this, using a NULL `dest`.
42+ * \note Must be called separately for each rank in `comm` (non root can call \ref
43+ * GpuArray_reduce_from instead).
44+ * \return int error code, GA_NO_ERROR if success
3845 */
3946GPUARRAY_PUBLIC int GpuArray_reduce (const GpuArray * src , GpuArray * dest , int opcode ,
4047 int root , gpucomm * comm );
4148
4249/**
43- * \brief TODO
44- * \param src [const GpuArray*] TODO
45- * \param dest [GpuArray*] TODO
46- * \param opcode [int] TODO
47- * \param comm [gpucomm*] TODO
48- * \return int TODO
50+ * \brief AllReduce collective operation for ranks in a communicator world.
51+ *
52+ * Reduces `src` using op operation and leaves identical copies of result in `dest`
53+ * on each rank of `comm`.
54+ *
55+ * \param src [const GpuArray*] array to be reduced
56+ * \param dest [GpuArray*] array to collect reduce operation result
57+ * \param opcode [int] reduce operation code, see \ref enum _gpucomm_reduce_ops
58+ * \param comm [gpucomm*] gpu communicator world
59+ * \note Must be called separately for each rank in `comm`.
60+ * \return int error code, GA_NO_ERROR if success
4961 */
5062GPUARRAY_PUBLIC int GpuArray_all_reduce (const GpuArray * src , GpuArray * dest ,
5163 int opcode , gpucomm * comm );
5264
5365/**
54- * \brief TODO
55- * \param src [const GpuArray*] TODO
56- * \param dest [GpuArray*] TODO
57- * \param opcode [int] TODO
58- * \param comm [gpucomm*] TODO
59- * \return int TODO
66+ * \brief ReduceScatter collective operation for ranks in a communicator world.
67+ *
68+ * Reduces data in `src` using `opcode` operation and leaves reduced result scattered
69+ * over `dest` in the user-defined rank order in `comm`.
70+ *
71+ * \param src [const GpuArray*] array to be reduced
72+ * \param dest [GpuArray*] array to collect reduce operation scattered result
73+ * \param opcode [int] reduce operation code, see \ref enum _gpucomm_reduce_ops
74+ * \param comm [gpucomm*] gpu communicator world
75+ * \note Must be called separately for each rank in `comm`.
76+ * \return int error code, GA_NO_ERROR if success
6077 */
6178GPUARRAY_PUBLIC int GpuArray_reduce_scatter (const GpuArray * src , GpuArray * dest ,
6279 int opcode , gpucomm * comm );
6380
6481/**
65- * \brief TODO
66- * \param array [GpuArray*] TODO
67- * \param root [int] TODO
68- * \param comm [gpucomm*] TODO
69- * \return int TODO
82+ * \brief Broadcast collective operation for ranks in a communicator world.
83+ *
84+ * Copies `array` to all ranks in `comm`.
85+ *
86+ * \param array [GpuArray*] array to be broadcasted, if root rank, else to receive
87+ * \param root [int] rank in `comm` which broadcasts its array
88+ * \param comm [gpucomm*] gpu communicator world
89+ * \note Must be called separately for each rank in `comm`.
90+ * \return int error code, GA_NO_ERROR if success
7091 */
7192GPUARRAY_PUBLIC int GpuArray_broadcast (GpuArray * array , int root , gpucomm * comm );
7293
7394/**
74- * \brief TODO
75- * \param src [const GpuArray*] TODO
76- * \param dest [GpuArray*] TODO
77- * \param comm [gpucomm*] TODO
78- * \return int TODO
95+ * \brief AllGather collective operation for ranks in a communicator world.
96+ *
97+ * Each rank receives all `src` arrays from every rank in the user-defined rank order
98+ * in `comm`.
99+ *
100+ * \param src [const GpuArray*] array to be gathered
101+ * \param dest [GpuArray*] array to receive all gathered arrays from ranks in `comm`
102+ * \param comm [gpucomm*] gpu communicator world
103+ * \note Must be called separately for each rank in `comm`.
104+ * \return int error code, GA_NO_ERROR if success
79105 */
80106GPUARRAY_PUBLIC int GpuArray_all_gather (const GpuArray * src , GpuArray * dest ,
81107 gpucomm * comm );
0 commit comments