Include more documentation

stijnh · stijnh · commit 83bf7033558c · 2023-03-08T10:52:56.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -9,4 +9,5 @@ build/
 docs/_doxygen
 cmake-build-debug/
 docs/_build
+docs/api/*.rst
 *.swp
diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in
@@ -2254,7 +2254,7 @@ INCLUDE_FILE_PATTERNS  =
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-PREDEFINED             = "KERNEL_FLOAT_FP16_AVAILABLE=1,KERNEL_FLOAT_BF16_AVAILABLE=1,KERNEL_FLOAT_FP8_AVAILABLE=1,__CUDA_ARCH__=800"
+PREDEFINED             = "KERNEL_FLOAT_FP16_AVAILABLE=1,KERNEL_FLOAT_BF16_AVAILABLE=1,KERNEL_FLOAT_FP8_AVAILABLE=1,__CUDA_ARCH__=800,DOXYGEN_SHOULD_SKIP_THIS=1"
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
diff --git a/docs/api.rst b/docs/api.rst
@@ -3,7 +3,6 @@ API Reference
 .. toctree::
    api/types.rst
    api/primitives.rst
-   api/iteration.rst
    api/unary_operators.rst
    api/binary_operators.rst
    api/reductions.rst
diff --git a/docs/build_api.py b/docs/build_api.py
@@ -88,14 +88,19 @@ def build_index_page(groups):
             "map",
             "reduce",
             "zip",
+            "zip_common",
             "cast",
             "broadcast",
+            "resize",
             "for_each",
         ],
         "Unary Operators": [
             "fill",
+            "fill_like",
             "zeros",
+            "zeros_like",
             "ones",
+            "ones_like",
             "negate",
             "bit_not",
             "logical_not",
diff --git a/include/kernel_float/binops.h b/include/kernel_float/binops.h
@@ -52,6 +52,16 @@ using zip_type = vector_storage<
     result_t<F, vector_value_type<L>, vector_value_type<R>>,
     common_vector_size<L, R>>;
 
+/**
+ * Applies ``fun`` to each pair of two elements from ``left`` and ``right`` and returns a new
+ * vector with the results.
+ *
+ * If ``left`` and ``right`` are not the same size, they will first be broadcast into a
+ * common size using ``resize``.
+ *
+ * Note that this function does **not** cast the input vectors to a common element type. See
+ * ``zip_common`` for that functionality.
+ */
 template<typename F, typename Left, typename Right, typename Output = zip_type<F, Left, Right>>
 KERNEL_FLOAT_INLINE Output zip(F fun, Left&& left, Right&& right) {
     static constexpr size_t N = vector_size<Output>;
@@ -66,6 +76,24 @@ using zip_common_type = vector_storage<
     result_t<F, common_vector_value_type<L, R>, common_vector_value_type<L, R>>,
     common_vector_size<L, R>>;
 
+/**
+ * Applies ``fun`` to each pair of two elements from ``left`` and ``right`` and returns a new
+ * vector with the results.
+ *
+ * If ``left`` and ``right`` are not the same size, they will first be broadcast into a
+ * common size using ``resize``.
+ *
+ * If ``left`` and ``right`` are not of the same type, they will first be case into a common
+ * data type. For example, zipping ``float`` and ``double`` first cast vectors to ``double``.
+ *
+ * Example
+ * =======
+ * ```
+ * vec<int, 5> x = {1, 2, 3, 4};
+ * vec<long, 1> = {8};
+ * vec<long, 5> = zip_common([](auto a, auto b){ return a + b; }, x, y); // [9, 10, 11, 12]
+ * ```
+ */
 template<
     typename F,
     typename Left,
diff --git a/include/kernel_float/interface.h b/include/kernel_float/interface.h
@@ -98,7 +98,7 @@ struct vector: public Storage {
     }
 
     /**
-     * Returns a reference to the ``index``-th item.
+     * Returns the ``index``-th item.
      */
     template<typename I>
     KERNEL_FLOAT_INLINE value_type operator[](I index) const noexcept {
diff --git a/include/kernel_float/iterate.h b/include/kernel_float/iterate.h
@@ -21,6 +21,13 @@ struct range_helper<F, V, index_sequence<Is...>> {
 /**
  * Generate vector of length ``N`` by applying the given function ``fun`` to
  * each index ``0...N-1``.
+ *
+ * Example
+ * =======
+ * ```
+ * // returns [0, 2, 4]
+ * vector<float, 3> vec = range<3>([](auto i) { return float(i * 2); });
+ * ```
  */
 template<size_t N, typename F, typename T = result_t<F, size_t>>
 KERNEL_FLOAT_INLINE vector_storage<T, N> range(F fun) {
@@ -29,28 +36,57 @@ KERNEL_FLOAT_INLINE vector_storage<T, N> range(F fun) {
 
 /**
  * Generate vector consisting of the numbers ``0...N-1`` of type ``T``.
+ *
+ * Example
+ * =======
+ * ```
+ * // Returns [0, 1, 2]
+ * vector<float, 3> vec = range<float, 3>();
+ * ```
  */
 template<typename T, size_t N>
 KERNEL_FLOAT_INLINE vector_storage<T, N> range() {
     return range(ops::cast<size_t, T> {});
 }
 
+/**
+ * Generate vector having same size and type as ``V``, but filled with the numbers ``0..N-1``.
+ */
+template<typename V>
+KERNEL_FLOAT_INLINE into_vector_type<V> range_like(V&& vector) {
+    return range<vector_value_type<T>, vector_size<V>>();
+}
+
 /**
  * Generate vector of `N` elements of type `T`
  *
+ * Example
+ * =======
  * ```
- * vector<float, 3> = fill(1.0);
+ * // Returns [1.0, 1.0, 1.0]
+ * vector<float, 3> = fill(1.0f);
  * ```
  */
 template<size_t N = 1, typename T>
 KERNEL_FLOAT_INLINE vector_storage<T, N> fill(T value) {
     return {value};
 }
 
+/**
+ * Generate vector having same size and type as ``V``, but filled with the given ``value``.
+ */
+template<typename V, typename T = vector_value_type<V>>
+KERNEL_FLOAT_INLINE into_vector_type<V> fill_like(V&& vector, T value) {
+    return {value};
+}
+
 /**
  * Generate vector of ``N`` zeros of type ``T``
  *
+ * Example
+ * =======
  * ```
+ * // Returns [0.0, 0.0, 0.0]
  * vector<float, 3> = zeros();
  * ```
  */
@@ -59,10 +95,22 @@ KERNEL_FLOAT_INLINE vector_storage<T, N> zeros() {
     return fill<N, T>(T(0));
 }
 
+/**
+ * Generate vector having same size and type as ``V``, but filled with zeros.
+ *
+ */
+template<typename V>
+KERNEL_FLOAT_INLINE into_vector_type<V> zeros_like(V&& vector) {
+    return zeros<vector_size<V>, vector_value_type<V>>();
+}
+
 /**
  * Generate vector of ``N`` ones of type ``T``
  *
+ * Example
+ * =======
  * ```
+ * // Returns [1.0, 1.0, 1.0]
  * vector<float, 3> = ones();
  * ```
  */
@@ -71,6 +119,15 @@ KERNEL_FLOAT_INLINE vector_storage<T, N> ones() {
     return fill<N, T>(T(1));
 }
 
+/**
+ * Generate vector having same size and type as ``V``, but filled with ones.
+ *
+ */
+template<typename V>
+KERNEL_FLOAT_INLINE into_vector_type<V> ones_like(V&& vector) {
+    return ones<vector_size<V>, vector_value_type<V>>();
+}
+
 namespace detail {
 template<typename F, typename V, typename Indices = make_index_sequence<vector_size<V>>>
 struct iterate_helper;
@@ -93,6 +150,14 @@ struct iterate_helper<F, V, index_sequence<I, Rest...>> {
 
 /**
  * Apply the function ``fun`` for each element from ``input``.
+ *
+ * Example
+ * =======
+ * ```
+ * for_each(range<3>(), [&](auto i) {
+ *    printf("element: %d\n", i);
+ * });
+ * ```
  */
 template<typename V, typename F>
 KERNEL_FLOAT_INLINE void for_each(V&& input, F fun) {
diff --git a/include/kernel_float/reduce.h b/include/kernel_float/reduce.h
@@ -48,43 +48,116 @@ struct reduce_helper<F, vector_compound<T, N>> {
 };
 }  // namespace detail
 
+/**
+ * Reduce the elements of the given vector ``input`` into a single value using
+ * the function ``fun``. This function should be a binary function that takes
+ * two elements and returns one element. The order in which the elements
+ * are reduced is not specified and depends on the reduction function and
+ * the vector type.
+ *
+ * Example
+ * =======
+ * ```
+ * vec<int, 3> x = {5, 2, 1};
+ * int y = reduce(x, [](int a, int b) { return a + b; }); // returns 8
+ * ```
+ */
 template<typename F, typename V>
 KERNEL_FLOAT_INLINE vector_value_type<V> reduce(F fun, V&& input) {
     return detail::reduce_helper<F, into_vector_type<V>>::call(
         fun,
         into_vector(std::forward<V>(input)));
 }
 
+/**
+ * Find the minimum element in the given vector ``input``.
+ *
+ * Example
+ * =======
+ * ```
+ * vec<int, 3> x = {5, 0, 2, 1, 0};
+ * int y = sum(x);  // Returns 8
+ * ```
+ */
 template<typename V, typename T = vector_value_type<V>>
 KERNEL_FLOAT_INLINE T min(V&& input) {
     return reduce(ops::min<T> {}, std::forward<V>(input));
 }
 
+/**
+ * Find the maximum element in the given vector ``input``.
+ *
+ * Example
+ * =======
+ * ```
+ * vec<int, 3> x = {5, 0, 2, 1, 0};
+ * int y = sum(x);  // Returns 8
+ * ```
+ */
 template<typename V, typename T = vector_value_type<V>>
 KERNEL_FLOAT_INLINE T max(V&& input) {
     return reduce(ops::max<T> {}, std::forward<V>(input));
 }
 
+/**
+ * Sum the items in the given vector ``input``.
+ *
+ * Example
+ * =======
+ * ```
+ * vec<int, 3> x = {5, 0, 2, 1, 0};
+ * int y = sum(x);  // Returns 8
+ * ```
+ */
 template<typename V, typename T = vector_value_type<V>>
 KERNEL_FLOAT_INLINE T sum(V&& input) {
     return reduce(ops::add<T> {}, std::forward<V>(input));
 }
 
+/**
+ * Multiply the items in the given vector ``input``.
+ *
+ * Example
+ * =======
+ * ```
+ * vec<int, 5> x = {5, 0, 2, 1, 0};
+ * int y = sum(x);  // Returns 5+0+2+1+0 = 8
+ * ```
+ */
 template<typename V, typename T = vector_value_type<V>>
 KERNEL_FLOAT_INLINE T product(V&& input) {
     return reduce(ops::multiply<T> {}, std::forward<V>(input));
 }
 
+/**
+ * Check if all elements in the given vector ``input`` are non-zero. An element ``v`` is considered
+ * non-zero if ``bool(v)`` returns ``true``.
+ */
 template<typename V>
 KERNEL_FLOAT_INLINE bool all(V&& input) {
     return reduce(ops::bit_and<bool> {}, cast<bool>(input));
 }
 
+/**
+ * Check if any element in the given vector ``input`` is non-zero. An element ``v`` is considered
+ * non-zero if ``bool(v)`` returns ``true``.
+ */
 template<typename V>
 KERNEL_FLOAT_INLINE bool any(V&& input) {
     return reduce(ops::bit_or<bool> {}, cast<bool>(input));
 }
 
+/**
+ * Count the number of non-zero items in the given vector ``input``. An element ``v`` is considered
+ * non-zero if ``bool(v)`` returns true.
+ *
+ * Example
+ * =======
+ * ```
+ * vec<int, 3> x = {5, 0, 2, 1, 0};
+ * int y = count(x);  // Returns 3
+ * ```
+ */
 template<typename V>
 KERNEL_FLOAT_INLINE int count(V&& input) {
     return sum(cast<int>(cast<bool>(input)));
diff --git a/include/kernel_float/unops.h b/include/kernel_float/unops.h
diff --git a/single_include/kernel_float.h b/single_include/kernel_float.h
diff --git a/tests/common.h b/tests/common.h

Original file line number	Diff line number	Diff line change
`@@ -98,7 +98,7 @@ struct vector: public Storage {`
`98`	`98`	`}`
`99`	`99`
`100`	`100`	`/**`
`101`		- * Returns a reference to the ``index``-th item.
	`101`	+ * Returns the ``index``-th item.
`102`	`102`	`*/`
`103`	`103`	`template<typename I>`
`104`	`104`	`KERNEL_FLOAT_INLINE value_type operator[](I index) const noexcept {`