diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index b44155acab..6e342ae868 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -108,6 +108,7 @@ "awkward_ListOffsetArray_drop_none_indexes", "awkward_ListOffsetArray_reduce_local_nextparents_64", "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", + "awkward_ListOffsetArray_reduce_local_outoffsets_64", "awkward_UnionArray_flatten_length", "awkward_UnionArray_flatten_combine", "awkward_UnionArray_nestedfill_tags_index", @@ -123,6 +124,7 @@ "awkward_reduce_sum_int32_bool_64", "awkward_reduce_sum_int64_bool_64", "awkward_reduce_sum_bool", + "awkward_reduce_prod", "awkward_reduce_prod_bool", "awkward_reduce_countnonzero", "awkward_sorting_ranges", @@ -381,6 +383,8 @@ def kernel_signatures_cuda_py(specification): from awkward._connect.cuda import fetch_specialization from awkward._connect.cuda import import_cupy +import math + cupy = import_cupy("Awkward Arrays with CUDA") """ ) diff --git a/dev/generate-tests.py b/dev/generate-tests.py index 12c65708ec..7c97628101 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -424,7 +424,6 @@ def genspectests(specdict): """ ) - f.write("import pytest\nimport kernels\n\n") f.write("import pytest\nimport numpy as np\nimport kernels\n\n") num = 1 if spec.tests == []: @@ -894,6 +893,7 @@ def gencpuunittests(specdict): "awkward_ListOffsetArray_drop_none_indexes", "awkward_ListOffsetArray_reduce_local_nextparents_64", "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", + "awkward_ListOffsetArray_reduce_local_outoffsets_64", "awkward_UnionArray_flatten_length", "awkward_UnionArray_flatten_combine", "awkward_UnionArray_nestedfill_tags_index", @@ -909,6 +909,7 @@ def gencpuunittests(specdict): "awkward_reduce_sum_int32_bool_64", "awkward_reduce_sum_int64_bool_64", "awkward_reduce_sum_bool", + "awkward_reduce_prod", "awkward_reduce_prod_bool", "awkward_reduce_countnonzero", "awkward_sorting_ranges", @@ -959,6 +960,8 @@ def gencudakerneltests(specdict): f.write( "import cupy\n" + "import cupy.testing as cpt\n" + "import numpy as np\n" "import pytest\n\n" "import awkward as ak\n" "import awkward._connect.cuda as ak_cu\n" @@ -1028,7 +1031,7 @@ def gencudakerneltests(specdict): if isinstance(val, list): f.write( " " * 4 - + f"assert cupy.array_equal({arg}[:len(pytest_{arg})], cupy.array(pytest_{arg}))\n" + + f"cpt.assert_allclose({arg}[:len(pytest_{arg})], cupy.array(pytest_{arg}))\n" ) else: f.write(" " * 4 + f"assert {arg} == pytest_{arg}\n") @@ -1088,6 +1091,7 @@ def gencudaunittests(specdict): f.write( "import re\n" "import cupy\n" + "import cupy.testing as cpt\n" "import pytest\n\n" "import awkward as ak\n" "import awkward._connect.cuda as ak_cu\n" @@ -1224,7 +1228,7 @@ def gencudaunittests(specdict): if isinstance(val, list): f.write( " " * 4 - + f"assert cupy.array_equal({arg}[:len(pytest_{arg})], cupy.array(pytest_{arg}))\n" + + f"cpt.assert_allclose({arg}[:len(pytest_{arg})], cupy.array(pytest_{arg}))\n" ) else: f.write(" " * 4 + f"assert {arg} == pytest_{arg}\n") diff --git a/kernel-test-data.json b/kernel-test-data.json index c9d778d692..b13f5fcc29 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -19046,7 +19046,7 @@ }, { "name": "awkward_ListOffsetArray_reduce_local_outoffsets_64", - "status": false, + "status": true, "tests": [ { "error": false, @@ -23128,7 +23128,7 @@ }, { "name": "awkward_reduce_sum_bool", - "status": false, + "status": true, "tests": [ { "error": false, @@ -23251,7 +23251,7 @@ }, { "name": "awkward_reduce_prod_bool", - "status": false, + "status": true, "tests": [ { "error": false, @@ -23543,7 +23543,7 @@ }, { "name": "awkward_reduce_argmax", - "status": false, + "status": true, "tests": [ { "error": false, @@ -23705,7 +23705,7 @@ }, { "name": "awkward_reduce_max", - "status": false, + "status": true, "tests": [ { "error": false, @@ -23893,7 +23893,7 @@ }, { "name": "awkward_reduce_countnonzero", - "status": false, + "status": true, "tests": [ { "error": false, @@ -23990,7 +23990,7 @@ }, { "name": "awkward_reduce_count_64", - "status": false, + "status": true, "tests": [ { "error": false, @@ -24034,7 +24034,7 @@ "inputs": { "lenparents": 1696, "outlength": 331, - "parents": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16] + "parents": [194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] }, "outputs": { "toptr": [626, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] @@ -24368,7 +24368,7 @@ }, { "name": "awkward_reduce_sum", - "status": false, + "status": true, "tests": [ { "error": false, @@ -24763,22 +24763,9 @@ ] }, { - "name": "awkward_reduce_prod", - "status": false, + "name": "awkward_reduce_sum_int32_bool_64", + "status": true, "tests": [ - { - "error": false, - "message": "", - "inputs": { - "fromptr": [1, 0, 0, 1, 0, 0], - "lenparents": 6, - "outlength": 4, - "parents": [0, 0, 0, 2, 2, 3] - }, - "outputs": { - "toptr": [0, 1, 0, 0] - } - }, { "error": false, "message": "", @@ -24796,208 +24783,195 @@ "error": false, "message": "", "inputs": { - "fromptr": [0, 1, 2, 3, 4, 5], - "lenparents": 6, - "outlength": 4, - "parents": [0, 0, 0, 2, 2, 3] + "fromptr": [0], + "lenparents": 1, + "outlength": 1, + "parents": [0] }, "outputs": { - "toptr": [0, 1, 12, 5] + "toptr": [0] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 53, 31, 101, 3, 59, 37, 103, 5, 61, 41, 107, 7, 67, 43, 109, 11, 71, 47, 113], - "lenparents": 20, - "outlength": 15, - "parents": [0, 0, 10, 10, 1, 1, 11, 11, 2, 2, 12, 12, 3, 3, 13, 13, 4, 4, 14, 14] + "fromptr": [0, 5, 20, 1, 6, 21, 2, 7, 22, 3, 8, 23, 4, 9, 24], + "lenparents": 15, + "outlength": 10, + "parents": [0, 5, 5, 1, 6, 6, 2, 7, 7, 3, 8, 8, 4, 9, 9] }, "outputs": { - "toptr": [106, 177, 305, 469, 781, 1, 1, 1, 1, 1, 3131, 3811, 4387, 4687, 5311] + "toptr": [0, 1, 1, 1, 1, 2, 2, 2, 2, 2] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 109, 11, 71, 47, 113], - "lenparents": 28, - "outlength": 15, - "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 14, 14] + "fromptr": [2, 3, 5, 7, 11, 13, 17, 19, 23], + "lenparents": 9, + "outlength": 6, + "parents": [0, 0, 0, 2, 2, 3, 4, 4, 5] }, "outputs": { - "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 1, 3131, 3811, 4387, 4687, 5311] + "toptr": [3, 0, 2, 1, 2, 1] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 11, 71, 29, 97, 47], - "lenparents": 28, - "outlength": 15, - "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 4, 4, 9, 9, 14] + "fromptr": [1, 0, 0, 1, 0, 0], + "lenparents": 6, + "outlength": 4, + "parents": [0, 0, 0, 2, 2, 3] }, "outputs": { - "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 43, 47] + "toptr": [1, 0, 1, 0] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 109, 11, 71, 29, 97], - "lenparents": 28, - "outlength": 14, - "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 9, 9] + "fromptr": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 20, 21, 22, 23, 24], + "lenparents": 15, + "outlength": 3, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2] }, "outputs": { - "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687] + "toptr": [4, 5, 5] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 109, 11, 71, 29, 97, 47], - "lenparents": 29, - "outlength": 15, - "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 9, 9, 14] + "fromptr": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], + "lenparents": 30, + "outlength": 6, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5] }, "outputs": { - "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687, 47] + "toptr": [4, 5, 5, 5, 5, 5] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 109, 11, 71, 29, 97, 47, 113], - "lenparents": 30, - "outlength": 15, - "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 9, 9, 14, 14] + "fromptr": [0, 1, 3, 4, 5, 6], + "lenparents": 6, + "outlength": 4, + "parents": [0, 0, 1, 3, 3, 3] }, "outputs": { - "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687, 5311] + "toptr": [1, 1, 0, 3] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 109, 11, 71, 29, 47], - "lenparents": 28, - "outlength": 15, - "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 9, 14] + "fromptr": [0, 5, 10, 15, 25, 1, 11, 16, 26, 2, 12, 17, 27, 8, 18, 28, 4, 9, 14, 29], + "lenparents": 20, + "outlength": 10, + "parents": [0, 0, 0, 5, 5, 1, 1, 6, 6, 2, 2, 7, 7, 3, 8, 8, 4, 4, 4, 9] }, "outputs": { - "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 29, 3131, 3811, 4387, 4687, 47] + "toptr": [2, 2, 2, 1, 3, 2, 2, 2, 2, 1] } }, { "error": false, "message": "", "inputs": { - "fromptr": [0], - "lenparents": 1, - "outlength": 3, - "parents": [2] + "fromptr": [15, 20, 25, 16, 21, 26, 17, 22, 27, 18, 23, 28, 19, 24, 29], + "lenparents": 15, + "outlength": 15, + "parents": [0, 5, 10, 1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14] }, "outputs": { - "toptr": [1, 1, 0] + "toptr": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] } }, { "error": false, "message": "", "inputs": { - "fromptr": [101, 103, 107, 109, 113, 53, 59, 61, 67, 71, 31, 37, 41, 43, 47, 2, 3, 5, 7, 11], + "fromptr": [0, 15, 5, 10, 25, 1, 16, 11, 26, 2, 17, 12, 27, 18, 8, 28, 4, 9, 14, 29], "lenparents": 20, - "outlength": 6, - "parents": [0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5] + "outlength": 15, + "parents": [0, 0, 5, 10, 10, 1, 1, 11, 11, 2, 2, 12, 12, 3, 8, 13, 4, 9, 14, 14] }, "outputs": { - "toptr": [13710311357, 1, 907383479, 95041567, 1, 2310] + "toptr": [1, 2, 2, 1, 1, 1, 0, 0, 1, 1, 2, 2, 2, 1, 2] } }, { "error": false, "message": "", "inputs": { - "fromptr": [101, 103, 107, 109, 113, 73, 79, 83, 89, 97, 53, 59, 61, 67, 71, 31, 37, 41, 43, 47, 13, 17, 19, 23, 29, 2, 3, 5, 7, 11], + "fromptr": [0, 15, 5, 20, 10, 25, 1, 16, 6, 21, 11, 26, 2, 17, 7, 22, 12, 27, 3, 18, 8, 23, 13, 28, 4, 19, 9, 24, 14, 29], "lenparents": 30, - "outlength": 6, - "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5] + "outlength": 15, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 9, 9, 14, 14] }, "outputs": { - "toptr": [13710311357, 4132280413, 907383479, 95041567, 2800733, 2310] + "toptr": [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] } }, { "error": false, "message": "", "inputs": { - "fromptr": [101, 103, 107, 109, 113, 53, 59, 61, 67, 71, 31, 37, 41, 43, 47, 2, 3, 5, 7, 11], - "lenparents": 20, - "outlength": 4, - "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3] + "fromptr": [0, 5, 10, 15, 20, 25, 1, 6, 11, 16, 21, 26, 2, 7, 12, 17, 22, 27, 3, 8, 13, 18, 23, 28, 4, 9, 14, 19, 24, 29], + "lenparents": 30, + "outlength": 10, + "parents": [0, 0, 0, 5, 5, 5, 1, 1, 1, 6, 6, 6, 2, 2, 2, 7, 7, 7, 3, 3, 3, 8, 8, 8, 4, 4, 4, 9, 9, 9] }, "outputs": { - "toptr": [13710311357, 907383479, 95041567, 2310] + "toptr": [2, 3, 3, 3, 3, 3, 3, 3, 3, 3] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 7, 17, 29, 3, 11, 19, 31, 5, 13, 23, 37], + "fromptr": [1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0, 0], "lenparents": 12, - "outlength": 6, - "parents": [0, 0, 3, 3, 1, 1, 4, 4, 2, 2, 5, 5] - }, - "outputs": { - "toptr": [14, 33, 65, 493, 589, 851] - } - }, - { - "error": false, - "message": "", - "inputs": { - "fromptr": [3, 53, 13, 73, 31, 101, 5, 59, 17, 79, 37, 103, 7, 61, 19, 83, 41, 107, 67, 23, 89, 43, 109, 71, 29, 97, 47, 113], - "lenparents": 28, - "outlength": 15, - "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 8, 8, 13, 13, 4, 9, 9, 14, 14] + "outlength": 3, + "parents": [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] }, "outputs": { - "toptr": [159, 295, 427, 67, 71, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687, 5311] + "toptr": [4, 4, 0] } }, { "error": false, "message": "", "inputs": { - "fromptr": [3, 53, 13, 73, 31, 101, 5, 59, 17, 79, 37, 103, 7, 61, 19, 83, 41, 107, 11, 67, 23, 89, 43, 109, 71, 29, 97, 47, 113], - "lenparents": 29, - "outlength": 15, - "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 9, 9, 14, 14] + "fromptr": [1, 2, 3, 4, 5, 1, 2, 3, 4, 5], + "lenparents": 10, + "outlength": 2, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] }, "outputs": { - "toptr": [159, 295, 427, 737, 71, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687, 5311] + "toptr": [5, 5] } }, { "error": false, "message": "", "inputs": { - "fromptr": [3, 53, 13, 73, 31, 101, 5, 59, 17, 79, 37, 103, 7, 61, 19, 83, 41, 107, 11, 67, 23, 89, 43, 109, 71, 97, 47, 113], - "lenparents": 28, - "outlength": 15, - "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 9, 14, 14] + "fromptr": [1, 2, 3, 4, 5, 6], + "lenparents": 6, + "outlength": 1, + "parents": [0, 0, 0, 0, 0, 0] }, "outputs": { - "toptr": [159, 295, 427, 737, 71, 949, 1343, 1577, 2047, 97, 3131, 3811, 4387, 4687, 5311] + "toptr": [6] } }, { @@ -25010,119 +24984,937 @@ "parents": [0, 0, 0, 6, 6, 1, 1, 7, 2] }, "outputs": { - "toptr": [182, 33, 5, 1, 1, 1, 391, 19] + "toptr": [3, 2, 1, 0, 0, 0, 2, 1] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37], + "fromptr": [1, 16, 0, 2, 32, 0, 4, 64, 0, 8, 128, 0], "lenparents": 12, - "outlength": 3, - "parents": [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] + "outlength": 4, + "parents": [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3] }, "outputs": { - "toptr": [210, 46189, 765049] + "toptr": [2, 2, 2, 2] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 3, 5, 7, 11, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 101, 103, 107, 109, 113], - "lenparents": 20, - "outlength": 6, - "parents": [0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5] + "fromptr": [0, 1, 2, 3, 4, 5], + "lenparents": 6, + "outlength": 4, + "parents": [0, 0, 0, 2, 2, 3] }, "outputs": { - "toptr": [2310, 1, 95041567, 907383479, 1, 13710311357] + "toptr": [2, 0, 2, 1] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113], - "lenparents": 30, - "outlength": 6, - "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5] + "fromptr": [0, 4, 1, 3, 5, 6], + "lenparents": 6, + "outlength": 4, + "parents": [0, 0, 1, 1, 1, 3] }, "outputs": { - "toptr": [2310, 2800733, 95041567, 907383479, 4132280413, 13710311357] + "toptr": [1, 3, 0, 1] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 3, 5, 7, 11, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 101, 103, 107, 109, 113], - "lenparents": 20, - "outlength": 4, - "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3] + "fromptr": [1, 4, 9, 16, 25, 1, 4, 9, 16, 25], + "lenparents": 10, + "outlength": 2, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] }, "outputs": { - "toptr": [2310, 95041567, 907383479, 13710311357] + "toptr": [5, 5] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 7, 3, 11, 5], - "lenparents": 5, - "outlength": 8, - "parents": [0, 6, 1, 7, 2] + "fromptr": [1, 4, 9, 16, 26, 1, 4, 10, 16, 24], + "lenparents": 10, + "outlength": 2, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] }, "outputs": { - "toptr": [2, 3, 5, 1, 1, 1, 7, 11] + "toptr": [5, 5] } }, { "error": false, "message": "", "inputs": { - "fromptr": [5, 53, 13, 73, 31, 101, 7, 59, 17, 79, 37, 103, 11, 61, 19, 83, 41, 107, 67, 23, 89, 43, 109, 71, 29, 97, 47, 113], - "lenparents": 28, - "outlength": 15, - "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 8, 8, 13, 13, 4, 9, 9, 14, 14] + "fromptr": [0, 5, 20, 1, 6, 21, 2, 7, 22, 3, 8, 23, 4, 9, 24], + "lenparents": 15, + "outlength": 10, + "parents": [0, 0, 5, 1, 1, 6, 2, 2, 7, 3, 3, 8, 4, 4, 9] }, "outputs": { - "toptr": [265, 413, 671, 67, 71, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687, 5311] + "toptr": [1, 2, 2, 2, 2, 1, 1, 1, 1, 1] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37], - "lenparents": 12, - "outlength": 8, - "parents": [0, 0, 0, 3, 3, 3, 4, 4, 4, 7, 7, 7] + "fromptr": [15, 20, 25, 16, 21, 26, 17, 22, 27, 18, 23, 28, 19, 24, 29], + "lenparents": 15, + "outlength": 5, + "parents": [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4] }, "outputs": { - "toptr": [30, 1, 1, 1001, 7429, 1, 1, 33263] + "toptr": [3, 3, 3, 3, 3] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 3, 5, 7, 11, 13], - "lenparents": 6, - "outlength": 4, - "parents": [0, 0, 0, 2, 2, 3] + "fromptr": [1, 2, 3], + "lenparents": 3, + "outlength": 1, + "parents": [0, 0, 0] }, "outputs": { - "toptr": [30, 1, 77, 13] + "toptr": [3] } }, { "error": false, "message": "", "inputs": { - "fromptr": [2, 3, 5, 7, 11, 13, 17, 19, 23], - "lenparents": 9, + "fromptr": [0, 1, 2, 4, 5, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 25, 26, 27, 28, 29], + "lenparents": 20, + "outlength": 6, + "parents": [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 5, 5, 5, 5, 5] + }, + "outputs": { + "toptr": [3, 3, 4, 4, 0, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 2, 4, 5, 5], + "lenparents": 5, + "outlength": 3, + "parents": [0, 0, 0, 2, 2] + }, + "outputs": { + "toptr": [3, 0, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], + "lenparents": 15, + "outlength": 3, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2] + }, + "outputs": { + "toptr": [5, 5, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [4, 1, 0, 1, 4, 5, 1, 0, 1, 3], + "lenparents": 10, + "outlength": 2, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + }, + "outputs": { + "toptr": [4, 4] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [4, 1, 0, 1, 4, 4, 1, 0, 1, 4], + "lenparents": 10, + "outlength": 2, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + }, + "outputs": { + "toptr": [4, 4] + } + } + ] + }, + { + "name": "awkward_reduce_sum_int64_bool_64", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromptr": [], + "lenparents": 0, + "outlength": 0, + "parents": [] + }, + "outputs": { + "toptr": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0], + "lenparents": 1, + "outlength": 1, + "parents": [0] + }, + "outputs": { + "toptr": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 5, 20, 1, 6, 21, 2, 7, 22, 3, 8, 23, 4, 9, 24], + "lenparents": 15, + "outlength": 10, + "parents": [0, 5, 5, 1, 6, 6, 2, 7, 7, 3, 8, 8, 4, 9, 9] + }, + "outputs": { + "toptr": [0, 1, 1, 1, 1, 2, 2, 2, 2, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 3, 5, 7, 11, 13, 17, 19, 23], + "lenparents": 9, + "outlength": 6, + "parents": [0, 0, 0, 2, 2, 3, 4, 4, 5] + }, + "outputs": { + "toptr": [3, 0, 2, 1, 2, 1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [1, 0, 0, 1, 0, 0], + "lenparents": 6, + "outlength": 4, + "parents": [0, 0, 0, 2, 2, 3] + }, + "outputs": { + "toptr": [1, 0, 1, 0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 20, 21, 22, 23, 24], + "lenparents": 15, + "outlength": 3, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2] + }, + "outputs": { + "toptr": [4, 5, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], + "lenparents": 30, + "outlength": 6, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5] + }, + "outputs": { + "toptr": [4, 5, 5, 5, 5, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 1, 3, 4, 5, 6], + "lenparents": 6, + "outlength": 4, + "parents": [0, 0, 1, 3, 3, 3] + }, + "outputs": { + "toptr": [1, 1, 0, 3] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 5, 10, 15, 25, 1, 11, 16, 26, 2, 12, 17, 27, 8, 18, 28, 4, 9, 14, 29], + "lenparents": 20, + "outlength": 10, + "parents": [0, 0, 0, 5, 5, 1, 1, 6, 6, 2, 2, 7, 7, 3, 8, 8, 4, 4, 4, 9] + }, + "outputs": { + "toptr": [2, 2, 2, 1, 3, 2, 2, 2, 2, 1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [15, 20, 25, 16, 21, 26, 17, 22, 27, 18, 23, 28, 19, 24, 29], + "lenparents": 15, + "outlength": 15, + "parents": [0, 5, 10, 1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14] + }, + "outputs": { + "toptr": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 15, 5, 10, 25, 1, 16, 11, 26, 2, 17, 12, 27, 18, 8, 28, 4, 9, 14, 29], + "lenparents": 20, + "outlength": 15, + "parents": [0, 0, 5, 10, 10, 1, 1, 11, 11, 2, 2, 12, 12, 3, 8, 13, 4, 9, 14, 14] + }, + "outputs": { + "toptr": [1, 2, 2, 1, 1, 1, 0, 0, 1, 1, 2, 2, 2, 1, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 15, 5, 20, 10, 25, 1, 16, 6, 21, 11, 26, 2, 17, 7, 22, 12, 27, 3, 18, 8, 23, 13, 28, 4, 19, 9, 24, 14, 29], + "lenparents": 30, + "outlength": 15, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 9, 9, 14, 14] + }, + "outputs": { + "toptr": [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 5, 10, 15, 20, 25, 1, 6, 11, 16, 21, 26, 2, 7, 12, 17, 22, 27, 3, 8, 13, 18, 23, 28, 4, 9, 14, 19, 24, 29], + "lenparents": 30, + "outlength": 10, + "parents": [0, 0, 0, 5, 5, 5, 1, 1, 1, 6, 6, 6, 2, 2, 2, 7, 7, 7, 3, 3, 3, 8, 8, 8, 4, 4, 4, 9, 9, 9] + }, + "outputs": { + "toptr": [2, 3, 3, 3, 3, 3, 3, 3, 3, 3] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0, 0], + "lenparents": 12, + "outlength": 3, + "parents": [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] + }, + "outputs": { + "toptr": [4, 4, 0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [1, 2, 3, 4, 5, 1, 2, 3, 4, 5], + "lenparents": 10, + "outlength": 2, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + }, + "outputs": { + "toptr": [5, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [1, 2, 3, 4, 5, 6], + "lenparents": 6, + "outlength": 1, + "parents": [0, 0, 0, 0, 0, 0] + }, + "outputs": { + "toptr": [6] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 7, 13, 17, 23, 3, 11, 19, 5], + "lenparents": 9, + "outlength": 8, + "parents": [0, 0, 0, 6, 6, 1, 1, 7, 2] + }, + "outputs": { + "toptr": [3, 2, 1, 0, 0, 0, 2, 1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [1, 16, 0, 2, 32, 0, 4, 64, 0, 8, 128, 0], + "lenparents": 12, + "outlength": 4, + "parents": [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3] + }, + "outputs": { + "toptr": [2, 2, 2, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 1, 2, 3, 4, 5], + "lenparents": 6, + "outlength": 4, + "parents": [0, 0, 0, 2, 2, 3] + }, + "outputs": { + "toptr": [2, 0, 2, 1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 4, 1, 3, 5, 6], + "lenparents": 6, + "outlength": 4, + "parents": [0, 0, 1, 1, 1, 3] + }, + "outputs": { + "toptr": [1, 3, 0, 1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [1, 4, 9, 16, 25, 1, 4, 9, 16, 25], + "lenparents": 10, + "outlength": 2, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + }, + "outputs": { + "toptr": [5, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [1, 4, 9, 16, 26, 1, 4, 10, 16, 24], + "lenparents": 10, + "outlength": 2, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + }, + "outputs": { + "toptr": [5, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 5, 20, 1, 6, 21, 2, 7, 22, 3, 8, 23, 4, 9, 24], + "lenparents": 15, + "outlength": 10, + "parents": [0, 0, 5, 1, 1, 6, 2, 2, 7, 3, 3, 8, 4, 4, 9] + }, + "outputs": { + "toptr": [1, 2, 2, 2, 2, 1, 1, 1, 1, 1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [15, 20, 25, 16, 21, 26, 17, 22, 27, 18, 23, 28, 19, 24, 29], + "lenparents": 15, + "outlength": 5, + "parents": [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4] + }, + "outputs": { + "toptr": [3, 3, 3, 3, 3] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [1, 2, 3], + "lenparents": 3, + "outlength": 1, + "parents": [0, 0, 0] + }, + "outputs": { + "toptr": [3] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 1, 2, 4, 5, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 25, 26, 27, 28, 29], + "lenparents": 20, + "outlength": 6, + "parents": [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 5, 5, 5, 5, 5] + }, + "outputs": { + "toptr": [3, 3, 4, 4, 0, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 2, 4, 5, 5], + "lenparents": 5, + "outlength": 3, + "parents": [0, 0, 0, 2, 2] + }, + "outputs": { + "toptr": [3, 0, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], + "lenparents": 15, + "outlength": 3, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2] + }, + "outputs": { + "toptr": [5, 5, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [4, 1, 0, 1, 4, 5, 1, 0, 1, 3], + "lenparents": 10, + "outlength": 2, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + }, + "outputs": { + "toptr": [4, 4] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [4, 1, 0, 1, 4, 4, 1, 0, 1, 4], + "lenparents": 10, + "outlength": 2, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + }, + "outputs": { + "toptr": [4, 4] + } + } + ] + }, + { + "name": "awkward_reduce_prod", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromptr": [1, 0, 0, 1, 0, 0], + "lenparents": 6, + "outlength": 4, + "parents": [0, 0, 0, 2, 2, 3] + }, + "outputs": { + "toptr": [0, 1, 0, 0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [], + "lenparents": 0, + "outlength": 0, + "parents": [] + }, + "outputs": { + "toptr": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0, 1, 2, 3, 4, 5], + "lenparents": 6, + "outlength": 4, + "parents": [0, 0, 0, 2, 2, 3] + }, + "outputs": { + "toptr": [0, 1, 12, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 53, 31, 101, 3, 59, 37, 103, 5, 61, 41, 107, 7, 67, 43, 109, 11, 71, 47, 113], + "lenparents": 20, + "outlength": 15, + "parents": [0, 0, 10, 10, 1, 1, 11, 11, 2, 2, 12, 12, 3, 3, 13, 13, 4, 4, 14, 14] + }, + "outputs": { + "toptr": [106, 177, 305, 469, 781, 1, 1, 1, 1, 1, 3131, 3811, 4387, 4687, 5311] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 109, 11, 71, 47, 113], + "lenparents": 28, + "outlength": 15, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 14, 14] + }, + "outputs": { + "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 1, 3131, 3811, 4387, 4687, 5311] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 11, 71, 29, 97, 47], + "lenparents": 28, + "outlength": 15, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 4, 4, 9, 9, 14] + }, + "outputs": { + "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 43, 47] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 109, 11, 71, 29, 97], + "lenparents": 28, + "outlength": 14, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 9, 9] + }, + "outputs": { + "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 109, 11, 71, 29, 97, 47], + "lenparents": 29, + "outlength": 15, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 9, 9, 14] + }, + "outputs": { + "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687, 47] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 109, 11, 71, 29, 97, 47, 113], + "lenparents": 30, + "outlength": 15, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 9, 9, 14, 14] + }, + "outputs": { + "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687, 5311] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 53, 13, 73, 31, 101, 3, 59, 17, 79, 37, 103, 5, 61, 19, 83, 41, 107, 7, 67, 23, 89, 43, 109, 11, 71, 29, 47], + "lenparents": 28, + "outlength": 15, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 4, 9, 14] + }, + "outputs": { + "toptr": [106, 177, 305, 469, 781, 949, 1343, 1577, 2047, 29, 3131, 3811, 4387, 4687, 47] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [0], + "lenparents": 1, + "outlength": 3, + "parents": [2] + }, + "outputs": { + "toptr": [1, 1, 0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [101, 103, 107, 109, 113, 53, 59, 61, 67, 71, 31, 37, 41, 43, 47, 2, 3, 5, 7, 11], + "lenparents": 20, + "outlength": 6, + "parents": [0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5] + }, + "outputs": { + "toptr": [13710311357, 1, 907383479, 95041567, 1, 2310] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [101, 103, 107, 109, 113, 73, 79, 83, 89, 97, 53, 59, 61, 67, 71, 31, 37, 41, 43, 47, 13, 17, 19, 23, 29, 2, 3, 5, 7, 11], + "lenparents": 30, + "outlength": 6, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5] + }, + "outputs": { + "toptr": [13710311357, 4132280413, 907383479, 95041567, 2800733, 2310] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [101, 103, 107, 109, 113, 53, 59, 61, 67, 71, 31, 37, 41, 43, 47, 2, 3, 5, 7, 11], + "lenparents": 20, + "outlength": 4, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3] + }, + "outputs": { + "toptr": [13710311357, 907383479, 95041567, 2310] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 7, 17, 29, 3, 11, 19, 31, 5, 13, 23, 37], + "lenparents": 12, + "outlength": 6, + "parents": [0, 0, 3, 3, 1, 1, 4, 4, 2, 2, 5, 5] + }, + "outputs": { + "toptr": [14, 33, 65, 493, 589, 851] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [3, 53, 13, 73, 31, 101, 5, 59, 17, 79, 37, 103, 7, 61, 19, 83, 41, 107, 67, 23, 89, 43, 109, 71, 29, 97, 47, 113], + "lenparents": 28, + "outlength": 15, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 8, 8, 13, 13, 4, 9, 9, 14, 14] + }, + "outputs": { + "toptr": [159, 295, 427, 67, 71, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687, 5311] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [3, 53, 13, 73, 31, 101, 5, 59, 17, 79, 37, 103, 7, 61, 19, 83, 41, 107, 11, 67, 23, 89, 43, 109, 71, 29, 97, 47, 113], + "lenparents": 29, + "outlength": 15, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 9, 9, 14, 14] + }, + "outputs": { + "toptr": [159, 295, 427, 737, 71, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687, 5311] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [3, 53, 13, 73, 31, 101, 5, 59, 17, 79, 37, 103, 7, 61, 19, 83, 41, 107, 11, 67, 23, 89, 43, 109, 71, 97, 47, 113], + "lenparents": 28, + "outlength": 15, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 3, 8, 8, 13, 13, 4, 9, 14, 14] + }, + "outputs": { + "toptr": [159, 295, 427, 737, 71, 949, 1343, 1577, 2047, 97, 3131, 3811, 4387, 4687, 5311] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 7, 13, 17, 23, 3, 11, 19, 5], + "lenparents": 9, + "outlength": 8, + "parents": [0, 0, 0, 6, 6, 1, 1, 7, 2] + }, + "outputs": { + "toptr": [182, 33, 5, 1, 1, 1, 391, 19] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37], + "lenparents": 12, + "outlength": 3, + "parents": [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] + }, + "outputs": { + "toptr": [210, 46189, 765049] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 3, 5, 7, 11, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 101, 103, 107, 109, 113], + "lenparents": 20, + "outlength": 6, + "parents": [0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5] + }, + "outputs": { + "toptr": [2310, 1, 95041567, 907383479, 1, 13710311357] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113], + "lenparents": 30, + "outlength": 6, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5] + }, + "outputs": { + "toptr": [2310, 2800733, 95041567, 907383479, 4132280413, 13710311357] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 3, 5, 7, 11, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 101, 103, 107, 109, 113], + "lenparents": 20, + "outlength": 4, + "parents": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3] + }, + "outputs": { + "toptr": [2310, 95041567, 907383479, 13710311357] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 7, 3, 11, 5], + "lenparents": 5, + "outlength": 8, + "parents": [0, 6, 1, 7, 2] + }, + "outputs": { + "toptr": [2, 3, 5, 1, 1, 1, 7, 11] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [5, 53, 13, 73, 31, 101, 7, 59, 17, 79, 37, 103, 11, 61, 19, 83, 41, 107, 67, 23, 89, 43, 109, 71, 29, 97, 47, 113], + "lenparents": 28, + "outlength": 15, + "parents": [0, 0, 5, 5, 10, 10, 1, 1, 6, 6, 11, 11, 2, 2, 7, 7, 12, 12, 3, 8, 8, 13, 13, 4, 9, 9, 14, 14] + }, + "outputs": { + "toptr": [265, 413, 671, 67, 71, 949, 1343, 1577, 2047, 2813, 3131, 3811, 4387, 4687, 5311] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37], + "lenparents": 12, + "outlength": 8, + "parents": [0, 0, 0, 3, 3, 3, 4, 4, 4, 7, 7, 7] + }, + "outputs": { + "toptr": [30, 1, 1, 1001, 7429, 1, 1, 33263] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 3, 5, 7, 11, 13], + "lenparents": 6, + "outlength": 4, + "parents": [0, 0, 0, 2, 2, 3] + }, + "outputs": { + "toptr": [30, 1, 77, 13] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromptr": [2, 3, 5, 7, 11, 13, 17, 19, 23], + "lenparents": 9, "outlength": 6, "parents": [0, 0, 0, 2, 2, 3, 4, 4, 5] }, @@ -25537,7 +26329,7 @@ }, { "name": "awkward_reduce_min", - "status": false, + "status": true, "tests": [ { "error": false, @@ -25557,7 +26349,7 @@ "error": false, "message": "", "inputs": { - "fromptr": [0, 4, 1, 3, 5, 6], + "fromptr": [0, 4, 1, 1, 5, 6], "identity": 9223372036854775807, "lenparents": 6, "outlength": 4, @@ -25739,7 +26531,7 @@ }, { "name": "awkward_reduce_argmin", - "status": false, + "status": true, "tests": [ { "error": false, diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index 494fca77d6..c8b2db7ff3 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -108,6 +108,7 @@ def fetch_template_specializations(kernel_dict): "awkward_ListArray_rpad_axis1", "awkward_ListOffsetArray_drop_none_indexes", "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", + "awkward_ListOffsetArray_reduce_local_outoffsets_64", "awkward_UnionArray_regular_index", "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", "awkward_ListOffsetArray_rpad_axis1", @@ -119,6 +120,7 @@ def fetch_template_specializations(kernel_dict): "awkward_reduce_sum_int32_bool_64", "awkward_reduce_sum_int64_bool_64", "awkward_reduce_sum_bool", + "awkward_reduce_prod", "awkward_reduce_prod_bool", "awkward_reduce_argmax", "awkward_reduce_argmin", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_local_outoffsets_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_local_outoffsets_64.cu new file mode 100644 index 0000000000..42e8119d46 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_local_outoffsets_64.cu @@ -0,0 +1,100 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (outoffsets, parents, lenparents, outlength, invocation_index, err_code) = args +// if block[0] > 0: +// segment = math.floor((outlength + block[0] - 1) / block[0]) +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// temp = cupy.zeros(lenparents, dtype=cupy.int64) +// scan_in_array = cupy.zeros(outlength, dtype=cupy.uint64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListOffsetArray_reduce_local_outoffsets_64_a", cupy.dtype(outoffsets.dtype).type, parents.dtype]))((grid_size,), block, (outoffsets, parents, lenparents, outlength, scan_in_array, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListOffsetArray_reduce_local_outoffsets_64_b", cupy.dtype(outoffsets.dtype).type, parents.dtype]))((grid_size,), block, (outoffsets, parents, lenparents, outlength, scan_in_array, temp, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListOffsetArray_reduce_local_outoffsets_64_c", cupy.dtype(outoffsets.dtype).type, parents.dtype]))((grid_size,), block, (outoffsets, parents, lenparents, outlength, scan_in_array, temp, invocation_index, err_code)) +// out["awkward_ListOffsetArray_reduce_local_outoffsets_64_a", {dtype_specializations}] = None +// out["awkward_ListOffsetArray_reduce_local_outoffsets_64_b", {dtype_specializations}] = None +// out["awkward_ListOffsetArray_reduce_local_outoffsets_64_c", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_ListOffsetArray_reduce_local_outoffsets_64_a( + T* outoffsets, + const C* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* scan_in_array, + int64_t* temp, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < outlength) { + outoffsets[thread_id] = 0; + } + } +} + +template +__global__ void +awkward_ListOffsetArray_reduce_local_outoffsets_64_b( + T* outoffsets, + const C* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* scan_in_array, + int64_t* temp, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; + + if (thread_id < lenparents) { + temp[thread_id] = 1; + } + __syncthreads(); + + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + int64_t val = 0; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + val = temp[thread_id - stride]; + } + __syncthreads(); + temp[thread_id] += val; + __syncthreads(); + } + + if (thread_id < lenparents) { + int64_t parent = parents[thread_id]; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicAdd(&scan_in_array[parent], temp[thread_id]); + } + } + } +} + +template +__global__ void +awkward_ListOffsetArray_reduce_local_outoffsets_64_c( + T* outoffsets, + const C* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* scan_in_array, + int64_t* temp, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + outoffsets[0] = 0; + + if (thread_id < outlength) { + outoffsets[thread_id + 1] = (T)(scan_in_array[thread_id]); + } + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmax.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmax.cu index 555420c3f8..df515f05a4 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmax.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmax.cu @@ -3,10 +3,18 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_argmax_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_argmax_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code)) +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// atomic_toptr = cupy.array(toptr, dtype=cupy.uint64) +// temp = cupy.zeros(lenparents, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_argmax_a", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_argmax_b", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_argmax_c", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) // out["awkward_reduce_argmax_a", {dtype_specializations}] = None // out["awkward_reduce_argmax_b", {dtype_specializations}] = None +// out["awkward_reduce_argmax_c", {dtype_specializations}] = None // END PYTHON template @@ -17,12 +25,15 @@ awkward_reduce_argmax_a( const U* parents, int64_t lenparents, int64_t outlength, + uint64_t* atomic_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < outlength) { - toptr[thread_id] = -1; + atomic_toptr[thread_id] = -1; } } } @@ -35,17 +46,57 @@ awkward_reduce_argmax_b( const U* parents, int64_t lenparents, int64_t outlength, + uint64_t* atomic_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; + + if (thread_id < lenparents) { + temp[thread_id] = thread_id; + } + __syncthreads(); + + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + int64_t index = -1; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + index = temp[thread_id - stride]; + } + if (index != -1 && (temp[thread_id] == -1 || fromptr[index] > fromptr[temp[thread_id]] || + (fromptr[index] == fromptr[temp[thread_id]] && index < temp[thread_id]))) { + temp[thread_id] = index; + } + __syncthreads(); + } if (thread_id < lenparents) { int64_t parent = parents[thread_id]; - if (toptr[parent] == -1 || - (fromptr[thread_id] > (fromptr[toptr[parent]]))) { - toptr[parent] = thread_id; // we need the last parent filled, thread random order problem, find max arg at that index + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicExch(&atomic_toptr[parent], temp[thread_id]); } } } } + +template +__global__ void +awkward_reduce_argmax_c( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomic_toptr, + T* temp, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < outlength) { + toptr[thread_id] = (T)(atomic_toptr[thread_id]); + } + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmin.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmin.cu index 282ebd11cc..af1d3fd93d 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmin.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmin.cu @@ -3,10 +3,18 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_argmin_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_argmin_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code)) +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// atomic_toptr = cupy.array(toptr, dtype=cupy.uint64) +// temp = cupy.zeros(lenparents, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_argmin_a", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_argmin_b", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_argmin_c", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) // out["awkward_reduce_argmin_a", {dtype_specializations}] = None // out["awkward_reduce_argmin_b", {dtype_specializations}] = None +// out["awkward_reduce_argmin_c", {dtype_specializations}] = None // END PYTHON template @@ -17,12 +25,15 @@ awkward_reduce_argmin_a( const U* parents, int64_t lenparents, int64_t outlength, + uint64_t* atomic_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < outlength) { - toptr[thread_id] = -1; + atomic_toptr[thread_id] = -1; } } } @@ -35,17 +46,57 @@ awkward_reduce_argmin_b( const U* parents, int64_t lenparents, int64_t outlength, + uint64_t* atomic_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; + + if (thread_id < lenparents) { + temp[thread_id] = thread_id; + } + __syncthreads(); + + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + int64_t index = -1; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + index = temp[thread_id - stride]; + } + if (index != -1 && (temp[thread_id] == -1 || fromptr[index] < fromptr[temp[thread_id]] || + (fromptr[index] == fromptr[temp[thread_id]] && index < temp[thread_id]))) { + temp[thread_id] = index; + } + __syncthreads(); + } if (thread_id < lenparents) { int64_t parent = parents[thread_id]; - if (toptr[parent] == -1 || - (fromptr[thread_id] < (fromptr[toptr[parent]]))) { - toptr[parent] = thread_id; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicExch(&atomic_toptr[parent], temp[thread_id]); } } } } + +template +__global__ void +awkward_reduce_argmin_c( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomic_toptr, + T* temp, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < outlength) { + toptr[thread_id] = (T)(atomic_toptr[thread_id]); + } + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu index 311f04012b..9c55e69600 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu @@ -2,68 +2,71 @@ // BEGIN PYTHON // def f(grid, block, args): -// (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args -// atomicAdd_toptr = cupy.array(toptr, dtype=cupy.uint64) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_count_64_a", toptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_count_64_b", toptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_count_64_c", toptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) +// (toptr, parents, lenparents, outlength, invocation_index, err_code) = args +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// temp = cupy.zeros(lenparents, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_count_64_a", cupy.dtype(toptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, parents, lenparents, outlength, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_count_64_b", cupy.dtype(toptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, parents, lenparents, outlength, temp, invocation_index, err_code)) // out["awkward_reduce_count_64_a", {dtype_specializations}] = None // out["awkward_reduce_count_64_b", {dtype_specializations}] = None -// out["awkward_reduce_count_64_c", {dtype_specializations}] = None // END PYTHON -template +template __global__ void awkward_reduce_count_64_a( T* toptr, - const bool* fromptr, - const U* parents, + const C* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < outlength) { - atomicAdd_toptr[thread_id] = 0; + toptr[thread_id] = 0; } } } -template +template __global__ void awkward_reduce_count_64_b( T* toptr, - const bool* fromptr, - const U* parents, + const C* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; + if (thread_id < lenparents) { - atomicAdd(atomicAdd_toptr + parents[thread_id], (uint64_t)1); + temp[thread_id] = 1; } - } -} + __syncthreads(); -template -__global__ void -awkward_reduce_count_64_c(T* toptr, - const bool* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { - if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_id < outlength) { - toptr[thread_id] = (T)atomicAdd_toptr[thread_id]; + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + int64_t val = 0; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + val = temp[thread_id - stride]; + } + __syncthreads(); + temp[thread_id] += val; + __syncthreads(); + } + + if (thread_id < lenparents) { + int64_t parent = parents[thread_id]; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicAdd(&toptr[parent], temp[thread_id]); + } } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_countnonzero.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_countnonzero.cu index 6b07dfa208..ffcb0b8bd3 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_countnonzero.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_countnonzero.cu @@ -3,13 +3,15 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args -// atomicAdd_toptr = cupy.array(toptr, dtype=cupy.uint64) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_countnonzero_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_countnonzero_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_countnonzero_c", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// temp = cupy.zeros(lenparents, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_countnonzero_a", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_countnonzero_b", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, temp, invocation_index, err_code)) // out["awkward_reduce_countnonzero_a", {dtype_specializations}] = None // out["awkward_reduce_countnonzero_b", {dtype_specializations}] = None -// out["awkward_reduce_countnonzero_c", {dtype_specializations}] = None // END PYTHON template @@ -20,14 +22,14 @@ awkward_reduce_countnonzero_a( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { - atomicAdd_toptr[thread_id] = 0; + toptr[thread_id] = 0; } } } @@ -40,34 +42,33 @@ awkward_reduce_countnonzero_b( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; if (thread_id < lenparents) { - atomicAdd(atomicAdd_toptr + parents[thread_id], - (uint64_t)(fromptr[thread_id] != 0)); + temp[thread_id] = (fromptr[thread_id] != 0) ? 1 : 0; } - } -} + __syncthreads(); -template -__global__ void -awkward_reduce_countnonzero_c(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { - if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + int64_t val = 0; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + val = temp[thread_id - stride]; + } + __syncthreads(); + temp[thread_id] += val; + __syncthreads(); + } - if (thread_id < outlength) { - toptr[thread_id] = (T)atomicAdd_toptr[thread_id]; + if (thread_id < lenparents) { + int64_t parent = parents[thread_id]; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicAdd(&toptr[parent], temp[thread_id]); + } } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu index 3c20b653ac..6a3fe66055 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu @@ -3,8 +3,13 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, identity, invocation_index, err_code) = args -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_max_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, identity, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_max_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, identity, invocation_index, err_code)) +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// temp = cupy.full(lenparents, identity, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_max_a", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_max_b", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code)) // out["awkward_reduce_max_a", {dtype_specializations}] = None // out["awkward_reduce_max_b", {dtype_specializations}] = None // END PYTHON @@ -18,6 +23,7 @@ awkward_reduce_max_a( int64_t lenparents, int64_t outlength, T identity, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { @@ -37,15 +43,34 @@ awkward_reduce_max_b( int64_t lenparents, int64_t outlength, T identity, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; + + if (thread_id < lenparents) { + temp[thread_id] = fromptr[thread_id]; + } + __syncthreads(); + + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + T val = identity; + + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + val = temp[idx - stride]; + } + __syncthreads(); + temp[thread_id] = val > temp[thread_id] ? val : temp[thread_id]; + __syncthreads(); + } if (thread_id < lenparents) { - C x = fromptr[thread_id]; - toptr[parents[thread_id]] = - (x > toptr[parents[thread_id]] ? x : toptr[parents[thread_id]]); + int64_t parent = parents[thread_id]; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicMax(&toptr[parent], temp[thread_id]); + } } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu index ae0e2dcb61..12a72b338f 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu @@ -3,8 +3,13 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, identity, invocation_index, err_code) = args -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_min_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, identity, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_min_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, identity, invocation_index, err_code)) +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// temp = cupy.full(lenparents, identity, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_min_a", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_min_b", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code)) // out["awkward_reduce_min_a", {dtype_specializations}] = None // out["awkward_reduce_min_b", {dtype_specializations}] = None // END PYTHON @@ -18,10 +23,12 @@ awkward_reduce_min_a( int64_t lenparents, int64_t outlength, T identity, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < outlength) { toptr[thread_id] = identity; } @@ -37,15 +44,33 @@ awkward_reduce_min_b( int64_t lenparents, int64_t outlength, T identity, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; + + if (thread_id < lenparents) { + temp[thread_id] = fromptr[thread_id]; + } + __syncthreads(); + + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + T val = identity; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + val = temp[thread_id - stride]; + } + __syncthreads(); + temp[thread_id] = val < temp[thread_id] ? val : temp[thread_id]; + __syncthreads(); + } if (thread_id < lenparents) { - C x = fromptr[thread_id]; - toptr[parents[thread_id]] = - (x < toptr[parents[thread_id]] ? x : toptr[parents[thread_id]]); + int64_t parent = parents[thread_id]; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicMin(&toptr[parent], temp[thread_id]); + } } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod.cu new file mode 100644 index 0000000000..9248e20efc --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod.cu @@ -0,0 +1,100 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// atomic_toptr = cupy.array(toptr, dtype=toptr.dtype) +// temp = cupy.ones(lenparents, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_prod_a", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_prod_b", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_prod_c", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) +// out["awkward_reduce_prod_a", {dtype_specializations}] = None +// out["awkward_reduce_prod_b", {dtype_specializations}] = None +// out["awkward_reduce_prod_c", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_reduce_prod_a( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + T* atomic_toptr, + T* temp, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < outlength) { + atomic_toptr[thread_id] = 1; + } + } +} + +template +__global__ void +awkward_reduce_prod_b( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + T* atomic_toptr, + T* temp, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; + + if (thread_id < lenparents) { + temp[thread_id] = fromptr[thread_id]; + } + __syncthreads(); + + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + T val = 1; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + val = temp[thread_id - stride]; + } + __syncthreads(); + temp[thread_id] *= val; + __syncthreads(); + } + + if (thread_id < lenparents) { + int64_t parent = parents[thread_id]; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicMul(&atomic_toptr[parent], temp[thread_id]); + } + } + } +} + +template +__global__ void +awkward_reduce_prod_c( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + T* atomic_toptr, + T* temp, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < outlength) { + toptr[thread_id] = (T)(atomic_toptr[thread_id]); + } + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_bool.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_bool.cu index 74843af6c0..9d85b366c7 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_bool.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_bool.cu @@ -3,10 +3,15 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args -// atomicAdd_toptr = cupy.array(toptr, dtype=cupy.uint64) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_prod_bool_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_prod_bool_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_prod_bool_c", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// atomic_toptr = cupy.array(toptr, dtype=cupy.uint32) +// temp = cupy.ones(lenparents, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_prod_bool_a", bool_, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_prod_bool_b", bool_, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_prod_bool_c", bool_, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) // out["awkward_reduce_prod_bool_a", {dtype_specializations}] = None // out["awkward_reduce_prod_bool_b", {dtype_specializations}] = None // out["awkward_reduce_prod_bool_c", {dtype_specializations}] = None @@ -20,14 +25,15 @@ awkward_reduce_prod_bool_a( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + uint32_t* atomic_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { - atomicAdd_toptr[thread_id] = true; + atomic_toptr[thread_id] = 1; } } } @@ -40,34 +46,55 @@ awkward_reduce_prod_bool_b( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + uint32_t* atomic_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; if (thread_id < lenparents) { - atomicAnd(atomicAdd_toptr + parents[thread_id], - (uint64_t)(fromptr[thread_id] != 0)); + temp[thread_id] = fromptr[thread_id]; + } + __syncthreads(); + + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + T val = 1; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + val = temp[thread_id - stride]; + } + __syncthreads(); + temp[thread_id] &= (val != 0); + __syncthreads(); + } + + if (thread_id < lenparents) { + int64_t parent = parents[thread_id]; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicAnd(&atomic_toptr[parent], temp[thread_id]); + } } } } template __global__ void -awkward_reduce_prod_bool_c(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_prod_bool_c( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint32_t* atomic_toptr, + T* temp, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { - toptr[thread_id] = (T)atomicAdd_toptr[thread_id]; + toptr[thread_id] = (T)(atomic_toptr[thread_id]); } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu index 13c5a31dbf..8ce2b8159c 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu @@ -3,13 +3,15 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args -// atomicAdd_toptr = cupy.array(toptr, dtype=cupy.uint64) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_c", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// temp = cupy.zeros(lenparents, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_a", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_b", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, temp, invocation_index, err_code)) // out["awkward_reduce_sum_a", {dtype_specializations}] = None // out["awkward_reduce_sum_b", {dtype_specializations}] = None -// out["awkward_reduce_sum_c", {dtype_specializations}] = None // END PYTHON template @@ -20,14 +22,14 @@ awkward_reduce_sum_a( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { - atomicAdd_toptr[thread_id] = 0; + toptr[thread_id] = 0; } } } @@ -40,35 +42,33 @@ awkward_reduce_sum_b( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; if (thread_id < lenparents) { - atomicAdd(atomicAdd_toptr + parents[thread_id], - (uint64_t)fromptr[thread_id]); + temp[thread_id] = fromptr[thread_id]; } - } -} + __syncthreads(); -template -__global__ void -awkward_reduce_sum_c( - T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { - if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + T val = 0; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + val = temp[thread_id - stride]; + } + __syncthreads(); + temp[thread_id] += val; + __syncthreads(); + } - if (thread_id < outlength) { - toptr[thread_id] = (T)atomicAdd_toptr[thread_id]; + if (thread_id < lenparents) { + int64_t parent = parents[thread_id]; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicAdd(&toptr[parent], temp[thread_id]); + } } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_bool.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_bool.cu index 0e062a6c78..f85df8e20a 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_bool.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_bool.cu @@ -3,10 +3,15 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args -// atomicAdd_toptr = cupy.array(toptr, dtype=cupy.uint64) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_bool_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_bool_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_bool_c", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// atomic_toptr = cupy.array(toptr, dtype=cupy.uint32) +// temp = cupy.zeros(lenparents, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_bool_a", bool_, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_bool_b", bool_, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_bool_c", bool_, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, atomic_toptr, temp, invocation_index, err_code)) // out["awkward_reduce_sum_bool_a", {dtype_specializations}] = None // out["awkward_reduce_sum_bool_b", {dtype_specializations}] = None // out["awkward_reduce_sum_bool_c", {dtype_specializations}] = None @@ -20,14 +25,15 @@ awkward_reduce_sum_bool_a( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + uint32_t* atomic_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { - atomicAdd_toptr[thread_id] = false; + atomic_toptr[thread_id] = 0; } } } @@ -40,34 +46,55 @@ awkward_reduce_sum_bool_b( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + uint32_t* atomic_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; if (thread_id < lenparents) { - atomicOr(atomicAdd_toptr + parents[thread_id], - (uint64_t)(fromptr[thread_id] != 0)); + temp[thread_id] = fromptr[thread_id]; + } + __syncthreads(); + + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + T val = 0; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + val = temp[thread_id - stride]; + } + __syncthreads(); + temp[thread_id] |= (val != 0); + __syncthreads(); + } + + if (thread_id < lenparents) { + int64_t parent = parents[thread_id]; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicOr(&atomic_toptr[parent], temp[thread_id]); + } } } } template __global__ void -awkward_reduce_sum_bool_c(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_sum_bool_c( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint32_t* atomic_toptr, + T* temp, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { - toptr[thread_id] = (T)atomicAdd_toptr[thread_id]; + toptr[thread_id] = (T)(atomic_toptr[thread_id]); } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int32_bool_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int32_bool_64.cu index 8bdb3fccc2..f52b6fb21c 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int32_bool_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int32_bool_64.cu @@ -3,13 +3,15 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args -// atomicAdd_toptr = cupy.array(toptr, dtype=cupy.uint64) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_int32_bool_64_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_int32_bool_64_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_int32_bool_64_c", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// temp = cupy.zeros(lenparents, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_int32_bool_64_a", int32, bool_, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_int32_bool_64_b", int32, bool_, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, temp, invocation_index, err_code)) // out["awkward_reduce_sum_int32_bool_64_a", {dtype_specializations}] = None // out["awkward_reduce_sum_int32_bool_64_b", {dtype_specializations}] = None -// out["awkward_reduce_sum_int32_bool_64_c", {dtype_specializations}] = None // END PYTHON template @@ -20,14 +22,14 @@ awkward_reduce_sum_int32_bool_64_a( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { - atomicAdd_toptr[thread_id] = 0; + toptr[thread_id] = 0; } } } @@ -40,34 +42,33 @@ awkward_reduce_sum_int32_bool_64_b( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; if (thread_id < lenparents) { - atomicAdd(atomicAdd_toptr + parents[thread_id], - (uint64_t)(fromptr[thread_id] != 0)); + temp[thread_id] = (fromptr[thread_id] != 0) ? 1 : 0;; } - } -} + __syncthreads(); -template -__global__ void -awkward_reduce_sum_int32_bool_64_c(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { - if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + T val = 0; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + val = temp[thread_id - stride]; + } + __syncthreads(); + temp[thread_id] += val; + __syncthreads(); + } - if (thread_id < outlength) { - toptr[thread_id] = (T)atomicAdd_toptr[thread_id]; + if (thread_id < lenparents) { + int64_t parent = parents[thread_id]; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicAdd(&toptr[parent], temp[thread_id]); + } } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int64_bool_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int64_bool_64.cu index 041558a663..7e220cccc0 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int64_bool_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int64_bool_64.cu @@ -3,13 +3,15 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args -// atomicAdd_toptr = cupy.array(toptr, dtype=cupy.uint64) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_int64_bool_64_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_int64_bool_64_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_int64_bool_64_c", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) +// if block[0] > 0: +// grid_size = math.floor((lenparents + block[0] - 1) / block[0]) +// else: +// grid_size = 1 +// temp = cupy.zeros(lenparents, dtype=toptr.dtype) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_int64_bool_64_a", int64, bool_, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, temp, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_int64_bool_64_b", int64, bool_, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, temp, invocation_index, err_code)) // out["awkward_reduce_sum_int64_bool_64_a", {dtype_specializations}] = None // out["awkward_reduce_sum_int64_bool_64_b", {dtype_specializations}] = None -// out["awkward_reduce_sum_int64_bool_64_c", {dtype_specializations}] = None // END PYTHON template @@ -20,14 +22,14 @@ awkward_reduce_sum_int64_bool_64_a( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { - atomicAdd_toptr[thread_id] = 0; + toptr[thread_id] = 0; } } } @@ -40,34 +42,33 @@ awkward_reduce_sum_int64_bool_64_b( const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + T* temp, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t idx = threadIdx.x; + int64_t thread_id = blockIdx.x * blockDim.x + idx; if (thread_id < lenparents) { - atomicAdd(atomicAdd_toptr + parents[thread_id], - (uint64_t)(fromptr[thread_id] != 0)); + temp[thread_id] = (fromptr[thread_id] != 0) ? 1 : 0;; } - } -} + __syncthreads(); -template -__global__ void -awkward_reduce_sum_int64_bool_64_c(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { - if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { + T val = 0; + if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { + val = temp[thread_id - stride]; + } + __syncthreads(); + temp[thread_id] += val; + __syncthreads(); + } - if (thread_id < outlength) { - toptr[thread_id] = (T)atomicAdd_toptr[thread_id]; + if (thread_id < lenparents) { + int64_t parent = parents[thread_id]; + if (idx == blockDim.x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1]) { + atomicAdd(&toptr[parent], temp[thread_id]); + } } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/cuda_common.cu b/src/awkward/_connect/cuda/cuda_kernels/cuda_common.cu index e101a7f3d8..9e8eb2bb35 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/cuda_common.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/cuda_common.cu @@ -32,6 +32,31 @@ typedef unsigned long long uintmax_t; atomicMin(err_code, \ invocation_index*(1 << ERROR_BITS) + (int)(ERROR_KERNEL_CODE)); +// BEGIN PYTHON +// def min_max_type(dtype): +// supported_types = { +// 'bool': cupy.int32, +// 'int8': cupy.int32, +// 'int16': cupy.int32, +// 'int32': cupy.int32, +// 'int64': cupy.int64, +// 'uint8': cupy.uint32, +// 'uint16': cupy.uint32, +// 'uint32': cupy.uint32, +// 'uint64': cupy.uint64, +// 'float32': cupy.float32, +// 'float64': cupy.float64 +// } +// if str(dtype) in supported_types: +// return supported_types[str(dtype)] +// else: +// raise ValueError("Unsupported dtype.", dtype) +// END PYTHON + + +// used by awkward_ListArray_getitem_next_range_carrylength +// and awkward_ListArray_getitem_next_range kernels + const int64_t kMaxInt64 = 9223372036854775806; // 2**63 - 2: see below const int64_t kSliceNone = kMaxInt64 + 1; // for Slice::none() @@ -43,29 +68,281 @@ awkward_regularize_rangeslice( bool hasstart, bool hasstop, int64_t length) { - if (posstep) { - if (!hasstart) *start = 0; - else if (*start < 0) *start += length; - if (*start < 0) *start = 0; - if (*start > length) *start = length; - - if (!hasstop) *stop = length; - else if (*stop < 0) *stop += length; - if (*stop < 0) *stop = 0; - if (*stop > length) *stop = length; - if (*stop < *start) *stop = *start; - } - - else { - if (!hasstart) *start = length - 1; - else if (*start < 0) *start += length; - if (*start < -1) *start = -1; - if (*start > length - 1) *start = length - 1; - - if (!hasstop) *stop = -1; - else if (*stop < 0) *stop += length; - if (*stop < -1) *stop = -1; - if (*stop > length - 1) *stop = length - 1; - if (*stop > *start) *stop = *start; - } + if (posstep) { + if (!hasstart) *start = 0; + else if (*start < 0) *start += length; + if (*start < 0) *start = 0; + if (*start > length) *start = length; + + if (!hasstop) *stop = length; + else if (*stop < 0) *stop += length; + if (*stop < 0) *stop = 0; + if (*stop > length) *stop = length; + if (*stop < *start) *stop = *start; } + + else { + if (!hasstart) *start = length - 1; + else if (*start < 0) *start += length; + if (*start < -1) *start = -1; + if (*start > length - 1) *start = length - 1; + + if (!hasstop) *stop = -1; + else if (*stop < 0) *stop += length; + if (*stop < -1) *stop = -1; + if (*stop > length - 1) *stop = length - 1; + if (*stop > *start) *stop = *start; + } +} + + +// atomicMin() specializations +template +__device__ T atomicMin(T* address, T val); + +// atomicMin() specialization for int8_t +template <> +__device__ int8_t atomicMin(int8_t* address, int8_t val) { + unsigned int *base_address = (unsigned int *)((size_t)address & ~3); + unsigned int selectors[] = {0x3214, 0x3240, 0x3410, 0x4210}; + unsigned int sel = selectors[(size_t)address & 3]; + unsigned int old, assumed, min_, new_; + old = *base_address; + do { + assumed = old; + min_ = min(val, (int8_t)__byte_perm(old, 0, ((size_t)address & 3))); + new_ = __byte_perm(old, min_, sel); + old = atomicCAS(base_address, assumed, new_); + } while (assumed != old); + return old; +} + +// atomicMin() specialization for uint8_t +template <> +__device__ uint8_t atomicMin(uint8_t* address, uint8_t val) { + unsigned int *base_address = (unsigned int *)((size_t)address & ~3); + unsigned int selectors[] = {0x3214, 0x3240, 0x3410, 0x4210}; + unsigned int sel = selectors[(size_t)address & 3]; + unsigned int old, assumed, min_, new_; + old = *base_address; + do { + assumed = old; + min_ = min(val, (uint8_t)__byte_perm(old, 0, ((size_t)address & 3))); + new_ = __byte_perm(old, min_, sel); + old = atomicCAS(base_address, assumed, new_); + } while (assumed != old); + return old; +} + +// atomicMin() specialization for int16_t +template <> +__device__ int16_t atomicMin(int16_t* address, int16_t val) { + uint16_t* address_as_ush = reinterpret_cast(address); + uint16_t old = *address_as_ush, assumed; + do { + assumed = old; + int16_t temp = min(val, reinterpret_cast(assumed)); + old = atomicCAS( + address_as_ush, assumed, reinterpret_cast(temp) + ); + } while (assumed != old); + return reinterpret_cast(old); +} + +// atomicMin() specialization for uint16_t +template <> +__device__ uint16_t atomicMin(uint16_t* address, uint16_t val) { + uint16_t old = *address, assumed; + do { + assumed = old; + old = atomicCAS(address, assumed, min(val, assumed)); + } while (assumed != old); + return old; +} + +// atomicMin() specialization for float +template <> +__device__ float atomicMin(float* addr, float value) { + float old; + old = !signbit(value) ? __int_as_float(atomicMin((int*)addr, __float_as_int(value))) : + __uint_as_float(atomicMax((unsigned int*)addr, __float_as_uint(value))); + return old; +} + +// atomicMin() specialization for double +template <> +__device__ double atomicMin(double* addr, double value) { + double old; + old = !signbit(value) ? __longlong_as_double(atomicMin((long long int*)addr, __double_as_longlong(value))) : + __ull2double_rz(atomicMax((unsigned long long int*)addr, __double2ull_ru(value))); + return old; +} + + +// atomicMax() specializations +template +__device__ T atomicMax(T* address, T val); + +// atomicMax() specialization for int8_t +template <> +__device__ int8_t atomicMax(int8_t* address, int8_t val) { + unsigned int *base_address = (unsigned int *)((size_t)address & ~3); + unsigned int selectors[] = {0x3214, 0x3240, 0x3410, 0x4210}; + unsigned int sel = selectors[(size_t)address & 3]; + unsigned int old, assumed, max_, new_; + old = *base_address; + do { + assumed = old; + max_ = max(val, (int8_t)__byte_perm(old, 0, ((size_t)address & 3))); + new_ = __byte_perm(old, max_, sel); + old = atomicCAS(base_address, assumed, new_); + } while (assumed != old); + return old; +} + +// atomicMax() specialization for uint8_t +template <> +__device__ uint8_t atomicMax(uint8_t* address, uint8_t val) { + unsigned int *base_address = (unsigned int *)((size_t)address & ~3); + unsigned int selectors[] = {0x3214, 0x3240, 0x3410, 0x4210}; + unsigned int sel = selectors[(size_t)address & 3]; + unsigned int old, assumed, max_, new_; + old = *base_address; + do { + assumed = old; + max_ = max(val, (uint8_t)__byte_perm(old, 0, ((size_t)address & 3))); + new_ = __byte_perm(old, max_, sel); + old = atomicCAS(base_address, assumed, new_); + } while (assumed != old); + return old; +} + +// atomicMax() specialization for int16_t +template <> +__device__ int16_t atomicMax(int16_t* address, int16_t val) { + uint16_t* address_as_ush = reinterpret_cast(address); + uint16_t old = *address_as_ush, assumed; + do { + assumed = old; + int16_t temp = max(val, reinterpret_cast(assumed)); + old = atomicCAS( + address_as_ush, assumed, reinterpret_cast(temp) + ); + } while (assumed != old); + return reinterpret_cast(old); +} + +// atomicMax() specialization for uint16_t +template <> +__device__ uint16_t atomicMax(uint16_t* address, uint16_t val) { + uint16_t old = *address, assumed; + do { + assumed = old; + old = atomicCAS(address, assumed, max(val, assumed)); + } while (assumed != old); + return old; +} + +// atomicMax() specialization for float +template <> +__device__ float atomicMax(float* addr, float value) { + float old; + old = !signbit(value) ? __int_as_float(atomicMax((int*)addr, __float_as_int(value))) : + __uint_as_float(atomicMin((unsigned int*)addr, __float_as_uint(value))); + return old; +} +// atomicMax() specialization for double +template <> +__device__ double atomicMax(double* addr, double value) { + double old; + old = !signbit(value) ? __longlong_as_double(atomicMax((long long int*)addr, __double_as_longlong(value))) : + __ull2double_rz(atomicMin((unsigned long long int*)addr, __double2ull_ru(value))); + return old; +} + + +// atomicAdd() specialization for int64_t +// uses 2's complement +__device__ int64_t atomicAdd(int64_t* address, int64_t val) { + uint64_t* address_as_ull = (uint64_t*)address; + uint64_t old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, assumed + (uint64_t)val); + } while (assumed != old); + return (int64_t)old; +} + + +// atomicMul() specializations +template +__device__ T atomicMul(T* address, T val); + +// atomicMul() specialization for int32_t +template <> +__device__ int32_t atomicMul(int32_t* address, int32_t val) { + int32_t old = *address, assumed; + do { + assumed = old; + old = atomicCAS(address, assumed, assumed * val); + } while (assumed != old); + return old; +} + +// atomicMul() specialization for uint32_t +template <> +__device__ uint32_t atomicMul(uint32_t* address, uint32_t val) { + uint32_t old = *address, assumed; + do { + assumed = old; + old = atomicCAS(address, assumed, assumed * val); + } while (assumed != old); + return old; +} + +// atomicMul() specialization for int64_t +template <> +__device__ int64_t atomicMul(int64_t* address, int64_t val) { + uint64_t* address_as_uint64 = reinterpret_cast(address); + uint64_t old = *address_as_uint64, assumed; + uint64_t val_as_uint64 = *reinterpret_cast(&val); + do { + assumed = old; + old = atomicCAS(address_as_uint64, assumed, assumed * val_as_uint64); + } while (assumed != old); + return *reinterpret_cast(&old); +} + +// atomicMul() specialization for uint64_t +template <> +__device__ uint64_t atomicMul(uint64_t* address, uint64_t val) { + uint64_t old = *address, assumed; + do { + assumed = old; + old = atomicCAS(address, assumed, assumed * val); + } while (assumed != old); + return old; +} + +// atomicMul() specialization for float +template <> +__device__ float atomicMul(float* address, float val) { + float old = *address, assumed; + do { + assumed = old; + old = __int_as_float(atomicCAS((int*)address, __float_as_int(assumed), __float_as_int(assumed * val))); + } while (assumed != old); + return old; +} + +// atomicMul() specialization for double +template <> +__device__ double atomicMul(double* address, double val) { + uint64_t* address_as_ull = (uint64_t*)address; + uint64_t old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(__longlong_as_double(assumed) * val)); + } while (assumed != old); + return __longlong_as_double(old); +} diff --git a/src/awkward/contents/regulararray.py b/src/awkward/contents/regulararray.py index 2d144a0b7a..a5a16fcdff 100644 --- a/src/awkward/contents/regulararray.py +++ b/src/awkward/contents/regulararray.py @@ -358,7 +358,6 @@ def _carry(self, carry: Index, allow_lazy: bool) -> Content: nextcarry = ak.index.Index64.empty( where.shape[0] * self._size, self._backend.index_nplike ) - assert nextcarry.nplike is self._backend.index_nplike self._maybe_index_error( self._backend[ @@ -472,6 +471,8 @@ def _getitem_next( nexthead, nexttail = ak._slicing.head_tail(tail) nextcarry = ak.index.Index64.empty(self._length, index_nplike) assert nextcarry.nplike is index_nplike + if ak.backend(head) == "cuda": + head = int(ak.to_backend(head, backend=self._backend)[0]) self._maybe_index_error( self._backend[ "awkward_RegularArray_getitem_next_at", nextcarry.dtype.type diff --git a/tests-cuda/test_2922a_new_cuda_kernels.py b/tests-cuda/test_2922a_new_cuda_kernels.py index feb800ecac..fa71d13e63 100644 --- a/tests-cuda/test_2922a_new_cuda_kernels.py +++ b/tests-cuda/test_2922a_new_cuda_kernels.py @@ -16,6 +16,13 @@ to_list = ak.operations.to_list +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + def test_0184_concatenate_operation_records(): one = ak.highlevel.Array([[1, 2, 3], [None, 4], None, [None, 5]]).layout two = ak.highlevel.Array([6, 7, 8]).layout diff --git a/tests-cuda/test_2922b_new_cuda_kernels.py b/tests-cuda/test_2922b_new_cuda_kernels.py index 5666dabf59..f03a5ffe71 100644 --- a/tests-cuda/test_2922b_new_cuda_kernels.py +++ b/tests-cuda/test_2922b_new_cuda_kernels.py @@ -10,6 +10,13 @@ to_list = ak.operations.to_list +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + def test_2651_parameter_union(): layout = ak.contents.IndexedArray( ak.index.Index64([0, 1, 2]), diff --git a/tests-cuda/test_3065a_cuda_kernels.py b/tests-cuda/test_3065a_cuda_kernels.py index de8b634da0..798d690a41 100644 --- a/tests-cuda/test_3065a_cuda_kernels.py +++ b/tests-cuda/test_3065a_cuda_kernels.py @@ -1,5 +1,6 @@ from __future__ import annotations +import cupy as cp import numpy as np import pytest @@ -9,6 +10,13 @@ to_list = ak.operations.to_list +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + def test_0449_merge_many_arrays_in_one_pass_concatenate(): one = ak.highlevel.Array([1, 2, 3]).layout two = ak.highlevel.Array([4.4, 5.5]).layout diff --git a/tests-cuda/test_3065b_cuda_kernels.py b/tests-cuda/test_3065b_cuda_kernels.py index bad768249c..91e77bd37d 100644 --- a/tests-cuda/test_3065b_cuda_kernels.py +++ b/tests-cuda/test_3065b_cuda_kernels.py @@ -11,6 +11,13 @@ to_list = ak.operations.to_list +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + def test_0582_propagate_context_in_broadcast_and_apply_firsts(): array = ak.Array([[[0, 1, 2], []], [[3, 4]], [], [[5], [6, 7, 8, 9]]]) cuda_array = ak.to_backend(array, "cuda") diff --git a/tests-cuda/test_3065c_cuda_kernels.py b/tests-cuda/test_3065c_cuda_kernels.py index 74ac927189..fef0b49181 100644 --- a/tests-cuda/test_3065c_cuda_kernels.py +++ b/tests-cuda/test_3065c_cuda_kernels.py @@ -9,6 +9,13 @@ to_list = ak.operations.to_list +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + def test_0546_fill_none_replacement_value_type(): array = ak.operations.values_astype( ak.highlevel.Array([1.1, 2.2, None, 3.3]), np.float32 diff --git a/tests-cuda/test_3086_cuda_concatenate.py b/tests-cuda/test_3086_cuda_concatenate.py index e35206b55a..ccf06d22df 100644 --- a/tests-cuda/test_3086_cuda_concatenate.py +++ b/tests-cuda/test_3086_cuda_concatenate.py @@ -10,6 +10,13 @@ to_list = ak.operations.to_list +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + def test_0184_concatenate_number(): a1 = ak.highlevel.Array([[1, 2, 3], [], [4, 5]]).layout a2 = ak.highlevel.Array([[[1.1], [2.2, 3.3]], [[]], [[4.4], [5.5]]]).layout diff --git a/tests-cuda/test_3130_cuda_listarray_getitem_next.py b/tests-cuda/test_3130_cuda_listarray_getitem_next.py index c26c8f9319..66783ad014 100644 --- a/tests-cuda/test_3130_cuda_listarray_getitem_next.py +++ b/tests-cuda/test_3130_cuda_listarray_getitem_next.py @@ -19,6 +19,13 @@ offsets2 = ak.index.IndexU32(np.array([0, 2, 3, 3, 5], np.uint32)) +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + def tests_0020_support_unsigned_indexes_listarray_ellipsis(): array1 = ak.contents.ListArray(starts1, stops1, content) array2 = ak.contents.ListArray(starts2, stops2, array1) diff --git a/tests-cuda/test_3136_cuda_argmin_and_argmax.py b/tests-cuda/test_3136_cuda_argmin_and_argmax.py new file mode 100644 index 0000000000..861ced70c5 --- /dev/null +++ b/tests-cuda/test_3136_cuda_argmin_and_argmax.py @@ -0,0 +1,195 @@ +from __future__ import annotations + +import cupy as cp +import pytest + +import awkward as ak + +to_list = ak.operations.to_list + + +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + +def test_0835_argmin_argmax_axis_None(): + array = ak.highlevel.Array( + [ + [ + [2022, 2023, 2025], + [], + [2027, 2011], + [2013], + ], + [], + [[2017, 2019], [2023]], + ], + ) + cuda_array = ak.to_backend(array, "cuda") + + assert ak.operations.argmin(cuda_array) == 4 + assert ak.operations.argmax(cuda_array) == 3 + + +def test_1106_argminmax_axis_None_missing_values(): + array = ak.highlevel.Array([1, 2, 3, None, 4]) + + cuda_array = ak.to_backend(array, "cuda") + + assert ak.operations.argmax(cuda_array) == 4 + + +def test_0070_argmin_and_argmax_jagged(): + v2_array = ak.operations.from_iter( + [[2.2, 1.1, 3.3], [], [4.4, 5.5], [5.5], [-4.4, -5.5, -6.6]], highlevel=False + ) + + cuda_v2_array = ak.to_backend(v2_array, "cuda", highlevel=False) + + assert to_list(ak.argmin(cuda_v2_array, axis=1, highlevel=False)) == [ + 1, + None, + 0, + 0, + 2, + ] + assert ( + ak.argmin(cuda_v2_array.to_typetracer(), axis=1, highlevel=False).form + == ak.argmin(cuda_v2_array, axis=1, highlevel=False).form + ) + + index2 = ak.index.Index64(cp.array([4, 3, 2, 1, 0], dtype=cp.int64)) + cuda_v2_array2 = ak.contents.IndexedArray(index2, cuda_v2_array) + + assert to_list(ak.argmin(cuda_v2_array2, axis=1, highlevel=False)) == [ + 2, + 0, + 0, + None, + 1, + ] + assert ( + ak.argmin(cuda_v2_array2.to_typetracer(), axis=1, highlevel=False).form + == ak.argmin(cuda_v2_array2, axis=1, highlevel=False).form + ) + + index3 = ak.index.Index64(cp.array([4, 3, -1, 4, 0], dtype=cp.int64)) + cuda_v2_array2 = ak.contents.IndexedOptionArray(index3, cuda_v2_array) + + assert to_list(ak.argmin(cuda_v2_array2, axis=1, highlevel=False)) == [ + 2, + 0, + None, + 2, + 1, + ] + assert ( + ak.argmin(cuda_v2_array2.to_typetracer(), axis=1, highlevel=False).form + == ak.argmin(cuda_v2_array2, axis=1, highlevel=False).form + ) + assert to_list(ak.argmin(cuda_v2_array2, axis=-1, highlevel=False)) == [ + 2, + 0, + None, + 2, + 1, + ] + assert ( + ak.argmin(cuda_v2_array2.to_typetracer(), axis=-1, highlevel=False).form + == ak.argmin(cuda_v2_array2, axis=-1, highlevel=False).form + ) + + +def test_0070_argmin_and_argmax_missing(): + array = ak.operations.from_iter( + [[[2.2, 1.1, 3.3]], [[]], [None, None, None], [[-4.4, -5.5, -6.6]]], + highlevel=False, + ) + + cuda_array = ak.to_backend(array, "cuda", highlevel=False) + + assert to_list(ak.argmin(cuda_array, axis=2, highlevel=False)) == [ + [1], + [None], + [None, None, None], + [2], + ] + assert ( + ak.argmin(cuda_array.to_typetracer(), axis=2, highlevel=False).form + == ak.argmin(cuda_array, axis=2, highlevel=False).form + ) + + +def test_0115_generic_reducer_operation_ByteMaskedArray(): + content = ak.operations.from_iter( + [ + [[1.1, 0.0, 2.2], [], [3.3, 4.4]], + [], + [[5.5]], + [[6.6, 9.9, 8.8, 7.7]], + [[], [12.2, 11.1, 10.0]], + ], + highlevel=False, + ) + mask = ak.index.Index8(cp.array([0, 0, 1, 1, 0], dtype=cp.int8)) + content = ak.to_backend(content, "cuda", highlevel=False) + + cuda_v2_array = ak.contents.ByteMaskedArray(mask, content, valid_when=False) + + assert to_list(cuda_v2_array) == [ + [[1.1, 0.0, 2.2], [], [3.3, 4.4]], + [], + None, + None, + [[], [12.2, 11.1, 10.0]], + ] + assert to_list(ak.argmin(cuda_v2_array, axis=-1, highlevel=False)) == [ + [1, None, 0], + [], + None, + None, + [None, 2], + ] + assert ( + ak.argmin(cuda_v2_array.to_typetracer(), axis=-1, highlevel=False).form + == ak.argmin(cuda_v2_array, axis=-1, highlevel=False).form + ) + + +@pytest.mark.parametrize( + "func", + [ + ak.argmin, + ak.argmax, + ], +) +def test_2754_highlevel_behavior_missing_reducers(func): + behavior_1 = {"foo": "bar"} + behavior_2 = {"baz": "bargh!"} + + array = ak.Array([[1, 2, 3, 4], [5], [10]]) + + cuda_array = ak.to_backend(array, "cuda") + + assert isinstance(func(cuda_array, axis=1, highlevel=True), ak.Array) + assert isinstance(func(cuda_array, axis=1, highlevel=False), ak.contents.Content) + assert ( + func( + ak.Array(cuda_array, behavior=behavior_1), + axis=1, + highlevel=True, + behavior=behavior_2, + ).behavior + == behavior_2 + ) + assert ( + func( + ak.Array(cuda_array, behavior=behavior_1), + axis=1, + highlevel=True, + ).behavior + == behavior_1 + ) diff --git a/tests-cuda/test_3136_cuda_reducers.py b/tests-cuda/test_3136_cuda_reducers.py new file mode 100644 index 0000000000..06ab47117a --- /dev/null +++ b/tests-cuda/test_3136_cuda_reducers.py @@ -0,0 +1,454 @@ +from __future__ import annotations + +import cupy as cp +import cupy.testing as cpt +import numpy as np +import pytest + +import awkward as ak + +to_list = ak.operations.to_list + + +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + +def prod(xs): + out = 1 + for x in xs: + out *= x + return out + + +def test_0115_generic_reducer_operation_sumprod_types(): + array = np.array([[True, False, False], [True, False, False]]) + content2 = ak.contents.NumpyArray(array.reshape(-1)) + offsets3 = ak.index.Index64(np.array([0, 3, 3, 5, 6], dtype=np.int64)) + depth1 = ak.contents.ListOffsetArray(offsets3, content2) + + depth1 = ak.to_backend(depth1, "cuda") + + assert sum(to_list(np.sum(array, axis=-1))) == sum( + to_list(ak.sum(depth1, axis=-1, highlevel=False)) + ) + assert prod(to_list(np.prod(array, axis=-1))) == prod( + to_list(ak.prod(depth1, axis=-1, highlevel=False)) + ) + del depth1 + + +def test_0115_generic_reducer_operation_sumprod_types_1(): + array = np.array([[0, 1, 2], [3, 4, 5]], dtype=np.int8) + content2 = ak.contents.NumpyArray(array.reshape(-1)) + offsets3 = ak.index.Index64(np.array([0, 3, 3, 5, 6], dtype=np.int64)) + depth1 = ak.contents.ListOffsetArray(offsets3, content2) + + depth1 = ak.to_backend(depth1, "cuda") + + assert ( + np.sum(array, axis=-1).dtype + == ak.to_numpy(ak.sum(depth1, axis=-1, highlevel=False)).dtype + ) + assert ( + np.prod(array, axis=-1).dtype + == ak.to_numpy(ak.prod(depth1, axis=-1, highlevel=False)).dtype + ) + assert sum(to_list(np.sum(array, axis=-1))) == sum( + to_list(ak.sum(depth1, axis=-1, highlevel=False)) + ) + assert prod(to_list(np.prod(array, axis=-1))) == prod( + to_list(ak.prod(depth1, axis=-1, highlevel=False)) + ) + del depth1 + + +def test_0115_generic_reducer_operation_sumprod_types_2(): + array = np.array([[0, 1, 2], [3, 4, 5]], dtype=np.uint8) + content2 = ak.contents.NumpyArray(array.reshape(-1)) + offsets3 = ak.index.Index64(np.array([0, 3, 3, 5, 6], dtype=np.int64)) + depth1 = ak.contents.ListOffsetArray(offsets3, content2) + + depth1 = ak.to_backend(depth1, "cuda") + + assert ( + np.sum(array, axis=-1).dtype + == ak.to_numpy(ak.sum(depth1, axis=-1, highlevel=False)).dtype + ) + assert ( + np.prod(array, axis=-1).dtype + == ak.to_numpy(ak.prod(depth1, axis=-1, highlevel=False)).dtype + ) + assert sum(to_list(np.sum(array, axis=-1))) == sum( + to_list(ak.sum(depth1, axis=-1, highlevel=False)) + ) + assert prod(to_list(np.prod(array, axis=-1))) == prod( + to_list(ak.prod(depth1, axis=-1, highlevel=False)) + ) + del depth1 + + +def test_0115_generic_reducer_operation_sumprod_types_3(): + array = np.array([[0, 1, 2], [3, 4, 5]], dtype=np.int16) + content2 = ak.contents.NumpyArray(array.reshape(-1)) + offsets3 = ak.index.Index64(np.array([0, 3, 3, 5, 6], dtype=np.int64)) + depth1 = ak.contents.ListOffsetArray(offsets3, content2) + + depth1 = ak.to_backend(depth1, "cuda") + + assert ( + np.sum(array, axis=-1).dtype + == ak.to_numpy(ak.sum(depth1, axis=-1, highlevel=False)).dtype + ) + assert ( + np.prod(array, axis=-1).dtype + == ak.to_numpy(ak.prod(depth1, axis=-1, highlevel=False)).dtype + ) + assert sum(to_list(np.sum(array, axis=-1))) == sum( + to_list(ak.sum(depth1, axis=-1, highlevel=False)) + ) + assert prod(to_list(np.prod(array, axis=-1))) == prod( + to_list(ak.prod(depth1, axis=-1, highlevel=False)) + ) + del depth1 + + +def test_0115_generic_reducer_operation_sumprod_types_4(): + array = np.array([[0, 1, 2], [3, 4, 5]], dtype=np.uint16) + content2 = ak.contents.NumpyArray(array.reshape(-1)) + offsets3 = ak.index.Index64(np.array([0, 3, 3, 5, 6], dtype=np.int64)) + depth1 = ak.contents.ListOffsetArray(offsets3, content2) + + depth1 = ak.to_backend(depth1, "cuda") + + assert ( + np.sum(array, axis=-1).dtype + == ak.to_numpy(ak.sum(depth1, axis=-1, highlevel=False)).dtype + ) + assert ( + np.prod(array, axis=-1).dtype + == ak.to_numpy(ak.prod(depth1, axis=-1, highlevel=False)).dtype + ) + assert sum(to_list(np.sum(array, axis=-1))) == sum( + to_list(ak.sum(depth1, axis=-1, highlevel=False)) + ) + assert prod(to_list(np.prod(array, axis=-1))) == prod( + to_list(ak.prod(depth1, axis=-1, highlevel=False)) + ) + + +def test_0115_generic_reducer_operation_sumprod_types_5(): + array = np.array([[0, 1, 2], [3, 4, 5]], dtype=np.int32) + content2 = ak.contents.NumpyArray(array.reshape(-1)) + offsets3 = ak.index.Index64(np.array([0, 3, 3, 5, 6], dtype=np.int64)) + depth1 = ak.contents.ListOffsetArray(offsets3, content2) + + depth1 = ak.to_backend(depth1, "cuda") + + assert ( + np.sum(array, axis=-1).dtype + == ak.to_numpy(ak.sum(depth1, axis=-1, highlevel=False)).dtype + ) + assert ( + np.prod(array, axis=-1).dtype + == ak.to_numpy(ak.prod(depth1, axis=-1, highlevel=False)).dtype + ) + assert sum(to_list(np.sum(array, axis=-1))) == sum( + to_list(ak.sum(depth1, axis=-1, highlevel=False)) + ) + assert prod(to_list(np.prod(array, axis=-1))) == prod( + to_list(ak.prod(depth1, axis=-1, highlevel=False)) + ) + del depth1 + + +def test_0115_generic_reducer_operation_sumprod_types_6(): + array = np.array([[0, 1, 2], [3, 4, 5]], dtype=np.uint32) + content2 = ak.contents.NumpyArray(array.reshape(-1)) + offsets3 = ak.index.Index64(np.array([0, 3, 3, 5, 6], dtype=np.int64)) + depth1 = ak.contents.ListOffsetArray(offsets3, content2) + + depth1 = ak.to_backend(depth1, "cuda") + + assert ( + np.sum(array, axis=-1).dtype + == ak.to_numpy(ak.sum(depth1, axis=-1, highlevel=False)).dtype + ) + assert ( + np.prod(array, axis=-1).dtype + == ak.to_numpy(ak.prod(depth1, axis=-1, highlevel=False)).dtype + ) + assert sum(to_list(np.sum(array, axis=-1))) == sum( + to_list(ak.sum(depth1, axis=-1, highlevel=False)) + ) + assert prod(to_list(np.prod(array, axis=-1))) == prod( + to_list(ak.prod(depth1, axis=-1, highlevel=False)) + ) + del depth1 + + +def test_0115_generic_reducer_operation_sumprod_types_7(): + array = np.array([[0, 1, 2], [3, 4, 5]], dtype=np.int64) + content2 = ak.contents.NumpyArray(array.reshape(-1)) + offsets3 = ak.index.Index64(np.array([0, 3, 3, 5, 6], dtype=np.int64)) + depth1 = ak.contents.ListOffsetArray(offsets3, content2) + + depth1 = ak.to_backend(depth1, "cuda") + + assert ( + np.sum(array, axis=-1).dtype + == ak.to_numpy(ak.sum(depth1, axis=-1, highlevel=False)).dtype + ) + assert ( + np.prod(array, axis=-1).dtype + == ak.to_numpy(ak.prod(depth1, axis=-1, highlevel=False)).dtype + ) + assert sum(to_list(np.sum(array, axis=-1))) == sum( + to_list(ak.sum(depth1, axis=-1, highlevel=False)) + ) + assert prod(to_list(np.prod(array, axis=-1))) == prod( + to_list(ak.prod(depth1, axis=-1, highlevel=False)) + ) + del depth1 + + +def test_0115_generic_reducer_operation_sumprod_types_8(): + array = np.array([[0, 1, 2], [3, 4, 5]], dtype=np.uint64) + content2 = ak.contents.NumpyArray(array.reshape(-1)) + offsets3 = ak.index.Index64(np.array([0, 3, 3, 5, 6], dtype=np.int64)) + depth1 = ak.contents.ListOffsetArray(offsets3, content2) + + depth1 = ak.to_backend(depth1, "cuda") + + assert ( + np.sum(array, axis=-1).dtype + == ak.to_numpy(ak.sum(depth1, axis=-1, highlevel=False)).dtype + ) + assert ( + np.prod(array, axis=-1).dtype + == ak.to_numpy(ak.prod(depth1, axis=-1, highlevel=False)).dtype + ) + assert sum(to_list(np.sum(array, axis=-1))) == sum( + to_list(ak.sum(depth1, axis=-1, highlevel=False)) + ) + assert prod(to_list(np.prod(array, axis=-1))) == prod( + to_list(ak.prod(depth1, axis=-1, highlevel=False)) + ) + del depth1 + + +def test_0115_generic_reducer_operation_sumprod_types_FIXME(): + array = np.array([[True, False, False], [True, False, False]]) + content2 = ak.contents.NumpyArray(array.reshape(-1)) + offsets3 = ak.index.Index64(np.array([0, 3, 3, 5, 6], dtype=np.int64)) + depth1 = ak.contents.ListOffsetArray(offsets3, content2) + depth1 = ak.to_backend(depth1, "cuda") + + assert ( + np.sum(array, axis=-1).dtype + == ak.to_numpy(ak.sum(depth1, axis=-1, highlevel=False)).dtype + ) + assert ( + np.prod(array, axis=-1).dtype + == ak.to_numpy(ak.prod(depth1, axis=-1, highlevel=False)).dtype + ) + del depth1 + + +def test_2020_reduce_axis_none_sum(): + array = ak.Array( + [[0, 2, 3.0], [4, 5, 6, 7, 8], [], [9, 8, None], [10, 1], []], backend="cuda" + ) + cpt.assert_allclose(ak.sum(array, axis=None), 63.0) + assert ak.almost_equal( + ak.sum(array, axis=None, keepdims=True), + ak.to_regular(ak.Array([[63.0]], backend="cuda")), + ) + assert ak.almost_equal( + ak.sum(array, axis=None, keepdims=True, mask_identity=True), + ak.to_regular( + ak.Array([[63.0]], backend="cuda").mask[ak.Array([[True]], backend="cuda")] + ), + ) + assert ak.sum(array[2], axis=None, mask_identity=True) is None + del array + + +def test_2020_reduce_axis_none_prod(): + array = ak.Array( + [[0, 2, 3.0], [4, 5, 6, 7, 8], [], [9, 8, None], [10, 1], []], backend="cuda" + ) + cpt.assert_allclose(ak.prod(array[1:], axis=None), 4838400.0) + assert ak.prod(array, axis=None) == 0 + assert ak.almost_equal( + ak.prod(array, axis=None, keepdims=True), + ak.to_regular(ak.Array([[0.0]], backend="cuda")), + ) + assert ak.almost_equal( + ak.prod(array[1:], axis=None, keepdims=True), + ak.to_regular(ak.Array([[4838400.0]], backend="cuda")), + ) + assert ak.almost_equal( + ak.prod(array[1:], axis=None, keepdims=True, mask_identity=True), + ak.to_regular( + ak.Array([[4838400.0]], backend="cuda").mask[ + ak.Array([[True]], backend="cuda") + ] + ), + ) + assert ak.prod(array[2], axis=None, mask_identity=True) is None + del array + + +def test_2020_reduce_axis_none_min(): + array = ak.Array( + [[0, 2, 3.0], [4, 5, 6, 7, 8], [], [9, 8, None], [10, 1], []], backend="cuda" + ) + cpt.assert_allclose(ak.min(array, axis=None), 0.0) + assert ak.almost_equal( + ak.min(array, axis=None, keepdims=True, mask_identity=False), + ak.to_regular(ak.Array([[0.0]], backend="cuda")), + ) + assert ak.almost_equal( + ak.min(array, axis=None, keepdims=True, initial=-100.0, mask_identity=False), + ak.to_regular(ak.Array([[-100.0]], backend="cuda")), + ) + assert ak.almost_equal( + ak.min(array, axis=None, keepdims=True, mask_identity=True), + ak.to_regular( + ak.Array([[0.0]], backend="cuda").mask[ak.Array([[True]], backend="cuda")] + ), + ) + assert ak.almost_equal( + ak.min(array[-1:], axis=None, keepdims=True, mask_identity=True), + ak.to_regular( + ak.Array(ak.Array([[np.inf]], backend="cuda")).mask[ + ak.Array([[False]], backend="cuda") + ] + ), + ) + assert ak.min(array[2], axis=None, mask_identity=True) is None + del array + + +def test_2020_reduce_axis_none_max(): + array = ak.Array( + [[0, 2, 3.0], [4, 5, 6, 7, 8], [], [9, 8, None], [10, 1], []], backend="cuda" + ) + cpt.assert_allclose(ak.max(array, axis=None), 10.0) + assert ak.almost_equal( + ak.max(array, axis=None, keepdims=True, mask_identity=False), + ak.to_regular(ak.Array([[10.0]], backend="cuda")), + ) + assert ak.almost_equal( + ak.max(array, axis=None, keepdims=True, initial=100.0, mask_identity=False), + ak.to_regular(ak.Array([[100.0]], backend="cuda")), + ) + assert ak.almost_equal( + ak.max(array, axis=None, keepdims=True, mask_identity=True), + ak.to_regular( + ak.Array([[10.0]], backend="cuda").mask[ak.Array([[True]], backend="cuda")] + ), + ) + assert ak.almost_equal( + ak.max(array[-1:], axis=None, keepdims=True, mask_identity=True), + ak.to_regular( + ak.Array(ak.Array([[np.inf]], backend="cuda")).mask[ + ak.Array([[False]], backend="cuda") + ] + ), + ) + assert ak.max(array[2], axis=None, mask_identity=True) is None + del array + + +def test_2020_reduce_axis_none_count(): + array = ak.Array( + [[0, 2, 3.0], [4, 5, 6, 7, 8], [], [9, 8, None], [10, 1], []], backend="cuda" + ) + assert ak.count(array, axis=None) == 12 + assert ak.almost_equal( + ak.count(array, axis=None, keepdims=True, mask_identity=False), + ak.to_regular(ak.Array([[12]], backend="cuda")), + ) + assert ak.almost_equal( + ak.count(array, axis=None, keepdims=True, mask_identity=True), + ak.to_regular( + ak.Array([[12]], backend="cuda").mask[ak.Array([[True]], backend="cuda")] + ), + ) + assert ak.almost_equal( + ak.count(array[-1:], axis=None, keepdims=True, mask_identity=True), + ak.to_regular( + ak.Array([[0]], backend="cuda").mask[ak.Array([[False]], backend="cuda")] + ), + ) + assert ak.count(array[2], axis=None, mask_identity=True) is None + assert ak.count(array[2], axis=None, mask_identity=False) == 0 + del array + + +def test_2020_reduce_axis_none_count_nonzero(): + array = ak.Array( + [[0, 2, 3.0], [4, 5, 6, 7, 8], [], [9, 8, None], [10, 1], []], backend="cuda" + ) + assert ak.count_nonzero(array, axis=None) == 11 + assert ak.almost_equal( + ak.count_nonzero(array, axis=None, keepdims=True, mask_identity=False), + ak.to_regular(ak.Array([[11]], backend="cuda")), + ) + assert ak.almost_equal( + ak.count_nonzero(array, axis=None, keepdims=True, mask_identity=True), + ak.to_regular( + ak.Array([[11]], backend="cuda").mask[ak.Array([[True]], backend="cuda")] + ), + ) + assert ak.almost_equal( + ak.count_nonzero(array[-1:], axis=None, keepdims=True, mask_identity=True), + ak.to_regular( + ak.Array([[0]], backend="cuda").mask[ak.Array([[False]], backend="cuda")] + ), + ) + assert ak.count_nonzero(array[2], axis=None, mask_identity=True) is None + assert ak.count_nonzero(array[2], axis=None, mask_identity=False) == 0 + del array + + +def test_2020_reduce_axis_none_std_no_mask_axis_none(): + array = ak.Array( + [[0, 2, 3.0], [4, 5, 6, 7, 8], [], [9, 8, None], [10, 1], []], backend="cuda" + ) + out1 = ak.std(array[-1:], axis=None, keepdims=True, mask_identity=True) + out2 = ak.to_regular( + ak.Array([[0.0]], backend="cuda").mask[ak.Array([[False]], backend="cuda")] + ) + assert ak.almost_equal(out1, out2) + + out3 = ak.std(array[2], axis=None, mask_identity=True) + assert out3 is None + del array + del out1, out2, out3 + + +def test_2020_reduce_axis_none_std(): + array = ak.Array( + [[0, 2, 3.0], [4, 5, 6, 7, 8], [], [9, 8, None], [10, 1], []], backend="cuda" + ) + cpt.assert_allclose(ak.std(array, axis=None), 3.139134700306227) + cpt.assert_allclose( + ak.std(array, axis=None, keepdims=True, mask_identity=False), + ak.to_regular([[3.139134700306227]]), + ) + cpt.assert_allclose( + ak.std(array, axis=None, keepdims=True, mask_identity=True), + ak.to_regular( + ak.Array([[3.139134700306227]], backend="cuda").mask[ + ak.Array([[True]], backend="cuda") + ] + ), + ) + assert np.isnan(ak.std(array[2], axis=None, mask_identity=False)) + del array diff --git a/tests-cuda/test_3140_cuda_jagged_and_masked_getitem.py b/tests-cuda/test_3140_cuda_jagged_and_masked_getitem.py index fff5417c03..064a6a5763 100644 --- a/tests-cuda/test_3140_cuda_jagged_and_masked_getitem.py +++ b/tests-cuda/test_3140_cuda_jagged_and_masked_getitem.py @@ -9,6 +9,13 @@ to_list = ak.operations.to_list +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + def test_0111_jagged_and_masked_getitem_bitmaskedarray2b(): array = ak.operations.from_iter( [[0.0, 1.1, 2.2], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8, 9.9]], highlevel=False @@ -36,6 +43,8 @@ def test_0111_jagged_and_masked_getitem_bitmaskedarray2b(): ] assert maskedarray.to_typetracer()[cuda_array].form == maskedarray[cuda_array].form + del cuda_array + def test_0111_jagged_and_masked_getitem_bytemaskedarray2b(): array = ak.operations.from_iter( @@ -62,6 +71,7 @@ def test_0111_jagged_and_masked_getitem_bytemaskedarray2b(): [6.6, 9.9], ] assert maskedarray.to_typetracer()[cuda_array].form == maskedarray[cuda_array].form + del cuda_array def test_0111_jagged_and_masked_getitem_emptyarray(): @@ -113,6 +123,8 @@ def test_0111_jagged_and_masked_getitem_emptyarray(): with pytest.raises(IndexError): cuda_listoffsetarray[cuda_array5] + del cuda_listoffsetarray + def test_0111_jagged_and_masked_getitem_indexedarray(): array = ak.operations.from_iter( @@ -248,6 +260,9 @@ def test_0111_jagged_and_masked_getitem_indexedarray(): == cuda_indexedarray[cuda_array1].form ) + del cuda_indexedarray + del cuda_array1 + def test_0111_jagged_and_masked_getitem_indexedarray2(): array = ak.operations.from_iter( @@ -275,6 +290,8 @@ def test_0111_jagged_and_masked_getitem_indexedarray2(): cuda_indexedarray.to_typetracer()[cuda_array].form == cuda_indexedarray[cuda_array].form ) + del cuda_indexedarray + del cuda_array def test_0111_jagged_and_masked_getitem_indexedarray2b(): @@ -303,6 +320,8 @@ def test_0111_jagged_and_masked_getitem_indexedarray2b(): cuda_indexedarray.to_typetracer()[cuda_array].form == cuda_indexedarray[cuda_array].form ) + del cuda_indexedarray + del cuda_array def test_0111_jagged_and_masked_getitem_indexedarray3(): @@ -381,6 +400,13 @@ def test_0111_jagged_and_masked_getitem_indexedarray3(): with pytest.raises(IndexError): cuda_array[cuda_array6] + del cuda_array + del cuda_array2 + del cuda_array3 + del cuda_array4 + del cuda_array5 + del cuda_array6 + def test_0111_jagged_and_masked_getitem_jagged(): array = ak.highlevel.Array( @@ -402,6 +428,9 @@ def test_0111_jagged_and_masked_getitem_jagged(): ] assert cuda_array.to_typetracer()[cuda_array2].form == cuda_array[cuda_array2].form + del cuda_array + del cuda_array2 + def test_0111_jagged_and_masked_getitem_double_jagged(): array = ak.highlevel.Array( @@ -533,54 +562,53 @@ def test_0111_jagged_and_masked_getitem_array_boolean_to_int(): b = ak._slicing._normalise_item_bool_to_int(cuda_a, backend=cuda_a.backend) assert to_list(b) == [[1, 2], [], [1], [], [1, 2, 3]] - # a = ak.operations.from_iter( - # [[True, True, None], [], [True, None], [None], [True, True, True, None]], - # highlevel=False, - # ) - # cuda_a = ak.to_backend(a, "cuda", highlevel=False) - # # b = ak._slicing._normalise_item_bool_to_int(cuda_a, backend=cuda_a.backend) - # # error in _slicing line 553 - FIXME - # assert to_list(b) == [[0, 1, None], [], [0, None], [None], [0, 1, 2, None]] - # assert ( - # b.content.index.data[b.content.index.data >= 0].tolist() - # == np.arange(6).tolist() # kernels expect nonnegative entries to be arange - # ) - - # a = ak.operations.from_iter( - # [[None, True, True], [], [None, True], [None], [None, True, True, True]], - # highlevel=False, - # ) - # cuda_a = ak.to_backend(a, "cuda", highlevel=False) - # b = ak._slicing._normalise_item_bool_to_int(cuda_a, backend=cuda_a.backend) - # assert to_list(b) == [[None, 1, 2], [], [None, 1], [None], [None, 1, 2, 3]] - # assert ( - # b.content.index.data[b.content.index.data >= 0].tolist() - # == np.arange(6).tolist() # kernels expect nonnegative entries to be arange - # ) - - # a = ak.operations.from_iter( - # [[False, True, None], [], [False, None], [None], [False, True, True, None]], - # highlevel=False, - # ) - # cuda_a = ak.to_backend(a, "cuda", highlevel=False) - # b = ak._slicing._normalise_item_bool_to_int(cuda_a, backend=cuda_a.backend) - # assert to_list(b) == [[1, None], [], [None], [None], [1, 2, None]] - # assert ( - # b.content.index.data[b.content.index.data >= 0].tolist() - # == np.arange(3).tolist() # kernels expect nonnegative entries to be arange - # ) - - # a = ak.operations.from_iter( - # [[None, True, False], [], [None, False], [None], [None, True, True, False]], - # highlevel=False, - # ) - # cuda_a = ak.to_backend(a, "cuda", highlevel=False) - # b = ak._slicing._normalise_item_bool_to_int(cuda_a, backend=cuda_a.backend) - # assert to_list(b) == [[None, 1], [], [None], [None], [None, 1, 2]] - # assert ( - # b.content.index.data[b.content.index.data >= 0].tolist() - # == np.arange(3).tolist() # kernels expect nonnegative entries to be arange - # ) + a = ak.operations.from_iter( + [[True, True, None], [], [True, None], [None], [True, True, True, None]], + highlevel=False, + ) + cuda_a = ak.to_backend(a, "cuda", highlevel=False) + b = ak._slicing._normalise_item_bool_to_int(cuda_a, backend=cuda_a.backend) + assert to_list(b) == [[0, 1, None], [], [0, None], [None], [0, 1, 2, None]] + assert ( + b.content.index.data[b.content.index.data >= 0].tolist() + == np.arange(6).tolist() # kernels expect nonnegative entries to be arange + ) + + a = ak.operations.from_iter( + [[None, True, True], [], [None, True], [None], [None, True, True, True]], + highlevel=False, + ) + cuda_a = ak.to_backend(a, "cuda", highlevel=False) + b = ak._slicing._normalise_item_bool_to_int(cuda_a, backend=cuda_a.backend) + assert to_list(b) == [[None, 1, 2], [], [None, 1], [None], [None, 1, 2, 3]] + assert ( + b.content.index.data[b.content.index.data >= 0].tolist() + == np.arange(6).tolist() # kernels expect nonnegative entries to be arange + ) + + a = ak.operations.from_iter( + [[False, True, None], [], [False, None], [None], [False, True, True, None]], + highlevel=False, + ) + cuda_a = ak.to_backend(a, "cuda", highlevel=False) + b = ak._slicing._normalise_item_bool_to_int(cuda_a, backend=cuda_a.backend) + assert to_list(b) == [[1, None], [], [None], [None], [1, 2, None]] + assert ( + b.content.index.data[b.content.index.data >= 0].tolist() + == np.arange(3).tolist() # kernels expect nonnegative entries to be arange + ) + + a = ak.operations.from_iter( + [[None, True, False], [], [None, False], [None], [None, True, True, False]], + highlevel=False, + ) + cuda_a = ak.to_backend(a, "cuda", highlevel=False) + b = ak._slicing._normalise_item_bool_to_int(cuda_a, backend=cuda_a.backend) + assert to_list(b) == [[None, 1], [], [None], [None], [None, 1, 2]] + assert ( + b.content.index.data[b.content.index.data >= 0].tolist() + == np.arange(3).tolist() # kernels expect nonnegative entries to be arange + ) def test_0111_jagged_and_masked_getitem_array_slice(): diff --git a/tests-cuda/test_3140_cuda_slicing.py b/tests-cuda/test_3140_cuda_slicing.py index 047fc7977c..59e2cfcb67 100644 --- a/tests-cuda/test_3140_cuda_slicing.py +++ b/tests-cuda/test_3140_cuda_slicing.py @@ -1,12 +1,21 @@ from __future__ import annotations +import cupy as cp import numpy as np +import pytest import awkward as ak to_list = ak.operations.to_list +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + def test_0315_integerindex_null_more(): f = ak.highlevel.Array([[0, None, 2], None, [3, 4], []], backend="cuda").layout g1 = ak.highlevel.Array([[1, 2, None], None, [], [None]], backend="cuda").layout diff --git a/tests-cuda/test_3141_cuda_misc.py b/tests-cuda/test_3141_cuda_misc.py index eb5adeb78e..7582788d9c 100644 --- a/tests-cuda/test_3141_cuda_misc.py +++ b/tests-cuda/test_3141_cuda_misc.py @@ -1,6 +1,8 @@ from __future__ import annotations +import cupy as cp import numpy as np +import pytest import awkward as ak from awkward.types import ArrayType, NumpyType, RegularType @@ -8,6 +10,13 @@ to_list = ak.operations.to_list +@pytest.fixture(scope="function", autouse=True) +def cleanup_cuda(): + yield + cp._default_memory_pool.free_all_blocks() + cp.cuda.Device().synchronize() + + def test_0150_ByteMaskedArray_flatten(): content = ak.operations.from_iter( [