Skip to content

Commit e04b01e

Browse files
committed
feat: add cumulative sum CUDA kernels
1 parent 28007e9 commit e04b01e

File tree

4 files changed

+137
-2
lines changed

4 files changed

+137
-2
lines changed

dev/generate-kernel-signatures.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,16 @@
1212

1313

1414
cuda_kernels_impl = [
15+
"awkward_Index_nones_as_index",
1516
"awkward_ListArray_min_range",
1617
"awkward_ListArray_validity",
1718
"awkward_BitMaskedArray_to_ByteMaskedArray",
1819
"awkward_ListArray_compact_offsets",
1920
"awkward_ListOffsetArray_flatten_offsets",
2021
"awkward_IndexedArray_overlay_mask",
22+
"awkward_ByteMaskedArray_numnull",
23+
"awkward_IndexedArray_numnull",
24+
"awkward_IndexedArray_numnull_parents",
2125
"awkward_IndexedArray_numnull_unique_64",
2226
"awkward_NumpyArray_fill",
2327
"awkward_ListArray_fill",

dev/generate-tests.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,12 +643,16 @@ def gencpuunittests(specdict):
643643

644644

645645
cuda_kernels_tests = [
646+
"awkward_Index_nones_as_index",
646647
"awkward_ListArray_min_range",
647648
"awkward_ListArray_validity",
648649
"awkward_BitMaskedArray_to_ByteMaskedArray",
649650
"awkward_ListArray_compact_offsets",
650651
"awkward_ListOffsetArray_flatten_offsets",
651652
"awkward_IndexedArray_overlay_mask",
653+
"awkward_ByteMaskedArray_numnull",
654+
"awkward_IndexedArray_numnull",
655+
"awkward_IndexedArray_numnull_parents",
652656
"awkward_IndexedArray_numnull_unique_64",
653657
"awkward_NumpyArray_fill",
654658
"awkward_ListArray_fill",

kernel-test-data.json

Lines changed: 125 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,7 +1073,7 @@
10731073
},
10741074
{
10751075
"name": "awkward_ByteMaskedArray_numnull",
1076-
"status": false,
1076+
"status": true,
10771077
"tests": [
10781078
{
10791079
"error": false,
@@ -1644,8 +1644,48 @@
16441644
},
16451645
{
16461646
"name": "awkward_IndexedArray_numnull",
1647-
"status": false,
1647+
"status": true,
16481648
"tests": [
1649+
{
1650+
"error": false,
1651+
"inputs": {
1652+
"fromindex": [1],
1653+
"lenindex": 1
1654+
},
1655+
"outputs": {
1656+
"numnull": [0]
1657+
}
1658+
},
1659+
{
1660+
"error": false,
1661+
"inputs": {
1662+
"fromindex": [-1],
1663+
"lenindex": 1
1664+
},
1665+
"outputs": {
1666+
"numnull": [1]
1667+
}
1668+
},
1669+
{
1670+
"error": false,
1671+
"inputs": {
1672+
"fromindex": [-1, -1, -1, -1],
1673+
"lenindex": 4
1674+
},
1675+
"outputs": {
1676+
"numnull": [4]
1677+
}
1678+
},
1679+
{
1680+
"error": false,
1681+
"inputs": {
1682+
"fromindex": [0, -1, 2, -1, -1, -1, -1],
1683+
"lenindex": 7
1684+
},
1685+
"outputs": {
1686+
"numnull": [5]
1687+
}
1688+
},
16491689
{
16501690
"error": false,
16511691
"inputs": {
@@ -1778,6 +1818,89 @@
17781818
}
17791819
]
17801820
},
1821+
{
1822+
"name": "awkward_IndexedArray_numnull_parents",
1823+
"status": true,
1824+
"tests": [
1825+
{
1826+
"error": false,
1827+
"inputs": {
1828+
"fromindex": [1],
1829+
"lenindex": 1
1830+
},
1831+
"outputs": {
1832+
"numnull": [0],
1833+
"tolength": [0]
1834+
}
1835+
},
1836+
{
1837+
"error": false,
1838+
"inputs": {
1839+
"fromindex": [-1],
1840+
"lenindex": 1
1841+
},
1842+
"outputs": {
1843+
"numnull": [1],
1844+
"tolength": [1]
1845+
}
1846+
},
1847+
{
1848+
"error": false,
1849+
"inputs": {
1850+
"fromindex": [-1, -1, -1, -1],
1851+
"lenindex": 4
1852+
},
1853+
"outputs": {
1854+
"numnull": [1, 1, 1, 1],
1855+
"tolength": [4]
1856+
}
1857+
},
1858+
{
1859+
"error": false,
1860+
"inputs": {
1861+
"fromindex": [0, -1, 2, -1, -1, -1, -1],
1862+
"lenindex": 7
1863+
},
1864+
"outputs": {
1865+
"numnull": [0, 1, 0, 1, 1, 1, 1],
1866+
"tolength": [5]
1867+
}
1868+
},
1869+
{
1870+
"error": false,
1871+
"inputs": {
1872+
"fromindex": [0, 1],
1873+
"lenindex": 2
1874+
},
1875+
"outputs": {
1876+
"numnull": [0, 0],
1877+
"tolength": [0]
1878+
}
1879+
},
1880+
{
1881+
"error": false,
1882+
"inputs": {
1883+
"fromindex": [0, 1, 2, 3],
1884+
"lenindex": 4
1885+
},
1886+
"outputs": {
1887+
"numnull": [0, 0, 0, 0],
1888+
"tolength": [0]
1889+
}
1890+
},
1891+
{
1892+
"error": false,
1893+
"inputs": {
1894+
"fromindex": [0, 1, -2, 3, -4, 5, -6],
1895+
"lenindex": 7
1896+
},
1897+
"outputs": {
1898+
"numnull": [0, 0, 1, 0, 1, 0, 1],
1899+
"tolength": [3]
1900+
}
1901+
}
1902+
]
1903+
},
17811904
{
17821905
"name": "awkward_IndexedArray_numnull_unique_64",
17831906
"status": true,

src/awkward/_connect/cuda/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,11 @@ def fetch_template_specializations(kernel_dict):
7171
# These cuda kernels consist of multiple kernels that don't have templated
7272
# specializations of the same name (e.g. '_a', '_b').
7373
kernel_exclusions = [
74+
"awkward_Index_nones_as_index",
7475
"awkward_ByteMaskedArray_getitem_nextcarry",
76+
"awkward_ByteMaskedArray_numnull",
77+
"awkward_IndexedArray_numnull",
78+
"awkward_IndexedArray_numnull_parents",
7579
"awkward_ByteMaskedArray_getitem_nextcarry_outindex",
7680
"awkward_ByteMaskedArray_reduce_next_64",
7781
"awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64",

0 commit comments

Comments
 (0)