Skip to content

Commit 3afad1b

Browse files
committed
work in progress
1 parent 406a88b commit 3afad1b

File tree

6 files changed

+176
-21
lines changed

6 files changed

+176
-21
lines changed

pyccel/ast/variable.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ class Variable(TypedAstNode):
5656
'stack' if memory should be allocated on the stack, represents stack arrays and scalars.
5757
'alias' if object allows access to memory stored in another variable.
5858
59+
memory_location: str, default: 'host'
60+
'host' the variable can only be accessed by the CPU.
61+
'device' the variable can only be accessed by the GPU.
62+
'managed' the variable can be accessed by CPU and GPU and is being managed by the Cuda API (memory transfer is being done implicitly).
63+
5964
is_const : bool, default: False
6065
Indicates if object is a const argument of a function.
6166
@@ -98,7 +103,7 @@ class Variable(TypedAstNode):
98103
>>> Variable(PythonNativeInt(), DottedName('matrix', 'n_rows'))
99104
matrix.n_rows
100105
"""
101-
__slots__ = ('_name', '_alloc_shape', '_memory_handling', '_is_const', '_is_target',
106+
__slots__ = ('_name', '_alloc_shape', '_memory_handling', '_memory_location', '_is_const', '_is_target',
102107
'_is_optional', '_allows_negative_indexes', '_cls_base', '_is_argument', '_is_temp',
103108
'_shape','_is_private','_class_type')
104109
_attribute_nodes = ()
@@ -109,6 +114,7 @@ def __init__(
109114
name,
110115
*,
111116
memory_handling='stack',
117+
memory_location='host',
112118
is_const=False,
113119
is_target=False,
114120
is_optional=False,
@@ -141,6 +147,10 @@ def __init__(
141147
raise ValueError("memory_handling must be 'heap', 'stack' or 'alias'")
142148
self._memory_handling = memory_handling
143149

150+
if memory_location not in ('host', 'device', 'managed'):
151+
raise ValueError("memory_location must be 'host', 'device' or 'managed'")
152+
self._memory_location = memory_location
153+
144154
if not isinstance(is_const, bool):
145155
raise TypeError('is_const must be a boolean.')
146156
self._is_const = is_const
@@ -323,6 +333,36 @@ def cls_base(self):
323333
"""
324334
return self._cls_base
325335

336+
@property
337+
def memory_location(self):
338+
""" Indicates whether a Variable has a dynamic size
339+
"""
340+
return self._memory_location
341+
342+
@memory_location.setter
343+
def memory_location(self, memory_location):
344+
if memory_location not in ('host', 'device', 'managed'):
345+
raise ValueError("memory_location must be 'host', 'device' or 'managed'")
346+
self._memory_location = memory_location
347+
348+
@property
349+
def on_host(self):
350+
""" Indicates if memory is only accessible by the CPU
351+
"""
352+
return self.memory_location == 'host'
353+
354+
@property
355+
def on_device(self):
356+
""" Indicates if memory is only accessible by the GPU
357+
"""
358+
return self.memory_location == 'device'
359+
360+
@property
361+
def is_managed(self):
362+
""" Indicates if memory is being managed by CUDA API
363+
"""
364+
return self.memory_location == 'managed'
365+
326366
@property
327367
def is_const(self):
328368
"""

pyccel/codegen/printing/ccode.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1313,7 +1313,8 @@ def get_declare_type(self, expr):
13131313
self.add_import(c_imports['ndarrays'])
13141314
dtype = 't_ndarray'
13151315
elif isinstance(expr.class_type, CudaArrayType):
1316-
dtype = 'int *'
1316+
self.add_import(c_imports['ndarrays'])
1317+
dtype = 't_ndarray'
13171318

13181319
else:
13191320
errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal')

pyccel/codegen/printing/cucode.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,14 @@
1616

1717
from pyccel.errors.errors import Errors
1818
from pyccel.ast.core import Allocate, Deallocate
19+
from pyccel.ast.numpytypes import NumpyInt64Type
20+
from pyccel.ast.cudatypes import CudaArrayType
21+
from pyccel.ast.datatypes import HomogeneousContainerType
22+
from pyccel.ast.numpytypes import NumpyNDArrayType, numpy_precision_map
23+
24+
25+
26+
1927

2028

2129

@@ -24,7 +32,9 @@
2432
__all__ = ["CudaCodePrinter"]
2533

2634
c_imports = {n : Import(n, Module(n, (), ())) for n in
27-
['cuda_ndarrays',]}
35+
['cuda_ndarrays',
36+
'ndarrays',
37+
]}
2838

2939
class CudaCodePrinter(CCodePrinter):
3040
"""
@@ -139,11 +149,32 @@ def _print_ModuleHeader(self, expr):
139149
function_declaration,
140150
"#endif // {name.upper()}_H\n"))
141151
def _print_Allocate(self, expr):
142-
152+
variable = expr.variable
153+
shape = ", ".join(self._print(i) for i in expr.shape)
154+
if isinstance(variable.class_type, CudaArrayType):
155+
dtype = self.find_in_ndarray_type_registry(variable.dtype)
156+
elif isinstance(variable.class_type, HomogeneousContainerType):
157+
dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(variable.dtype.primitive_type, variable.dtype.precision)])
158+
else:
159+
raise NotImplementedError(f"Don't know how to index {variable.class_type} type")
160+
shape_dtype = self.get_c_type(NumpyInt64Type())
161+
shape_Assign = "("+ shape_dtype +"[]){" + shape + "}"
162+
is_view = 'false' if variable.on_heap else 'true'
163+
memory_location = expr.variable.memory_location
164+
if memory_location in ('device', 'host'):
165+
memory_location = 'allocateMemoryOn' + str(memory_location).capitalize()
166+
else:
167+
memory_location = 'managedMemory'
143168
self.add_import(c_imports['cuda_ndarrays'])
144-
alloc_code = f"{self._print(expr.variable)} = cuda_array_create();\n"
169+
self.add_import(c_imports['ndarrays'])
170+
alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank}, {shape_Assign}, {dtype}, {is_view},{memory_location});\n"
145171
return f'{alloc_code}'
146-
# print(shape)
147-
148-
# return "hjsjkahsjkajskasjkasj"
172+
173+
def _print_Deallocate(self, expr):
174+
var_code = self._print(expr.variable)
175+
176+
if expr.variable.memory_location == 'host':
177+
return f"cuda_free_host({var_code});\n"
178+
else:
179+
return f"cuda_free({var_code});\n"
149180

Lines changed: 88 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,97 @@
11
#include "cuda_ndarrays.h"
22

3-
void *cuda_array_create(int shape[])
3+
void device_memory(void** devPtr, size_t size)
44
{
5-
size_t i = 0;
6-
size_t alloc_size = 1;
5+
cudaMalloc(devPtr, size);
6+
}
77

8-
while (shape[i] != 0)
8+
void managed_memory(void** devPtr, size_t size)
9+
{
10+
cudaMallocManaged(devPtr, size);
11+
}
12+
13+
void host_memory(void** devPtr, size_t size)
14+
{
15+
cudaMallocHost(devPtr, size);
16+
}
17+
t_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape,
18+
enum e_types type, bool is_view)
19+
{
20+
t_ndarray arr;
21+
void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory};
22+
23+
arr.nd = nd;
24+
arr.type = type;
25+
switch (type)
926
{
10-
alloc_size *= shape[i];
11-
i++;
27+
case nd_int8:
28+
arr.type_size = sizeof(int8_t);
29+
break;
30+
case nd_int16:
31+
arr.type_size = sizeof(int16_t);
32+
break;
33+
case nd_int32:
34+
arr.type_size = sizeof(int32_t);
35+
break;
36+
case nd_int64:
37+
arr.type_size = sizeof(int64_t);
38+
break;
39+
case nd_float:
40+
arr.type_size = sizeof(float);
41+
break;
42+
case nd_double:
43+
arr.type_size = sizeof(double);
44+
break;
45+
case nd_bool:
46+
arr.type_size = sizeof(bool);
47+
break;
1248
}
13-
14-
void *array_ptr = malloc(alloc_size);
15-
if (array_ptr == NULL)
49+
arr.is_view = is_view;
50+
arr.length = 1;
51+
arr.shape = (int64_t *)malloc(arr.nd * sizeof(int64_t));
52+
for (int32_t i = 0; i < arr.nd; i++)
1653
{
17-
cout << "Error allocating memory" << endl;
18-
return NULL;
54+
arr.length *= shape[i];
55+
arr.shape[i] = shape[i];
1956
}
57+
arr.buffer_size = arr.length * arr.type_size;
2058

21-
return array_ptr;
22-
}
59+
if (!is_view)
60+
(*fun_ptr_arr[location])(&(arr.raw_data), arr.buffer_size);
61+
return (arr);
62+
}
63+
64+
int32_t cuda_free_host(t_ndarray arr)
65+
{
66+
if (arr.shape == NULL)
67+
return (0);
68+
cudaFreeHost(arr.raw_data);
69+
arr.raw_data = NULL;
70+
cudaFree(arr.shape);
71+
arr.shape = NULL;
72+
cudaFree(arr.strides);
73+
arr.strides = NULL;
74+
return (1);
75+
}
76+
77+
__host__ __device__
78+
int32_t cuda_free(t_ndarray arr)
79+
{
80+
if (arr.shape == NULL)
81+
return (0);
82+
cudaFree(arr.raw_data);
83+
arr.raw_data = NULL;
84+
cudaFree(arr.shape);
85+
arr.shape = NULL;
86+
return (0);
87+
}
88+
89+
__host__ __device__
90+
int32_t cuda_free_pointer(t_ndarray arr)
91+
{
92+
if (arr.is_view == false || arr.shape == NULL)
93+
return (0);
94+
cudaFree(arr.shape);
95+
arr.shape = NULL;
96+
return (0);
97+
}

pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
# include <cuda_runtime.h>
55
# include <iostream>
6+
#include "../ndarrays/ndarrays.h"
67

78
using namespace std;
89

pyccel/stdlib/ndarrays/ndarrays.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,13 @@ typedef enum e_order
8080
order_c,
8181
} t_order;
8282

83+
enum e_memory_locations
84+
{
85+
managedMemory,
86+
allocateMemoryOnHost,
87+
allocateMemoryOnDevice
88+
};
89+
8390
typedef struct s_ndarray
8491
{
8592
/* raw data buffer*/

0 commit comments

Comments
 (0)