work in progress

smazouz42 · smazouz42 · commit 3afad1b06c09 · 2024-07-09T15:39:25.000+01:00
diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py
@@ -56,6 +56,11 @@ class Variable(TypedAstNode):
         'stack' if memory should be allocated on the stack, represents stack arrays and scalars.
         'alias' if object allows access to memory stored in another variable.
 
+    memory_location: str, default: 'host'
+        'host' the variable can only be accessed by the CPU.
+        'device' the variable can only be accessed by the GPU.
+        'managed' the variable can be accessed by CPU and GPU and is being managed by the Cuda API (memory transfer is being done implicitly).
+
     is_const : bool, default: False
         Indicates if object is a const argument of a function.
 
@@ -98,7 +103,7 @@ class Variable(TypedAstNode):
     >>> Variable(PythonNativeInt(), DottedName('matrix', 'n_rows'))
     matrix.n_rows
     """
-    __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_is_const', '_is_target',
+    __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_memory_location', '_is_const', '_is_target',
             '_is_optional', '_allows_negative_indexes', '_cls_base', '_is_argument', '_is_temp',
             '_shape','_is_private','_class_type')
     _attribute_nodes = ()
@@ -109,6 +114,7 @@ def __init__(
         name,
         *,
         memory_handling='stack',
+        memory_location='host',
         is_const=False,
         is_target=False,
         is_optional=False,
@@ -141,6 +147,10 @@ def __init__(
             raise ValueError("memory_handling must be 'heap', 'stack' or 'alias'")
         self._memory_handling = memory_handling
 
+        if memory_location not in ('host', 'device', 'managed'):
+            raise ValueError("memory_location must be 'host', 'device' or 'managed'")
+        self._memory_location = memory_location
+
         if not isinstance(is_const, bool):
             raise TypeError('is_const must be a boolean.')
         self._is_const = is_const
@@ -323,6 +333,36 @@ def cls_base(self):
         """
         return self._cls_base
 
+    @property
+    def memory_location(self):
+        """ Indicates whether a Variable has a dynamic size
+        """
+        return self._memory_location
+
+    @memory_location.setter
+    def memory_location(self, memory_location):
+        if memory_location not in ('host', 'device', 'managed'):
+            raise ValueError("memory_location must be 'host', 'device' or 'managed'")
+        self._memory_location = memory_location
+
+    @property
+    def on_host(self):
+        """  Indicates if memory is only accessible by the CPU
+        """
+        return self.memory_location == 'host'
+
+    @property
+    def on_device(self):
+        """ Indicates if memory is only accessible by the GPU
+        """
+        return self.memory_location == 'device'
+
+    @property
+    def is_managed(self):
+        """ Indicates if memory is being managed by CUDA API
+        """
+        return self.memory_location == 'managed'
+
     @property
     def is_const(self):
         """
diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py
@@ -1313,7 +1313,8 @@ def get_declare_type(self, expr):
                 self.add_import(c_imports['ndarrays'])
                 dtype = 't_ndarray'
             elif isinstance(expr.class_type, CudaArrayType):
-                dtype = 'int *'
+                self.add_import(c_imports['ndarrays'])
+                dtype = 't_ndarray'
              
             else:
                 errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal')
diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py
@@ -16,6 +16,14 @@
 
 from pyccel.errors.errors           import Errors
 from pyccel.ast.core                import Allocate, Deallocate
+from pyccel.ast.numpytypes          import   NumpyInt64Type
+from pyccel.ast.cudatypes           import CudaArrayType
+from pyccel.ast.datatypes           import HomogeneousContainerType
+from pyccel.ast.numpytypes          import NumpyNDArrayType, numpy_precision_map
+
+
+
+
 
 
 
@@ -24,7 +32,9 @@
 __all__ = ["CudaCodePrinter"]
 
 c_imports = {n : Import(n, Module(n, (), ())) for n in
-                ['cuda_ndarrays',]}
+                ['cuda_ndarrays',
+                 'ndarrays',
+                 ]}
 
 class CudaCodePrinter(CCodePrinter):
     """
@@ -139,11 +149,32 @@ def _print_ModuleHeader(self, expr):
                           function_declaration,
                           "#endif // {name.upper()}_H\n"))
     def _print_Allocate(self, expr):
- 
+        variable = expr.variable
+        shape = ", ".join(self._print(i) for i in expr.shape)
+        if isinstance(variable.class_type, CudaArrayType):
+            dtype = self.find_in_ndarray_type_registry(variable.dtype)
+        elif isinstance(variable.class_type, HomogeneousContainerType):
+            dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(variable.dtype.primitive_type, variable.dtype.precision)])
+        else:
+            raise NotImplementedError(f"Don't know how to index {variable.class_type} type")
+        shape_dtype = self.get_c_type(NumpyInt64Type())
+        shape_Assign = "("+ shape_dtype +"[]){" + shape + "}"
+        is_view = 'false' if variable.on_heap else 'true'
+        memory_location = expr.variable.memory_location
+        if memory_location in ('device', 'host'):
+            memory_location = 'allocateMemoryOn' + str(memory_location).capitalize()
+        else:
+            memory_location = 'managedMemory'
         self.add_import(c_imports['cuda_ndarrays'])
-        alloc_code = f"{self._print(expr.variable)} = cuda_array_create();\n"
+        self.add_import(c_imports['ndarrays'])
+        alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank},  {shape_Assign}, {dtype}, {is_view},{memory_location});\n"
         return f'{alloc_code}'
-        # print(shape)
-        
-        # return "hjsjkahsjkajskasjkasj"
+
+    def _print_Deallocate(self, expr):
+        var_code = self._print(expr.variable)
+
+        if expr.variable.memory_location == 'host':
+            return f"cuda_free_host({var_code});\n"
+        else:
+            return f"cuda_free({var_code});\n"
 
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu
@@ -1,22 +1,97 @@
 #include "cuda_ndarrays.h"
 
-void *cuda_array_create(int shape[])
+void    device_memory(void** devPtr, size_t size)
 {
-    size_t i = 0;
-    size_t alloc_size = 1;
+    cudaMalloc(devPtr, size);
+}
 
-    while (shape[i] != 0)
+void    managed_memory(void** devPtr, size_t size)
+{
+    cudaMallocManaged(devPtr, size);
+}
+
+void    host_memory(void** devPtr, size_t size)
+{
+    cudaMallocHost(devPtr, size);
+}
+t_ndarray   cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape,
+        enum e_types type, bool is_view)
+{
+    t_ndarray arr;
+    void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory};
+
+    arr.nd = nd;
+    arr.type = type;
+    switch (type)
     {
-        alloc_size *= shape[i];
-        i++;
+        case nd_int8:
+            arr.type_size = sizeof(int8_t);
+            break;
+        case nd_int16:
+            arr.type_size = sizeof(int16_t);
+            break;
+        case nd_int32:
+            arr.type_size = sizeof(int32_t);
+            break;
+        case nd_int64:
+            arr.type_size = sizeof(int64_t);
+            break;
+        case nd_float:
+            arr.type_size = sizeof(float);
+            break;
+        case nd_double:
+            arr.type_size = sizeof(double);
+            break;
+        case nd_bool:
+            arr.type_size = sizeof(bool);
+            break;
     }
-
-    void *array_ptr = malloc(alloc_size);
-    if (array_ptr == NULL)
+    arr.is_view = is_view;
+    arr.length = 1;
+    arr.shape = (int64_t *)malloc(arr.nd * sizeof(int64_t));
+    for (int32_t i = 0; i < arr.nd; i++)
     {
-        cout << "Error allocating memory" << endl;
-        return NULL;
+        arr.length *= shape[i];
+        arr.shape[i] = shape[i];
     }
+    arr.buffer_size = arr.length * arr.type_size;
 
-    return array_ptr;
-}
+    if (!is_view)
+        (*fun_ptr_arr[location])(&(arr.raw_data), arr.buffer_size);
+    return (arr);
+}
+
+int32_t cuda_free_host(t_ndarray arr)
+{
+    if (arr.shape == NULL)
+        return (0);
+    cudaFreeHost(arr.raw_data);
+    arr.raw_data = NULL;
+    cudaFree(arr.shape);
+    arr.shape = NULL;
+    cudaFree(arr.strides);
+    arr.strides = NULL;
+    return (1);
+}
+
+__host__ __device__
+int32_t cuda_free(t_ndarray arr)
+{
+    if (arr.shape == NULL)
+        return (0);
+    cudaFree(arr.raw_data);
+    arr.raw_data = NULL;
+    cudaFree(arr.shape);
+    arr.shape = NULL;
+    return (0);
+}
+
+__host__ __device__
+int32_t cuda_free_pointer(t_ndarray arr)
+{
+    if (arr.is_view == false || arr.shape == NULL)
+        return (0);
+    cudaFree(arr.shape);
+    arr.shape = NULL;
+    return (0);
+}
diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h
@@ -3,6 +3,7 @@
 
 # include <cuda_runtime.h>
 # include <iostream>
+#include "../ndarrays/ndarrays.h"
 
 using namespace std;
 
diff --git a/pyccel/stdlib/ndarrays/ndarrays.h b/pyccel/stdlib/ndarrays/ndarrays.h
@@ -80,6 +80,13 @@ typedef enum e_order
     order_c,
 } t_order;
 
+enum e_memory_locations
+{
+        managedMemory,
+        allocateMemoryOnHost,
+        allocateMemoryOnDevice
+};
+
 typedef struct  s_ndarray
 {
     /* raw data buffer*/