Skip to content

Commit 3988c85

Browse files
committed
feat(gpu): Implement fft128 in cuda backend
1 parent c1bf43e commit 3988c85

File tree

15 files changed

+17780
-2
lines changed

15 files changed

+17780
-2
lines changed

_typos.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,8 @@ extend-ignore-identifiers-re = [
1313
# Example in trivium
1414
"C9217BA0D762ACA1"
1515
]
16+
17+
[files]
18+
extend-exclude = [
19+
"backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cu"
20+
]

backends/tfhe-cuda-backend/build.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ fn main() {
6262
"cuda/include/integer/integer.h",
6363
"cuda/include/keyswitch.h",
6464
"cuda/include/linear_algebra.h",
65+
"cuda/include/fft/fft128.h",
6566
"cuda/include/pbs/programmable_bootstrap.h",
6667
"cuda/include/pbs/programmable_bootstrap_multibit.h",
6768
];

backends/tfhe-cuda-backend/cuda/include/device.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
5252

5353
void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
5454

55-
void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
55+
void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
5656
cudaStream_t stream, uint32_t gpu_index);
5757

5858
void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include <stdint.h>
2+
extern "C" {
3+
void cuda_fourier_transform_forward_as_torus_f128_async(
4+
void *stream, uint32_t gpu_index, void *re0, void *re1, void *im0,
5+
void *im1, void const *standard, uint32_t const N,
6+
const uint32_t number_of_samples);
7+
8+
void cuda_fourier_transform_forward_as_integer_f128_async(
9+
void *stream, uint32_t gpu_index, void *re0, void *re1, void *im0,
10+
void *im1, void const *standard, uint32_t const N,
11+
const uint32_t number_of_samples);
12+
13+
void cuda_fourier_transform_backward_as_torus_f128_async(
14+
void *stream, uint32_t gpu_index, void *standard, void const *re0,
15+
void const *re1, void const *im0, void const *im1, uint32_t const N,
16+
const uint32_t number_of_samples);
17+
}

backends/tfhe-cuda-backend/cuda/src/device.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ bool cuda_check_support_thread_block_clusters() {
135135
}
136136

137137
/// Copy memory to the GPU asynchronously
138-
void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
138+
void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
139139
cudaStream_t stream, uint32_t gpu_index) {
140140
if (size == 0)
141141
return;

0 commit comments

Comments
 (0)