Skip to content

Commit 24500f1

Browse files
V4.2.9: Fix Double Free and switch to cudaMalloc (non-async) for T4 stability
1 parent 846987d commit 24500f1

File tree

4 files changed

+13
-14
lines changed

4 files changed

+13
-14
lines changed

Crayon_Colab_Notebook.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
XERV CRAYON V4.2.8 - Production Omni-Backend Tokenizer
2+
XERV CRAYON V4.2.9 - Production Omni-Backend Tokenizer
33
=======================================================
44
Copy this ENTIRE script into a Google Colab cell and run it.
55
@@ -13,7 +13,7 @@
1313
import time
1414

1515
print("=" * 70)
16-
print("XERV CRAYON V4.2.8 INSTALLATION")
16+
print("XERV CRAYON V4.2.9 INSTALLATION")
1717
print("=" * 70)
1818

1919
# ... (rest of the script is same until Verification)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "xerv-crayon"
7-
version = "4.2.8"
7+
version = "4.2.9"
88
description = "Omni-Backend Tokenizer - CPU (AVX2/512), CUDA (NVIDIA), ROCm (AMD) with automatic hardware detection"
99
readme = "README.md"
1010
requires-python = ">=3.10"

src/crayon/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545

4646
from __future__ import annotations
4747

48-
__version__ = "4.2.8"
48+
__version__ = "4.2.9"
4949
__author__ = "Xerv Research Engineering Division"
5050

5151
# ============================================================================

src/crayon/c_ext/gpu_engine_cuda.cu

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -157,20 +157,19 @@ static PyObject* load_gpu(PyObject* self, PyObject* args) {
157157
char* arr_ptr = raw + 12;
158158
size_t bytes = size * sizeof(int32_t);
159159

160-
// FIX: Free old + guard
160+
// FIX: Free old + guard (RAII handles actual free of old_ptrs upon exit)
161161
void* old_ptrs[3] = {d_cuda_base, d_cuda_check, d_cuda_values};
162162
CudaMemGuard guard(old_ptrs, 3);
163-
if (cuda_loaded) {
164-
CHECK_CUDA_ERR(cudaFree(d_cuda_base));
165-
CHECK_CUDA_ERR(cudaFree(d_cuda_check));
166-
CHECK_CUDA_ERR(cudaFree(d_cuda_values));
167-
}
168-
163+
164+
// FIX: Remove manual free to prevent double-free with guard
165+
169166
// FIX: Async alloc + stream init
170167
if (!stream) CHECK_CUDA_ERR(cudaStreamCreate(&stream));
171-
CHECK_CUDA_ERR(cudaMallocAsync(&d_cuda_base, bytes, stream));
172-
CHECK_CUDA_ERR(cudaMallocAsync(&d_cuda_check, bytes, stream));
173-
CHECK_CUDA_ERR(cudaMallocAsync(&d_cuda_values, bytes, stream));
168+
169+
// Use standard cudaMalloc for maximum compatibility
170+
CHECK_CUDA_ERR(cudaMalloc(&d_cuda_base, bytes));
171+
CHECK_CUDA_ERR(cudaMalloc(&d_cuda_check, bytes));
172+
CHECK_CUDA_ERR(cudaMalloc(&d_cuda_values, bytes));
174173

175174
CHECK_CUDA_ERR(cudaMemcpyAsync(d_cuda_base, arr_ptr, bytes, cudaMemcpyHostToDevice, stream));
176175
CHECK_CUDA_ERR(cudaMemcpyAsync(d_cuda_check, arr_ptr + bytes, bytes, cudaMemcpyHostToDevice, stream));

0 commit comments

Comments
 (0)