55
66#define PY_SSIZE_T_CLEAN
77#include <Python.h>
8+ #include <string.h>
9+ #include <stdlib.h>
810#include "newmm.h"
911
12+ /* Module-level dictionary cache */
13+ static struct {
14+ newmm_dict_t dict ;
15+ char * dict_path ;
16+ } dict_cache = {NULL , NULL };
17+
18+ /**
19+ * Load or retrieve cached dictionary
20+ */
21+ static newmm_dict_t get_or_load_dict (const char * dict_path ) {
22+ /* Check if we need to reload the dictionary */
23+ int need_reload = 0 ;
24+
25+ if (dict_cache .dict == NULL ) {
26+ /* No cached dict */
27+ need_reload = 1 ;
28+ } else if (dict_path == NULL && dict_cache .dict_path != NULL ) {
29+ /* Switching from custom to default */
30+ need_reload = 1 ;
31+ } else if (dict_path != NULL && dict_cache .dict_path == NULL ) {
32+ /* Switching from default to custom */
33+ need_reload = 1 ;
34+ } else if (dict_path != NULL && dict_cache .dict_path != NULL ) {
35+ /* Both custom, check if path changed */
36+ if (strcmp (dict_path , dict_cache .dict_path ) != 0 ) {
37+ need_reload = 1 ;
38+ }
39+ }
40+
41+ if (need_reload ) {
42+ /* Free old dictionary */
43+ if (dict_cache .dict ) {
44+ newmm_free_dict (dict_cache .dict );
45+ dict_cache .dict = NULL ;
46+ }
47+ if (dict_cache .dict_path ) {
48+ free (dict_cache .dict_path );
49+ dict_cache .dict_path = NULL ;
50+ }
51+
52+ /* Load new dictionary */
53+ dict_cache .dict = newmm_load_dict (dict_path );
54+ if (dict_cache .dict && dict_path ) {
55+ dict_cache .dict_path = strdup (dict_path );
56+ if (!dict_cache .dict_path ) {
57+ /* strdup failed, clean up and return NULL */
58+ newmm_free_dict (dict_cache .dict );
59+ dict_cache .dict = NULL ;
60+ return NULL ;
61+ }
62+ }
63+ }
64+
65+ return dict_cache .dict ;
66+ }
67+
1068/**
1169 * Python wrapper for newmm_segment function
1270 */
@@ -21,8 +79,15 @@ static PyObject* py_newmm_segment(PyObject* Py_UNUSED(self), PyObject* args, PyO
2179 return NULL ;
2280 }
2381
24- /* Call C function */
25- char * * tokens = newmm_segment (text , dict_path , & token_count );
82+ /* Get or load dictionary */
83+ newmm_dict_t dict = get_or_load_dict (dict_path );
84+ if (!dict ) {
85+ PyErr_SetString (PyExc_MemoryError , "Failed to load dictionary (out of memory)" );
86+ return NULL ;
87+ }
88+
89+ /* Call C function with cached dictionary */
90+ char * * tokens = newmm_segment_with_dict (text , dict , & token_count );
2691
2792 if (!tokens ) {
2893 PyErr_SetString (PyExc_RuntimeError , "Failed to segment text" );
@@ -52,6 +117,21 @@ static PyObject* py_newmm_segment(PyObject* Py_UNUSED(self), PyObject* args, PyO
52117 return result ;
53118}
54119
120+ /**
121+ * Clear cached dictionary
122+ */
123+ static PyObject * py_clear_cache (PyObject * Py_UNUSED (self ), PyObject * Py_UNUSED (args )) {
124+ if (dict_cache .dict ) {
125+ newmm_free_dict (dict_cache .dict );
126+ dict_cache .dict = NULL ;
127+ }
128+ if (dict_cache .dict_path ) {
129+ free (dict_cache .dict_path );
130+ dict_cache .dict_path = NULL ;
131+ }
132+ Py_RETURN_NONE ;
133+ }
134+
55135/**
56136 * Module method definitions
57137 */
@@ -72,6 +152,13 @@ static PyMethodDef CThaiNLPMethods[] = {
72152 " >>> print(tokens)\n"
73153 " ['ฉัน', 'ไป', 'โรงเรียน']\n"
74154 },
155+ {
156+ "clear_cache" ,
157+ py_clear_cache ,
158+ METH_NOARGS ,
159+ "Clear the cached dictionary.\n\n"
160+ "This forces the next tokenization to reload the dictionary.\n"
161+ },
75162 {NULL , NULL , 0 , NULL } /* Sentinel */
76163};
77164
@@ -90,9 +177,26 @@ static struct PyModuleDef cthainlp_module = {
90177 NULL /* m_free */
91178};
92179
180+ /**
181+ * Module cleanup function
182+ */
183+ static void module_free (void * Py_UNUSED (self )) {
184+ /* Clean up cached dictionary on module unload */
185+ if (dict_cache .dict ) {
186+ newmm_free_dict (dict_cache .dict );
187+ dict_cache .dict = NULL ;
188+ }
189+ if (dict_cache .dict_path ) {
190+ free (dict_cache .dict_path );
191+ dict_cache .dict_path = NULL ;
192+ }
193+ }
194+
93195/**
94196 * Module initialization function
95197 */
96198PyMODINIT_FUNC PyInit__cthainlp (void ) {
199+ /* Update module definition with cleanup function */
200+ cthainlp_module .m_free = module_free ;
97201 return PyModule_Create (& cthainlp_module );
98202}
0 commit comments