@@ -112,6 +112,7 @@ async def lint_docx_file(
112112 errors = []
113113 warnings = []
114114
115+ docxtpl_temp_file_path : Optional [str ] = None
115116 try :
116117 # Stage 1: Extract plaintext using python-docx (independent of docxtpl)
117118 logger .info (f"Step 1: Extracting plaintext from { filename } using python-docx" )
@@ -155,7 +156,7 @@ async def lint_docx_file(
155156
156157 # Stage 4: If syntax is clean, proceed with docxtpl processing
157158 logger .info (f"Step 4: Syntax clean, proceeding with docxtpl processing" )
158- doc_template , raw_xml = self ._extract_xml_with_docxtpl (file_content , filename )
159+ doc_template , raw_xml , docxtpl_temp_file_path = self ._extract_xml_with_docxtpl (file_content , filename )
159160
160161 # Stage 5: Use docxtpl to process extended docx tags
161162 logger .info (f"Step 5: Processing extended docx tags with docxtpl" )
@@ -209,8 +210,15 @@ async def lint_docx_file(
209210 except Exception as e :
210211 logger .error (f"Linting failed for { filename } : { str (e )} " )
211212 return self ._create_error_result (e , filename , start_time )
213+ finally :
214+ # Keep the docxtpl temp file around for all lazy operations (patch_xml, variable detection, etc.)
215+ if docxtpl_temp_file_path and os .path .exists (docxtpl_temp_file_path ):
216+ try :
217+ os .unlink (docxtpl_temp_file_path )
218+ except Exception as e :
219+ logger .warning (f"Failed to delete temp file { docxtpl_temp_file_path } : { e } " )
212220
213- def _extract_xml_with_docxtpl (self , file_content : bytes , filename : str ) -> Tuple [DocxTemplate , str ]:
221+ def _extract_xml_with_docxtpl (self , file_content : bytes , filename : str ) -> Tuple [DocxTemplate , str , str ]:
214222 """
215223 Step 1: Use docxtpl to extract XML from docx.
216224
@@ -219,30 +227,34 @@ def _extract_xml_with_docxtpl(self, file_content: bytes, filename: str) -> Tuple
219227 filename: Original filename for error reporting
220228
221229 Returns:
222- Tuple of (DocxTemplate instance, raw XML string)
230+ Tuple of (DocxTemplate instance, raw XML string, temp file path).
231+ NOTE: The caller is responsible for deleting the returned temp file path
232+ after all docxtpl/python-docx lazy operations are complete.
223233 """
234+ temp_file_path : Optional [str ] = None
224235 try :
225- # Create temporary file
236+ # Create temporary file - must remain on disk while docxtpl lazily reads it
226237 with tempfile .NamedTemporaryFile (delete = False , suffix = '.docx' ) as temp_file :
227238 temp_file .write (file_content )
228239 temp_file_path = temp_file .name
229-
230- try :
231- # Create DocxTemplate instance
232- doc_template = DocxTemplate (temp_file_path )
233- doc_template .init_docx ()
234-
235- # Extract raw XML
236- raw_xml = doc_template .get_xml ()
237-
238- logger .debug (f"Successfully extracted XML from { filename } : { len (raw_xml )} characters" )
239- return doc_template , raw_xml
240-
241- finally :
242- if os .path .exists (temp_file_path ):
243- os .unlink (temp_file_path )
244-
240+
241+ # Create DocxTemplate instance
242+ doc_template = DocxTemplate (temp_file_path )
243+ doc_template .init_docx ()
244+
245+ # Extract raw XML
246+ raw_xml = doc_template .get_xml ()
247+
248+ logger .debug (f"Successfully extracted XML from { filename } : { len (raw_xml )} characters" )
249+ return doc_template , raw_xml , temp_file_path
250+
245251 except Exception as e :
252+ # Clean up the temp file on failure to avoid leaking /tmp files
253+ if temp_file_path and os .path .exists (temp_file_path ):
254+ try :
255+ os .unlink (temp_file_path )
256+ except Exception as cleanup_err :
257+ logger .warning (f"Failed to delete temp file { temp_file_path } : { cleanup_err } " )
246258 raise DocumentExtractionException (
247259 f"Failed to extract XML from { filename } using docxtpl: { str (e )} "
248260 )
@@ -281,40 +293,46 @@ def _extract_structured_text(self, file_content: bytes, filename: str) -> str:
281293 Returns:
282294 Structured text with proper line breaks
283295 """
296+ temp_file_path : Optional [str ] = None
284297 try :
285- # Create temporary file
298+ # Create temporary file - must remain on disk while python-docx lazily reads it
286299 with tempfile .NamedTemporaryFile (delete = False , suffix = '.docx' ) as temp_file :
287300 temp_file .write (file_content )
288301 temp_file_path = temp_file .name
289-
290- try :
291- doc = Document (temp_file_path )
292- full_text = []
293-
294- # Extract paragraph text
295- for paragraph in doc .paragraphs :
296- if paragraph .text .strip (): # Skip empty paragraphs
297- full_text .append (paragraph .text )
298-
299- # Extract table text
300- for table in doc .tables :
301- for row in table .rows :
302- row_text = []
303- for cell in row .cells :
304- row_text .append (cell .text .strip ())
302+
303+ doc = Document (temp_file_path )
304+ full_text = []
305+
306+ # Extract paragraph text
307+ for paragraph in doc .paragraphs :
308+ if paragraph .text .strip (): # Skip empty paragraphs
309+ full_text .append (paragraph .text )
310+
311+ # Extract table text
312+ for table in doc .tables :
313+ for row in table .rows :
314+ row_text = []
315+ for cell in row .cells :
316+ text = cell .text .strip ()
317+ if text :
318+ row_text .append (text )
319+ if row_text :
305320 full_text .append (' | ' .join (row_text ))
306-
307- structured_text = '\n ' .join (full_text )
308- logger .debug (f"Extracted structured text: { len (structured_text )} characters, { len (full_text )} lines" )
309- return structured_text
310-
311- finally :
312- if os .path .exists (temp_file_path ):
313- os .unlink (temp_file_path )
314-
321+
322+ structured_text = '\n ' .join (full_text )
323+ logger .debug (f"Extracted structured text: { len (structured_text )} characters, { len (full_text )} lines" )
324+ return structured_text
325+
315326 except Exception as e :
316327 logger .error (f"Failed to extract structured text from { filename } : { str (e )} " )
317328 return ""
329+ finally :
330+ # Clean up AFTER all operations complete
331+ if temp_file_path and os .path .exists (temp_file_path ):
332+ try :
333+ os .unlink (temp_file_path )
334+ except Exception as cleanup_err :
335+ logger .warning (f"Failed to delete temp file { temp_file_path } : { cleanup_err } " )
318336
319337 def _create_input_data (self , raw_xml : str , processed_xml : str , structured_text : str , filename : str ) -> dict :
320338 """
0 commit comments