Skip to content

Commit 983679f

Browse files
authored
[Comp-770] Tweak docx gen tables and images (#668)
1 parent 1c207a6 commit 983679f

File tree

1 file changed

+155
-1
lines changed

1 file changed

+155
-1
lines changed

compliance-api/src/compliance_api/services/inspection_record/inspection_record_doc_generator.py

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,22 @@ def _set_cell_background(cell, fill):
5656
cell._element.get_or_add_tcPr().append(shading_elm)
5757

5858

59+
def set_cell_border(cell, **kwargs):
60+
"""Set cell borders."""
61+
tc = cell._tc
62+
tc_para = tc.get_or_add_tcPr()
63+
64+
tc_borders = OxmlElement('w:tcBorders')
65+
for edge in ('start', 'top', 'end', 'bottom', 'insideH', 'insideV'):
66+
if edge in kwargs:
67+
edge_data = kwargs[edge]
68+
edge_el = OxmlElement(f'w:{edge}')
69+
for key, value in edge_data.items():
70+
edge_el.set(qn(f'w:{key}'), str(value))
71+
tc_borders.append(edge_el)
72+
tc_para.append(tc_borders)
73+
74+
5975
def _add_html_to_container(container, html_text, *, font_size=None, clear_first=True):
6076
if not html_text:
6177
return
@@ -89,6 +105,10 @@ def _add_html_to_container(container, html_text, *, font_size=None, clear_first=
89105
_add_list(container, element, font_size)
90106
first_para_used = True # Mark as used after adding list
91107

108+
elif element.name == "table":
109+
add_html_table_to_container(container, element)
110+
first_para_used = True # Mark as used after adding table
111+
92112

93113
def _add_paragraph(container, p_tag, font_size):
94114
text = p_tag.get_text(strip=True)
@@ -216,6 +236,115 @@ def _remove_cell_margins(cell):
216236
table_cell_pr.append(tc_margin)
217237

218238

239+
def add_html_table_to_container(container, table_element):
240+
"""Convert an HTML table to a docx table."""
241+
rows = table_element.find_all('tr')
242+
if not rows:
243+
return
244+
245+
max_cols = max(len(row.find_all(['th', 'td'])) for row in rows)
246+
247+
# Create docx table
248+
docx_table = container.add_table(rows=len(rows), cols=max_cols)
249+
docx_table.style = 'Table Grid'
250+
251+
# Populate table
252+
for row_idx, tr in enumerate(rows):
253+
docx_row = docx_table.rows[row_idx]
254+
255+
# Try to extract row height from style attribute
256+
style = tr.get('style', '')
257+
if 'height:' in style:
258+
# Extract height value (e.g., "66px")
259+
import re as regex
260+
height_match = regex.search(r'height:\s*(\d+)px', style)
261+
if height_match:
262+
height_px = int(height_match.group(1))
263+
# Convert pixels to inches (roughly 96 DPI)
264+
height_inches = height_px / 96
265+
docx_row.height = Inches(height_inches)
266+
267+
cells = tr.find_all(['th', 'td'])
268+
for col_idx, cell in enumerate(cells):
269+
docx_cell = docx_row.cells[col_idx]
270+
271+
# Clear default paragraph
272+
docx_cell.paragraphs[0].text = ""
273+
274+
# Process cell content - handle paragraphs, lists, and formatting
275+
first_element = True
276+
for element in cell.children:
277+
if element.name == 'p':
278+
# Use first paragraph or create new one
279+
if first_element and docx_cell.paragraphs:
280+
para = docx_cell.paragraphs[0]
281+
first_element = False
282+
else:
283+
para = docx_cell.add_paragraph()
284+
285+
# Add formatted text to paragraph
286+
_add_formatted_text_to_table_para(para, element)
287+
288+
elif element.name in ('ul', 'ol'):
289+
# Add list to cell
290+
_add_list_to_table_cell(docx_cell, element)
291+
first_element = False
292+
293+
# Make header cells bold
294+
if cell.name == 'th':
295+
for para in docx_cell.paragraphs:
296+
for run in para.runs:
297+
run.bold = True
298+
299+
# Set borders
300+
set_cell_border(
301+
docx_cell,
302+
top={"sz": 4, "val": "single", "color": "000000"},
303+
bottom={"sz": 4, "val": "single", "color": "000000"},
304+
start={"sz": 4, "val": "single", "color": "000000"},
305+
end={"sz": 4, "val": "single", "color": "000000"}
306+
)
307+
308+
309+
def _add_formatted_text_to_table_para(para, p_element):
310+
"""Add text with formatting (bold, italic) to a paragraph from a <p> element."""
311+
for child in p_element.children:
312+
if isinstance(child, str):
313+
text = re.sub(r' {2,}', ' ', child)
314+
if text.strip():
315+
run = para.add_run(text)
316+
run.font.size = Pt(11)
317+
elif child.name in ['strong', 'b']:
318+
text = re.sub(r' {2,}', ' ', child.get_text())
319+
run = para.add_run(text)
320+
run.bold = True
321+
run.font.size = Pt(11)
322+
elif child.name in ['em', 'i']:
323+
text = re.sub(r' {2,}', ' ', child.get_text())
324+
run = para.add_run(text)
325+
run.italic = True
326+
run.font.size = Pt(11)
327+
elif child.name == 'span':
328+
text = re.sub(r' {2,}', ' ', child.get_text())
329+
if text.strip():
330+
run = para.add_run(text)
331+
run.font.size = Pt(11)
332+
elif child.name == 'br':
333+
para.add_run('\n')
334+
335+
336+
def _add_list_to_table_cell(cell, list_element):
337+
"""Add a list (ordered or unordered) to a table cell."""
338+
style = "List Number" if list_element.name == "ol" else "List Bullet"
339+
340+
for li in list_element.find_all('li', recursive=False):
341+
para = cell.add_paragraph(style=style)
342+
text = li.get_text(strip=True)
343+
text = re.sub(r' {2,}', ' ', text)
344+
run = para.add_run(text)
345+
run.font.size = Pt(11)
346+
347+
219348
def _add_photo(photo, cell):
220349
photo_url = photo.get('photo_url')
221350
caption_text = f"Photo {photo.get('photo_number', '')}. {photo.get('photo_caption', '')}"
@@ -297,6 +426,7 @@ def _add_requirement_details_table(doc, req):
297426

298427
# Add title if present
299428
if source.get('title'):
429+
run = para.add_run('\n')
300430
run = para.add_run(source.get('title'))
301431
run.bold = True
302432

@@ -308,6 +438,30 @@ def _add_requirement_details_table(doc, req):
308438
clear_first=False,
309439
)
310440

441+
# Add req source images
442+
for img in source.get('requirement_source_images', []):
443+
image_url = img.get('image_url')
444+
if image_url:
445+
try:
446+
response = requests.get(image_url)
447+
response.raise_for_status()
448+
image_stream = BytesIO(response.content)
449+
450+
# Insert the image
451+
img_para = cell.add_paragraph()
452+
run = img_para.add_run()
453+
run.add_picture(image_stream, width=Inches(4))
454+
455+
# Add filename as caption if available
456+
if img.get('original_file_name'):
457+
caption_para = cell.add_paragraph()
458+
caption_run = caption_para.add_run(img.get('original_file_name'))
459+
caption_run.font.size = Pt(9)
460+
except RequestException:
461+
# If image fails, show placeholder text
462+
error_para = cell.add_paragraph()
463+
error_para.text = f"[Failed to load image: {img.get('original_file_name', 'unknown')}]"
464+
311465
# Add document details
312466
for doc_group in source.get('requirement_documents', []):
313467
run = para.add_run(doc_group.get('document_title', ''))
@@ -413,7 +567,7 @@ def generate_inspection_report_docx(preview_data):
413567
logo_para.alignment = WD_ALIGN_PARAGRAPH.LEFT
414568
logo_para.paragraph_format.space_before = Pt(0)
415569
logo_para.paragraph_format.space_after = Pt(0)
416-
logo_para.paragraph_format.left_indent = Inches(-0.35)
570+
logo_para.paragraph_format.left_indent = Inches(-0.25)
417571

418572
logo_run = logo_para.add_run()
419573
logo_path = Path(__file__).parent / "assets" / "EAO_Logo.png"

0 commit comments

Comments
 (0)