diff --git a/app.py b/app.py index 95d9213..24e0228 100644 --- a/app.py +++ b/app.py @@ -28,26 +28,33 @@ def embed_font_in_html(font_path, font_name, html_content): font-family: '{font_name}', Arial, sans-serif; margin: 0; padding: 0; - background-color: white; + background-color: #f5f5f5; + line-height: 1.6; }} .page {{ position: relative; width: 8.5in; + min-height: 11in; margin: 20px auto; - padding: 20px; + padding: 1in; box-sizing: border-box; background-color: white; - box-shadow: 0 0 10px rgba(0,0,0,0.1); + box-shadow: 0 2px 8px rgba(0,0,0,0.1); }} .paragraph {{ - margin: 0; + margin-bottom: 12pt; padding: 0; position: relative; + line-height: 1.5; + }} + .paragraph > div {{ + line-height: inherit; }} .image-container {{ display: inline-block; position: relative; vertical-align: middle; + margin: 10px 0; }} img {{ max-width: 100%; @@ -58,12 +65,13 @@ def embed_font_in_html(font_path, font_name, html_content): table {{ border-collapse: collapse; width: 100%; - margin: 10px 0; + margin: 15px 0; }} td, th {{ - border: 1px solid black; - padding: 8px; + border: 1px solid #ddd; + padding: 10px 12px; position: relative; + line-height: 1.4; }} """ @@ -112,24 +120,28 @@ def get_image_position(element): def process_paragraph(paragraph, images_dict): html_content = '
' - + + # Determine text alignment if paragraph.alignment == WD_ALIGN_PARAGRAPH.CENTER: html_content += '
' elif paragraph.alignment == WD_ALIGN_PARAGRAPH.RIGHT: html_content += '
' else: html_content += '
' - + + # Check if paragraph is empty + has_content = False + for run in paragraph.runs: style = [] if run.bold: style.append('font-weight: bold') if run.italic: style.append('font-style: italic') if run.underline: style.append('text-decoration: underline') if run.font.size: style.append(f'font-size: {run.font.size.pt}pt') - + drawing_elements = run._element.findall('.//w:drawing', {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) - + for drawing in drawing_elements: blip = drawing.find('.//a:blip', {'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'}) @@ -146,11 +158,19 @@ def process_paragraph(paragraph, images_dict): html_content += f'
' html_content += f'Document Image' html_content += '
' - + has_content = True + style_str = '; '.join(style) - if run.text.strip(): + # Preserve all text including whitespace for proper spacing + if run.text: html_content += f'{run.text}' - + if run.text.strip(): + has_content = True + + # If paragraph is empty, add a line break to preserve spacing + if not has_content: + html_content += '
' + html_content += '
' return html_content @@ -160,17 +180,21 @@ def process_table(table, images_dict): html_content += '' for cell in row.cells: html_content += '' - for paragraph in cell.paragraphs: + for para_idx, paragraph in enumerate(cell.paragraphs): + # Add spacing between multiple paragraphs in a cell + if para_idx > 0: + html_content += '
' + for run in paragraph.runs: style = [] if run.bold: style.append('font-weight: bold') if run.italic: style.append('font-style: italic') if run.underline: style.append('text-decoration: underline') if run.font.size: style.append(f'font-size: {run.font.size.pt}pt') - + drawing_elements = run._element.findall('.//w:drawing', {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) - + for drawing in drawing_elements: blip = drawing.find('.//a:blip', {'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'}) @@ -180,9 +204,10 @@ def process_table(table, images_dict): html_content += f'
' html_content += f'Table Cell Image' html_content += '
' - + style_str = '; '.join(style) - if run.text.strip(): + # Preserve text with proper spacing + if run.text: html_content += f'{run.text}' html_content += '' html_content += ''