-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathword_generator.py
More file actions
213 lines (180 loc) Β· 9.56 KB
/
word_generator.py
File metadata and controls
213 lines (180 loc) Β· 9.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
from docx.shared import Inches, RGBColor
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.text import WD_ALIGN_PARAGRAPH
from urllib.parse import urlparse
import os
import config
import time
import logging
def clean_rate(rate_text):
"""Extract a numeric rate from text, removing unwanted characters."""
if rate_text == "N/A":
return None
rate_text = rate_text.replace("%", "").strip() # Remove percentage sign
try:
return float(rate_text)
except ValueError:
return None
def determine_rate_change(old_rate, new_rate):
"""Determine if the rate increased, decreased, or remained the same."""
old = clean_rate(old_rate)
new = clean_rate(new_rate)
if old is not None and new is not None:
if new > old:
return "Up", RGBColor(255, 0, 0) # πΊ Red
elif new < old:
return "Down", RGBColor(0, 128, 0) # π» Green
else:
return "Same", RGBColor(128, 128, 128) # β Gray
return "N/A", RGBColor(0, 0, 0)
def generate_word_report(comparison_results):
"""Generate a well-structured Word document with rate comparisons and screenshots inside the table."""
doc = Document()
timestamp = time.strftime("%d%m%y") # β
Matches your requested format
# Set the document to landscape orientation with appropriate margins
section = doc.sections[0]
section.page_width, section.page_height = section.page_height, section.page_width # Swap dimensions for landscape
section.left_margin = Inches(0.5)
section.right_margin = Inches(0.5)
section.top_margin = Inches(0.5)
section.bottom_margin = Inches(0.5)
if not comparison_results:
doc.add_heading(f"Rates Report β No Data Available - {timestamp}", level=1)
doc.add_paragraph("β οΈ No rate changes were detected in this run.")
report_filename = f"Rate_Comparison_Report_{time.strftime('%Y%m%d_%H%M%S')}.docx"
report_path = os.path.join(config.OUTPUT_REPORTS_DIR, report_filename)
doc.save(report_path)
logging.info(f"β
Empty report generated at: {report_path}")
print(f"β
Empty report generated at: {report_path}")
return
# Group data by domain name
grouped_results = {}
for result in comparison_results:
url = result.get("URL", "")
parsed_url = urlparse(url)
domain = parsed_url.netloc # Extract domain name
# Get website name from domain
website_name = domain # Default to domain if no mapping found
# Remove www. prefix if present
if domain.startswith("www."):
domain = domain[4:]
# Map domain to readable website name
if "westpac" in domain:
website_name = "Westpac"
elif "stgeorge" in domain or "st.george" in domain:
website_name = "St. George"
elif "banksa" in domain:
website_name = "BankSA"
elif "bankofmelbourne" in domain:
website_name = "Bank of Melbourne"
elif "rams" in domain:
website_name = "RAMS"
else:
# If no specific mapping, use domain first part
website_name = domain.split('.')[0].title()
if website_name not in grouped_results:
grouped_results[website_name] = []
grouped_results[website_name].append(result)
# Generate tables for each website
for website_name, results in grouped_results.items():
# β
Add Report Title with website name
report_title = f"Rates Report β {website_name} - {timestamp}"
heading = doc.add_heading(report_title, level=1)
heading.alignment = WD_ALIGN_PARAGRAPH.CENTER # Center the title
# β
Calculate Summary for this website
same_count = sum(1 for r in results if determine_rate_change(r["Old JSON Rate"], r["New JSON Rate"])[0] == "Same")
up_count = sum(1 for r in results if determine_rate_change(r["Old JSON Rate"], r["New JSON Rate"])[0] == "Up")
down_count = sum(1 for r in results if determine_rate_change(r["Old JSON Rate"], r["New JSON Rate"])[0] == "Down")
matches_yes = sum(1 for r in results if r.get("Match Status") == "β")
matches_no = sum(1 for r in results if r.get("Match Status") == "β")
summary_text = f"Summary: Rate Changes - Same: {same_count}, Up: {up_count}, Down: {down_count}, Website Match - Yes: {matches_yes}, No: {matches_no}"
summary_para = doc.add_paragraph(summary_text)
summary_para.alignment = WD_ALIGN_PARAGRAPH.CENTER # Center the summary
# β
Define Column Headers with simplified names to save space
headers = ["Date", "Product Key", "Rate Type", "Previous Rate", "New Rate", "Change", "Match?", "Screenshot", "Page URL"]
# β
Create Table
table = doc.add_table(rows=1, cols=len(headers))
table.style = 'Table Grid' # Professional styling
# More conservative column widths that fit within the page width
# Total available width is approximately 9.5 inches (11 - 0.5 - 0.5 margins)
col_widths = [0.8, 1.2, 0.7, 0.8, 0.8, 0.6, 0.6, 2.0, 2.0] # Total: 9.5 inches
# Apply column widths
for i, width in enumerate(col_widths):
table.columns[i].width = Inches(width)
# β
Add Headers with formatting
header_cells = table.rows[0].cells
for i, header in enumerate(headers):
header_cells[i].text = header
# Center align and bold the header text
for paragraph in header_cells[i].paragraphs:
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
for run in paragraph.runs:
run.bold = True
run.font.size = Pt(9) # Smaller font size for headers
# β
Populate Table for this website
for result in results:
row_cells = table.add_row().cells
# Format date in a more compact way
date_str = time.strftime('%Y-%m-%d') # Use just the date without time to save space
row_cells[0].text = date_str
# β
Dynamic product key based on URL, just like in the previous code
url = result.get("URL", "")
if "all-interest-rates/tables" in url:
product_key = "Flexi First Option Home Loan"
elif "all-interest-rates" in url:
product_key = "Flexi First Option Home Loan"
elif "manage-home-loan/reference-rates" in url:
product_key = "Variable Rate Home Loan"
else:
product_key = "Home Loan Product" # Default value
row_cells[1].text = product_key
row_cells[2].text = result.get("Extracted Rate", "N/A")
row_cells[3].text = str(result.get("Old JSON Rate", "N/A"))
row_cells[4].text = str(result.get("New JSON Rate", "N/A"))
# β
Determine Rate Change (Up, Down, Same)
rate_change_text, rate_change_color = determine_rate_change(result["Old JSON Rate"], result["New JSON Rate"])
rate_change_run = row_cells[5].paragraphs[0].add_run(rate_change_text)
rate_change_run.font.color.rgb = rate_change_color # Apply color formatting
row_cells[5].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
match_status = "Yes" if result.get("Match Status", "β") == "β" else "No"
row_cells[6].text = match_status
row_cells[6].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
# β
Insert Screenshot - smaller size to fit better
screenshot_path = result.get("Screenshot", "")
if os.path.exists(screenshot_path):
paragraph = row_cells[7].paragraphs[0]
run = paragraph.add_run()
run.add_picture(screenshot_path, width=Inches(1.9)) # Slightly smaller screenshot
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
else:
row_cells[7].text = "No Screenshot"
row_cells[7].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
# β
Page URL - handle long URLs
max_url_length = 50
if len(url) > max_url_length:
parsed_url = urlparse(url)
domain = parsed_url.netloc
path = parsed_url.path
# Keep domain and truncate path if necessary
if len(domain + path) > max_url_length:
path = path[:max_url_length - len(domain) - 3] + "..."
display_url = domain + path
else:
display_url = url
row_cells[8].text = display_url
# Apply consistent font size to all cells
for cell in row_cells:
for paragraph in cell.paragraphs:
for run in paragraph.runs:
run.font.size = Pt(8) # Even smaller font size for data to ensure fit
# Add a page break between websites (except for the last one)
if website_name != list(grouped_results.keys())[-1]:
doc.add_page_break()
# β
Save Report with Timestamp
report_filename = f"Rate_Comparison_Report_{time.strftime('%Y%m%d_%H%M%S')}.docx"
report_path = os.path.join(config.OUTPUT_REPORTS_DIR, report_filename)
doc.save(report_path)
logging.info(f"β
Report generated successfully at: {report_path}")
print(f"β
Report generated successfully at: {report_path}")