Rate-Automation/word_generator.py at main · Mujeeb4/Rate-Automation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
from docx.shared import Inches, RGBColor
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.text import WD_ALIGN_PARAGRAPH
from urllib.parse import urlparse
import os
import config
import time
import logging

def clean_rate(rate_text):
    """Extract a numeric rate from text, removing unwanted characters."""
    if rate_text == "N/A":
        return None
    rate_text = rate_text.replace("%", "").strip()  # Remove percentage sign
    try:
        return float(rate_text)
    except ValueError:
        return None

def determine_rate_change(old_rate, new_rate):
    """Determine if the rate increased, decreased, or remained the same."""
    old = clean_rate(old_rate)
    new = clean_rate(new_rate)

    if old is not None and new is not None:
        if new > old:
            return "Up", RGBColor(255, 0, 0)  # 🔺 Red
        elif new < old:
            return "Down", RGBColor(0, 128, 0)  # 🔻 Green
        else:
            return "Same", RGBColor(128, 128, 128)  # ➖ Gray
    return "N/A", RGBColor(0, 0, 0)

def generate_word_report(comparison_results):
    """Generate a well-structured Word document with rate comparisons and screenshots inside the table."""
    doc = Document()
    timestamp = time.strftime("%d%m%y")  # ✅ Matches your requested format

    # Set the document to landscape orientation with appropriate margins
    section = doc.sections[0]
    section.page_width, section.page_height = section.page_height, section.page_width  # Swap dimensions for landscape
    section.left_margin = Inches(0.5)
    section.right_margin = Inches(0.5)
    section.top_margin = Inches(0.5)
    section.bottom_margin = Inches(0.5)

    if not comparison_results:
        doc.add_heading(f"Rates Report – No Data Available - {timestamp}", level=1)
        doc.add_paragraph("⚠️ No rate changes were detected in this run.")
        report_filename = f"Rate_Comparison_Report_{time.strftime('%Y%m%d_%H%M%S')}.docx"
        report_path = os.path.join(config.OUTPUT_REPORTS_DIR, report_filename)
        doc.save(report_path)
        logging.info(f"✅ Empty report generated at: {report_path}")
        print(f"✅ Empty report generated at: {report_path}")
        return

    # Group data by domain name
    grouped_results = {}
    for result in comparison_results:
        url = result.get("URL", "")
        parsed_url = urlparse(url)
        domain = parsed_url.netloc  # Extract domain name

        # Get website name from domain
        website_name = domain  # Default to domain if no mapping found

        # Remove www. prefix if present
        if domain.startswith("www."):
            domain = domain[4:]

        # Map domain to readable website name
        if "westpac" in domain:
            website_name = "Westpac"
        elif "stgeorge" in domain or "st.george" in domain:
            website_name = "St. George"
        elif "banksa" in domain:
            website_name = "BankSA"
        elif "bankofmelbourne" in domain:
            website_name = "Bank of Melbourne"
        elif "rams" in domain:
            website_name = "RAMS"
        else:
            # If no specific mapping, use domain first part
            website_name = domain.split('.')[0].title()

        if website_name not in grouped_results:
            grouped_results[website_name] = []
        grouped_results[website_name].append(result)

    # Generate tables for each website
    for website_name, results in grouped_results.items():
        # ✅ Add Report Title with website name
        report_title = f"Rates Report – {website_name} - {timestamp}"
        heading = doc.add_heading(report_title, level=1)
        heading.alignment = WD_ALIGN_PARAGRAPH.CENTER  # Center the title

        # ✅ Calculate Summary for this website
        same_count = sum(1 for r in results if determine_rate_change(r["Old JSON Rate"], r["New JSON Rate"])[0] == "Same")
        up_count = sum(1 for r in results if determine_rate_change(r["Old JSON Rate"], r["New JSON Rate"])[0] == "Up")
        down_count = sum(1 for r in results if determine_rate_change(r["Old JSON Rate"], r["New JSON Rate"])[0] == "Down")
        matches_yes = sum(1 for r in results if r.get("Match Status") == "✔")
        matches_no = sum(1 for r in results if r.get("Match Status") == "✘")

        summary_text = f"Summary: Rate Changes - Same: {same_count}, Up: {up_count}, Down: {down_count}, Website Match - Yes: {matches_yes}, No: {matches_no}"
        summary_para = doc.add_paragraph(summary_text)
        summary_para.alignment = WD_ALIGN_PARAGRAPH.CENTER  # Center the summary

        # ✅ Define Column Headers with simplified names to save space
        headers = ["Date", "Product Key", "Rate Type", "Previous Rate", "New Rate", "Change", "Match?", "Screenshot", "Page URL"]

        # ✅ Create Table
        table = doc.add_table(rows=1, cols=len(headers))
        table.style = 'Table Grid'  # Professional styling

        # More conservative column widths that fit within the page width
        # Total available width is approximately 9.5 inches (11 - 0.5 - 0.5 margins)
        col_widths = [0.8, 1.2, 0.7, 0.8, 0.8, 0.6, 0.6, 2.0, 2.0]  # Total: 9.5 inches

        # Apply column widths
        for i, width in enumerate(col_widths):
            table.columns[i].width = Inches(width)

        # ✅ Add Headers with formatting
        header_cells = table.rows[0].cells
        for i, header in enumerate(headers):
            header_cells[i].text = header
            # Center align and bold the header text
            for paragraph in header_cells[i].paragraphs:
                paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
                for run in paragraph.runs:
                    run.bold = True
                    run.font.size = Pt(9)  # Smaller font size for headers

        # ✅ Populate Table for this website
        for result in results:
            row_cells = table.add_row().cells

            # Format date in a more compact way
            date_str = time.strftime('%Y-%m-%d')  # Use just the date without time to save space
            row_cells[0].text = date_str

            # ✅ Dynamic product key based on URL, just like in the previous code
            url = result.get("URL", "")
            if "all-interest-rates/tables" in url:
                product_key = "Flexi First Option Home Loan"
            elif "all-interest-rates" in url:
                product_key = "Flexi First Option Home Loan"
            elif "manage-home-loan/reference-rates" in url:
                product_key = "Variable Rate Home Loan"
            else:
                product_key = "Home Loan Product"  # Default value

            row_cells[1].text = product_key

            row_cells[2].text = result.get("Extracted Rate", "N/A")
            row_cells[3].text = str(result.get("Old JSON Rate", "N/A"))
            row_cells[4].text = str(result.get("New JSON Rate", "N/A"))

            # ✅ Determine Rate Change (Up, Down, Same)
            rate_change_text, rate_change_color = determine_rate_change(result["Old JSON Rate"], result["New JSON Rate"])
            rate_change_run = row_cells[5].paragraphs[0].add_run(rate_change_text)
            rate_change_run.font.color.rgb = rate_change_color  # Apply color formatting
            row_cells[5].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

            match_status = "Yes" if result.get("Match Status", "✘") == "✔" else "No"
            row_cells[6].text = match_status
            row_cells[6].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

            # ✅ Insert Screenshot - smaller size to fit better
            screenshot_path = result.get("Screenshot", "")
            if os.path.exists(screenshot_path):
                paragraph = row_cells[7].paragraphs[0]
                run = paragraph.add_run()
                run.add_picture(screenshot_path, width=Inches(1.9))  # Slightly smaller screenshot
                paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
            else:
                row_cells[7].text = "No Screenshot"
                row_cells[7].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

            # ✅ Page URL - handle long URLs
            max_url_length = 50
            if len(url) > max_url_length:
                parsed_url = urlparse(url)
                domain = parsed_url.netloc
                path = parsed_url.path
                # Keep domain and truncate path if necessary
                if len(domain + path) > max_url_length:
                    path = path[:max_url_length - len(domain) - 3] + "..."
                display_url = domain + path
            else:
                display_url = url

            row_cells[8].text = display_url

            # Apply consistent font size to all cells
            for cell in row_cells:
                for paragraph in cell.paragraphs:
                    for run in paragraph.runs:
                        run.font.size = Pt(8)  # Even smaller font size for data to ensure fit

        # Add a page break between websites (except for the last one)
        if website_name != list(grouped_results.keys())[-1]:
            doc.add_page_break()

    # ✅ Save Report with Timestamp
    report_filename = f"Rate_Comparison_Report_{time.strftime('%Y%m%d_%H%M%S')}.docx"
    report_path = os.path.join(config.OUTPUT_REPORTS_DIR, report_filename)
    doc.save(report_path)

    logging.info(f"✅ Report generated successfully at: {report_path}")
    print(f"✅ Report generated successfully at: {report_path}")