-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcomparator.py
More file actions
150 lines (122 loc) · 6.03 KB
/
comparator.py
File metadata and controls
150 lines (122 loc) · 6.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import json
import os
import config
from json_handler import load_json_files, find_matching_item_by_url, find_matching_rate_by_product, get_json_value
import logging
def compare_rates_simple(scraped_data):
"""
A simplified comparison function that doesn't rely on product names.
It compares rates at the same position in the JSON structure.
"""
old_json_data = load_json_files(config.OLD_JSON_DIR)
new_json_data = scraped_data # Use scraped data directly
logging.info(f"Loaded {len(old_json_data)} items from old_json directory")
if not old_json_data:
logging.warning("⚠️ No previous data found in old_json/. Using basic report.")
return []
comparison_results = []
# First, convert old_json_data to a dictionary with URL as key
old_data_by_url = {}
for json_file in os.listdir(config.OLD_JSON_DIR):
if json_file.endswith('.json'):
file_path = os.path.join(config.OLD_JSON_DIR, json_file)
try:
with open(file_path, 'r') as f:
data = json.load(f)
url = data.get('url')
if url:
old_data_by_url[url] = data
except Exception as e:
logging.error(f"Error reading {file_path}: {e}")
# Now compare by URL and rate position
for url, data in scraped_data.items():
new_rates = data.get("rates", [])
# Check if we have this URL in old data
if url in old_data_by_url:
old_rates = old_data_by_url[url].get("rates", [])
# Compare rates by position (index)
for i, new_rate in enumerate(new_rates):
new_rate_value = new_rate.get("rate", "N/A")
old_rate_value = "N/A"
# If we have a rate at this index in old data
if i < len(old_rates):
old_rate_value = old_rates[i].get("rate", "N/A")
# Create comparison result
comparison_results.append({
"URL": url,
"Product Key": f"Product {i+1}", # Just number them
"Extracted Rate": new_rate_value,
"Old JSON Rate": old_rate_value,
"New JSON Rate": new_rate_value,
"Match Status": "✔" if old_rate_value == new_rate_value else "✘",
"Screenshot": data.get("screenshot", "No Screenshot")
})
else:
# No previous data for this URL
for i, rate in enumerate(new_rates):
comparison_results.append({
"URL": url,
"Product Key": f"Product {i+1}",
"Extracted Rate": rate.get("rate", "N/A"),
"Old JSON Rate": "N/A", # No previous data
"New JSON Rate": rate.get("rate", "N/A"),
"Match Status": "N/A", # Can't determine
"Screenshot": data.get("screenshot", "No Screenshot")
})
if not comparison_results:
logging.error("⚠️ No valid comparisons could be made.")
else:
logging.info(f"✅ Generated {len(comparison_results)} comparison results")
return comparison_results
def compare_rates(scraped_data):
"""Compare extracted website rates with Old and New JSON files, handling empty data safely."""
# Load JSON data as lists of items
old_json_data = load_json_files(config.OLD_JSON_DIR)
new_json_data = load_json_files(config.NEW_JSON_DIR)
# Debug output
logging.info(f"Loaded {len(old_json_data)} items from old_json directory")
logging.info(f"Loaded {len(new_json_data)} items from new_json directory")
# ✅ If old JSON is empty, don't attempt comparison
if not old_json_data:
logging.warning("⚠️ No previous data found in old_json/. Skipping comparison.")
return []
comparison_results = []
# Process each URL in scraped data
for url, data in scraped_data.items():
extracted_rates = data.get("rates", [])
# ✅ If no extracted rates, log warning and continue
if not extracted_rates:
logging.warning(f"⚠️ No extracted rates found for {url}, skipping comparison.")
continue
# Find matching items in old and new JSON data
old_url_item = find_matching_item_by_url(url, old_json_data)
if not old_url_item:
logging.warning(f"⚠️ No previous data for {url}, skipping entry.")
continue
old_rates = old_url_item.get("rates", [])
# Process each rate entry for this URL
for rate_entry in extracted_rates:
rate = rate_entry.get("rate", "N/A")
product_name = rate_entry.get("product_name", "Unknown Product")
# Find matching rate in old JSON data
old_rate = find_matching_rate_by_product(product_name, old_rates)
if old_rate == "N/A":
logging.warning(f"⚠️ No previous rate found for {product_name} at {url}, skipping entry.")
continue
# Add to comparison results
comparison_results.append({
"URL": url,
"Product Key": product_name,
"Extracted Rate": rate,
"Old JSON Rate": old_rate,
"New JSON Rate": rate,
"Match Status": "✔" if old_rate == rate else "✘",
"Screenshot": data.get("screenshot", "No Screenshot")
})
# ✅ Prevent list index error by checking if results exist before returning
if not comparison_results:
logging.error("⚠️ No valid comparisons could be made. No report will be generated.")
else:
logging.info(f"✅ Generated {len(comparison_results)} comparison results")
logging.info(f"✅ Comparison completed for {len(comparison_results)} URLs.")
return comparison_results