1515 MenuSubCategoryCreate ,
1616 MenuItemCreate
1717)
18+ from app .api .routes .utils import transform_restaurant_data
1819
1920logging .basicConfig (level = logging .DEBUG , format = '%(asctime)s - %(levelname)s - %(message)s' )
2021logger = logging .getLogger (__name__ )
@@ -26,10 +27,9 @@ class ZomatoUrl(BaseModel):
2627
2728def fetch_zomato_data (url : str ) -> str :
2829 headers = {
29- 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 ' ,
30+ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' ,
3031 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' ,
3132 'Accept-Language' : 'en-US,en;q=0.5' ,
32- 'Connection' : 'keep-alive' ,
3333 }
3434
3535 try :
@@ -43,33 +43,25 @@ def fetch_zomato_data(url: str) -> str:
4343
4444def parse_zomato_page (html_content : str ) -> dict :
4545 try :
46- logger .debug ("Starting page parsing..." )
4746 soup = BeautifulSoup (html_content , 'html.parser' )
48-
4947 scripts = soup .find_all ('script' )
50- target_script = None
5148
5249 for script in scripts :
5350 if script .string and 'window.__PRELOADED_STATE__' in script .string :
54- target_script = script
55- break
56-
57- if not target_script :
58- raise ValueError ("Could not find PRELOADED_STATE in page" )
59-
60- json_str = target_script .string .split ('window.__PRELOADED_STATE__ = JSON.parse(' )[1 ]
61- json_str = json_str .split (');' )[0 ].strip ()
62- json_str = json_str .strip ('"' ).replace ('\\ "' , '"' ).replace ('\\ \\ ' , '\\ ' ).replace ('\\ n' , '' )
63-
64- return json .loads (json_str )
65-
51+ json_str = script .string .split ('window.__PRELOADED_STATE__ = JSON.parse(' )[1 ]
52+ json_str = json_str .split (');' )[0 ].strip ()
53+ json_str = json_str .strip ('"' ).replace ('\\ "' , '"' ).replace ('\\ \\ ' , '\\ ' )
54+ return json .loads (json_str )
55+
56+ raise ValueError ("Could not find PRELOADED_STATE in page" )
6657 except Exception as e :
6758 logger .error (f"Error parsing page: { str (e )} " )
6859 raise
6960
7061def extract_menu_data (json_data : dict ) -> dict :
7162 try :
72- logger .debug ("Starting menu data extraction..." )
63+ with open ('data.json' , 'w' ) as f :
64+ json .dump (json_data , f , indent = 4 )
7365 restaurant_data = json_data .get ('pages' , {}).get ('current' , {})
7466 restaurant_details = json_data .get ('pages' , {}).get ('restaurant' , {})
7567
@@ -79,56 +71,99 @@ def extract_menu_data(json_data: dict) -> dict:
7971
8072 res_info = restaurant_details [restaurant_id ].get ('sections' , {})
8173 basic_info = res_info .get ('SECTION_BASIC_INFO' , {})
82-
74+ menu_widget = res_info .get ('SECTION_MENU_WIDGET' , {})
75+
76+
77+ # Validate required name field
78+ if not basic_info .get ('name' ):
79+ raise ValueError ("Restaurant name is required" )
80+
8381 restaurant_info = {
84- 'name' : basic_info .get ('name' ),
85- 'cuisines' : basic_info .get ('cuisine_string' ),
86- 'rating' : {
87- 'aggregate_rating' : basic_info .get ('rating' , {}).get ('aggregate_rating' ),
88- 'votes' : basic_info .get ('rating' , {}).get ('votes' ),
89- 'rating_text' : basic_info .get ('rating' , {}).get ('rating_text' )
90- },
91- 'location' : {
92- 'locality' : restaurant_data .get ('pageDescription' , '' ),
93- 'url' : basic_info .get ('resUrl' )
94- },
95- 'timing' : {
96- 'description' : basic_info .get ('timing' , {}).get ('timing_desc' ),
97- 'hours' : basic_info .get ('timing' , {}).get ('customised_timings' , {}).get ('opening_hours' , [])
82+ 'cuisine_type' : basic_info .get ('cuisine_string' , '' ),
83+ 'venue' : {
84+ 'name' : basic_info .get ('name' , '' ),
85+ 'address' : basic_info .get ('address' , '' ),
86+ 'locality' : basic_info .get ('locality_verbose' , '' ),
87+ 'city' : basic_info .get ('city' , '' ),
88+ 'latitude' : basic_info .get ('latitude' , '0' ),
89+ 'longitude' : basic_info .get ('longitude' , '0' ),
90+ 'zipcode' : basic_info .get ('zipcode' , '' ),
91+ 'rating' : basic_info .get ('rating' , {}).get ('aggregate_rating' , '0' ),
92+ 'timing' : basic_info .get ('timing' , {}).get ('timing' , '' ),
93+ 'avg_cost_for_two' : basic_info .get ('average_cost_for_two' , 0 )
9894 }
9995 }
100-
96+
97+
10198 menu_categories = []
102- menu_data = res_info .get ('SECTION_MENU_WIDGET' , {})
103-
104- for category in menu_data .get ('categories' , []):
105- category_items = {
106- 'category' : category .get ('name' , '' ),
107- 'items' : []
99+ print ("Catorgies" , menu_widget .get ('menu' , {}).get ('categories' , []))
100+ for category in menu_widget .get ('menu' , {}).get ('categories' , []):
101+ print ("category" , category )
102+ category_data = {
103+ 'name' : category .get ('name' , '' ),
104+ 'description' : category .get ('description' , '' ),
105+ 'subcategories' : []
108106 }
109107
108+ # Group items by subcategory
109+ subcategories = {}
110110 for item in category .get ('items' , []):
111+ subcategory_name = item .get ('category' , 'Other' )
112+
113+ if subcategory_name not in subcategories :
114+ subcategories [subcategory_name ] = {
115+ 'name' : subcategory_name ,
116+ 'description' : '' ,
117+ 'items' : []
118+ }
119+
111120 menu_item = {
112- 'name' : item .get ('name' ),
113- 'description' : item .get ('description' , '' ),
114- 'price' : float (item .get ('price' , 0 )),
115- 'image_url' : item .get ('imageUrl' , '' ),
121+ 'name' : item .get ('name' , '' ),
122+ 'description' : item .get ('desc' , '' ),
116123 'is_veg' : item .get ('isVeg' , True ),
117- 'spice_level' : item .get ('spiceLevel' , 'None' )
124+ 'image_url' : item .get ('itemImage' , '' ),
125+ 'variants' : []
118126 }
119- category_items ['items' ].append (menu_item )
127+
128+ # Handle variants
129+ if item .get ('variantsV2' ):
130+ for variant in item ['variantsV2' ]:
131+ menu_item ['variants' ].append ({
132+ 'name' : variant .get ('variantName' , '' ),
133+ 'price' : float (variant .get ('price' , 0 )) / 100 ,
134+ 'is_default' : variant .get ('isDefault' , False )
135+ })
136+ else :
137+ menu_item ['variants' ].append ({
138+ 'name' : 'Regular' ,
139+ 'price' : float (item .get ('defaultPrice' , 0 )) / 100 ,
140+ 'is_default' : True
141+ })
142+
143+ subcategories [subcategory_name ]['items' ].append (menu_item )
120144
121- if category_items ['items' ]:
122- menu_categories .append (category_items )
123-
145+ # Add non-empty subcategories to category
146+ category_data ['subcategories' ] = [
147+ subcat for subcat in subcategories .values ()
148+ if subcat ['items' ]
149+ ]
150+
151+ if category_data ['subcategories' ]:
152+ menu_categories .append (category_data )
153+
124154 return {
155+ 'status' : 'success' ,
125156 'restaurant_info' : restaurant_info ,
126157 'menu' : menu_categories
127158 }
128-
159+
129160 except Exception as e :
130161 logger .error (f"Error extracting menu data: { str (e )} " )
131- raise
162+ raise HTTPException (
163+ status_code = 500 ,
164+ detail = f"Failed to extract menu data: { str (e )} "
165+ )
166+
132167
133168async def create_restaurant (client : httpx .AsyncClient , restaurant_data : RestaurantCreate ):
134169 response = await client .post ("/venue/restaurants/" , json = restaurant_data .dict ())
@@ -161,18 +196,21 @@ async def scrape_and_create_menu(
161196 current_user : UserBusiness = Depends (get_business_user )
162197):
163198 try :
164- # 1. Scrape data
199+ # 1. Scrape and log data
165200 html_content = fetch_zomato_data (str (request .url ))
166201 json_data = parse_zomato_page (html_content )
167202 scraped_data = extract_menu_data (json_data )
203+
204+ logger .info ("Scraped Restaurant Info:" )
205+ logger .info (f"Name: { scraped_data ['restaurant_info' ]['name' ]} " )
206+ logger .info (f"Cuisines: { scraped_data ['restaurant_info' ]['cuisines' ]} " )
168207
169208 async with httpx .AsyncClient () as client :
170209 # 2. Create Restaurant
171210 venue_data = VenueCreate (
172211 name = scraped_data ['restaurant_info' ]['name' ],
173212 description = "Restaurant imported from Zomato" ,
174- opening_time = scraped_data ['restaurant_info' ]['timing' ]['opening_time' ],
175- avg_expense_for_two = float (scraped_data ['restaurant_info' ]['avg_cost_for_two' ]),
213+ avg_expense_for_two = scraped_data ['restaurant_info' ]['avg_cost_for_two' ],
176214 zomato_link = str (request .url )
177215 )
178216
@@ -196,15 +234,13 @@ async def scrape_and_create_menu(
196234
197235 # 4. Create Categories, Subcategories, and Items sequentially
198236 for category in scraped_data ['menu' ]:
199- # Create Category
200237 category_data = MenuCategoryCreate (
201238 name = category ['category' ],
202239 menu_id = menu_id
203240 )
204241 category_id = await create_category (client , category_data )
205242 logger .info (f"Created category: { category ['category' ]} " )
206243
207- # Create default subcategory
208244 subcategory_data = MenuSubCategoryCreate (
209245 name = f"{ category ['category' ]} Items" ,
210246 category_id = category_id ,
@@ -213,7 +249,6 @@ async def scrape_and_create_menu(
213249 subcategory_id = await create_subcategory (client , subcategory_data )
214250 logger .info (f"Created subcategory for { category ['category' ]} " )
215251
216- # Create items one by one
217252 for item in category ['items' ]:
218253 item_data = MenuItemCreate (
219254 name = item ['name' ],
@@ -230,9 +265,45 @@ async def scrape_and_create_menu(
230265 "message" : "Menu successfully created" ,
231266 "venue_id" : str (venue_id ),
232267 "menu_id" : str (menu_id ),
233- "restaurant_name" : scraped_data ['restaurant_info' ]['name' ]
268+ "restaurant_name" : scraped_data ['restaurant_info' ]['name' ],
269+ "scraped_data" : scraped_data
234270 }
235271
236272 except Exception as e :
237273 logger .error (f"Menu creation failed: { str (e )} " )
238274 raise HTTPException (status_code = 500 , detail = f"Failed to create menu: { str (e )} " )
275+
276+
277+ @router .get ("/menu/scrape" )
278+ async def get_scraped_menu (url : str ):
279+ try :
280+ # Validate URL
281+ zomato_url = ZomatoUrl (url = url )
282+
283+ # Scrape data using existing functions
284+ html_content = fetch_zomato_data (str (zomato_url .url ))
285+ json_data = parse_zomato_page (html_content )
286+ # print(json_data)
287+ # scraped_data = extract_menu_data(json_data)
288+ print ("==== cleaning the data ====" )
289+ scraped_data = transform_restaurant_data (json_data )
290+ print (scraped_data )
291+
292+ return {
293+ "status" : "success" ,
294+ "restaurant_info" : scraped_data ,
295+ "menu" : scraped_data ['menu' ]
296+ }
297+
298+ except ValueError as ve :
299+ logger .error (f"Invalid URL format: { str (ve )} " )
300+ raise HTTPException (
301+ status_code = 400 ,
302+ detail = f"Invalid Zomato URL: { str (ve )} "
303+ )
304+ except Exception as e :
305+ logger .error (f"Scraping failed: { str (e )} " )
306+ raise HTTPException (
307+ status_code = 500 ,
308+ detail = f"Failed to scrape menu data: { str (e )} "
309+ )
0 commit comments