11"""
2- Service for bulk creating/updating competitions with multilingual data
2+ Service for bulk creating/updating competitions with multilingual info
33"""
44from typing import Optional
55from app .repositories import d1_competition_crud as competition_crud
66from app .services .scrape .competitions import links_service
77from app .services .unify .function import find_original_sentence
8+ from app .services .scrape .competitions .scrape import (
9+ get_competition_name ,
10+ get_competition_description ,
11+ get_competition_image_link ,
12+ get_competition_application_link )
813from app .services .unify .lists import (
914 ar_names_list , tr_names_list , en_names_list ,
1015 tr_links_list , en_links_list , min_members_list , max_members_list ,
1722from datetime import datetime
1823
1924
20- def get_competition_name ( soup ):
21- """Extract competition name from BeautifulSoup object """
25+ def scrape_competition_info ( link : str ):
26+ """Scrape competition info from a single link """
2227 try :
23- competition_name = soup .find ('div' , class_ = 'container' ).find ('h1' ).text .strip ()
24- return competition_name
25- except :
26- return None
27-
28-
29- def get_competition_description (soup ):
30- """Extract competition description from BeautifulSoup object"""
31- try :
32- # Get the description from the first tab content
33- description = soup .find ('div' , id = 'tabsNavigation1' ).text .strip ()
34- return description if description else None
35- except :
36- pass
37- return None
38-
39-
40- def get_competition_image_link (soup ):
41- """Extract competition image link from BeautifulSoup object"""
42- try :
43- from urllib .parse import unquote
44- image_element = soup .find ('div' , id = 'tabsNavigation1' ).find ('img' )
45- if image_element :
46- img_src = image_element .get ('src' )
47- if img_src :
48- img_src = unquote (img_src )
49- if img_src .startswith ('http' ):
50- return img_src
51- else :
52- return f"https://teknofest.org{ img_src } "
53- except :
54- pass
55- return None
56-
57-
58- def get_competition_application_link (soup ):
59- """Extract competition application link from BeautifulSoup object"""
60- try :
61- application_link = soup .find ('div' , id = 'tabsNavigation1' ).find ('a' )['href' ]
62- return application_link
63- except :
64- return None
65-
28+ if __name__ == "__main__" :
29+ print (f" Scraping competition info from { link } " )
6630
67- def scrape_competition_data (link : str ):
68- """Scrape competition data from a single link"""
69- try :
7031 response = requests .get (link , timeout = 10 )
7132 response .raise_for_status ()
7233 soup = BeautifulSoup (response .text , 'html.parser' )
@@ -77,6 +38,11 @@ def scrape_competition_data(link: str):
7738 application_link = get_competition_application_link (soup )
7839
7940 print (f" Scraped: { name or 'N/A' } " )
41+
42+ if __name__ == "__main__" :
43+ print (f" Description: { description [:100 ] + '...' if description else 'N/A' } " )
44+ print (f" Image Link: { image_link or 'N/A' } " )
45+ print (f" Application Link: { application_link or 'N/A' } " )
8046
8147 return {
8248 'name' : name ,
@@ -214,9 +180,9 @@ def find_competition_in_db(tr_name: Optional[str] = None, en_name: Optional[str]
214180 return None
215181
216182
217- def merge_competition_data (idx : int , tr_data : dict , en_data : dict ):
183+ def merge_competition_info (idx : int , tr_info : dict , en_info : dict ):
218184 """
219- Merge competition data from Turkish and English sources along with predefined list data
185+ Merge competition info from Turkish and English sources along with predefined list info
220186 into a single Competition object
221187 """
222188 competition = Competition ()
@@ -238,7 +204,7 @@ def merge_competition_data(idx: int, tr_data: dict, en_data: dict):
238204 if idx < len (t3kys_number_list ) and t3kys_number_list [idx ] is not None :
239205 competition .t3kys_number = str (t3kys_number_list [idx ])
240206
241- # Use predefined data from lists (these are already properly matched by index)
207+ # Use predefined info from lists (these are already properly matched by index)
242208 if idx < len (tr_names_list ):
243209 competition .tr_name = tr_names_list [idx ].strip ()
244210 if idx < len (en_names_list ):
@@ -247,9 +213,9 @@ def merge_competition_data(idx: int, tr_data: dict, en_data: dict):
247213 competition .ar_name = ar_names_list [idx ].strip ()
248214
249215 if idx < len (tr_links_list ):
250- competition .tr_link = f"https://teknofest.org/tr/yarismalar/ { tr_links_list [idx ].strip ()} /"
216+ competition .tr_link = f"{ tr_links_list [idx ].strip ()} /"
251217 if idx < len (en_links_list ):
252- competition .en_link = f"https://teknofest.org/en/competitions/ { en_links_list [idx ].strip ()} /"
218+ competition .en_link = f"{ en_links_list [idx ].strip ()} /"
253219
254220 # Set min and max members from CSV if available
255221 if idx < len (min_members_list ) and min_members_list [idx ] is not None :
@@ -258,22 +224,22 @@ def merge_competition_data(idx: int, tr_data: dict, en_data: dict):
258224 competition .max_member = max_members_list [idx ]
259225
260226 # Add scraped descriptions if available
261- if en_data .get ('description' ):
262- competition .en_description = en_data ['description' ]
263- if tr_data .get ('description' ):
264- competition .tr_description = tr_data ['description' ]
227+ if en_info .get ('description' ):
228+ competition .en_description = en_info ['description' ]
229+ if tr_info .get ('description' ):
230+ competition .tr_description = tr_info ['description' ]
265231
266232 # Add scraped application links if available
267- if en_data .get ('application_link' ):
268- competition .application_link_en = en_data ['application_link' ]
269- if tr_data .get ('application_link' ):
270- competition .application_link_tr = tr_data ['application_link' ]
233+ if en_info .get ('application_link' ):
234+ competition .application_link_en = en_info ['application_link' ]
235+ if tr_info .get ('application_link' ):
236+ competition .application_link_tr = tr_info ['application_link' ]
271237
272238 # Use image from whichever source has it
273- if en_data .get ('image_link' ):
274- competition .image_path = en_data ['image_link' ]
275- elif tr_data .get ('image_link' ):
276- competition .image_path = tr_data ['image_link' ]
239+ if en_info .get ('image_link' ):
240+ competition .image_path = en_info ['image_link' ]
241+ elif tr_info .get ('image_link' ):
242+ competition .image_path = tr_info ['image_link' ]
277243
278244 return competition
279245
@@ -324,12 +290,12 @@ def bulk_create_update_competitions_from_remote(year: str = None):
324290 print (f"Found { len (en_links )} English competition links" )
325291
326292 except Exception as e :
327- print (f"Error fetching competition data : { str (e )} " )
293+ print (f"Error fetching competition info : { str (e )} " )
328294 return {
329295 'created' : 0 ,
330296 'updated' : 0 ,
331297 'failed' : 1 ,
332- 'details' : [{'error' : f'Failed to fetch competition data : { str (e )} ' }]
298+ 'details' : [{'error' : f'Failed to fetch competition info : { str (e )} ' }]
333299 }
334300
335301 # Match TR and EN competitions by position
@@ -353,41 +319,41 @@ def bulk_create_update_competitions_from_remote(year: str = None):
353319
354320 print (f"\n ({ idx + 1 } /{ max_competitions } ): { identifier } " )
355321
356- # Scrape data from each language version (with session cookie if available)
322+ # Scrape info from each language version (with session cookie if available)
357323 if tr_link :
358324 response = requests .get (tr_link , cookies = session_cookies , timeout = 10 ) if session_cookies else requests .get (tr_link , timeout = 10 )
359325 tr_soup = BeautifulSoup (response .content , 'html.parser' )
360- tr_data = {
326+ tr_info = {
361327 'name' : get_competition_name (tr_soup ),
362328 'description' : get_competition_description (tr_soup ),
363329 'image_link' : get_competition_image_link (tr_soup ),
364330 'application_link' : get_competition_application_link (tr_soup ),
365331 'link' : tr_link
366332 }
367333 else :
368- tr_data = {}
334+ tr_info = {}
369335
370336 if en_link :
371337 response = requests .get (en_link , cookies = session_cookies , timeout = 10 ) if session_cookies else requests .get (en_link , timeout = 10 )
372338 en_soup = BeautifulSoup (response .content , 'html.parser' )
373- en_data = {
339+ en_info = {
374340 'name' : get_competition_name (en_soup ),
375341 'description' : get_competition_description (en_soup ),
376342 'image_link' : get_competition_image_link (en_soup ),
377343 'application_link' : get_competition_application_link (en_soup ),
378344 'link' : en_link
379345 }
380346 else :
381- en_data = {}
347+ en_info = {}
382348
383349 # Create competition object
384350 competition = Competition ()
385351
386- # Set names from scraped data
387- if tr_data .get ('name' ):
388- competition .tr_name = tr_data ['name' ]
389- if en_data .get ('name' ):
390- competition .en_name = en_data ['name' ]
352+ # Set names from scraped info
353+ if tr_info .get ('name' ):
354+ competition .tr_name = tr_info ['name' ]
355+ if en_info .get ('name' ):
356+ competition .en_name = en_info ['name' ]
391357
392358 # Set links
393359 if tr_link :
@@ -396,22 +362,22 @@ def bulk_create_update_competitions_from_remote(year: str = None):
396362 competition .en_link = en_link
397363
398364 # Set descriptions
399- if tr_data .get ('description' ):
400- competition .tr_description = tr_data ['description' ]
401- if en_data .get ('description' ):
402- competition .en_description = en_data ['description' ]
365+ if tr_info .get ('description' ):
366+ competition .tr_description = tr_info ['description' ]
367+ if en_info .get ('description' ):
368+ competition .en_description = en_info ['description' ]
403369
404370 # Set application links
405- if tr_data .get ('application_link' ):
406- competition .application_link_tr = tr_data ['application_link' ]
407- if en_data .get ('application_link' ):
408- competition .application_link_en = en_data ['application_link' ]
371+ if tr_info .get ('application_link' ):
372+ competition .application_link_tr = tr_info ['application_link' ]
373+ if en_info .get ('application_link' ):
374+ competition .application_link_en = en_info ['application_link' ]
409375
410376 # Set image
411- if en_data .get ('image_link' ):
412- competition .image_path = en_data ['image_link' ]
413- elif tr_data .get ('image_link' ):
414- competition .image_path = tr_data ['image_link' ]
377+ if en_info .get ('image_link' ):
378+ competition .image_path = en_info ['image_link' ]
379+ elif tr_info .get ('image_link' ):
380+ competition .image_path = tr_info ['image_link' ]
415381
416382 # set id
417383
@@ -429,7 +395,7 @@ def bulk_create_update_competitions_from_remote(year: str = None):
429395 # Create or update
430396 if existing_competition :
431397 print (f" Found existing competition (ID: { existing_competition .id } )" )
432- # Merge with existing data
398+ # Merge with existing info
433399 for field in ['tr_name' , 'tr_description' , 'tr_link' , 'en_name' , 'en_description' , 'en_link' ,
434400 'image_path' , 'application_link_tr' , 'application_link_en' ]:
435401 new_value = getattr (competition , field )
@@ -485,14 +451,17 @@ def bulk_create_update_competitions_from_remote(year: str = None):
485451
486452def bulk_create_update_competitions_multilingual (source : str = "lists" , year : str = None ):
487453 """
488- Create or update all competitions in the database with multilingual data .
454+ Create or update all competitions in the database with multilingual info .
489455
490456 Args:
491457 source: 'lists' for local CSV or 'remote' for website scraping
492458 year: Competition year (used for tracking which years competitions are held)
493459
494460 Returns a summary of the operation.
495461 """
462+ if __name__ == "__main__" :
463+ print (f"Running bulk_create_update_competitions_multilingual with source='{ source } ' and year='{ year } '" )
464+
496465 if source == "remote" :
497466 return bulk_create_update_competitions_from_remote (year = year )
498467
@@ -523,28 +492,36 @@ def bulk_create_update_competitions_multilingual(source: str = "lists", year: st
523492
524493 for idx in range (max_competitions ):
525494 try :
526- # Get data from predefined lists
495+ # Get info from predefined lists
527496 tr_name = tr_names_list [idx ].strip () if idx < len (tr_names_list ) else None
528497 en_name = en_names_list [idx ].strip () if idx < len (en_names_list ) else None
529498 ar_name = ar_names_list [idx ].strip () if idx < len (ar_names_list ) else None
530499 tr_link = tr_links_list [idx ].strip () if idx < len (tr_links_list ) else None
531500 en_link = en_links_list [idx ].strip () if idx < len (en_links_list ) else None
532501
533502 # Build full URLs
534- tr_url = f"https://teknofest.org/tr/yarismalar/{ tr_link } /" if tr_link else None
535- en_url = f"https://teknofest.org/en/competitions/{ en_link } /" if en_link else None
503+ tr_url = f"{ tr_link } /" if tr_link else None
504+ en_url = f"{ en_link } /" if en_link else None
505+
506+ if __name__ == "__main__" :
507+ print (f"\n Processing competition index { idx } :" )
508+ print (f" TR Name: { tr_name } " )
509+ print (f" TR Link: { tr_url } " )
510+ print (f" EN Name: { en_name } " )
511+ print (f" EN Link: { en_url } " )
512+ print (f" AR Name: { ar_name } " )
536513
537514 # Generate identifier from available names
538515 identifier = en_name or tr_name or ar_name or f"competition_{ idx } "
539516
540517 print (f"\n ({ idx + 1 } /{ max_competitions } ): { identifier } " )
541518
542- # Scrape data from each language version for descriptions and images
543- tr_data = scrape_competition_data (tr_url ) if tr_url else {}
544- en_data = scrape_competition_data (en_url ) if en_url else {}
519+ # Scrape info from each language version for descriptions and images
520+ tr_info = scrape_competition_info (tr_url ) if tr_url else {}
521+ en_info = scrape_competition_info (en_url ) if en_url else {}
545522
546- # Merge the data (passing index to use list data )
547- competition = merge_competition_data (idx , tr_data , en_data )
523+ # Merge the info (passing index to use list info )
524+ competition = merge_competition_info (idx , tr_info , en_info )
548525
549526 # Add year to years list
550527 competition .years = [year ]
@@ -559,10 +536,18 @@ def bulk_create_update_competitions_multilingual(source: str = "lists", year: st
559536 en_link = en_url
560537 )
561538
539+ if __name__ == "__main__" :
540+ if existing_competition :
541+ print (f" Found existing competition in DB (ID: { existing_competition .id } )" )
542+ continue
543+ else :
544+ print (f" No existing competition found in DB" )
545+ continue
546+
562547 # Create or update
563548 if existing_competition :
564549 print (f" Found existing competition (ID: { existing_competition .id } )" )
565- # Merge with existing data , preserving fields not set in new data
550+ # Merge with existing info , preserving fields not set in new info
566551 for field in ['tr_name' , 'tr_description' , 'tr_link' , 'en_name' , 'en_description' , 'en_link' ,
567552 'ar_name' , 'ar_description' , 'ar_link' , 'image_path' , 'min_member' , 'max_member' ,
568553 'application_link_tr' , 'application_link_en' ]:
@@ -621,3 +606,7 @@ def bulk_create_update_competitions_multilingual(source: str = "lists", year: st
621606 print (f"{ '=' * 60 } \n " )
622607
623608 return results
609+
610+ if __name__ == "__main__" :
611+ # Example usage: bulk create/update competitions from lists for current year
612+ bulk_create_update_competitions_multilingual (source = "lists" , year = "2026" )
0 commit comments