@@ -146,13 +146,13 @@ def get_quest_blueprint(quest_id: str) -> list:
146146 {'id' : 'fSytzGwwBVw' , 'title' : 'What is Data Leakage? (StatQuest)' , 'channel' : 'StatQuest' },
147147 ],
148148 'implementation' : [
149- {'id' : 'rmEa9_8GKQY ' , 'title' : 'Sklearn Pipeline으로 Data Leakage 방지 (Krish Naik)' , 'channel' : 'Krish Naik' },
149+ {'id' : 'GX5giNNEpgY ' , 'title' : 'Sklearn Pipeline Tutorial (Krish Naik)' , 'channel' : 'Krish Naik' },
150150 ],
151151 'abstraction' : [
152152 {'id' : 'gJo0uNL-5Lw' , 'title' : 'K-Fold Cross Validation (StatQuest)' , 'channel' : 'StatQuest' },
153153 ],
154154 'edgeCase' : [
155- {'id' : 'Gmq7mXv6M-c ' , 'title' : 'Saving & Loading ML Models — Pickle & Joblib (NeuralNine)' , 'channel' : 'NeuralNine' },
155+ {'id' : 'Ki4s8mWZ4ME ' , 'title' : 'Saving & Loading ML Models — Pickle & Joblib (NeuralNine)' , 'channel' : 'NeuralNine' },
156156 ],
157157 'default' : [
158158 {'id' : 'fSytzGwwBVw' , 'title' : 'What is Data Leakage?' , 'channel' : 'StatQuest' },
@@ -200,18 +200,18 @@ def get_quest_blueprint(quest_id: str) -> list:
200200 {'id' : 'geZDkTfGT-I' , 'title' : 'Imbalanced Data 처리 전략 설계 (StatQuest)' , 'channel' : 'StatQuest' },
201201 ],
202202 'edgeCase' : [
203- {'id' : 'pDw_JHHvj-0 ' , 'title' : 'Class Weights & Cost-Sensitive Learning (Krish Naik)' , 'channel' : 'Krish Naik' },
203+ {'id' : 'FE-AiGF_HFk ' , 'title' : 'Class Weights & Cost-Sensitive Learning (Krish Naik)' , 'channel' : 'Krish Naik' },
204204 ],
205205 'abstraction' : [
206206 {'id' : 'gJo0uNL-5Lw' , 'title' : 'StratifiedKFold & 계층화 분할 (StatQuest)' , 'channel' : 'StatQuest' },
207207 ],
208208 'implementation' : [
209- {'id' : 'U3X98xZ4_no ' , 'title' : 'SMOTE 완전 구현 (imbalanced-learn )' , 'channel' : 'imbalanced-learn ' },
209+ {'id' : 'adBBIaNFSmw ' , 'title' : 'SMOTE Oversampling Python Tutorial (Krish Naik )' , 'channel' : 'Krish Naik ' },
210210 ],
211211 'default' : [
212212 {'id' : 'geZDkTfGT-I' , 'title' : 'Handling Imbalanced Data' , 'channel' : 'StatQuest' },
213213 {'id' : '4jRBRDbJemM' , 'title' : 'ROC AUC' , 'channel' : 'StatQuest' },
214- {'id' : 'U3X98xZ4_no ' , 'title' : 'SMOTE' , 'channel' : 'imbalanced-learn ' },
214+ {'id' : 'adBBIaNFSmw ' , 'title' : 'SMOTE' , 'channel' : 'Krish Naik ' },
215215 ],
216216 },
217217
@@ -230,15 +230,15 @@ def get_quest_blueprint(quest_id: str) -> list:
230230 {'id' : 'A88rDEf-pfk' , 'title' : 'StandardScaler fit/transform 분리 (StatQuest)' , 'channel' : 'StatQuest' },
231231 ],
232232 'abstraction' : [
233- {'id' : 'viZrOnJclY0 ' , 'title' : 'Polynomial Features & 상호작용 특성 (StatQuest)' , 'channel' : 'StatQuest' },
233+ {'id' : 'Hk7RaHvJ0RE ' , 'title' : 'Polynomial Features & 상호작용 특성 (StatQuest)' , 'channel' : 'StatQuest' },
234234 ],
235235 'edgeCase' : [
236- {'id' : 'FgakZw6K1QQ' , 'title' : 'Curse of Dimensionality & PCA (StatQuest)' , 'channel' : 'StatQuest' },
236+ {'id' : 'FgakZw6K1QQ' , 'title' : 'PCA Step-by-Step (StatQuest)' , 'channel' : 'StatQuest' },
237237 ],
238238 'default' : [
239239 {'id' : 'md8IrSMPi6o' , 'title' : 'Feature Engineering' , 'channel' : 'Kaggle' },
240240 {'id' : '68ABAU_V8qI' , 'title' : 'Feature Selection' , 'channel' : 'StatQuest' },
241- {'id' : 'FgakZw6K1QQ' , 'title' : 'Curse of Dimensionality ' , 'channel' : 'StatQuest' },
241+ {'id' : 'FgakZw6K1QQ' , 'title' : 'PCA Step-by-Step ' , 'channel' : 'StatQuest' },
242242 ],
243243 },
244244
@@ -257,15 +257,15 @@ def get_quest_blueprint(quest_id: str) -> list:
257257 {'id' : 'HdlDYng7g58' , 'title' : 'GridSearchCV 완전 구현 (sklearn)' , 'channel' : 'StatQuest' },
258258 ],
259259 'abstraction' : [
260- {'id' : 'Np8h_U9PmFw ' , 'title' : '파라미터 상호작용 & Warm-start (W&B )' , 'channel' : 'W&B ' },
260+ {'id' : 'WhnkT0lbiqw ' , 'title' : 'Hyperparameter Tuning with Optuna (Krish Naik )' , 'channel' : 'Krish Naik ' },
261261 ],
262262 'edgeCase' : [
263- {'id' : 'Np8h_U9PmFw ' , 'title' : 'Bayesian Optimization & Optuna — 효율적 탐색 (W&B )' , 'channel' : 'W&B ' },
263+ {'id' : 'WhnkT0lbiqw ' , 'title' : 'Bayesian Optimization & Optuna — 효율적 탐색 (Krish Naik )' , 'channel' : 'Krish Naik ' },
264264 ],
265265 'default' : [
266266 {'id' : 'HdlDYng7g58' , 'title' : 'Hyperparameter Tuning' , 'channel' : 'StatQuest' },
267267 {'id' : 'gJo0uNL-5Lw' , 'title' : 'K-Fold Cross Validation' , 'channel' : 'StatQuest' },
268- {'id' : 'Np8h_U9PmFw ' , 'title' : 'Bayesian Optimization' , 'channel' : 'W&B ' },
268+ {'id' : 'WhnkT0lbiqw ' , 'title' : 'Bayesian Optimization' , 'channel' : 'Krish Naik ' },
269269 ],
270270 },
271271
@@ -281,18 +281,18 @@ def get_quest_blueprint(quest_id: str) -> list:
281281 {'id' : 'B-c8tIgchu0' , 'title' : 'SHAP Summary Plot — 일관된 해석 기준 (StatQuest)' , 'channel' : 'StatQuest' },
282282 ],
283283 'design' : [
284- {'id' : 'GfGpXMBjOBg ' , 'title' : 'Counterfactual Explanation & Actionable AI (Google )' , 'channel' : 'Google ' },
284+ {'id' : 'HdlDYng7g58 ' , 'title' : 'Model Interpretability & Explainability (StatQuest )' , 'channel' : 'StatQuest ' },
285285 ],
286286 'implementation' : [
287- {'id' : 'C80SQe16Rao ' , 'title' : 'LIME 구현 — 개별 예측 설명 (Towards Data Science )' , 'channel' : 'Towards Data Science ' },
287+ {'id' : 'IBWHDySoXB0 ' , 'title' : 'LIME Explanation Python Tutorial (Krish Naik )' , 'channel' : 'Krish Naik ' },
288288 ],
289289 'edgeCase' : [
290- {'id' : 'GfGpXMBjOBg ' , 'title' : 'AI Fairness & Proxy Bias 감지 (Google Developers )' , 'channel' : 'Google ' },
290+ {'id' : 'B-c8tIgchu0 ' , 'title' : 'SHAP Values 완전 이해 (StatQuest )' , 'channel' : 'StatQuest ' },
291291 ],
292292 'default' : [
293293 {'id' : 'B-c8tIgchu0' , 'title' : 'SHAP Values' , 'channel' : 'StatQuest' },
294- {'id' : 'C80SQe16Rao ' , 'title' : 'LIME' , 'channel' : 'Towards Data Science ' },
295- {'id' : 'GfGpXMBjOBg ' , 'title' : 'AI Fairness ' , 'channel' : 'Google ' },
294+ {'id' : 'IBWHDySoXB0 ' , 'title' : 'LIME' , 'channel' : 'Krish Naik ' },
295+ {'id' : 'HdlDYng7g58 ' , 'title' : 'Feature Importance ' , 'channel' : 'StatQuest ' },
296296 ],
297297 },
298298}
@@ -534,30 +534,26 @@ def generate_fallback_deep_dive(quest_id: str) -> dict:
534534# ============================================================================
535535
536536def get_recommended_videos_legacy (
537- quest_id : str ,
538- dimensions : dict ,
537+ quest_id : str ,
538+ dimensions : dict ,
539539 max_count : int = 3 ,
540540 quest_title : str = ""
541541) -> list :
542542 """
543- [2026-02-23 업그레이드]
544- 1. 정적 큐레이션 데이터(QUEST_VIDEOS) 매핑
545- 2. 데이터 부족 시 YouTube Search API를 통한 실시간 검색 폴백 수행
543+ [2026-03-04 개편] 하드코딩 제거 - LLM 실시간 검색 전용
544+ - 취약 차원(70점 미만) → 기초/튜토리얼 영상
545+ - 강한 차원(70점 이상) → 퀘스트 주제 심화 영상
546546 """
547547 try :
548- # quest_id 정규화: 'unit01_02' 같은 형태는 숫자 부분만 추출
548+ from core .utils .youtube_helper import generate_llm_search_queries , search_youtube_multi_query
549+
550+ # quest_id 정규화
549551 if isinstance (quest_id , str ):
550- # '언더스코어' 형태 (e.g., 'unit01_02' -> '2', 'unit01_04' -> '4')
551552 if '_' in quest_id :
552553 parts = quest_id .split ('_' )
553554 last_nums = re .findall (r'\d+' , parts [- 1 ])
554- if last_nums :
555- n = int (last_nums [- 1 ])
556- quest_id_normalized = str (n ) if 1 <= n <= 6 else '1'
557- else :
558- quest_id_normalized = '1'
555+ quest_id_normalized = str (int (last_nums [- 1 ])) if last_nums and 1 <= int (last_nums [- 1 ]) <= 6 else '1'
559556 else :
560- # 순수 숫자 (e.g., '2', '3')
561557 nums = re .findall (r'\d+' , quest_id )
562558 quest_id_normalized = '1'
563559 for n_str in nums :
@@ -568,114 +564,59 @@ def get_recommended_videos_legacy(
568564 else :
569565 n = int (quest_id ) if quest_id else 1
570566 quest_id_normalized = str (n ) if 1 <= n <= 6 else '1'
571-
572- quest_videos = get_quest_videos (quest_id_normalized )
573- quest_int = int (quest_id_normalized )
567+
568+ # 차원 점수 정렬 (낮은 순)
574569 priority = get_dimension_priority (quest_id_normalized )
575-
576- # 취약 차원 정렬
577570 dim_ratios = []
578571 for dim in priority :
579572 d = dimensions .get (dim , {})
580573 pct = d .get ('percentage' , 100 ) if isinstance (d , dict ) else 100
581574 dim_ratios .append ((dim , pct ))
582575 dim_ratios .sort (key = lambda x : x [1 ])
583-
584- # [수정일: 2026-02-23] 유튜브 큐레이션 동적화: 하이브리드 방식 (정적 1개 + 동적 2개)
585- candidates = []
586- used_ids = set ()
587-
588- # 1. 정적 큐레이션 데이터에서 가장 취약한 차원의 영상을 1개만 무작위로 선택
589- import random
590- for dim , _ in dim_ratios :
591- videos = quest_videos .get (dim , [])
592- if videos and isinstance (videos , list ):
593- # 셔플하여 '하드코딩된 느낌' 방지
594- random_video = random .choice (videos )
595- candidates .append ({** random_video , '_dim' : dim , '_source' : 'curated' })
596- used_ids .add (random_video ['id' ])
597- break # 1개만 뽑고 종료
598-
599- # 2. 부족한 부분은 LLM이 생성한 검색어로 YouTube 실시간 검색
600- from core .utils .youtube_helper import generate_llm_search_queries , search_youtube_multi_query
601576
602- needed = max_count - len (candidates )
603- if needed > 0 :
604- # 취약 지표 상위 2개 추출
577+ # 취약/강한 차원 분리
578+ weak_dims = [d for d , pct in dim_ratios if pct < 70 ][:2 ]
579+ strong_dims = [d for d , pct in dim_ratios if pct >= 70 ][:1 ]
580+ if not weak_dims :
605581 weak_dims = [d for d , _ in dim_ratios [:2 ]]
606582
607- # LLM으로 검색어 3개 생성
608- llm_queries = generate_llm_search_queries (
609- quest_title = quest_title or f"Quest { quest_id_normalized } " ,
610- weak_dimensions = weak_dims ,
611- )
612- logger .info (f"[QuestResources] LLM 생성 쿼리: { llm_queries } " )
613-
614- # 멀티 쿼리로 YouTube 검색
615- live_videos = search_youtube_multi_query (llm_queries , max_per_query = 2 )
616-
617- for lv in live_videos :
618- if len (candidates ) >= max_count :
619- break
620- vid_id = lv .get ('videoId' ) or lv .get ('id' )
621- if vid_id and vid_id not in used_ids :
622- candidates .append ({
623- ** lv ,
624- '_dim' : f"live_{ weak_dims [0 ] if weak_dims else 'default' } " ,
625- '_source' : 'llm_live' ,
626- })
627- used_ids .add (vid_id )
628-
629- # 3. 그래도 부족하면 마지막으로 default 정적 데이터로 보완 (셔플 적용)
630- if len (candidates ) < max_count :
631- default_videos = list (quest_videos .get ('default' , []))
632- random .shuffle (default_videos )
633- for video in default_videos :
634- if len (candidates ) >= max_count :
635- break
636- if video ['id' ] not in used_ids :
637- candidates .append ({** video , '_dim' : 'default' , '_source' : 'fallback' })
638- used_ids .add (video ['id' ])
639-
640- # [수정 2026-03-04] filter_valid_videos는 API 실패 시 전체를 날려버리는 문제가 있어
641- # 실패해도 원본 candidates를 유지하도록 방어 처리
642- try :
643- from core .utils .youtube_helper import filter_valid_videos
644- filtered = filter_valid_videos (candidates )
645- # 필터 결과가 너무 적으면(0개) 원본 유지
646- valid_candidates = filtered if len (filtered ) > 0 else candidates
647- except Exception as fe :
648- logger .warning (f"[get_recommended_videos_legacy] filter_valid_videos 실패, 원본 사용: { fe } " )
649- valid_candidates = candidates
650-
651- # 필터 후 부족한 경우 default로 보완
652- if len (valid_candidates ) < max_count :
653- default_videos = list (quest_videos .get ('default' , []))
654- random .shuffle (default_videos )
655- existing_ids = {v .get ('id' ) or v .get ('videoId' ) for v in valid_candidates }
656- for video in default_videos :
657- if len (valid_candidates ) >= max_count :
658- break
659- vid = video .get ('id' ) or video .get ('videoId' )
660- if vid and vid not in existing_ids :
661- valid_candidates .append ({** video , '_dim' : 'default' , '_source' : 'fallback_recheck' })
662- existing_ids .add (vid )
663-
664- # [수정 2026-03-04] 프론트 일관성: videoId/thumbnail/url/channelTitle 필드 보장
583+ dim_scores = {d : pct for d , pct in dim_ratios }
584+
585+ # LLM으로 검색어 생성
586+ llm_queries = generate_llm_search_queries (
587+ quest_title = quest_title or f"Quest { quest_id_normalized } " ,
588+ weak_dimensions = weak_dims ,
589+ strong_dimensions = strong_dims ,
590+ dimension_scores = dim_scores ,
591+ )
592+ logger .info (f"[QuestResources] LLM 생성 쿼리: { llm_queries } " )
593+
594+ # YouTube 실시간 검색
595+ live_videos = search_youtube_multi_query (llm_queries , max_per_query = 2 )
596+ logger .info (f"[QuestResources] YouTube 검색 결과: { len (live_videos )} 개" )
597+
598+ # 결과 정리
665599 result_videos = []
666- for v in valid_candidates [:max_count ]:
600+ used_ids = set ()
601+ for v in live_videos :
602+ if len (result_videos ) >= max_count :
603+ break
667604 vid_id = v .get ('videoId' ) or v .get ('id' , '' )
605+ if not vid_id or vid_id in used_ids :
606+ continue
607+ used_ids .add (vid_id )
668608 result_videos .append ({
669609 'videoId' : vid_id ,
670610 'id' : vid_id ,
671611 'title' : v .get ('title' , '' ),
672612 'channelTitle' : v .get ('channelTitle' ) or v .get ('channel' , '' ),
673- 'thumbnail' : v .get ('thumbnail' ) or ( f'https://img.youtube.com/vi/{ vid_id } /mqdefault.jpg' if vid_id else '' ) ,
674- 'url' : v .get ('url' ) or ( f'https://www.youtube.com/watch?v={ vid_id } ' if vid_id else '#' ) ,
613+ 'thumbnail' : v .get ('thumbnail' ) or f'https://img.youtube.com/vi/{ vid_id } /mqdefault.jpg' ,
614+ 'url' : v .get ('url' ) or f'https://www.youtube.com/watch?v={ vid_id } ' ,
675615 'description' : v .get ('description' ) or v .get ('desc' , '' ),
676616 })
677617
678618 return result_videos
619+
679620 except Exception as e :
680621 logger .error (f"[get_recommended_videos_legacy] Error: { e } " )
681622 return []
0 commit comments