@@ -135,6 +135,201 @@ def _get_client() -> AsyncOpenAI:
135135 {"label" : "break" , "correct" : False },
136136 ],
137137 },
138+ {
139+ "title" : "pandas 컬럼 대소문자 불일치" , "bug_type_name" : "KeyError" ,
140+ "file_name" : "preprocess.py" , "bug_line" : 4 ,
141+ "buggy_code" : 'import pandas as pd\n df = pd.read_csv("data.csv")\n # 컬럼명: Age, Name, Score\n print(df["age"].mean())' ,
142+ "error_log" : "KeyError: 'age'\n File \" preprocess.py\" , line 4" ,
143+ "hint" : "CSV 컬럼명의 대소문자를 확인해보세요." ,
144+ "choices" : [
145+ {"label" : 'print(df["Age"].mean())' , "correct" : True },
146+ {"label" : 'print(df.age.mean())' , "correct" : False },
147+ {"label" : 'print(df[["age"]].mean())' , "correct" : False },
148+ {"label" : 'print(df.get("age").mean())' , "correct" : False },
149+ ],
150+ },
151+ {
152+ "title" : "numpy shape 불일치 행렬곱" , "bug_type_name" : "ValueError" ,
153+ "file_name" : "model_layer.py" , "bug_line" : 4 ,
154+ "buggy_code" : 'import numpy as np\n A = np.ones((3, 4))\n B = np.ones((3, 5))\n result = np.dot(A, B)\n print(result.shape)' ,
155+ "error_log" : "ValueError: shapes (3,4) and (3,5) not aligned\n File \" model_layer.py\" , line 4" ,
156+ "hint" : "행렬곱에서 앞 행렬의 열 수와 뒤 행렬의 행 수가 일치해야 합니다." ,
157+ "choices" : [
158+ {"label" : "B = np.ones((4, 5))" , "correct" : True },
159+ {"label" : "B = np.ones((3, 4))" , "correct" : False },
160+ {"label" : "result = np.dot(A, B.T)" , "correct" : False },
161+ {"label" : "result = A * B" , "correct" : False },
162+ ],
163+ },
164+ {
165+ "title" : "문자열 포맷 타입 에러" , "bug_type_name" : "TypeError" ,
166+ "file_name" : "report.py" , "bug_line" : 3 ,
167+ "buggy_code" : 'accuracy = 0.9234\n epochs = 10\n print("Accuracy: " + accuracy + " after " + epochs + " epochs")' ,
168+ "error_log" : 'TypeError: can only concatenate str (not "float") to str\n File "report.py", line 3' ,
169+ "hint" : "숫자를 문자열과 합치려면 변환이 필요합니다." ,
170+ "choices" : [
171+ {"label" : 'print(f"Accuracy: {accuracy} after {epochs} epochs")' , "correct" : True },
172+ {"label" : 'print("Accuracy: ", accuracy, " after ", epochs)' , "correct" : False },
173+ {"label" : 'print("Accuracy: " + str(accuracy + epochs))' , "correct" : False },
174+ {"label" : 'print("Accuracy: " + accuracy)' , "correct" : False },
175+ ],
176+ },
177+ {
178+ "title" : "환경변수 누락으로 API 키 없음" , "bug_type_name" : "TypeError" ,
179+ "file_name" : "openai_client.py" , "bug_line" : 4 ,
180+ "buggy_code" : 'import os\n from openai import OpenAI\n api_key = os.getenv("OPENAI_API_KEY")\n client = OpenAI(api_key=api_key)\n response = client.chat.completions.create(model="gpt-4o")' ,
181+ "error_log" : "openai.AuthenticationError: No API key provided\n File \" openai_client.py\" , line 4" ,
182+ "hint" : "getenv는 키가 없으면 None을 반환합니다." ,
183+ "choices" : [
184+ {"label" : 'api_key = os.getenv("OPENAI_API_KEY") or raise ValueError("키 없음")' , "correct" : False },
185+ {"label" : 'if not api_key: raise ValueError("OPENAI_API_KEY not set")' , "correct" : True },
186+ {"label" : 'api_key = os.environ["OPENAI_API_KEY"]' , "correct" : False },
187+ {"label" : 'api_key = "hardcoded-key"' , "correct" : False },
188+ ],
189+ },
190+ {
191+ "title" : "DataFrame 필터링 후 인덱스 접근" , "bug_type_name" : "KeyError" ,
192+ "file_name" : "filter_data.py" , "bug_line" : 5 ,
193+ "buggy_code" : 'import pandas as pd\n df = pd.DataFrame({"score": [80, 90, 70], "pass": [True, True, False]})\n passed = df[df["pass"] == True]\n first_score = passed[0]["score"]\n print(first_score)' ,
194+ "error_log" : "KeyError: 0\n File \" filter_data.py\" , line 4" ,
195+ "hint" : "필터링 후 인덱스는 원본 DataFrame의 인덱스를 유지합니다." ,
196+ "choices" : [
197+ {"label" : 'first_score = passed.iloc[0]["score"]' , "correct" : True },
198+ {"label" : 'first_score = passed.loc[0]["score"]' , "correct" : False },
199+ {"label" : 'first_score = passed["score"][0]' , "correct" : False },
200+ {"label" : 'first_score = passed.reset_index()[0]' , "correct" : False },
201+ ],
202+ },
203+ {
204+ "title" : "리스트 컴프리헨션 조건 위치 오류" , "bug_type_name" : "SyntaxError" ,
205+ "file_name" : "filter_list.py" , "bug_line" : 2 ,
206+ "buggy_code" : 'numbers = [1, 2, 3, 4, 5, 6]\n evens = [if x % 2 == 0 x for x in numbers]\n print(evens)' ,
207+ "error_log" : "SyntaxError: invalid syntax\n File \" filter_list.py\" , line 2" ,
208+ "hint" : "리스트 컴프리헨션에서 if 조건의 위치를 확인하세요." ,
209+ "choices" : [
210+ {"label" : "evens = [x for x in numbers if x % 2 == 0]" , "correct" : True },
211+ {"label" : "evens = [x if x % 2 == 0 for x in numbers]" , "correct" : False },
212+ {"label" : "evens = [x for x in numbers where x % 2 == 0]" , "correct" : False },
213+ {"label" : "evens = filter(x % 2 == 0, numbers)" , "correct" : False },
214+ ],
215+ },
216+ {
217+ "title" : "sklearn train_test_split 비율 오류" , "bug_type_name" : "ValueError" ,
218+ "file_name" : "split_data.py" , "bug_line" : 4 ,
219+ "buggy_code" : 'from sklearn.model_selection import train_test_split\n import numpy as np\n X = np.arange(100)\n X_train, X_test = train_test_split(X, test_size=80)\n print(len(X_train))' ,
220+ "error_log" : "ValueError: test_size=80 should be either positive and smaller than the number of samples 100\n File \" split_data.py\" , line 4" ,
221+ "hint" : "test_size에 정수를 넣으면 샘플 수, 소수를 넣으면 비율입니다." ,
222+ "choices" : [
223+ {"label" : "X_train, X_test = train_test_split(X, test_size=0.2)" , "correct" : True },
224+ {"label" : "X_train, X_test = train_test_split(X, test_size=0.8)" , "correct" : False },
225+ {"label" : "X_train, X_test = train_test_split(X, train_size=80)" , "correct" : False },
226+ {"label" : "X_train, X_test = train_test_split(X, test_size=20)" , "correct" : False },
227+ ],
228+ },
229+ {
230+ "title" : "PyTorch 텐서 gradient 누적" , "bug_type_name" : "LogicError" ,
231+ "file_name" : "train_loop.py" , "bug_line" : 6 ,
232+ "buggy_code" : 'import torch\n model = torch.nn.Linear(10, 1)\n optimizer = torch.optim.SGD(model.parameters(), lr=0.01)\n for i in range(3):\n loss = model(torch.randn(10)).sum()\n loss.backward()\n optimizer.step()' ,
233+ "error_log" : "RuntimeError: gradient accumulates across iterations\n Gradient grows unexpectedly" ,
234+ "hint" : "매 iteration마다 gradient를 초기화해야 합니다." ,
235+ "choices" : [
236+ {"label" : "optimizer.zero_grad() 를 loss.backward() 전에 추가" , "correct" : True },
237+ {"label" : "optimizer.step() 을 제거" , "correct" : False },
238+ {"label" : "loss.detach() 후 backward()" , "correct" : False },
239+ {"label" : "model.zero_grad() 를 optimizer.step() 후에 추가" , "correct" : False },
240+ ],
241+ },
242+ {
243+ "title" : "파일 경로 역슬래시 에러" , "bug_type_name" : "SyntaxError" ,
244+ "file_name" : "load_model.py" , "bug_line" : 2 ,
245+ "buggy_code" : 'import torch\n path = "C:\\ Users\\ model\\ best.pt"\n model = torch.load(path)' ,
246+ "error_log" : "SyntaxError: (unicode error) 'unicodeescape' codec\n File \" load_model.py\" , line 2" ,
247+ "hint" : "Windows 경로에서 역슬래시는 이스케이프 문자로 처리됩니다." ,
248+ "choices" : [
249+ {"label" : 'path = r"C:\\ Users\\ model\\ best.pt"' , "correct" : True },
250+ {"label" : 'path = "C://Users//model//best.pt"' , "correct" : False },
251+ {"label" : 'path = "C:/Users/model/best.pt"' , "correct" : False },
252+ {"label" : 'path = os.path.join("C:", "Users", "model", "best.pt")' , "correct" : False },
253+ ],
254+ },
255+ {
256+ "title" : "재귀 함수 기저 조건 누락" , "bug_type_name" : "RecursionError" ,
257+ "file_name" : "factorial.py" , "bug_line" : 2 ,
258+ "buggy_code" : 'def factorial(n):\n return n * factorial(n - 1)\n \n print(factorial(5))' ,
259+ "error_log" : "RecursionError: maximum recursion depth exceeded\n File \" factorial.py\" , line 2" ,
260+ "hint" : "재귀 함수에는 반드시 종료 조건이 필요합니다." ,
261+ "choices" : [
262+ {"label" : "if n <= 1: return 1 을 첫 줄에 추가" , "correct" : True },
263+ {"label" : "if n == 0: return n 을 첫 줄에 추가" , "correct" : False },
264+ {"label" : "return n * factorial(n + 1)" , "correct" : False },
265+ {"label" : "sys.setrecursionlimit(10000) 추가" , "correct" : False },
266+ ],
267+ },
268+ {
269+ "title" : "f-string 중괄호 이스케이프" , "bug_type_name" : "ValueError" ,
270+ "file_name" : "template.py" , "bug_line" : 2 ,
271+ "buggy_code" : 'name = "Alice"\n msg = f"Hello {name}! Your score is {85}%"\n print(msg)' ,
272+ "error_log" : "정상 동작하지만 % 기호가 dict format으로 오해될 수 있음" ,
273+ "hint" : "f-string 안에서 중괄호를 리터럴로 출력하려면 두 번 써야 합니다." ,
274+ "choices" : [
275+ {"label" : 'msg = f"Hello {name}! Score: {{85}}%"' , "correct" : False },
276+ {"label" : 'msg = f"Hello {name}! Score: {85}%"' , "correct" : True },
277+ {"label" : 'msg = "Hello %s! Score: %d%%" % (name, 85)' , "correct" : False },
278+ {"label" : 'msg = f"Hello " + name + "! Score: 85%"' , "correct" : False },
279+ ],
280+ },
281+ {
282+ "title" : "glob 패턴 파일 없음" , "bug_type_name" : "FileNotFoundError" ,
283+ "file_name" : "batch_load.py" , "bug_line" : 4 ,
284+ "buggy_code" : 'import glob\n files = glob.glob("data/*.CSV")\n for f in files:\n print(f)' ,
285+ "error_log" : "No files matched — files list is empty\n File \" batch_load.py\" , line 3" ,
286+ "hint" : "Linux/Mac에서 glob 패턴은 대소문자를 구분합니다." ,
287+ "choices" : [
288+ {"label" : 'files = glob.glob("data/*.csv")' , "correct" : True },
289+ {"label" : 'files = glob.glob("data/**")' , "correct" : False },
290+ {"label" : 'files = glob.glob("data/")' , "correct" : False },
291+ {"label" : 'files = glob.glob("*.CSV", recursive=True)' , "correct" : False },
292+ ],
293+ },
294+ {
295+ "title" : "zip 길이 불일치 데이터 손실" , "bug_type_name" : "LogicError" ,
296+ "file_name" : "pair_data.py" , "bug_line" : 3 ,
297+ "buggy_code" : 'labels = [0, 1, 2, 3, 4]\n features = [[1,2], [3,4], [5,6]]\n pairs = list(zip(labels, features))\n print(len(pairs))' ,
298+ "error_log" : "출력: 3 (기대값: 5)\n 데이터 손실 발생" ,
299+ "hint" : "zip은 가장 짧은 iterable 기준으로 멈춥니다." ,
300+ "choices" : [
301+ {"label" : "from itertools import zip_longest; pairs = list(zip_longest(labels, features))" , "correct" : True },
302+ {"label" : "pairs = list(zip(features, labels))" , "correct" : False },
303+ {"label" : "pairs = [(labels[i], features[i]) for i in range(len(labels))]" , "correct" : False },
304+ {"label" : "pairs = list(map(zip, labels, features))" , "correct" : False },
305+ ],
306+ },
307+ {
308+ "title" : "클래스 메서드 self 누락" , "bug_type_name" : "TypeError" ,
309+ "file_name" : "model_class.py" , "bug_line" : 4 ,
310+ "buggy_code" : 'class Predictor:\n def __init__(self, threshold):\n self.threshold = threshold\n def predict(score):\n return score > self.threshold\n \n p = Predictor(0.5)\n print(p.predict(0.8))' ,
311+ "error_log" : "TypeError: predict() takes 1 positional argument but 2 were given\n File \" model_class.py\" , line 4" ,
312+ "hint" : "인스턴스 메서드의 첫 번째 인자는 항상 self여야 합니다." ,
313+ "choices" : [
314+ {"label" : "def predict(self, score):" , "correct" : True },
315+ {"label" : "def predict(score, self):" , "correct" : False },
316+ {"label" : "@staticmethod\n def predict(score):" , "correct" : False },
317+ {"label" : "def predict(cls, score):" , "correct" : False },
318+ ],
319+ },
320+ {
321+ "title" : "JSON dumps 직렬화 불가 타입" , "bug_type_name" : "TypeError" ,
322+ "file_name" : "serialize.py" , "bug_line" : 4 ,
323+ "buggy_code" : 'import json\n import numpy as np\n result = {"scores": np.array([0.9, 0.8, 0.7])}\n print(json.dumps(result))' ,
324+ "error_log" : "TypeError: Object of type ndarray is not JSON serializable\n File \" serialize.py\" , line 4" ,
325+ "hint" : "numpy 배열은 기본 JSON 직렬화가 되지 않습니다." ,
326+ "choices" : [
327+ {"label" : 'result = {"scores": np.array([0.9, 0.8, 0.7]).tolist()}' , "correct" : True },
328+ {"label" : 'print(json.dumps(result, indent=2))' , "correct" : False },
329+ {"label" : 'print(json.dumps(str(result)))' , "correct" : False },
330+ {"label" : 'result["scores"] = list(result["scores"])' , "correct" : False },
331+ ],
332+ },
138333]
139334
140335
@@ -202,11 +397,17 @@ async def generate_bug_problems(count: int = 10, difficulty: int = 1) -> List[Di
202397 except Exception as e :
203398 logger .error (f"[BugGen] GPT 호출 실패: { e } " )
204399
205- if len (problems ) < 5 :
400+ if len (problems ) < 20 :
206401 fallback = [_clean_problem (dict (p ), len (problems ) + i + 1 )
207402 for i , p in enumerate (FALLBACK_PROBLEMS )]
208403 random .shuffle (fallback )
209- problems .extend (fallback [:max (0 , 5 - len (problems ))])
404+ needed = 20 - len (problems )
405+ # fallback이 부족하면 반복해서 체움
406+ while len (fallback ) < needed :
407+ extra = [_clean_problem (dict (p ), 0 ) for p in FALLBACK_PROBLEMS ]
408+ random .shuffle (extra )
409+ fallback .extend (extra )
410+ problems .extend (fallback [:needed ])
210411
211412 random .shuffle (problems )
212413 return problems
0 commit comments