Skip to content

Commit 6271b51

Browse files
committed
Update tool
1 parent f18a131 commit 6271b51

File tree

1 file changed

+31
-5
lines changed

1 file changed

+31
-5
lines changed

backend/question/tools/qn_cleaner.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import json
33
from typing import List
44
from pymongo import MongoClient
5+
import re
56
from dotenv import load_dotenv
67
import os
78

@@ -17,8 +18,9 @@
1718
def clean():
1819
with open(TARGET_FILE, "r") as f:
1920
data = json.load(f)
20-
for entry in data:
21+
for entry in data[1:]:
2122
questionid = entry.get("questionid")
23+
print("Processing questionid", questionid)
2224
try:
2325
if questionid is not None:
2426
qn_doc = collection.find_one({"questionid": questionid})
@@ -28,10 +30,34 @@ def clean():
2830
examples = qn_doc.get("examples", [])
2931
solution = entry.get("solution")
3032

31-
cleaned_examples = html.unescape(
32-
examples
33-
) # Converts &quot; to ", &lt; to <, &gt; to >, etc.
34-
cleaned_solution = html.unescape(solution)
33+
cleaned_examples = []
34+
cleaned_solution = ""
35+
if examples:
36+
for example in examples:
37+
new_example = {}
38+
new_example["expected_input"] = html.unescape(
39+
example.get("expected_input")
40+
)
41+
new_example["expected_input"] = re.sub(
42+
r"<[^>]+>", "", new_example["expected_input"]
43+
)
44+
new_example["expected_output"] = html.unescape(
45+
example.get("expected_output")
46+
)
47+
new_example["expected_output"] = re.sub(
48+
r"<[^>]+>", "", new_example["expected_output"]
49+
)
50+
if example.get("explanation"):
51+
new_example["explanation"] = html.unescape(
52+
example.get("explanation")
53+
)
54+
new_example["explanation"] = re.sub(
55+
r"<[^>]+>", "", new_example["explanation"]
56+
)
57+
58+
cleaned_examples.append(new_example)
59+
if solution:
60+
cleaned_solution = html.unescape(solution)
3561

3662
result = collection.update_one(
3763
{"questionid": questionid},

0 commit comments

Comments
 (0)