2
2
import json
3
3
from typing import List
4
4
from pymongo import MongoClient
5
+ import re
5
6
from dotenv import load_dotenv
6
7
import os
7
8
17
18
def clean ():
18
19
with open (TARGET_FILE , "r" ) as f :
19
20
data = json .load (f )
20
- for entry in data :
21
+ for entry in data [ 1 :] :
21
22
questionid = entry .get ("questionid" )
23
+ print ("Processing questionid" , questionid )
22
24
try :
23
25
if questionid is not None :
24
26
qn_doc = collection .find_one ({"questionid" : questionid })
@@ -28,10 +30,34 @@ def clean():
28
30
examples = qn_doc .get ("examples" , [])
29
31
solution = entry .get ("solution" )
30
32
31
- cleaned_examples = html .unescape (
32
- examples
33
- ) # Converts " to ", < to <, > to >, etc.
34
- cleaned_solution = html .unescape (solution )
33
+ cleaned_examples = []
34
+ cleaned_solution = ""
35
+ if examples :
36
+ for example in examples :
37
+ new_example = {}
38
+ new_example ["expected_input" ] = html .unescape (
39
+ example .get ("expected_input" )
40
+ )
41
+ new_example ["expected_input" ] = re .sub (
42
+ r"<[^>]+>" , "" , new_example ["expected_input" ]
43
+ )
44
+ new_example ["expected_output" ] = html .unescape (
45
+ example .get ("expected_output" )
46
+ )
47
+ new_example ["expected_output" ] = re .sub (
48
+ r"<[^>]+>" , "" , new_example ["expected_output" ]
49
+ )
50
+ if example .get ("explanation" ):
51
+ new_example ["explanation" ] = html .unescape (
52
+ example .get ("explanation" )
53
+ )
54
+ new_example ["explanation" ] = re .sub (
55
+ r"<[^>]+>" , "" , new_example ["explanation" ]
56
+ )
57
+
58
+ cleaned_examples .append (new_example )
59
+ if solution :
60
+ cleaned_solution = html .unescape (solution )
35
61
36
62
result = collection .update_one (
37
63
{"questionid" : questionid },
0 commit comments