Skip to content
This repository was archived by the owner on Feb 3, 2024. It is now read-only.

Commit 95c4bb2

Browse files
committed
add to test script
1 parent dd14d5b commit 95c4bb2

File tree

2 files changed

+156
-140
lines changed

2 files changed

+156
-140
lines changed

makeTestdataAll.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ getTestDataInputForTldAndDomain()
7878
return 1
7979
}
8080
}
81+
82+
# parse the input and annotate it and split the body in sections
83+
./test2.py -C "$d/input" > "$d/input.out"
8184
}
8285

8386
getTestDataOutputForTldAndDomain()

test2.py

Lines changed: 153 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,155 @@
1414
IgnoreReturncode = False
1515

1616

17+
class ResponseCleaner:
18+
data: Optional[str] = None
19+
rDict: Dict = {}
20+
21+
def __init__(self, pathToTestFile: str):
22+
self.data = self.readInputFile(pathToTestFile)
23+
24+
def readInputFile(self, pathToTestFile: str):
25+
if not os.path.exists(pathToTestFile):
26+
return None
27+
28+
with open(pathToTestFile, mode="rb") as f: # switch to binary mode as that is what Popen uses
29+
# make sure the data is treated exactly the same as the output of Popen
30+
return f.read().decode(errors="ignore")
31+
32+
def cleanSection(self, section: List) -> List:
33+
# cleanup any beginning and ending empty lines from the section
34+
35+
if len(section) == 0:
36+
return section
37+
38+
rr = r"^\s*$"
39+
n = 0 # remove empty lines from the start of section
40+
while re.match(rr, section[n]):
41+
section.pop(n)
42+
# n stays 0
43+
44+
n = len(section) - 1 # remove empty lines from the end of the section
45+
while re.match(rr, section[n]):
46+
section.pop(n)
47+
n = len(section) - 1 # remove empty lines from the end of section
48+
49+
return section
50+
51+
def splitBodyInSections(self, body: List) -> List:
52+
# split the body on empty line, cleanup all sections, remove empty sections
53+
# return list of body's
54+
55+
sections = []
56+
n = 0
57+
sections.append([])
58+
for line in body:
59+
if re.match(r"^\s*$", line):
60+
n += 1
61+
sections.append([])
62+
continue
63+
sections[n].append(line)
64+
65+
m = 0
66+
while m < len(sections):
67+
sections[m] = self.cleanSection(sections[m])
68+
m += 1
69+
70+
# now remove ampty sections and return
71+
sections2 = []
72+
m = 0
73+
while m < len(sections):
74+
if len(sections[m]) > 0:
75+
sections2.append("\n".join(sections[m]))
76+
m += 1
77+
78+
return sections2
79+
80+
def cleanupWhoisResponse(
81+
self,
82+
verbose: bool = False,
83+
with_cleanup_results: bool = False,
84+
):
85+
result = whois._2_parse.cleanupWhoisResponse(
86+
self.data,
87+
verbose=False,
88+
with_cleanup_results=False,
89+
)
90+
91+
self.rDict = {
92+
"BodyHasSections": False, # if this is true the body is not a list of lines but a list of sections with lines
93+
"Preamble": [], # the lines telling what whois servers wwere contacted
94+
"Percent": [], # lines staring with %% , often not present but may contain hints
95+
"Body": [], # the body of the whois, may be in sections separated by empty lines
96+
"Postamble": [], # copyright and other not relevant info for actual parsing whois
97+
}
98+
body = []
99+
100+
rr = []
101+
z = result.split("\n")
102+
preambleSeen = False
103+
postambleSeen = False
104+
percentSeen = False
105+
for line in z:
106+
if preambleSeen is False:
107+
if line.startswith("["):
108+
self.rDict["Preamble"].append(line)
109+
line = "PRE;" + line
110+
continue
111+
else:
112+
preambleSeen = True
113+
114+
if preambleSeen is True and percentSeen is False:
115+
if line.startswith("%"):
116+
self.rDict["Percent"].append(line)
117+
line = "PERCENT;" + line
118+
continue
119+
else:
120+
percentSeen = True
121+
122+
if postambleSeen is False:
123+
if line.startswith("--") or line.startswith(">>> ") or line.startswith("Copyright notice"):
124+
postambleSeen = True
125+
126+
if postambleSeen is True:
127+
self.rDict["Postamble"].append(line)
128+
line = "POST;" + line
129+
continue
130+
131+
body.append(line)
132+
133+
if "\t" in line:
134+
line = "TAB;" + line # mark lines having tabs
135+
136+
if line.endswith("\r"):
137+
line = "CR;" + line # mark lines having CR (\r)
138+
139+
rr.append(line)
140+
141+
body = self.cleanSection(body)
142+
self.rDict["Body"] = self.splitBodyInSections(body)
143+
return "\n".join(rr), self.rDict
144+
145+
def printMe(self):
146+
zz = ["Preamble", "Percent", "Postamble"]
147+
for k in zz:
148+
n = 0
149+
for lines in self.rDict[k]:
150+
tab = " [TAB] " if "\t" in lines else "" # tabs are present in this section
151+
cr = " [CR] " if "\r" in lines else "" # \r is present in this section
152+
print(k,cr, tab, lines)
153+
154+
155+
k = "Body"
156+
if len(self.rDict[k]):
157+
n = 0
158+
for lines in self.rDict[k]:
159+
tab = " [TAB] " if "\t" in lines else "-------" # tabs are present in this section
160+
cr = " [CR] " if "\r" in lines else "------" # \r is present in this section
161+
print(f"# ------------- {k} Section: {n} {cr}{tab}---------")
162+
n += 1
163+
print(lines)
164+
165+
17166
def prepItem(d):
18167
print("")
19168
print(f"test domain: <<<<<<<<<< {d} >>>>>>>>>>>>>>>>>>>>")
@@ -165,131 +314,6 @@ def ShowRuleset(tld):
165314
print(key, rule, "IGNORECASE")
166315

167316

168-
def readInputFile(pathToTestFile: str):
169-
if not os.path.exists(pathToTestFile):
170-
return None
171-
172-
with open(pathToTestFile, mode="rb") as f: # switch to binary mode as that is what Popen uses
173-
# make sure the data is treated exactly the same as the output of Popen
174-
return f.read().decode(errors="ignore")
175-
176-
177-
def cleanSection(section: List) -> List:
178-
# cleanup any beginning and ending empty lines from the section
179-
180-
if len(section) == 0:
181-
return section
182-
183-
rr = r"^\s*$"
184-
n = 0 # remove empty lines from the start of section
185-
while re.match(rr, section[n]):
186-
section.pop(n)
187-
# n stays 0
188-
189-
n = len(section) - 1 # remove empty lines from the end of the section
190-
while re.match(rr, section[n]):
191-
section.pop(n)
192-
n = len(section) - 1 # remove empty lines from the end of section
193-
194-
return section
195-
196-
197-
def splitBodyInSections(body: List) -> List:
198-
# split the body on empty line, cleanup all sections, remove empty sections
199-
# return list of body's
200-
201-
sections = []
202-
n = 0
203-
sections.append([])
204-
for line in body:
205-
if re.match(r"^\s*$", line):
206-
n += 1
207-
sections.append([])
208-
continue
209-
sections[n].append(line)
210-
211-
m = 0
212-
while m < len(sections):
213-
sections[m] = cleanSection(sections[m])
214-
m += 1
215-
216-
# now remove ampty sections and return
217-
sections2 = []
218-
m = 0
219-
while m < len(sections):
220-
if len(sections[m]) > 0:
221-
sections2.append(sections[m])
222-
m += 1
223-
224-
return sections2
225-
226-
227-
def cleanupWhoisResponse(
228-
response: str,
229-
verbose: bool = False,
230-
with_cleanup_results: bool = False,
231-
):
232-
result = whois._2_parse.cleanupWhoisResponse(
233-
response,
234-
verbose=False,
235-
with_cleanup_results=False,
236-
)
237-
238-
rDict = {
239-
"BodyHasSections": False, # if this is true the body is not a list of lines but a list of sections with lines
240-
"Preamble": [], # the lines telling what whois servers wwere contacted
241-
"Percent": [], # lines staring with %% , often not present but may contain hints
242-
"Body": [], # the body of the whois, may be in sections separated by empty lines
243-
"Postamble": [], # copyright and other not relevant info for actual parsing whois
244-
}
245-
body = []
246-
247-
rr = []
248-
z = result.split("\n")
249-
preambleSeen = False
250-
postambleSeen = False
251-
percentSeen = False
252-
for line in z:
253-
if preambleSeen is False:
254-
if line.startswith("["):
255-
rDict["Preamble"].append(line)
256-
line = "PRE;" + line
257-
continue
258-
else:
259-
preambleSeen = True
260-
261-
if preambleSeen is True and percentSeen is False:
262-
if line.startswith("%"):
263-
rDict["Percent"].append(line)
264-
line = "PERCENT;" + line
265-
continue
266-
else:
267-
percentSeen = True
268-
269-
if postambleSeen is False:
270-
if line.startswith("--") or line.startswith(">>> ") or line.startswith("Copyright notice"):
271-
postambleSeen = True
272-
273-
if postambleSeen is True:
274-
rDict["Postamble"].append(line)
275-
line = "POST;" + line
276-
continue
277-
278-
body.append(line)
279-
280-
if "\t" in line:
281-
line = "TAB;" + line # mark lines having tabs
282-
283-
if line.endswith("\r"):
284-
line = "CR;" + line # mark lines having CR (\r)
285-
286-
rr.append(line)
287-
288-
body = cleanSection(body)
289-
rDict["Body"] = splitBodyInSections(body)
290-
return "\n".join(rr), rDict
291-
292-
293317
def usage():
294318
print(
295319
"""
@@ -448,25 +472,14 @@ def main(argv):
448472

449473
if opt in ("-C", "--Cleanup"):
450474
inFile = arg
451-
isFile = os.path.isfile(inFile)
475+
isFile = os.path.isfile(arg)
452476
if isFile is False:
453477
print(f"{inFile} cannot be found or is not a file", file=sys.stderr)
454478
sys.exit(101)
455-
whois_str = readInputFile(inFile)
456-
d1, rDict = cleanupWhoisResponse(whois_str)
457-
458-
print(d1) # the data without pre and postamble or percent section
459-
print(rDict)
460-
461-
k = "Body"
462-
if len(rDict[k]):
463-
n = 0
464-
for section in rDict[k]:
465-
print(f"# ------------- {k} Section: {n} ----------------------")
466-
n += 1
467-
for line in section:
468-
print(line)
469479

480+
rc = ResponseCleaner(inFile)
481+
d1, rDict = rc.cleanupWhoisResponse()
482+
rc.printMe()
470483
sys.exit(0)
471484

472485
if opt in ("-f", "--file"):

0 commit comments

Comments
 (0)