|
14 | 14 | IgnoreReturncode = False |
15 | 15 |
|
16 | 16 |
|
| 17 | +class ResponseCleaner: |
| 18 | + data: Optional[str] = None |
| 19 | + rDict: Dict = {} |
| 20 | + |
| 21 | + def __init__(self, pathToTestFile: str): |
| 22 | + self.data = self.readInputFile(pathToTestFile) |
| 23 | + |
| 24 | + def readInputFile(self, pathToTestFile: str): |
| 25 | + if not os.path.exists(pathToTestFile): |
| 26 | + return None |
| 27 | + |
| 28 | + with open(pathToTestFile, mode="rb") as f: # switch to binary mode as that is what Popen uses |
| 29 | + # make sure the data is treated exactly the same as the output of Popen |
| 30 | + return f.read().decode(errors="ignore") |
| 31 | + |
| 32 | + def cleanSection(self, section: List) -> List: |
| 33 | + # cleanup any beginning and ending empty lines from the section |
| 34 | + |
| 35 | + if len(section) == 0: |
| 36 | + return section |
| 37 | + |
| 38 | + rr = r"^\s*$" |
| 39 | + n = 0 # remove empty lines from the start of section |
| 40 | + while re.match(rr, section[n]): |
| 41 | + section.pop(n) |
| 42 | + # n stays 0 |
| 43 | + |
| 44 | + n = len(section) - 1 # remove empty lines from the end of the section |
| 45 | + while re.match(rr, section[n]): |
| 46 | + section.pop(n) |
| 47 | + n = len(section) - 1 # remove empty lines from the end of section |
| 48 | + |
| 49 | + return section |
| 50 | + |
| 51 | + def splitBodyInSections(self, body: List) -> List: |
| 52 | + # split the body on empty line, cleanup all sections, remove empty sections |
| 53 | + # return list of body's |
| 54 | + |
| 55 | + sections = [] |
| 56 | + n = 0 |
| 57 | + sections.append([]) |
| 58 | + for line in body: |
| 59 | + if re.match(r"^\s*$", line): |
| 60 | + n += 1 |
| 61 | + sections.append([]) |
| 62 | + continue |
| 63 | + sections[n].append(line) |
| 64 | + |
| 65 | + m = 0 |
| 66 | + while m < len(sections): |
| 67 | + sections[m] = self.cleanSection(sections[m]) |
| 68 | + m += 1 |
| 69 | + |
| 70 | + # now remove ampty sections and return |
| 71 | + sections2 = [] |
| 72 | + m = 0 |
| 73 | + while m < len(sections): |
| 74 | + if len(sections[m]) > 0: |
| 75 | + sections2.append("\n".join(sections[m])) |
| 76 | + m += 1 |
| 77 | + |
| 78 | + return sections2 |
| 79 | + |
| 80 | + def cleanupWhoisResponse( |
| 81 | + self, |
| 82 | + verbose: bool = False, |
| 83 | + with_cleanup_results: bool = False, |
| 84 | + ): |
| 85 | + result = whois._2_parse.cleanupWhoisResponse( |
| 86 | + self.data, |
| 87 | + verbose=False, |
| 88 | + with_cleanup_results=False, |
| 89 | + ) |
| 90 | + |
| 91 | + self.rDict = { |
| 92 | + "BodyHasSections": False, # if this is true the body is not a list of lines but a list of sections with lines |
| 93 | + "Preamble": [], # the lines telling what whois servers wwere contacted |
| 94 | + "Percent": [], # lines staring with %% , often not present but may contain hints |
| 95 | + "Body": [], # the body of the whois, may be in sections separated by empty lines |
| 96 | + "Postamble": [], # copyright and other not relevant info for actual parsing whois |
| 97 | + } |
| 98 | + body = [] |
| 99 | + |
| 100 | + rr = [] |
| 101 | + z = result.split("\n") |
| 102 | + preambleSeen = False |
| 103 | + postambleSeen = False |
| 104 | + percentSeen = False |
| 105 | + for line in z: |
| 106 | + if preambleSeen is False: |
| 107 | + if line.startswith("["): |
| 108 | + self.rDict["Preamble"].append(line) |
| 109 | + line = "PRE;" + line |
| 110 | + continue |
| 111 | + else: |
| 112 | + preambleSeen = True |
| 113 | + |
| 114 | + if preambleSeen is True and percentSeen is False: |
| 115 | + if line.startswith("%"): |
| 116 | + self.rDict["Percent"].append(line) |
| 117 | + line = "PERCENT;" + line |
| 118 | + continue |
| 119 | + else: |
| 120 | + percentSeen = True |
| 121 | + |
| 122 | + if postambleSeen is False: |
| 123 | + if line.startswith("--") or line.startswith(">>> ") or line.startswith("Copyright notice"): |
| 124 | + postambleSeen = True |
| 125 | + |
| 126 | + if postambleSeen is True: |
| 127 | + self.rDict["Postamble"].append(line) |
| 128 | + line = "POST;" + line |
| 129 | + continue |
| 130 | + |
| 131 | + body.append(line) |
| 132 | + |
| 133 | + if "\t" in line: |
| 134 | + line = "TAB;" + line # mark lines having tabs |
| 135 | + |
| 136 | + if line.endswith("\r"): |
| 137 | + line = "CR;" + line # mark lines having CR (\r) |
| 138 | + |
| 139 | + rr.append(line) |
| 140 | + |
| 141 | + body = self.cleanSection(body) |
| 142 | + self.rDict["Body"] = self.splitBodyInSections(body) |
| 143 | + return "\n".join(rr), self.rDict |
| 144 | + |
| 145 | + def printMe(self): |
| 146 | + zz = ["Preamble", "Percent", "Postamble"] |
| 147 | + for k in zz: |
| 148 | + n = 0 |
| 149 | + for lines in self.rDict[k]: |
| 150 | + tab = " [TAB] " if "\t" in lines else "" # tabs are present in this section |
| 151 | + cr = " [CR] " if "\r" in lines else "" # \r is present in this section |
| 152 | + print(k,cr, tab, lines) |
| 153 | + |
| 154 | + |
| 155 | + k = "Body" |
| 156 | + if len(self.rDict[k]): |
| 157 | + n = 0 |
| 158 | + for lines in self.rDict[k]: |
| 159 | + tab = " [TAB] " if "\t" in lines else "-------" # tabs are present in this section |
| 160 | + cr = " [CR] " if "\r" in lines else "------" # \r is present in this section |
| 161 | + print(f"# ------------- {k} Section: {n} {cr}{tab}---------") |
| 162 | + n += 1 |
| 163 | + print(lines) |
| 164 | + |
| 165 | + |
17 | 166 | def prepItem(d): |
18 | 167 | print("") |
19 | 168 | print(f"test domain: <<<<<<<<<< {d} >>>>>>>>>>>>>>>>>>>>") |
@@ -165,131 +314,6 @@ def ShowRuleset(tld): |
165 | 314 | print(key, rule, "IGNORECASE") |
166 | 315 |
|
167 | 316 |
|
168 | | -def readInputFile(pathToTestFile: str): |
169 | | - if not os.path.exists(pathToTestFile): |
170 | | - return None |
171 | | - |
172 | | - with open(pathToTestFile, mode="rb") as f: # switch to binary mode as that is what Popen uses |
173 | | - # make sure the data is treated exactly the same as the output of Popen |
174 | | - return f.read().decode(errors="ignore") |
175 | | - |
176 | | - |
177 | | -def cleanSection(section: List) -> List: |
178 | | - # cleanup any beginning and ending empty lines from the section |
179 | | - |
180 | | - if len(section) == 0: |
181 | | - return section |
182 | | - |
183 | | - rr = r"^\s*$" |
184 | | - n = 0 # remove empty lines from the start of section |
185 | | - while re.match(rr, section[n]): |
186 | | - section.pop(n) |
187 | | - # n stays 0 |
188 | | - |
189 | | - n = len(section) - 1 # remove empty lines from the end of the section |
190 | | - while re.match(rr, section[n]): |
191 | | - section.pop(n) |
192 | | - n = len(section) - 1 # remove empty lines from the end of section |
193 | | - |
194 | | - return section |
195 | | - |
196 | | - |
197 | | -def splitBodyInSections(body: List) -> List: |
198 | | - # split the body on empty line, cleanup all sections, remove empty sections |
199 | | - # return list of body's |
200 | | - |
201 | | - sections = [] |
202 | | - n = 0 |
203 | | - sections.append([]) |
204 | | - for line in body: |
205 | | - if re.match(r"^\s*$", line): |
206 | | - n += 1 |
207 | | - sections.append([]) |
208 | | - continue |
209 | | - sections[n].append(line) |
210 | | - |
211 | | - m = 0 |
212 | | - while m < len(sections): |
213 | | - sections[m] = cleanSection(sections[m]) |
214 | | - m += 1 |
215 | | - |
216 | | - # now remove ampty sections and return |
217 | | - sections2 = [] |
218 | | - m = 0 |
219 | | - while m < len(sections): |
220 | | - if len(sections[m]) > 0: |
221 | | - sections2.append(sections[m]) |
222 | | - m += 1 |
223 | | - |
224 | | - return sections2 |
225 | | - |
226 | | - |
227 | | -def cleanupWhoisResponse( |
228 | | - response: str, |
229 | | - verbose: bool = False, |
230 | | - with_cleanup_results: bool = False, |
231 | | -): |
232 | | - result = whois._2_parse.cleanupWhoisResponse( |
233 | | - response, |
234 | | - verbose=False, |
235 | | - with_cleanup_results=False, |
236 | | - ) |
237 | | - |
238 | | - rDict = { |
239 | | - "BodyHasSections": False, # if this is true the body is not a list of lines but a list of sections with lines |
240 | | - "Preamble": [], # the lines telling what whois servers wwere contacted |
241 | | - "Percent": [], # lines staring with %% , often not present but may contain hints |
242 | | - "Body": [], # the body of the whois, may be in sections separated by empty lines |
243 | | - "Postamble": [], # copyright and other not relevant info for actual parsing whois |
244 | | - } |
245 | | - body = [] |
246 | | - |
247 | | - rr = [] |
248 | | - z = result.split("\n") |
249 | | - preambleSeen = False |
250 | | - postambleSeen = False |
251 | | - percentSeen = False |
252 | | - for line in z: |
253 | | - if preambleSeen is False: |
254 | | - if line.startswith("["): |
255 | | - rDict["Preamble"].append(line) |
256 | | - line = "PRE;" + line |
257 | | - continue |
258 | | - else: |
259 | | - preambleSeen = True |
260 | | - |
261 | | - if preambleSeen is True and percentSeen is False: |
262 | | - if line.startswith("%"): |
263 | | - rDict["Percent"].append(line) |
264 | | - line = "PERCENT;" + line |
265 | | - continue |
266 | | - else: |
267 | | - percentSeen = True |
268 | | - |
269 | | - if postambleSeen is False: |
270 | | - if line.startswith("--") or line.startswith(">>> ") or line.startswith("Copyright notice"): |
271 | | - postambleSeen = True |
272 | | - |
273 | | - if postambleSeen is True: |
274 | | - rDict["Postamble"].append(line) |
275 | | - line = "POST;" + line |
276 | | - continue |
277 | | - |
278 | | - body.append(line) |
279 | | - |
280 | | - if "\t" in line: |
281 | | - line = "TAB;" + line # mark lines having tabs |
282 | | - |
283 | | - if line.endswith("\r"): |
284 | | - line = "CR;" + line # mark lines having CR (\r) |
285 | | - |
286 | | - rr.append(line) |
287 | | - |
288 | | - body = cleanSection(body) |
289 | | - rDict["Body"] = splitBodyInSections(body) |
290 | | - return "\n".join(rr), rDict |
291 | | - |
292 | | - |
293 | 317 | def usage(): |
294 | 318 | print( |
295 | 319 | """ |
@@ -448,25 +472,14 @@ def main(argv): |
448 | 472 |
|
449 | 473 | if opt in ("-C", "--Cleanup"): |
450 | 474 | inFile = arg |
451 | | - isFile = os.path.isfile(inFile) |
| 475 | + isFile = os.path.isfile(arg) |
452 | 476 | if isFile is False: |
453 | 477 | print(f"{inFile} cannot be found or is not a file", file=sys.stderr) |
454 | 478 | sys.exit(101) |
455 | | - whois_str = readInputFile(inFile) |
456 | | - d1, rDict = cleanupWhoisResponse(whois_str) |
457 | | - |
458 | | - print(d1) # the data without pre and postamble or percent section |
459 | | - print(rDict) |
460 | | - |
461 | | - k = "Body" |
462 | | - if len(rDict[k]): |
463 | | - n = 0 |
464 | | - for section in rDict[k]: |
465 | | - print(f"# ------------- {k} Section: {n} ----------------------") |
466 | | - n += 1 |
467 | | - for line in section: |
468 | | - print(line) |
469 | 479 |
|
| 480 | + rc = ResponseCleaner(inFile) |
| 481 | + d1, rDict = rc.cleanupWhoisResponse() |
| 482 | + rc.printMe() |
470 | 483 | sys.exit(0) |
471 | 484 |
|
472 | 485 | if opt in ("-f", "--file"): |
|
0 commit comments