|
4 | 4 | import re |
5 | 5 | import getopt |
6 | 6 | import sys |
| 7 | +from typing import Optional, List, Dict |
7 | 8 |
|
8 | 9 | Verbose = False |
9 | 10 | PrintGetRawWhoisResult = False |
@@ -174,58 +175,126 @@ def readInputFile(pathToTestFile: str): |
174 | 175 | return f.read().decode(errors="ignore") |
175 | 176 |
|
176 | 177 |
|
| 178 | +def cleanSection(section: List) -> List: |
| 179 | + # cleanup any beginning and ending empty lines from the section |
| 180 | + |
| 181 | + if len(section) == 0: |
| 182 | + return section |
| 183 | + |
| 184 | + rr = r"^\s*$" |
| 185 | + n = 0 # remove empty lines from the start of section |
| 186 | + while re.match(rr, section[n]): |
| 187 | + section.pop(n) |
| 188 | + # n stays 0 |
| 189 | + |
| 190 | + n = len(section) - 1 # remove empty lines from the end of the section |
| 191 | + while re.match(rr, section[n]): |
| 192 | + section.pop(n) |
| 193 | + n = len(section) - 1 # remove empty lines from the end of section |
| 194 | + |
| 195 | + return section |
| 196 | + |
| 197 | + |
| 198 | +def splitBodyInSections(body: List) -> List: |
| 199 | + # split the body on empty line, cleanup all sections, remove empty sections |
| 200 | + # return list of body's |
| 201 | + |
| 202 | + sections = [] |
| 203 | + n = 0 |
| 204 | + sections.append([]) |
| 205 | + for line in body: |
| 206 | + if re.match(r"^\s*$", line): |
| 207 | + n += 1 |
| 208 | + sections.append([]) |
| 209 | + continue |
| 210 | + sections[n].append(line) |
| 211 | + |
| 212 | + m = 0 |
| 213 | + while m < len(sections): |
| 214 | + sections[m] = cleanSection(sections[m]) |
| 215 | + m += 1 |
| 216 | + |
| 217 | + # print(sections) |
| 218 | + # print(len(sections)) |
| 219 | + |
| 220 | + # now remove ampty sections and return |
| 221 | + sections2 = [] |
| 222 | + m = 0 |
| 223 | + while m < len(sections): |
| 224 | + print(m, len(sections[m])) |
| 225 | + if len(sections[m]) > 0: |
| 226 | + sections2.append(sections[m]) |
| 227 | + m += 1 |
| 228 | + |
| 229 | + # print(sections2) |
| 230 | + return sections2 |
| 231 | + |
| 232 | + |
177 | 233 | def cleanupWhoisResponse( |
178 | 234 | response: str, |
179 | 235 | verbose: bool = False, |
180 | 236 | with_cleanup_results: bool = False, |
181 | 237 | ): |
182 | | - markPreable = True |
183 | | - markPostmble = True |
| 238 | + result = whois._2_parse.cleanupWhoisResponse( |
| 239 | + response, |
| 240 | + verbose=False, |
| 241 | + with_cleanup_results=False, |
| 242 | + ) |
184 | 243 |
|
185 | | - result = whois._2_parse.cleanupWhoisResponse(response, verbose=False, with_cleanup_results=False) |
| 244 | + rDict = { |
| 245 | + "BodyHasSections": False, # if this is true the body is not a list of lines but a list of sections with lines |
| 246 | + "Preamble": [], # the lines telling what whois servers wwere contacted |
| 247 | + "Percent": [], # lines staring with %% , often not present but may contain hints |
| 248 | + "Body": [], # the body of the whois, may be in sections separated by empty lines |
| 249 | + "Postamble": [], # copyright and other not relevant info for actual parsing whois |
| 250 | + } |
| 251 | + body = [] |
186 | 252 |
|
| 253 | + rr = [] |
| 254 | + z = result.split("\n") |
187 | 255 | preambleSeen = False |
188 | 256 | postambleSeen = False |
189 | 257 | percentSeen = False |
190 | | - |
191 | | - rr = [] |
192 | | - n = 0 |
193 | | - z = result.split("\n") |
194 | 258 | for line in z: |
195 | 259 | if preambleSeen is False: |
196 | 260 | if line.startswith("["): |
| 261 | + rDict["Preamble"].append(line) |
197 | 262 | line = "PRE;" + line |
| 263 | + continue |
198 | 264 | else: |
199 | 265 | preambleSeen = True |
200 | 266 |
|
201 | 267 | if preambleSeen is True and percentSeen is False: |
202 | 268 | if line.startswith("%"): |
| 269 | + rDict["Percent"].append(line) |
203 | 270 | line = "PERCENT;" + line |
| 271 | + continue |
204 | 272 | else: |
205 | 273 | percentSeen = True |
206 | 274 |
|
207 | 275 | if postambleSeen is False: |
208 | 276 | if line.startswith("--") or line.startswith(">>> ") or line.startswith("Copyright notice"): |
209 | 277 | postambleSeen = True |
210 | | - if n > 0: |
211 | | - # look for lines just before me that are actually empty, we can add them to postamble |
212 | | - p = n - 1 |
213 | | - if len(z[p]) == 0 or re.match(r"^\s*$", z[p]): |
214 | | - rr[p] = "POST+;" + rr[p] |
215 | 278 |
|
216 | 279 | if postambleSeen is True: |
| 280 | + rDict["Postamble"].append(line) |
217 | 281 | line = "POST;" + line |
| 282 | + continue |
| 283 | + |
| 284 | + body.append(line) |
218 | 285 |
|
219 | 286 | if "\t" in line: |
220 | | - line = "TAB;" + line |
| 287 | + line = "TAB;" + line # mark lines having tabs |
221 | 288 |
|
222 | 289 | if line.endswith("\r"): |
223 | | - line = "CR;" + line |
| 290 | + line = "CR;" + line # mark lines having CR (\r) |
224 | 291 |
|
225 | 292 | rr.append(line) |
226 | | - n += 1 |
227 | 293 |
|
228 | | - print("\n".join(rr)) |
| 294 | + body = cleanSection(body) |
| 295 | + sections = splitBodyInSections(body) |
| 296 | + rDict["Body"] = sections |
| 297 | + return "\n".join(rr), rDict["Body"] |
229 | 298 |
|
230 | 299 |
|
231 | 300 | def usage(): |
@@ -391,7 +460,11 @@ def main(argv): |
391 | 460 | print(f"{inFile} cannot be found or is not a file", file=sys.stderr) |
392 | 461 | sys.exit(101) |
393 | 462 | whois_str = readInputFile(inFile) |
394 | | - cleanupWhoisResponse(whois_str) |
| 463 | + d1, rDict = cleanupWhoisResponse(whois_str) |
| 464 | + |
| 465 | + print(d1) # the data without pre and postamble or percent section |
| 466 | + print(rDict) |
| 467 | + |
395 | 468 | sys.exit(0) |
396 | 469 |
|
397 | 470 | if opt in ("-f", "--file"): |
|
0 commit comments