Skip to content
This repository was archived by the owner on Feb 3, 2024. It is now read-only.

Commit 6791a5b

Browse files
committed
analize body of response, split and cleanup
1 parent 0498291 commit 6791a5b

File tree

1 file changed

+90
-17
lines changed

1 file changed

+90
-17
lines changed

test2.py

Lines changed: 90 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import re
55
import getopt
66
import sys
7+
from typing import Optional, List, Dict
78

89
Verbose = False
910
PrintGetRawWhoisResult = False
@@ -174,58 +175,126 @@ def readInputFile(pathToTestFile: str):
174175
return f.read().decode(errors="ignore")
175176

176177

178+
def cleanSection(section: List) -> List:
179+
# cleanup any beginning and ending empty lines from the section
180+
181+
if len(section) == 0:
182+
return section
183+
184+
rr = r"^\s*$"
185+
n = 0 # remove empty lines from the start of section
186+
while re.match(rr, section[n]):
187+
section.pop(n)
188+
# n stays 0
189+
190+
n = len(section) - 1 # remove empty lines from the end of the section
191+
while re.match(rr, section[n]):
192+
section.pop(n)
193+
n = len(section) - 1 # remove empty lines from the end of section
194+
195+
return section
196+
197+
198+
def splitBodyInSections(body: List) -> List:
199+
# split the body on empty line, cleanup all sections, remove empty sections
200+
# return list of body's
201+
202+
sections = []
203+
n = 0
204+
sections.append([])
205+
for line in body:
206+
if re.match(r"^\s*$", line):
207+
n += 1
208+
sections.append([])
209+
continue
210+
sections[n].append(line)
211+
212+
m = 0
213+
while m < len(sections):
214+
sections[m] = cleanSection(sections[m])
215+
m += 1
216+
217+
# print(sections)
218+
# print(len(sections))
219+
220+
# now remove ampty sections and return
221+
sections2 = []
222+
m = 0
223+
while m < len(sections):
224+
print(m, len(sections[m]))
225+
if len(sections[m]) > 0:
226+
sections2.append(sections[m])
227+
m += 1
228+
229+
# print(sections2)
230+
return sections2
231+
232+
177233
def cleanupWhoisResponse(
178234
response: str,
179235
verbose: bool = False,
180236
with_cleanup_results: bool = False,
181237
):
182-
markPreable = True
183-
markPostmble = True
238+
result = whois._2_parse.cleanupWhoisResponse(
239+
response,
240+
verbose=False,
241+
with_cleanup_results=False,
242+
)
184243

185-
result = whois._2_parse.cleanupWhoisResponse(response, verbose=False, with_cleanup_results=False)
244+
rDict = {
245+
"BodyHasSections": False, # if this is true the body is not a list of lines but a list of sections with lines
246+
"Preamble": [], # the lines telling what whois servers wwere contacted
247+
"Percent": [], # lines staring with %% , often not present but may contain hints
248+
"Body": [], # the body of the whois, may be in sections separated by empty lines
249+
"Postamble": [], # copyright and other not relevant info for actual parsing whois
250+
}
251+
body = []
186252

253+
rr = []
254+
z = result.split("\n")
187255
preambleSeen = False
188256
postambleSeen = False
189257
percentSeen = False
190-
191-
rr = []
192-
n = 0
193-
z = result.split("\n")
194258
for line in z:
195259
if preambleSeen is False:
196260
if line.startswith("["):
261+
rDict["Preamble"].append(line)
197262
line = "PRE;" + line
263+
continue
198264
else:
199265
preambleSeen = True
200266

201267
if preambleSeen is True and percentSeen is False:
202268
if line.startswith("%"):
269+
rDict["Percent"].append(line)
203270
line = "PERCENT;" + line
271+
continue
204272
else:
205273
percentSeen = True
206274

207275
if postambleSeen is False:
208276
if line.startswith("--") or line.startswith(">>> ") or line.startswith("Copyright notice"):
209277
postambleSeen = True
210-
if n > 0:
211-
# look for lines just before me that are actually empty, we can add them to postamble
212-
p = n - 1
213-
if len(z[p]) == 0 or re.match(r"^\s*$", z[p]):
214-
rr[p] = "POST+;" + rr[p]
215278

216279
if postambleSeen is True:
280+
rDict["Postamble"].append(line)
217281
line = "POST;" + line
282+
continue
283+
284+
body.append(line)
218285

219286
if "\t" in line:
220-
line = "TAB;" + line
287+
line = "TAB;" + line # mark lines having tabs
221288

222289
if line.endswith("\r"):
223-
line = "CR;" + line
290+
line = "CR;" + line # mark lines having CR (\r)
224291

225292
rr.append(line)
226-
n += 1
227293

228-
print("\n".join(rr))
294+
body = cleanSection(body)
295+
sections = splitBodyInSections(body)
296+
rDict["Body"] = sections
297+
return "\n".join(rr), rDict["Body"]
229298

230299

231300
def usage():
@@ -391,7 +460,11 @@ def main(argv):
391460
print(f"{inFile} cannot be found or is not a file", file=sys.stderr)
392461
sys.exit(101)
393462
whois_str = readInputFile(inFile)
394-
cleanupWhoisResponse(whois_str)
463+
d1, rDict = cleanupWhoisResponse(whois_str)
464+
465+
print(d1) # the data without pre and postamble or percent section
466+
print(rDict)
467+
395468
sys.exit(0)
396469

397470
if opt in ("-f", "--file"):

0 commit comments

Comments
 (0)