@@ -94,38 +94,28 @@ Resources:
9494 import boto3
9595
9696 def clean_html(html_content):
97- result = ""
98- urls = []
99- ref_index = [1]
100-
101- class SimpleHTMLParser(HTMLParser):
97+ class MyParser(HTMLParser):
98+ def __init__(self):
99+ super().__init__()
100+ self.text = ''
101+ self.ref = {}
102+ self.index = 0
102103 def handle_starttag(self, tag, attrs):
103- nonlocal result
104104 if tag == 'a':
105+ self.index += 1
105106 href = next((value for attr, value in attrs if attr == 'href'), None)
106107 if href:
107108 if href.startswith('/'):
108109 href = f"https://aws.amazon.com{href}"
109- urls.append(href)
110- self.last_href_index = len(result)
111-
110+ self.ref[self.index] = href
112111 def handle_endtag(self, tag):
113- nonlocal result
114112 if tag == 'a':
115- result = result[:self.last_href_index] + f"[{ref_index[0]}] " + result[self.last_href_index:]
116- ref_index[0] += 1
117-
113+ self.text += f"[{self.index}]"
118114 def handle_data(self, data):
119- nonlocal result
120- result += data
121-
122- parser = SimpleHTMLParser()
115+ self.text += data
116+ parser = MyParser()
123117 parser.feed(html_content)
124-
125- for i, url in enumerate(urls, start=1):
126- result += f"\n[{i}] {url}"
127-
128- return result
118+ return parser.text.strip() + '\n\n' + '\n'.join([f"[{index}]: {url}" for index, url in parser.ref.items()])
129119
130120 def lambda_handler(event, context):
131121 feed_url = os.environ['FEED_URL']
@@ -532,38 +522,28 @@ Resources:
532522 from dateutil.parser import parse, ParserError
533523
534524 def clean_html(html_content):
535- result = ""
536- urls = []
537- ref_index = [1]
538-
539- class SimpleHTMLParser(HTMLParser):
525+ class MyParser(HTMLParser):
526+ def __init__(self):
527+ super().__init__()
528+ self.text = ''
529+ self.ref = {}
530+ self.index = 0
540531 def handle_starttag(self, tag, attrs):
541- nonlocal result
542532 if tag == 'a':
533+ self.index += 1
543534 href = next((value for attr, value in attrs if attr == 'href'), None)
544535 if href:
545536 if href.startswith('/'):
546537 href = f"https://aws.amazon.com{href}"
547- urls.append(href)
548- self.last_href_index = len(result)
549-
538+ self.ref[self.index] = href
550539 def handle_endtag(self, tag):
551- nonlocal result
552540 if tag == 'a':
553- result = result[:self.last_href_index] + f"[{ref_index[0]}] " + result[self.last_href_index:]
554- ref_index[0] += 1
555-
541+ self.text += f"[{self.index}]"
556542 def handle_data(self, data):
557- nonlocal result
558- result += data
559-
560- parser = SimpleHTMLParser()
543+ self.text += data
544+ parser = MyParser()
561545 parser.feed(html_content)
562-
563- for i, url in enumerate(urls, start=1):
564- result += f"\n[{i}] {url}"
565-
566- return result
546+ return parser.text.strip() + '\n\n' + '\n'.join([f"[{index}]: {url}" for index, url in parser.ref.items()])
567547
568548 def lambda_handler(event, context):
569549 feed_url = os.environ['FEED_URL']
@@ -662,7 +642,7 @@ Resources:
662642 Variables :
663643 BUCKET_NAME : !Ref DestinationBucket
664644 BUCKET_PATH : " aws-feeds/aws-feeds-security-bulletin"
665- FEED_URL : " https://aws.amazon.com/security/security-bulletins/rss/feed/"
645+ FEED_URL : " https://aws.amazon.com/security/security-bulletins/rss/feed/"
666646 Metadata :
667647 cfn_nag :
668648 rules_to_suppress :
0 commit comments