|
| 1 | +import urllib.request as req |
| 2 | +from bs4 import BeautifulSoup |
| 3 | +import xml.dom.minidom |
| 4 | +from django.utils.feedgenerator import Rss201rev2Feed |
| 5 | +from datetime import datetime |
| 6 | +import locale |
| 7 | +import re |
| 8 | +import html |
| 9 | +import hashlib |
| 10 | + |
| 11 | +def main(): |
| 12 | + url = 'https://docs.contrastsecurity.jp/ja/release-on-premises.html' |
| 13 | + res = req.urlopen(url) |
| 14 | + soup = BeautifulSoup(res, 'lxml') |
| 15 | + elems = soup.select('section.section') |
| 16 | + modified_date = soup.select_one('span.formatted-date').text.strip() |
| 17 | + #print(modified_date) |
| 18 | + |
| 19 | + feed = Rss201rev2Feed( |
| 20 | + title='Contrast Release Note(On-premises)', |
| 21 | + link='https://contrastsecurity.dev/contrast-documentation-rss', |
| 22 | + description='Contrast Release Note(On-premises)', |
| 23 | + language='ja', |
| 24 | + author_name="Contrast Security Japan G.K.", |
| 25 | + feed_url='https://contrastsecurity.dev/contrast-documentation-rss/contrast_rlsnote_eop.xml', |
| 26 | + feed_copyright='Copyright 2023 Contrast Security Japan G.K.' |
| 27 | + ) |
| 28 | + |
| 29 | + id_ptn = re.compile(r'^[0-9]{1,2}月-[0-9\-]+-$') |
| 30 | + title_ptn = re.compile(r'^[0-9]{1,2}月\([0-9\.]+\)$') |
| 31 | + |
| 32 | + for elem in elems: |
| 33 | + try: |
| 34 | + id_str = elem.get("id").strip() |
| 35 | + title = elem.select('h3.title')[0].text.strip() |
| 36 | + if not id_ptn.search(id_str) or not title_ptn.search(title): |
| 37 | + continue |
| 38 | + pubdate_str = elem.get("data-time-modified") # November 6, 2023 |
| 39 | + pubdate = None |
| 40 | + if pubdate_str: |
| 41 | + pubdate = datetime.strptime(pubdate_str, '%B %d, %Y') |
| 42 | + #print(id_str, pubdate_str, title) |
| 43 | + desc_buffer = [] |
| 44 | + for elem2 in elem.select('section.section'): |
| 45 | + id_str2 = elem2.get("id").strip() |
| 46 | + #print('- ', elem2.select_one('div.titlepage').text) |
| 47 | + desc_buffer.append('<b>%s</b>' % elem2.select_one('div.titlepage').text) |
| 48 | + for elem3 in elem2.select('li.listitem'): |
| 49 | + #print(' - ', elem3.select_one('p').text) |
| 50 | + desc_buffer.append('・%s' % elem3.select_one('p').text) |
| 51 | + #print(id_str, elem.get('data-legacy-id')) |
| 52 | + #if not title.lower().startswith('java'): |
| 53 | + # continue |
| 54 | + id_hash = hashlib.md5(id_str.encode()).hexdigest() |
| 55 | + url = 'https://docs.contrastsecurity.jp/ja/release-on-premises.html#%s' % id_str |
| 56 | + guid = 'https://docs.contrastsecurity.jp/ja/release-on-premises.html#%s' % id_hash |
| 57 | + if not '月' in title: |
| 58 | + continue |
| 59 | + feed.add_item(title=title, link=url, description=''.join(['<p>{0}</p>'.format(s) for s in desc_buffer]), pubdate=pubdate, unique_id=guid) |
| 60 | + except IndexError: |
| 61 | + continue |
| 62 | + str_val = feed.writeString('utf-8') |
| 63 | + dom = xml.dom.minidom.parseString(str_val) |
| 64 | + with open('/feeds/contrast_rlsnote_eop.xml','w') as fp: |
| 65 | + dom.writexml(fp, encoding='utf-8', newl='\n', indent='', addindent=' ') |
| 66 | + |
| 67 | +if __name__ == "__main__": |
| 68 | + main() |
| 69 | + |
0 commit comments