|
30 | 30 | # to update that. |
31 | 31 | # |
32 | 32 |
|
33 | | -from datetime import datetime |
34 | | -from dateutil import parser |
35 | | -from markdownify import markdownify as md |
| 33 | +from bs4 import BeautifulSoup |
36 | 34 | from datadog_api_client import ApiClient, Configuration |
37 | 35 | from datadog_api_client.v2.api.metrics_api import MetricsApi |
38 | 36 | from datadog_api_client.v2.model.metric_intake_type import MetricIntakeType |
39 | 37 | from datadog_api_client.v2.model.metric_payload import MetricPayload |
40 | 38 | from datadog_api_client.v2.model.metric_point import MetricPoint |
41 | 39 | from datadog_api_client.v2.model.metric_resource import MetricResource |
42 | 40 | from datadog_api_client.v2.model.metric_series import MetricSeries |
| 41 | +from datetime import datetime |
| 42 | +from dateutil import parser |
| 43 | +from markdownify import markdownify as md |
43 | 44 | import click |
44 | 45 | import json |
45 | 46 | import logging |
@@ -201,6 +202,16 @@ def _load_event_json(self, raw): |
201 | 202 | if room != "": |
202 | 203 | self.rooms.add(room) |
203 | 204 | clean_session = {k: v.strip() for k, v in session.items()} |
| 205 | + if clean_session["LongAbstract"] != "": |
| 206 | + html = BeautifulSoup( |
| 207 | + clean_session["LongAbstract"], "html.parser" |
| 208 | + ) |
| 209 | + # nuke all images from the HTML because Guidebook doesn't |
| 210 | + # support them and will escape the tags in a way that makes |
| 211 | + # us forever update the sessions as different |
| 212 | + for img in html.find_all("img"): |
| 213 | + img.decompose() |
| 214 | + clean_session["LongAbstract"] = str(html) |
204 | 215 | data_by_name[name] = clean_session |
205 | 216 | data_by_nid[session["nid"]] = clean_session |
206 | 217 | return (data_by_name, data_by_nid) |
@@ -661,6 +672,7 @@ def normalize_html(self, html): |
661 | 672 | that which gives us a lot of information about formatting without |
662 | 673 | being sensitive to exact HTML. |
663 | 674 | """ |
| 675 | + |
664 | 676 | markdown = md(html) |
665 | 677 | # Normalize whitespace and quotes |
666 | 678 | markdown = markdown.replace("\u2018", "'").replace("\u2019", "'") |
@@ -692,14 +704,14 @@ def session_needs_update(self, new_data, original_session): |
692 | 704 | ] |
693 | 705 | for key in all_keys: |
694 | 706 | if "time" in key: |
695 | | - a = self.normalize_time(new_data[key]) |
696 | | - b = self.normalize_time(original_session[key]) |
| 707 | + a = self.normalize_time(original_session[key]) |
| 708 | + b = self.normalize_time(new_data[key]) |
697 | 709 | elif "html" in key: |
698 | | - a = self.normalize_html(new_data[key]) |
699 | | - b = self.normalize_html(original_session[key]) |
| 710 | + a = self.normalize_html(original_session[key]) |
| 711 | + b = self.normalize_html(new_data[key]) |
700 | 712 | else: |
701 | | - a = new_data[key] |
702 | | - b = original_session[key] |
| 713 | + a = original_session[key] |
| 714 | + b = new_data[key] |
703 | 715 | if a != b: |
704 | 716 | self.logger.info( |
705 | 717 | "Session '%s' needs update because '%s' changed: '%s' !=" |
|
0 commit comments