Skip to content

Commit 2f8e1e7

Browse files
authored
Workaround guidebook image behavior (#47)
If there's an `img` tag, Guidebook escapes it so that raw HTML shows up for that tag. We then detect that as a different and update it, which it then munges, and we update it every time. This strips all img tags first. Signed-off-by: Phil Dibowitz <phil@ipom.com>
1 parent 4315e98 commit 2f8e1e7

File tree

1 file changed

+21
-9
lines changed

1 file changed

+21
-9
lines changed

guidebook/sync_guidebook.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,17 @@
3030
# to update that.
3131
#
3232

33-
from datetime import datetime
34-
from dateutil import parser
35-
from markdownify import markdownify as md
33+
from bs4 import BeautifulSoup
3634
from datadog_api_client import ApiClient, Configuration
3735
from datadog_api_client.v2.api.metrics_api import MetricsApi
3836
from datadog_api_client.v2.model.metric_intake_type import MetricIntakeType
3937
from datadog_api_client.v2.model.metric_payload import MetricPayload
4038
from datadog_api_client.v2.model.metric_point import MetricPoint
4139
from datadog_api_client.v2.model.metric_resource import MetricResource
4240
from datadog_api_client.v2.model.metric_series import MetricSeries
41+
from datetime import datetime
42+
from dateutil import parser
43+
from markdownify import markdownify as md
4344
import click
4445
import json
4546
import logging
@@ -201,6 +202,16 @@ def _load_event_json(self, raw):
201202
if room != "":
202203
self.rooms.add(room)
203204
clean_session = {k: v.strip() for k, v in session.items()}
205+
if clean_session["LongAbstract"] != "":
206+
html = BeautifulSoup(
207+
clean_session["LongAbstract"], "html.parser"
208+
)
209+
# nuke all images from the HTML because Guidebook doesn't
210+
# support them and will escape the tags in a way that makes
211+
# us forever update the sessions as different
212+
for img in html.find_all("img"):
213+
img.decompose()
214+
clean_session["LongAbstract"] = str(html)
204215
data_by_name[name] = clean_session
205216
data_by_nid[session["nid"]] = clean_session
206217
return (data_by_name, data_by_nid)
@@ -661,6 +672,7 @@ def normalize_html(self, html):
661672
that which gives us a lot of information about formatting without
662673
being sensitive to exact HTML.
663674
"""
675+
664676
markdown = md(html)
665677
# Normalize whitespace and quotes
666678
markdown = markdown.replace("\u2018", "'").replace("\u2019", "'")
@@ -692,14 +704,14 @@ def session_needs_update(self, new_data, original_session):
692704
]
693705
for key in all_keys:
694706
if "time" in key:
695-
a = self.normalize_time(new_data[key])
696-
b = self.normalize_time(original_session[key])
707+
a = self.normalize_time(original_session[key])
708+
b = self.normalize_time(new_data[key])
697709
elif "html" in key:
698-
a = self.normalize_html(new_data[key])
699-
b = self.normalize_html(original_session[key])
710+
a = self.normalize_html(original_session[key])
711+
b = self.normalize_html(new_data[key])
700712
else:
701-
a = new_data[key]
702-
b = original_session[key]
713+
a = original_session[key]
714+
b = new_data[key]
703715
if a != b:
704716
self.logger.info(
705717
"Session '%s' needs update because '%s' changed: '%s' !="

0 commit comments

Comments
 (0)