1- #!/usr/bin/python3
1+ #!/usr/bin/env python3
22
33#
44# Copyright 2018-present Southern California Linux Expo
3232
3333from datetime import datetime
3434from dateutil import parser
35+ from markdownify import markdownify as md
3536import click
3637import json
3738import logging
@@ -500,6 +501,30 @@ def add_session(self, session, original_session=None):
500501 self .sessions_by_nid [session ["nid" ]] = s
501502 self .sessions_by_name [name ] = s
502503
504+ def normalize_html (self , html ):
505+ """
506+ The HTML supported by Drupal vs Guidebook is different and
507+ GB normalizes it upon import, so we can get in a state where
508+ we always detect a difference.
509+
510+ Stripping HTML is lossy, so instead we convert to MD and compare
511+ that which gives us a lot of information about formatting without
512+ being sensitive to exact HTML.
513+ """
514+ markdown = md (html )
515+ # Normalize whitespace and quotes
516+ markdown = markdown .replace ("\u2018 " , "'" ).replace ("\u2019 " , "'" )
517+ markdown = markdown .replace ("\u201c " , '"' ).replace ("\u201d " , '"' )
518+ # collapse whitespace
519+ markdown = " " .join (markdown .split ())
520+ return markdown
521+
522+ def normalize_time (self , time_str ):
523+ n = time_str .replace ("+0000" , "+00:00" )
524+ n = parser .isoparse (n )
525+ n = n .astimezone (pytz .utc )
526+ return n
527+
503528 def session_needs_update (self , new_data , original_session ):
504529 """
505530 Compare the new session data to the original session data, and return
@@ -517,12 +542,11 @@ def session_needs_update(self, new_data, original_session):
517542 ]
518543 for key in all_keys :
519544 if "time" in key :
520- a = new_data [key ].replace ("+0000" , "+00:00" )
521- b = original_session [key ].replace ("+0000" , "+00:00" )
522- a = parser .isoparse (a )
523- b = parser .isoparse (b )
524- a = a .astimezone (pytz .utc )
525- b = b .astimezone (pytz .utc )
545+ a = self .normalize_time (new_data [key ])
546+ b = self .normalize_time (original_session [key ])
547+ elif "html" in key :
548+ a = self .normalize_html (new_data [key ])
549+ b = self .normalize_html (original_session [key ])
526550 else :
527551 a = new_data [key ]
528552 b = original_session [key ]
0 commit comments