11# -- coding: utf-8 --
22
3- import datetime
3+ from datetime import datetime
44import codecs
55import re
6- import os
7- from html . parser import HTMLParser
6+ import html
7+ import urllib . request
88
99from pymysql import connect
1010
@@ -22,11 +22,15 @@ def _clean_file(filepath, log):
2222 :param filepath: Path to ARCHIVE_DB.pl
2323 :return: Python dictionary keyed by original story id
2424 """
25- h = HTMLParser ()
26- archive_db = codecs .open (filepath , "r" , encoding = "utf-8" ).read ()
25+ encoding = input (
26+ 'Encoding for the ARCHIVE_DB.pl file, e.g. "utf-8", "latin_1", "cp1252" (default: "utf-8"): '
27+ )
28+ if encoding is None or encoding == "" :
29+ encoding = "utf-8"
30+ archive_db = codecs .open (filepath , "r" , encoding = encoding ).read ()
2731
2832 # Manually escape single quote entity and reformat file as a Python dictionary
29- step1 = h .unescape (archive_db .replace ("'" , "\\ '" ))
33+ step1 = html .unescape (archive_db .replace ("'" , "\\ '" ))
3034
3135 # Indent the file with a single tab instead of whatever is currently used
3236 step15 = re .sub (r"^\s+" , "\t " , step1 )
@@ -122,8 +126,32 @@ def _extract_fandoms(args, record):
122126 return tags .strip (", " )
123127
124128
129+ def _extract_date (args , record , log ):
130+ date_string = record .get (
131+ "PrintTime" ,
132+ record .get (
133+ "DatePrint" ,
134+ record .get ("Date" , str (datetime .now ().strftime ("%m/%d/%y" ))),
135+ ),
136+ )
137+
138+ dt = None
139+ try :
140+ # If the date is in the form of a Unix timestamp
141+ if date_string .isdigit ():
142+ dt = datetime .fromtimestamp (int (date_string ))
143+ else :
144+ dt = datetime .strptime (date_string , "%m/%d/%y" )
145+ except Exception as e :
146+ log .error (
147+ f"Failed to parse date value '{ date_string } ' due to exception: { str (e )} "
148+ )
149+
150+ return dt .strftime ("%Y-%m-%d" ) if dt else ""
151+
152+
125153def _create_mysql (args , FILES , log ):
126- db = connect (args .db_host , args .db_user , args .db_password , "" )
154+ db = connect (host = args .db_host , user = args .db_user , password = args .db_password , db = "" )
127155 cursor = db .cursor ()
128156 DATABASE_NAME = args .temp_db_database
129157
@@ -132,12 +160,13 @@ def _create_mysql(args, FILES, log):
132160 cursor .execute ("create database {0};" .format (DATABASE_NAME ))
133161 cursor .execute ("use {0}" .format (DATABASE_NAME ))
134162
135- sql = Sql (args )
136- codepath = os .path .dirname (os .path .realpath (__file__ ))
163+ # Instead of duplicating this file in the repo grab it from the master branch of eFiction
164+ url = "https://raw.githubusercontent.com/otwcode/open-doors-eFiction/refs/heads/master/opendoors/open-doors-tables-working.sql"
165+ with urllib .request .urlopen (url ) as response :
166+ script = response .read ().decode ()
137167
138- sql .run_script_from_file (
139- codepath + "/shared_python/create-open-doors-tables.sql" , database = DATABASE_NAME
140- )
168+ sql = Sql (args , log )
169+ sql .run_sql_file (script , database = DATABASE_NAME )
141170 db .commit ()
142171
143172 authors = [
@@ -164,26 +193,17 @@ def _create_mysql(args, FILES, log):
164193 FILES [i ].get ("Summary" , "" ).replace ("'" , "\\ '" ),
165194 _extract_tags (args , FILES [i ]),
166195 _extract_characters (args , FILES [i ]),
167- datetime .datetime .strptime (
168- FILES [i ].get (
169- "PrintTime" ,
170- FILES [i ].get (
171- "DatePrint" ,
172- FILES [i ].get (
173- "Date" , str (datetime .datetime .now ().strftime ("%m/%d/%y" ))
174- ),
175- ),
176- ),
177- "%m/%d/%y" ,
178- ).strftime ("%Y-%m-%d" ),
196+ _extract_date (args , FILES [i ], log ),
179197 FILES [i ].get ("Location" , "" ).replace ("'" , "\\ '" ),
180198 FILES [i ]
181199 .get ("LocationURL" , FILES [i ].get ("StoryURL" , "" ))
182200 .replace ("'" , "\\ '" ),
183201 FILES [i ].get ("Notes" , "" ).replace ("'" , "\\ '" ),
184202 _extract_relationships (args , FILES [i ]),
185203 FILES [i ].get ("Rating" , "" ),
186- FILES [i ].get ("Warnings" , "" ).replace ("'" , "\\ '" ),
204+ FILES [i ]
205+ .get ("Warnings" , FILES [i ].get ("OptionalWarnings" , "" ))
206+ .replace ("'" , "\\ '" ),
187207 FILES [i ].get ("Author" , "" ).strip (),
188208 FILES [i ].get ("Email" , FILES [i ].get ("EmailAuthor" , "" )).lower ().strip (),
189209 FILES [i ].get ("FileType" , args .chapters_file_extensions )
@@ -196,6 +216,7 @@ def _create_mysql(args, FILES, log):
196216
197217 cur = 0
198218 total = len (FILES )
219+ item_dict = {}
199220 for (
200221 original_id ,
201222 title ,
@@ -225,7 +246,7 @@ def _create_mysql(args, FILES, log):
225246 table_name = "stories"
226247 else :
227248 filename = url
228- table_name = "bookmarks "
249+ table_name = "story_links "
229250
230251 # Clean up fandoms and add default fandom if it exists
231252 final_fandoms = fandoms .replace ("'" , r"\'" )
@@ -241,10 +262,14 @@ def _create_mysql(args, FILES, log):
241262 if element [1 ] == author and element [2 ] == email
242263 ]
243264 authorid = result [0 ][0 ]
265+ item_dict [original_id ] = {
266+ "authorid" : authorid ,
267+ "itemtype" : "story_link" if table_name == "story_links" else "story" ,
268+ }
244269
245270 stor = """
246- INSERT INTO {0} (id, fandoms, title, summary, tags, characters, date, url, notes, relationships, rating, warnings, author_id )
247- VALUES({1}, '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}', '{13}' );\n """ .format (
271+ INSERT INTO {0} (id, fandoms, title, summary, tags, characters, date, url, notes, relationships, rating, warnings)
272+ VALUES({1}, '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}');\n """ .format (
248273 table_name ,
249274 original_id ,
250275 final_fandoms .replace (r"\\" , "\\ " ),
@@ -258,7 +283,6 @@ def _create_mysql(args, FILES, log):
258283 pairings ,
259284 rating ,
260285 warnings ,
261- authorid ,
262286 )
263287 cursor .execute (stor )
264288 except :
@@ -285,6 +309,21 @@ def _create_mysql(args, FILES, log):
285309 raise
286310 db .commit ()
287311
312+ for itemid , item_info in item_dict .items ():
313+ try :
314+ item_auth = """
315+ INSERT INTO item_authors (author_id, item_id, item_type)
316+ VALUES({0}, {1}, '{2}');\n """ .format (
317+ item_info ["authorid" ], itemid , item_info ["itemtype" ]
318+ )
319+ cursor .execute (item_auth )
320+ except :
321+ log .error (
322+ f"Failed to insert item_authors for { item_info ['itemtype' ]} { itemid } with author { item_info ['authorid' ]} "
323+ )
324+ raise
325+ db .commit ()
326+
288327
289328def clean_and_load_data (args , log ):
290329 data = _clean_file (args .db_input_file , log )
0 commit comments