|
| 1 | +""" |
| 2 | +Google Keep to Evernote Converter |
| 3 | +---------------------------------- |
| 4 | +
|
| 5 | +Converts notes from a Google Takeout Keep export into Evernote-compatible .enex files. |
| 6 | +
|
| 7 | +Features: |
| 8 | +- Converts note content, labels, timestamps, and pinned status |
| 9 | +- Converts checklists to Evernote <en-todo> format |
| 10 | +- Embeds images inline |
| 11 | +- Handles Unicode and emoji properly |
| 12 | +- Splits output into multiple .enex files (100 notes each) |
| 13 | +- Logs skipped or problematic items to a migration log |
| 14 | +
|
| 15 | +Tested with over 5,000 notes and 500MB of export data. |
| 16 | +
|
| 17 | +Author: Tal Tabakman (https://github.com/tabakman) |
| 18 | +Project: https://github.com/tabakman/google-keep-to-evernote |
| 19 | +License: MIT |
| 20 | +""" |
| 21 | + |
| 22 | +import os |
| 23 | +import sys |
| 24 | +import json |
| 25 | +import base64 |
| 26 | +import hashlib |
| 27 | +import mimetypes |
| 28 | +import html |
| 29 | +import shutil |
| 30 | +from bs4 import BeautifulSoup |
| 31 | +from datetime import datetime, timezone |
| 32 | +from math import ceil |
| 33 | + |
| 34 | +# Configuration |
| 35 | +SOURCE_DIR = 'keep_source' |
| 36 | +OUTPUT_DIR = 'evernote_output' |
| 37 | +CHUNK_SIZE = 100 |
| 38 | +LOG_FILE = os.path.join(OUTPUT_DIR, 'migration_log.txt') |
| 39 | + |
| 40 | +def escape_xml(text): |
| 41 | + return html.escape(text, quote=False) |
| 42 | + |
| 43 | +def format_keep_time(usec): |
| 44 | + try: |
| 45 | + dt = datetime.utcfromtimestamp(int(usec) / 1_000_000) |
| 46 | + return dt.strftime('%Y%m%dT%H%M%SZ') |
| 47 | + except: |
| 48 | + return datetime.now(timezone.utc).strftime('%Y%m%dT%H%M%SZ') |
| 49 | + |
| 50 | +def log_warning(msg): |
| 51 | + with open(LOG_FILE, 'a', encoding='utf-8') as log: |
| 52 | + log.write(msg + '\n') |
| 53 | + |
| 54 | +def create_enex_chunks(source_dir, output_dir, chunk_size=100): |
| 55 | + html_files = [f for f in os.listdir(source_dir) if f.endswith('.html')] |
| 56 | + html_files.sort() |
| 57 | + enex_notes = [] |
| 58 | + media_count = 0 |
| 59 | + pinned_count = 0 |
| 60 | + checklist_count = 0 |
| 61 | + |
| 62 | + for filename in html_files: |
| 63 | + base_name = os.path.splitext(filename)[0] |
| 64 | + html_path = os.path.join(source_dir, filename) |
| 65 | + json_path = os.path.join(source_dir, f"{base_name}.json") |
| 66 | + |
| 67 | + try: |
| 68 | + with open(html_path, 'r', encoding='utf-8') as f: |
| 69 | + soup = BeautifulSoup(f, 'html.parser') |
| 70 | + except Exception as e: |
| 71 | + log_warning(f"[ERROR] Failed to parse HTML: {filename} — {e}") |
| 72 | + continue |
| 73 | + |
| 74 | + title_tag = soup.find('title') |
| 75 | + title = title_tag.text.strip() if title_tag else 'Untitled' |
| 76 | + |
| 77 | + content_div = soup.find('div', {'class': 'content'}) |
| 78 | + if content_div: |
| 79 | + content = content_div |
| 80 | + elif soup.body: |
| 81 | + content = soup.body |
| 82 | + else: |
| 83 | + log_warning(f"[SKIP] No usable content found in {filename}") |
| 84 | + continue |
| 85 | + |
| 86 | + # Convert Google Keep checklist HTML to Evernote-compatible todos |
| 87 | + for checklist in content.find_all('ul', class_='list'): |
| 88 | + for li in checklist.find_all('li', class_='listitem'): |
| 89 | + bullet = li.find('span', class_='bullet') |
| 90 | + text = li.find('span', class_='text') |
| 91 | + if bullet and text: |
| 92 | + char = bullet.get_text(strip=True) |
| 93 | + is_checked = char in ['☑', '✓', '✔'] |
| 94 | + todo = soup.new_tag('en-todo', checked='true' if is_checked else 'false') |
| 95 | + li.clear() |
| 96 | + li.append(todo) |
| 97 | + li.append(' ' + text.get_text(strip=True)) |
| 98 | + checklist_count += 1 |
| 99 | + |
| 100 | + content_html = content.decode_contents() |
| 101 | + media_tags = '' |
| 102 | + resources = '' |
| 103 | + |
| 104 | + # Process embedded images |
| 105 | + for img in soup.find_all('img'): |
| 106 | + src = img.get('src', '') |
| 107 | + image_data = None |
| 108 | + mime_type = None |
| 109 | + |
| 110 | + if src.startswith('data:image/'): |
| 111 | + try: |
| 112 | + header, encoded = src.split(',', 1) |
| 113 | + mime_type = header.split(';')[0].split(':')[1] |
| 114 | + image_data = base64.b64decode(encoded) |
| 115 | + except Exception as e: |
| 116 | + log_warning(f"[WARN] Failed to decode base64 image in {filename}: {e}") |
| 117 | + continue |
| 118 | + |
| 119 | + elif os.path.isfile(os.path.join(source_dir, src)): |
| 120 | + try: |
| 121 | + full_path = os.path.join(source_dir, src) |
| 122 | + with open(full_path, 'rb') as img_file: |
| 123 | + image_data = img_file.read() |
| 124 | + mime_type, _ = mimetypes.guess_type(full_path) |
| 125 | + except Exception as e: |
| 126 | + log_warning(f"[WARN] Failed to load local image '{src}' in {filename}: {e}") |
| 127 | + continue |
| 128 | + |
| 129 | + if image_data and mime_type: |
| 130 | + encoded = base64.b64encode(image_data).decode('utf-8') |
| 131 | + file_ext = mimetypes.guess_extension(mime_type) |
| 132 | + md5_hash = hashlib.md5(image_data).hexdigest() |
| 133 | + |
| 134 | + original_filename = os.path.basename(src) |
| 135 | + if not original_filename: |
| 136 | + original_filename = f'image{file_ext or ".bin"}' |
| 137 | + |
| 138 | + resources += f''' |
| 139 | +<resource> |
| 140 | + <data encoding="base64"> |
| 141 | + {encoded} |
| 142 | + </data> |
| 143 | + <mime>{mime_type}</mime> |
| 144 | + <resource-attributes> |
| 145 | + <file-name>{escape_xml(original_filename)}</file-name> |
| 146 | + </resource-attributes> |
| 147 | +</resource>''' |
| 148 | + |
| 149 | + media_tags += f'<en-media type="{mime_type}" hash="{md5_hash}"/>\n' |
| 150 | + media_count += 1 |
| 151 | + print(f"[IMG] Embedded image '{original_filename}' as {mime_type}") |
| 152 | + |
| 153 | + # Extract metadata from JSON (tags, timestamps, pinned) |
| 154 | + tags = [] |
| 155 | + created_time = updated_time = datetime.now(timezone.utc).strftime('%Y%m%dT%H%M%SZ') |
| 156 | + if os.path.exists(json_path): |
| 157 | + try: |
| 158 | + with open(json_path, 'r', encoding='utf-8') as jf: |
| 159 | + json_data = json.load(jf) |
| 160 | + if 'labels' in json_data: |
| 161 | + tags = [label.get('name') for label in json_data['labels'] if 'name' in label] |
| 162 | + if json_data.get('isPinned'): |
| 163 | + tags.append('pinned') |
| 164 | + pinned_count += 1 |
| 165 | + created_time = format_keep_time(json_data.get('createdTimestampUsec', 0)) |
| 166 | + updated_time = format_keep_time(json_data.get('userEditedTimestampUsec', 0)) |
| 167 | + except Exception as e: |
| 168 | + log_warning(f"[WARN] Failed to parse JSON for {filename}: {e}") |
| 169 | + |
| 170 | + tag_elements = ''.join(f'<tag>{escape_xml(tag)}</tag>' for tag in tags) |
| 171 | + full_en_note = f"{content_html}\n{media_tags}" |
| 172 | + |
| 173 | + note = f''' |
| 174 | +<note> |
| 175 | + <title>{escape_xml(title)}</title> |
| 176 | + {tag_elements} |
| 177 | + <content><![CDATA[<en-note>{full_en_note}</en-note>]]></content> |
| 178 | + <created>{created_time}</created> |
| 179 | + <updated>{updated_time}</updated> |
| 180 | + {resources} |
| 181 | +</note>''' |
| 182 | + |
| 183 | + enex_notes.append(note) |
| 184 | + print(f"[INFO] Parsed note: {title.encode('utf-8', errors='replace').decode('utf-8')}") |
| 185 | + |
| 186 | + # Split into .enex chunks |
| 187 | + total_chunks = ceil(len(enex_notes) / chunk_size) |
| 188 | + for i in range(total_chunks): |
| 189 | + chunk_notes = enex_notes[i * chunk_size:(i + 1) * chunk_size] |
| 190 | + chunk_filename = f'output_{i+1:03}.enex' |
| 191 | + chunk_path = os.path.join(output_dir, chunk_filename) |
| 192 | + |
| 193 | + with open(chunk_path, 'w', encoding='utf-8') as f: |
| 194 | + f.write('<?xml version="1.0" encoding="UTF-8"?>\n') |
| 195 | + f.write('<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">\n') |
| 196 | + f.write(f'<en-export export-date="{datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")}" application="KeepToEvernoteScript" version="1.0">\n') |
| 197 | + for note in chunk_notes: |
| 198 | + f.write(note.encode('utf-8', errors='replace').decode('utf-8')) |
| 199 | + f.write('\n</en-export>') |
| 200 | + |
| 201 | + print(f"[OK] Wrote {chunk_filename} with {len(chunk_notes)} notes") |
| 202 | + |
| 203 | + # Final summary |
| 204 | + print("\n=== EXPORT SUMMARY ===") |
| 205 | + print(f"📝 Notes exported : {len(enex_notes)}") |
| 206 | + print(f"📦 ENEX files created : {total_chunks}") |
| 207 | + print(f"🖼️ Images embedded : {media_count}") |
| 208 | + print(f"☑️ Checkboxes converted: {checklist_count}") |
| 209 | + print(f"📌 Pinned notes : {pinned_count}") |
| 210 | + print(f"📄 Migration log : {LOG_FILE if os.path.exists(LOG_FILE) else 'No issues logged.'}") |
| 211 | + print("✅ Done! Your Evernote files are in:", output_dir) |
| 212 | + |
| 213 | +# --- Initial folder checks --- |
| 214 | +if not os.path.exists(SOURCE_DIR): |
| 215 | + print(f"[ERROR] Source folder '{SOURCE_DIR}' not found.") |
| 216 | + sys.exit(1) |
| 217 | + |
| 218 | +files = os.listdir(SOURCE_DIR) |
| 219 | +if not any(f.endswith(('.html', '.json', '.png', '.jpg', '.jpeg', '.gif')) for f in files): |
| 220 | + print(f"[ERROR] Folder '{SOURCE_DIR}' appears empty or missing Keep data.") |
| 221 | + print("Please place your Google Takeout Keep HTML, JSON, and image files into it (flat structure).") |
| 222 | + sys.exit(1) |
| 223 | + |
| 224 | +if not os.path.exists(OUTPUT_DIR): |
| 225 | + os.makedirs(OUTPUT_DIR) |
| 226 | +elif os.listdir(OUTPUT_DIR): |
| 227 | + choice = input(f"[WARN] Output folder '{OUTPUT_DIR}' is not empty. Clear it? [Y/n]: ").strip().lower() |
| 228 | + if choice in ("y", "yes", ""): |
| 229 | + shutil.rmtree(OUTPUT_DIR) |
| 230 | + os.makedirs(OUTPUT_DIR) |
| 231 | + print(f"[OK] Output folder '{OUTPUT_DIR}' cleared.") |
| 232 | + else: |
| 233 | + print(f"[EXIT] Aborted by user.") |
| 234 | + sys.exit(0) |
| 235 | + |
| 236 | +# --- Go --- |
| 237 | +create_enex_chunks(SOURCE_DIR, OUTPUT_DIR, CHUNK_SIZE) |
0 commit comments