diff --git a/.gitignore b/.gitignore index 73a7a40..7016e94 100644 --- a/.gitignore +++ b/.gitignore @@ -112,7 +112,7 @@ venv.bak/ .DS_Store # dev testing -setup.py +# setup.py test* .olauth .olignore diff --git a/README.md b/README.md index 466751e..ae4a3e7 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ + +olsync is for overleaf. +olcesync is for the community edition. +Remember to click "accept all cookies" when logging in. + + # Overleaf-Sync ### Easy Overleaf Two-Way Synchronization diff --git a/olsync/__init__.py b/olcesync/olcesync/__init__.py similarity index 100% rename from olsync/__init__.py rename to olcesync/olcesync/__init__.py diff --git a/olcesync/olcesync/comm.py b/olcesync/olcesync/comm.py new file mode 100644 index 0000000..13474ef --- /dev/null +++ b/olcesync/olcesync/comm.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# created on 2024-04-30 17:25 by J. Zhao + +# Where to get the CSRF Token and where to send the login request to +# JS snippet to get the first link +JAVASCRIPT_EXTRACT_PROJECT_URL = "document.getElementsByClassName('dash-cell-name')[1].firstChild.href" +# JS snippet to extract the csrfToken +JAVASCRIPT_CSRF_EXTRACTOR = "document.getElementsByName('ol-csrfToken')[0].content" +# Name of the cookies we want to extract +COOKIE_NAMES = ["overleaf.sid"] diff --git a/olcesync/olcesync/olbrowserlogin.py b/olcesync/olcesync/olbrowserlogin.py new file mode 100644 index 0000000..e8f44ab --- /dev/null +++ b/olcesync/olcesync/olbrowserlogin.py @@ -0,0 +1,122 @@ +"""Ol Browser Login Utility""" +################################################## +# MIT License +################################################## +# File: olbrowserlogin.py +# Description: Overleaf Browser Login Utility +# Author: Moritz Glöckl +# License: MIT +# Version: 1.2.0 +################################################## + +from PySide6.QtCore import QCoreApplication, QUrl +from PySide6.QtWebEngineCore import (QWebEnginePage, QWebEngineProfile, + QWebEngineSettings) +from PySide6.QtWebEngineWidgets import QWebEngineView +from PySide6.QtWidgets import QApplication, QMainWindow + +from olcesync.comm import * + + +def on_cert_error(e): + # print(f"cert error: {e.description()}") + # print(f"type: {e.type()}") + # print(f"overridable: {e.isOverridable()}") + # print(f"url: {e.url()}") + # for c in e.certificateChain(): + # print(c.toText()) + e.acceptCertificate() + + +class OlBrowserLoginWindow(QMainWindow): + """ + Overleaf Browser Login Utility + Opens a browser window to securely login the user and returns relevant login data. + """ + + def __init__(self, server_ip, *args, **kwargs): + super(OlBrowserLoginWindow, self).__init__(*args, **kwargs) + + self.webview = QWebEngineView() + self._cookies = {} + self._csrf = "" + self._login_success = False + + self.LOGIN_URL = "https://{}/login".format(server_ip) + self.PROJECT_URL = "https://{}/project".format(server_ip) + + self.profile = QWebEngineProfile(self.webview) + self.cookie_store = self.profile.cookieStore() + self.cookie_store.cookieAdded.connect(self.handle_cookie_added) + self.profile.setPersistentCookiesPolicy( + QWebEngineProfile.NoPersistentCookies) + + self.profile.settings().setAttribute(QWebEngineSettings.JavascriptEnabled, + True) + + webpage = QWebEnginePage(self.profile, self) + webpage.certificateError.connect(on_cert_error) + self.webview.setPage(webpage) + self.webview.load(QUrl.fromUserInput(self.LOGIN_URL)) + self.webview.loadFinished.connect(self.handle_load_finished) + + self.setCentralWidget(self.webview) + self.resize(600, 700) + + def handle_load_finished(self): + + def callback(result): + + def callback(result): + self._csrf = result + self._login_success = True + QCoreApplication.quit() + + self.webview.load(QUrl.fromUserInput(result)) + self.webview.loadFinished.connect(lambda x: self.webview.page( + ).runJavaScript(JAVASCRIPT_CSRF_EXTRACTOR, 0, callback)) + + if self.webview.url().toString() == self.PROJECT_URL: + self.webview.page().runJavaScript(JAVASCRIPT_EXTRACT_PROJECT_URL, 0, + callback) + + def handle_cookie_added(self, cookie): + cookie_name = cookie.name().data().decode('utf-8') + if cookie_name in COOKIE_NAMES: + self._cookies[cookie_name] = cookie.value().data().decode('utf-8') + + @property + def cookies(self): + return self._cookies + + @property + def csrf(self): + return self._csrf + + @property + def login_success(self): + return self._login_success + + +def login(server_ip): + from PySide6.QtCore import QLoggingCategory + QLoggingCategory.setFilterRules('''\ + qt.webenginecontext.info=false + ''') + + app = QApplication([]) + ol_browser_login_window = OlBrowserLoginWindow(server_ip) + ol_browser_login_window.show() + app.exec() + + if not ol_browser_login_window.login_success: + return None + + return { + "cookie": ol_browser_login_window.cookies, + "csrf": ol_browser_login_window.csrf + } + + +if __name__ == '__main__': + print(login("202.117.43.87")) diff --git a/olcesync/olcesync/olclient.py b/olcesync/olcesync/olclient.py new file mode 100644 index 0000000..ca129aa --- /dev/null +++ b/olcesync/olcesync/olclient.py @@ -0,0 +1,391 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- +"""Overleaf Client""" +################################################## +# MIT License +################################################## +# File: olclient.py +# Description: Overleaf API Wrapper +# Author: Moritz Glöckl +# License: MIT +# Version: 1.2.0 +################################################## + +import json +import time + +import requests as reqs +import urllib3 +from bs4 import BeautifulSoup +from socketIO_client import SocketIO + +from olcesync.comm import * + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +PATH_SEP = "/" # Use hardcoded path separator for both windows and posix system + + +def search_dic(name, dic): + """ Search `name' in dic['docs'] and dic['fileRefs'] + Return file_id and file_type + """ + for v in dic['docs']: + if v['name'] == name: + return v['_id'], 'doc' + for v in dic['fileRefs']: + if v['name'] == name: + return v['_id'], 'file' + return None, None + + +class OverleafClient(object): + """ + Overleaf API Wrapper + Supports login, querying all projects, querying a specific project, downloading a project and + uploading a file to a project. + """ + + @staticmethod + def filter_projects(json_content, more_attrs=None): + more_attrs = more_attrs or {} + for p in json_content: + if not p.get("archived") and not p.get("trashed"): + if all(p.get(k) == v for k, v in more_attrs.items()): + yield p + + def __init__(self, server_ip, cookie=None, csrf=None): + self._cookie = cookie # Store the cookie for authenticated requests + self._csrf = csrf # Store the CSRF token since it is needed for some requests + + self.BASE_URL = "https://" + server_ip + self.LOGIN_URL = self.BASE_URL + "/login" + self.PROJECT_URL = self.BASE_URL + "/project" + self.DOWNLOAD_URL = self.BASE_URL + "/project/{}/download/zip" + self.UPLOAD_URL = self.BASE_URL + "/project/{}/upload" + # The URL to create folders + self.FOLDER_URL = self.BASE_URL + "/project/{}/folder" + # The URL to delete files + self.DELETE_URL = self.BASE_URL + "/project/{}/{}/{}" + # The URL to compile the project + self.COMPILE_URL = self.BASE_URL + "/project/{}/compile?enable_pdf_caching=true" + + def login(self, username, password): + """ + WARNING - DEPRECATED - Not working as Overleaf introduced captchas + Login to the Overleaf Service with a username and a password + Params: username, password + Returns: Dict of cookie and CSRF + """ + + get_login = reqs.get(self.LOGIN_URL) + self._csrf = BeautifulSoup(get_login.content, + 'html.parser').find('input', { + 'name': '_csrf' + }).get('value') + login_json = { + "_csrf": self._csrf, + "email": username, + "password": password + } + post_login = reqs.post(self.LOGIN_URL, + json=login_json, + cookies=get_login.cookies, + verify=False) + + # On a successful authentication the Overleaf API returns a new authenticated cookie. + # If the cookie is different than the cookie of the GET request the authentication was successful + if post_login.status_code == 200: + self._cookie = post_login.cookies + + # Enrich cookie with GCLB cookie from GET request above + self._cookie['GCLB'] = get_login.cookies['GCLB'] + # CSRF changes after making the login request, new CSRF token will be on the projects page + projects_page = reqs.get(self.PROJECT_URL, cookies=self._cookie) + self._csrf = BeautifulSoup(projects_page.content, 'html.parser').find( + 'meta', { + 'name': 'ol-csrfToken' + }).get('content') + + return {"cookie": self._cookie, "csrf": self._csrf} + + # Convert cookie from CookieJar to string + def get_cookie_str(self): + return "; ".join([f"{n}={self._cookie[n]}" for n in COOKIE_NAMES]) + + def all_projects(self): + """ + Get all of a user's active projects (= not archived and not trashed) + Returns: List of project objects + """ + projects_page = reqs.get(self.PROJECT_URL, + cookies=self._cookie, + verify=False) + json_content = json.loads( + BeautifulSoup(projects_page.content, + 'html.parser').find('meta', { + 'name': 'ol-prefetchedProjectsBlob' + }).get('content')) + + return list(OverleafClient.filter_projects(json_content['projects'])) + + def get_project(self, project_name): + """ + Get a specific project by project_name + Params: project_name, the name of the project + Returns: project object + """ + projects_page = reqs.get(self.PROJECT_URL, + cookies=self._cookie, + verify=False) + json_content = json.loads( + BeautifulSoup(projects_page.content, + 'html.parser').find('meta', { + 'name': 'ol-prefetchedProjectsBlob' + }).get('content')) + + return next( + OverleafClient.filter_projects(json_content['projects'], + {"name": project_name}), None) + + def download_project(self, project_id): + """ + Download project in zip format + Params: project_id, the id of the project + Returns: bytes string (zip file) + """ + r = reqs.get(self.DOWNLOAD_URL.format(project_id), + stream=True, + cookies=self._cookie, + verify=False) + return r.content + + def create_folder(self, project_id, parent_folder_id, folder_name): + """ + Create a new folder in a project + + Params: + project_id: the id of the project + parent_folder_id: the id of the parent folder, root is the project_id + folder_name: how the folder will be named + + Returns: folder id or None + """ + + params = {"parent_folder_id": parent_folder_id, "name": folder_name} + headers = {"X-Csrf-Token": self._csrf} + r = reqs.post(self.FOLDER_URL.format(project_id), + cookies=self._cookie, + headers=headers, + json=params, + verify=False) + + if r.ok: + return json.loads(r.content) + elif r.status_code == str(400): + # Folder already exists + return + else: + raise reqs.HTTPError() + + def get_project_infos(self, project_id): + """ + Get detailed project infos about the project + + Params: + project_id: the id of the project + + Returns: project details + """ + project_infos = None + + # Callback function for the joinProject emitter + def set_project_infos(project_infos_dict): + # Set project_infos variable in outer scope + nonlocal project_infos + project_infos = project_infos_dict.get("project", {}) + + # Connect to Overleaf Socket.IO, send a time parameter and the cookies + socket_io = SocketIO(self.BASE_URL, + params={ + 't': int(time.time()), + 'projectId': project_id + }, + headers={'Cookie': self.get_cookie_str()}, + verify=False) + + # Wait until we connect to the socket + socket_io.on('connect', lambda: None) + socket_io.wait_for_callbacks() + + # Send the joinProject event and receive the project infos + socket_io.on('joinProjectResponse', set_project_infos) + while project_infos is None: + socket_io.wait(1) + + # Disconnect from the socket if still connected + if socket_io.connected: + socket_io.disconnect() + + return project_infos + + def upload_file(self, project_id, project_infos, file_name, file_size, file): + """ + Upload a file to the project + + Params: + project_id: the id of the project + file_name: how the file will be named + file_size: the size of the file in bytes + file: the file itself + + Returns: True on success, False on fail + """ + + # Set the folder_id to the id of the root folder + folder_id = project_infos['rootFolder'][0]['_id'] + + only_file_name = file_name + + # The file name contains path separators, check folders + if PATH_SEP in file_name: + # Remove last item since this is the file name + items = file_name.split(PATH_SEP) + local_folders, only_file_name = items[:-1], items[-1] + # Set the current remote folder + current_overleaf_folder = project_infos['rootFolder'][0]['folders'] + + for local_folder in local_folders: + exists_on_remote = False + for remote_folder in current_overleaf_folder: + # Check if the folder exists on remote, continue with the new folder structure + if local_folder.lower() == remote_folder['name'].lower(): + exists_on_remote = True + folder_id = remote_folder['_id'] + current_overleaf_folder = remote_folder['folders'] + break + # Create the folder if it doesn't exist + if not exists_on_remote: + new_folder = self.create_folder(project_id, folder_id, + local_folder) + current_overleaf_folder.append(new_folder) + folder_id = new_folder['_id'] + current_overleaf_folder = new_folder['folders'] + + # Upload the file to the predefined folder + params = {'folder_id': folder_id} + data = { + "relativePath": "null", + "name": only_file_name, + } + files = {"qqfile": (file_name, file)} + headers = { + "X-CSRF-TOKEN": self._csrf, + } + + # Upload the file to the predefined folder + r = reqs.post(self.UPLOAD_URL.format(project_id), + cookies=self._cookie, + headers=headers, + params=params, + data=data, + files=files, + verify=False) + + # print(params, r.status_code, r.content) + return r.status_code == str(200) and json.loads(r.content)["success"] + + def delete_file(self, project_id, project_infos, file_name): + """ + Deletes a project's file + + Params: + project_id: the id of the project + file_name: how the file will be named + + Returns: True on success, False on fail + """ + + file_type = file_id = None + # The file name contains path separators, check folders + if PATH_SEP in file_name: + items = file_name.split(PATH_SEP) + dir_depth = len(items) - 1 + only_file_name = items[-1] + current_overleaf_folder = project_infos['rootFolder'][0]['folders'] + for i in range(dir_depth): + success = False + for remote_folder in current_overleaf_folder: + if items[i] == remote_folder['name']: + if i != dir_depth - 1: + current_overleaf_folder = remote_folder['folders'] + else: + file_id, file_type = search_dic( + only_file_name, remote_folder) + success = True + break + if not success: + print("Local folder {} does not exist in remote!".format( + items[i])) + return False + else: # File is in root folder + remote_folder = project_infos['rootFolder'][0] + file_id, file_type = search_dic(file_name, remote_folder) + + # File not found! + if file_id is None: return False + + headers = {"X-Csrf-Token": self._csrf} + + r = reqs.delete(self.DELETE_URL.format(project_id, file_type, file_id), + cookies=self._cookie, + headers=headers, + verify=False) + + return r.status_code == '204' + + def download_pdf(self, project_id): + """ + Compiles and returns a project's PDF + + Params: + project_id: the id of the project + + Returns: PDF file name and content on success + """ + headers = {"X-Csrf-Token": self._csrf} + + body = { + "check": "silent", + "draft": False, + "incrementalCompilesEnabled": True, + "rootDoc_id": "", + "stopOnFirstError": False + } + + r = reqs.post(self.COMPILE_URL.format(project_id), + cookies=self._cookie, + headers=headers, + json=body, + verify=False) + + if not r.ok: + raise reqs.HTTPError() + + compile_result = json.loads(r.content) + + if compile_result["status"] != "success": + raise reqs.HTTPError() + + pdf_file = next(v for v in compile_result['outputFiles'] + if v['type'] == 'pdf') + + download_req = reqs.get(self.BASE_URL + pdf_file['url'], + cookies=self._cookie, + headers=headers, + verify=False) + + if download_req.ok: + return pdf_file['path'], download_req.content + + return None diff --git a/olcesync/olcesync/olsync.py b/olcesync/olcesync/olsync.py new file mode 100644 index 0000000..5f702d1 --- /dev/null +++ b/olcesync/olcesync/olsync.py @@ -0,0 +1,533 @@ +"""Overleaf Two-Way Sync Tool""" +################################################## +# MIT License +################################################## +# File: olsync.py +# Description: Overleaf Two-Way Sync +# Author: Moritz Glöckl +# License: MIT +# Version: 1.2.0 +################################################## + +import fnmatch +import glob +import io +import json +import os +import pickle +import traceback +import zipfile +from pathlib import Path + +import click +import dateutil.parser +from yaspin import yaspin + +try: + # Import for pip installation / wheel + import olcesync.olbrowserlogin as olbrowserlogin + from olcesync.olclient import OverleafClient +except ImportError: + # Import for development + import olbrowserlogin # type:ignore + from olclient import OverleafClient # type:ignore + + +@click.group(invoke_without_command=True) +@click.option('-l', + '--local-only', + 'local', + is_flag=True, + help="Sync local project files to Overleaf only.") +@click.option( + '-r', + '--remote-only', + 'remote', + is_flag=True, + help="Sync remote project files from Overleaf to local file system only.") +@click.option( + '-n', + '--name', + 'project_name', + default=None, + help= + "Specify the Overleaf project name instead of the default name of the sync directory." +) +@click.option('--store-path', + 'cookie_path', + default=".olauth", + type=click.Path(exists=False), + help="Relative path to load the persisted Overleaf cookie.") +@click.option('-p', + '--path', + 'sync_path', + default=".", + type=click.Path(exists=True), + help="Path of the project to sync.") +@click.option( + '-i', + '--olignore', + 'olignore_path', + default=".olignore", + type=click.Path(exists=False), + help= + "Path to the .olignore file relative to sync path (ignored if syncing from remote to local). See " + "fnmatch / unix filename pattern matching for information on how to use it.") +@click.option('-v', + '--verbose', + 'verbose', + is_flag=True, + help="Enable extended error logging.") +@click.version_option(package_name='overleaf-sync') +@click.pass_context +def main(ctx, local, remote, project_name, cookie_path, sync_path, olignore_path, + verbose): + if ctx.invoked_subcommand is None: + for i in range(5): + if not os.path.isfile(cookie_path): + os.chdir('..') + print("Current directory:", os.getcwd()) + else: + break + if not os.path.isfile(cookie_path): + raise click.ClickException("Persisted Overleaf cookie not found. Please login or check store path.") + + with open(cookie_path, 'rb') as f: + store = pickle.load(f) + + server_ip = get_key("server") + overleaf_client = OverleafClient(server_ip, store["cookie"], + store["csrf"]) + + # Change the current directory to the specified sync path + os.chdir(sync_path) + + if project_name: update_info(project=project_name) + else: + project_name = get_key("project") + if not project_name: + project_name = os.path.basename(os.getcwd()) + update_info(project=project_name) + + print("Using project name:", project_name) + project = execute_action( + lambda: overleaf_client.get_project(project_name), "Querying project", + "Project queried successfully.", "Project could not be queried.", + verbose) + + project_infos = execute_action( + lambda: overleaf_client.get_project_infos(project["id"]), + "Querying project details", "Project details queried successfully.", + "Project details could not be queried.", verbose) + + zip_file = execute_action( + lambda: zipfile.ZipFile( + io.BytesIO(overleaf_client.download_project(project["id"]))), + "Downloading project", "Project downloaded successfully.", + "Project could not be downloaded.", verbose) + + sync = not (local or remote) + + if remote or sync: + sync_func( + files_from=zip_file.namelist(), + deleted_files=[ + f for f in olignore_keep_list(olignore_path) + if f not in zip_file.namelist() and not sync + ], + create_file_at_to=lambda name: write_file( + name, zip_file.read(name)), + delete_file_at_to=lambda name: delete_file(name), + create_file_at_from=lambda name: overleaf_client.upload_file( + project["id"], project_infos, name, os.path.getsize(name), + open(name, 'rb')), + from_exists_in_to=lambda name: os.path.isfile(name), + from_equal_to_to=lambda name: open(name, 'rb').read( + ) == zip_file.read(name), + from_newer_than_to=lambda name: dateutil.parser.isoparse(project[ + "lastUpdated"]).timestamp() > os.path.getmtime(name), + from_name="remote", + to_name="local", + verbose=verbose) + if local or sync: + sync_func( + files_from=olignore_keep_list(olignore_path), + deleted_files=[ + f for f in zip_file.namelist() + if f not in olignore_keep_list(olignore_path) and not sync + ], + create_file_at_to=lambda name: overleaf_client.upload_file( + project["id"], project_infos, name, os.path.getsize(name), + open(name, 'rb')), + delete_file_at_to=lambda name: overleaf_client.delete_file( + project["id"], project_infos, name), + create_file_at_from=lambda name: write_file( + name, zip_file.read(name)), + from_exists_in_to=lambda name: name in zip_file.namelist(), + from_equal_to_to=lambda name: open(name, 'rb').read() == zip_file.read(name), + from_newer_than_to=lambda name: os.path.getmtime(name) > dateutil. + parser.isoparse(project["lastUpdated"]).timestamp(), + from_name="local", + to_name="remote", + verbose=verbose) + + +@main.command() +@click.option('-s', '--server_ip', help="Server IP.") +@click.option('--path', + 'cookie_path', + default=".olauth", + type=click.Path(exists=False), + help="Path to store the persisted Overleaf cookie.") +@click.option('-v', + '--verbose', + 'verbose', + is_flag=True, + help="Enable extended error logging.") +def login(server_ip, cookie_path, verbose): + update_info(server=server_ip) + if os.path.isfile(cookie_path) and not click.confirm( + 'Persisted Overleaf cookie already exist. Do you want to override it?' + ): + return + click.clear() + execute_action( + lambda: login_handler(server_ip, cookie_path), "Login", + "Login successful. Cookie persisted as `" + + click.format_filename(cookie_path) + "`. You may now sync your project.", + "Login failed. Please try again.", verbose) + + +@main.command(name='list') +@click.option('--store-path', + 'cookie_path', + default=".olauth", + type=click.Path(exists=False), + help="Relative path to load the persisted Overleaf cookie.") +@click.option('-v', + '--verbose', + 'verbose', + is_flag=True, + help="Enable extended error logging.") +def list_projects(cookie_path, verbose): + + def query_projects(): + for index, p in enumerate( + sorted(overleaf_client.all_projects(), + key=lambda x: x['lastUpdated'], + reverse=True)): + if not index: + click.echo("\n") + click.echo( + f"{dateutil.parser.isoparse(p['lastUpdated']).strftime('%m/%d/%Y, %H:%M:%S')} - {p['name']}" + ) + return True + + if not os.path.isfile(cookie_path): + raise click.ClickException( + "Persisted Overleaf cookie not found. Please login or check store path." + ) + + with open(cookie_path, 'rb') as f: + store = pickle.load(f) + + server_ip = get_key("server") + overleaf_client = OverleafClient(server_ip, store["cookie"], store["csrf"]) + + click.clear() + execute_action(query_projects, "Querying all projects", + "Querying all projects successful.", + "Querying all projects failed. Please try again.", verbose) + + +@main.command(name='download') +@click.option( + '-n', + '--name', + 'project_name', + default="", + help= + "Specify the Overleaf project name instead of the default name of the sync directory." +) +@click.option('--download-path', + 'download_path', + default=".", + type=click.Path(exists=True)) +@click.option('--store-path', + 'cookie_path', + default=".olauth", + type=click.Path(exists=False), + help="Relative path to load the persisted Overleaf cookie.") +@click.option('-v', + '--verbose', + 'verbose', + is_flag=True, + help="Enable extended error logging.") +def download_pdf(project_name, download_path, cookie_path, verbose): + + def download_project_pdf(): + nonlocal project_name + project_name = project_name or os.path.basename(os.getcwd()) + project = execute_action( + lambda: overleaf_client.get_project(project_name), "Querying project", + "Project queried successfully.", "Project could not be queried.", + verbose) + + file_name, content = overleaf_client.download_pdf( + project["id"]) # type:ignore + + if file_name and content: + # Change the current directory to the specified sync path + os.chdir(download_path) + open(file_name, 'wb').write(content) + + return True + + if not os.path.isfile(cookie_path): + raise click.ClickException( + "Persisted Overleaf cookie not found. Please login or check store path." + ) + + with open(cookie_path, 'rb') as f: + store = pickle.load(f) + + server_ip = get_key("server") + overleaf_client = OverleafClient(server_ip, store["cookie"], store["csrf"]) + + click.clear() + + execute_action(download_project_pdf, "Downloading project's PDF", + "Downloading project's PDF successful.", + "Downloading project's PDF failed. Please try again.", verbose) + + +def login_handler(server, path): + store = olbrowserlogin.login(server) + if store is None: + return False + with open(path, 'wb+') as f: + pickle.dump(store, f) + return True + + +def delete_file(path): + _dir = os.path.dirname(path) + if _dir == path: + return + + if _dir != '' and not os.path.exists(_dir): + return + else: + os.remove(path) + + +def write_file(path, content): + _dir = os.path.dirname(path) + if _dir == path: + return + + # path is a file + if _dir != '' and not os.path.exists(_dir): + os.makedirs(_dir) + + with open(path, 'wb+') as f: + f.write(content) + + +def sync_func(files_from, + deleted_files, + create_file_at_to, + delete_file_at_to, + create_file_at_from, + from_exists_in_to, + from_equal_to_to, + from_newer_than_to, + from_name, + to_name, + verbose=False): + click.echo("\nSyncing files from [%s] to [%s]" % (from_name, to_name)) + click.echo('=' * 40) + + newly_add_list = [] + update_list = [] + delete_list = [] + restore_list = [] + not_restored_list = [] + not_sync_list = [] + synced_list = [] + + for name in files_from: + if from_exists_in_to(name): + if not from_equal_to_to(name): + if not from_newer_than_to(name) and not click.confirm( + '\n-> Warning: last-edit time stamp of file <%s> from [%s] is older than [%s].\nContinue to ' + 'overwrite with an older version?' % + (name, from_name, to_name)): + not_sync_list.append(name) + continue + + update_list.append(name) + else: + synced_list.append(name) + else: + newly_add_list.append(name) + + for name in deleted_files: + delete_choice = click.prompt( + '\n-> Warning: file <%s> does not exist on [%s] anymore (but it still exists on [%s]).' + '\nShould the file be [d]eleted, [r]estored or [i]gnored?' % + (name, from_name, to_name), + default="i", + type=click.Choice(['d', 'r', 'i'])) + if delete_choice == "d": + delete_list.append(name) + elif delete_choice == "r": + restore_list.append(name) + elif delete_choice == "i": + not_restored_list.append(name) + + if newly_add_list: + click.echo("\n[NEW] Following files created on [%s]" % to_name) + for name in newly_add_list: + click.echo("\t%s" % name) + try: + create_file_at_to(name) + except: + if verbose: + print(traceback.format_exc()) + raise click.ClickException( + "\n[ERROR] An error occurred while creating new file(s) on [%s]" % + to_name) + + if restore_list: + click.echo("\n[NEW] Following files created on [%s]" % from_name) + for name in restore_list: + click.echo("\t%s" % name) + try: + create_file_at_from(name) + except: + if verbose: + print(traceback.format_exc()) + raise click.ClickException( + "\n[ERROR] An error occurred while creating new file(s) on [%s]" % + from_name) + + if update_list: + click.echo("\n[UPDATE] Following files updated on [%s]" % to_name) + for name in update_list: + click.echo("\t%s" % name) + try: + create_file_at_to(name) + except: + if verbose: + print(traceback.format_exc()) + raise click.ClickException( + "\n[ERROR] An error occurred while updating file(s) on [%s]" % + to_name) + + if delete_list: + click.echo("\n[DELETE] Following files deleted on [%s]" % to_name) + for name in delete_list: + click.echo("\t%s" % name) + try: + delete_file_at_to(name) + except: + if verbose: + print(traceback.format_exc()) + raise click.ClickException( + "\n[ERROR] An error occurred while creating new file(s) on [%s]" % + to_name) + + if not_sync_list: + click.echo( + "\n[SKIP] Following files on [%s] have not been synced to [%s]" % + (from_name, to_name)) + for name in not_sync_list: + click.echo("\t%s" % name) + + if not_restored_list: + click.echo( + "\n[SKIP] Following files on [%s] have not been synced to [%s]" % + (to_name, from_name)) + for name in not_restored_list: + click.echo("\t%s" % name) + + click.echo("") + click.echo("✅ Synced files from [%s] to [%s]" % (from_name, to_name)) + click.echo("") + + +def execute_action(action, + progress_message, + success_message, + fail_message, + verbose_error_logging=False): + with yaspin(text=progress_message, color="green") as spinner: + try: + success = action() + except: + if verbose_error_logging: + print(traceback.format_exc()) + success = False + + if success: + spinner.write(success_message) + spinner.ok("✅ ") + else: + spinner.fail("💥 ") + raise click.ClickException(fail_message) + + return success + + +def olignore_keep_list(olignore_path): + """ + The list of files to keep synced, with support for sub-folders. + Should only be called when syncing from local to remote. + """ + # get list of files recursively (ignore .* files) + files = glob.glob('**', recursive=True) + + # click.echo("="*40) + if not os.path.isfile(olignore_path): + # click.echo("\nNotice: .olignore file does not exist, will sync all items.") + keep_list = files + else: + # click.echo("\n.olignore: using %s to filter items" % olignore_path) + with open(olignore_path, 'r') as f: + ignore_pattern = f.read().splitlines() + + keep_list = [ + f for f in files + if not any(fnmatch.fnmatch(f, ignore) for ignore in ignore_pattern) + ] + + keep_list = [ + Path(item).as_posix() for item in keep_list if not os.path.isdir(item) + ] + return keep_list + + +def read_info(): + info = {} + if os.path.isfile(".olinfo"): + with open(".olinfo", 'r') as f: + info = json.load(f) + return info + + +def get_key(key): + info = read_info() + return info[key] if key in info else None + + +def update_info(**args): + info = read_info() + for k in args: + info[k] = args[k] + with open(".olinfo", "w") as fw: + json.dump(info, fw) + + +if __name__ == "__main__": + main() diff --git a/olcesync/requirements.txt b/olcesync/requirements.txt new file mode 100644 index 0000000..eeb18ce --- /dev/null +++ b/olcesync/requirements.txt @@ -0,0 +1,7 @@ +requests==2.* +beautifulsoup4==4.* +yaspin==2.* +python-dateutil~=2.8.1 +click==8.* +socketIO-client==0.5.7.4 +PySide6==6.* diff --git a/olcesync/setup.py b/olcesync/setup.py new file mode 100644 index 0000000..d69601e --- /dev/null +++ b/olcesync/setup.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# created on 2018-08-12 19:22 + +import setuptools + +setuptools.setup( + name="olcesync", + version="0.0.1", + author="J. Zhao", + author_email="author@example.com", + description="A small example package", + packages=setuptools.find_packages(), +) diff --git a/olsync/olsync/__init__.py b/olsync/olsync/__init__.py new file mode 100644 index 0000000..395db3c --- /dev/null +++ b/olsync/olsync/__init__.py @@ -0,0 +1,3 @@ +"""Overleaf Two-Way Sync Tool""" + +__version__ = '1.2.0' diff --git a/olsync/olbrowserlogin.py b/olsync/olsync/olbrowserlogin.py similarity index 59% rename from olsync/olbrowserlogin.py rename to olsync/olsync/olbrowserlogin.py index f5a8e9a..9cc539c 100644 --- a/olsync/olbrowserlogin.py +++ b/olsync/olsync/olbrowserlogin.py @@ -9,14 +9,20 @@ # Version: 1.2.0 ################################################## -from PySide6.QtCore import * -from PySide6.QtWidgets import * -from PySide6.QtWebEngineWidgets import * -from PySide6.QtWebEngineCore import QWebEngineProfile, QWebEngineSettings, QWebEnginePage +import requests as reqs +from PySide6.QtCore import QCoreApplication, QUrl +from PySide6.QtWebEngineCore import (QWebEnginePage, QWebEngineProfile, + QWebEngineSettings) +from PySide6.QtWebEngineWidgets import QWebEngineView +from PySide6.QtWidgets import QApplication, QMainWindow # Where to get the CSRF Token and where to send the login request to LOGIN_URL = "https://www.overleaf.com/login" -PROJECT_URL = "https://www.overleaf.com/project" # The dashboard URL +PROJECT_URL = "https://www.overleaf.com/project" # The dashboard URL +SOCKET_URL = "https://www.overleaf.com/socket.io/socket.io.js" + +# JS snippet to get the first link +JAVASCRIPT_EXTRACT_PROJECT_URL = "document.getElementsByClassName('dash-cell-name')[1].firstChild.href" # JS snippet to extract the csrfToken JAVASCRIPT_CSRF_EXTRACTOR = "document.getElementsByName('ol-csrfToken')[0].content" # Name of the cookies we want to extract @@ -41,9 +47,11 @@ def __init__(self, *args, **kwargs): self.profile = QWebEngineProfile(self.webview) self.cookie_store = self.profile.cookieStore() self.cookie_store.cookieAdded.connect(self.handle_cookie_added) - self.profile.setPersistentCookiesPolicy(QWebEngineProfile.NoPersistentCookies) + self.profile.setPersistentCookiesPolicy( + QWebEngineProfile.NoPersistentCookies) - self.profile.settings().setAttribute(QWebEngineSettings.JavascriptEnabled, True) + self.profile.settings().setAttribute(QWebEngineSettings.JavascriptEnabled, + True) webpage = QWebEnginePage(self.profile, self) self.webview.setPage(webpage) @@ -54,15 +62,21 @@ def __init__(self, *args, **kwargs): self.resize(600, 700) def handle_load_finished(self): + def callback(result): - self._csrf = result - self._login_success = True - QCoreApplication.quit() + + def callback(result): + self._csrf = result + self._login_success = True + QCoreApplication.quit() + + self.webview.load(QUrl.fromUserInput(result)) + self.webview.loadFinished.connect(lambda x: self.webview.page( + ).runJavaScript(JAVASCRIPT_CSRF_EXTRACTOR, 0, callback)) if self.webview.url().toString() == PROJECT_URL: - self.webview.page().runJavaScript( - JAVASCRIPT_CSRF_EXTRACTOR, 0, callback - ) + self.webview.page().runJavaScript(JAVASCRIPT_EXTRACT_PROJECT_URL, 0, + callback) def handle_cookie_added(self, cookie): cookie_name = cookie.name().data().decode('utf-8') @@ -84,9 +98,7 @@ def login_success(self): def login(): from PySide6.QtCore import QLoggingCategory - QLoggingCategory.setFilterRules('''\ - qt.webenginecontext.info=false - ''') + QLoggingCategory.setFilterRules('qt.webenginecontext.info=false') app = QApplication([]) ol_browser_login_window = OlBrowserLoginWindow() @@ -96,4 +108,17 @@ def login(): if not ol_browser_login_window.login_success: return None - return {"cookie": ol_browser_login_window.cookies, "csrf": ol_browser_login_window.csrf} + dat = { + "cookie": ol_browser_login_window.cookies, + "csrf": ol_browser_login_window.csrf + } + + # requesting GCLB + # r = reqs.get(SOCKET_URL, cookies=dat["cookie"]) + # dat["cookie"]['GCLB'] = r.cookies['GCLB'] # type: ignore + + return dat + + +if __name__ == '__main__': + login() diff --git a/olsync/olclient.py b/olsync/olsync/olclient.py similarity index 55% rename from olsync/olclient.py rename to olsync/olsync/olclient.py index a993de1..a99cd3f 100644 --- a/olsync/olclient.py +++ b/olsync/olsync/olclient.py @@ -9,24 +9,39 @@ # Version: 1.2.0 ################################################## +import json +import time + import requests as reqs from bs4 import BeautifulSoup -import json -import uuid from socketIO_client import SocketIO -import time # Where to get the CSRF Token and where to send the login request to LOGIN_URL = "https://www.overleaf.com/login" -PROJECT_URL = "https://www.overleaf.com/project" # The dashboard URL +PROJECT_URL = "https://www.overleaf.com/project" # The dashboard URL # The URL to download all the files in zip format DOWNLOAD_URL = "https://www.overleaf.com/project/{}/download/zip" -UPLOAD_URL = "https://www.overleaf.com/project/{}/upload" # The URL to upload files -FOLDER_URL = "https://www.overleaf.com/project/{}/folder" # The URL to create folders -DELETE_URL = "https://www.overleaf.com/project/{}/doc/{}" # The URL to delete files -COMPILE_URL = "https://www.overleaf.com/project/{}/compile?enable_pdf_caching=true" # The URL to compile the project -BASE_URL = "https://www.overleaf.com" # The Overleaf Base URL -PATH_SEP = "/" # Use hardcoded path separator for both windows and posix system +# UPLOAD_URL = "https://www.overleaf.com/project/{}/upload" # The URL to upload files +UPLOAD_URL = "https://www.overleaf.com/project/{}/upload" +FOLDER_URL = "https://www.overleaf.com/project/{}/folder" # The URL to create folders +DELETE_URL = "https://www.overleaf.com/project/{}/{}/{}" # The URL to delete files +COMPILE_URL = "https://www.overleaf.com/project/{}/compile?enable_pdf_caching=true" # The URL to compile the project +BASE_URL = "https://www.overleaf.com" # The Overleaf Base URL +PATH_SEP = "/" # Use hardcoded path separator for both windows and posix system + + +def search_dic(name, dic): + """ Search `name' in dic['docs'] and dic['fileRefs'] + Return file_id and file_type + """ + for v in dic['docs']: + if v['name'] == name: + return v['_id'], 'doc' + for v in dic['fileRefs']: + if v['name'] == name: + return v['_id'], 'file' + return None, None + class OverleafClient(object): """ @@ -44,43 +59,8 @@ def filter_projects(json_content, more_attrs=None): yield p def __init__(self, cookie=None, csrf=None): - self._cookie = cookie # Store the cookie for authenticated requests - self._csrf = csrf # Store the CSRF token since it is needed for some requests - - def login(self, username, password): - """ - WARNING - DEPRECATED - Not working as Overleaf introduced captchas - Login to the Overleaf Service with a username and a password - Params: username, password - Returns: Dict of cookie and CSRF - """ - - get_login = reqs.get(LOGIN_URL) - self._csrf = BeautifulSoup(get_login.content, 'html.parser').find( - 'input', {'name': '_csrf'}).get('value') - login_json = { - "_csrf": self._csrf, - "email": username, - "password": password - } - post_login = reqs.post(LOGIN_URL, json=login_json, - cookies=get_login.cookies) - - # On a successful authentication the Overleaf API returns a new authenticated cookie. - # If the cookie is different than the cookie of the GET request the authentication was successful - if post_login.status_code == 200 and get_login.cookies["overleaf_session2"] != post_login.cookies[ - "overleaf_session2"]: - self._cookie = post_login.cookies - - # Enrich cookie with GCLB cookie from GET request above - self._cookie['GCLB'] = get_login.cookies['GCLB'] - - # CSRF changes after making the login request, new CSRF token will be on the projects page - projects_page = reqs.get(PROJECT_URL, cookies=self._cookie) - self._csrf = BeautifulSoup(projects_page.content, 'html.parser').find('meta', {'name': 'ol-csrfToken'}) \ - .get('content') - - return {"cookie": self._cookie, "csrf": self._csrf} + self._cookie = cookie + self._csrf = csrf def all_projects(self): """ @@ -88,9 +68,15 @@ def all_projects(self): Returns: List of project objects """ projects_page = reqs.get(PROJECT_URL, cookies=self._cookie) + json_content = json.loads( - BeautifulSoup(projects_page.content, 'html.parser').find('meta', {'name': 'ol-projects'}).get('content')) - return list(OverleafClient.filter_projects(json_content)) + BeautifulSoup( + projects_page.content, # type: ignore + 'html.parser').find('meta', { + 'name': 'ol-prefetchedProjectsBlob' + }).get('content')) # type: ignore + + return list(OverleafClient.filter_projects(json_content['projects'])) def get_project(self, project_name): """ @@ -101,8 +87,13 @@ def get_project(self, project_name): projects_page = reqs.get(PROJECT_URL, cookies=self._cookie) json_content = json.loads( - BeautifulSoup(projects_page.content, 'html.parser').find('meta', {'name': 'ol-projects'}).get('content')) - return next(OverleafClient.filter_projects(json_content, {"name": project_name}), None) + BeautifulSoup(projects_page.content, + 'html.parser').find('meta', { + 'name': 'ol-prefetchedProjectsBlob' + }).get('content')) + return next( + OverleafClient.filter_projects(json_content['projects'], + {"name": project_name}), None) def download_project(self, project_id): """ @@ -111,7 +102,8 @@ def download_project(self, project_id): Returns: bytes string (zip file) """ r = reqs.get(DOWNLOAD_URL.format(project_id), - stream=True, cookies=self._cookie) + stream=True, + cookies=self._cookie) return r.content def create_folder(self, project_id, parent_folder_id, folder_name): @@ -126,15 +118,12 @@ def create_folder(self, project_id, parent_folder_id, folder_name): Returns: folder id or None """ - params = { - "parent_folder_id": parent_folder_id, - "name": folder_name - } - headers = { - "X-Csrf-Token": self._csrf - } + params = {"parent_folder_id": parent_folder_id, "name": folder_name} + headers = {"X-Csrf-Token": self._csrf} r = reqs.post(FOLDER_URL.format(project_id), - cookies=self._cookie, headers=headers, json=params) + cookies=self._cookie, + headers=headers, + json=params) if r.ok: return json.loads(r.content) @@ -153,35 +142,35 @@ def get_project_infos(self, project_id): Returns: project details """ + project_infos = None # Callback function for the joinProject emitter - def set_project_infos(a, project_infos_dict, c, d): + def set_project_infos(project_infos_dict): # Set project_infos variable in outer scope nonlocal project_infos - project_infos = project_infos_dict + project_infos = project_infos_dict.get("project", {}) # Convert cookie from CookieJar to string - cookie = "GCLB={}; overleaf_session2={}" \ - .format( - self._cookie["GCLB"], - self._cookie["overleaf_session2"] - ) + cookie = "GCLB={}; overleaf_session2={}".format( + self._cookie["GCLB"], self._cookie["overleaf_session2"]) # Connect to Overleaf Socket.IO, send a time parameter and the cookies - socket_io = SocketIO( - BASE_URL, - params={'t': int(time.time())}, - headers={'Cookie': cookie} - ) + socket_io = SocketIO(BASE_URL, + params={ + 't': int(time.time()), + 'projectId': project_id + }, + headers={'Cookie': cookie}) # Wait until we connect to the socket socket_io.on('connect', lambda: None) socket_io.wait_for_callbacks() # Send the joinProject event and receive the project infos - socket_io.emit('joinProject', {'project_id': project_id}, set_project_infos) - socket_io.wait_for_callbacks() + socket_io.on('joinProjectResponse', set_project_infos) + while project_infos is None: + socket_io.wait(1) # Disconnect from the socket if still connected if socket_io.connected: @@ -205,10 +194,15 @@ def upload_file(self, project_id, project_infos, file_name, file_size, file): # Set the folder_id to the id of the root folder folder_id = project_infos['rootFolder'][0]['_id'] + only_file_name = file_name + # The file name contains path separators, check folders if PATH_SEP in file_name: - local_folders = file_name.split(PATH_SEP)[:-1] # Remove last item since this is the file name - current_overleaf_folder = project_infos['rootFolder'][0]['folders'] # Set the current remote folder + # Remove last item since this is the file name + items = file_name.split(PATH_SEP) + local_folders, only_file_name = items[:-1], items[-1] + # Set the current remote folder + current_overleaf_folder = project_infos['rootFolder'][0]['folders'] for local_folder in local_folders: exists_on_remote = False @@ -221,23 +215,30 @@ def upload_file(self, project_id, project_infos, file_name, file_size, file): break # Create the folder if it doesn't exist if not exists_on_remote: - new_folder = self.create_folder(project_id, folder_id, local_folder) + new_folder = self.create_folder(project_id, folder_id, + local_folder) current_overleaf_folder.append(new_folder) folder_id = new_folder['_id'] current_overleaf_folder = new_folder['folders'] - params = { - "folder_id": folder_id, - "_csrf": self._csrf, - "qquuid": str(uuid.uuid4()), - "qqfilename": file_name, - "qqtotalfilesize": file_size, + + # Upload the file to the predefined folder + params = {'folder_id': folder_id} + data = { + "relativePath": "null", + "name": only_file_name, } - files = { - "qqfile": file + files = {"qqfile": (file_name, file)} + headers = { + "X-CSRF-TOKEN": self._csrf, } # Upload the file to the predefined folder - r = reqs.post(UPLOAD_URL.format(project_id), cookies=self._cookie, params=params, files=files) + r = reqs.post(UPLOAD_URL.format(project_id), + cookies=self._cookie, + headers=headers, + params=params, + data=data, + files=files) return r.status_code == str(200) and json.loads(r.content)["success"] @@ -252,35 +253,42 @@ def delete_file(self, project_id, project_infos, file_name): Returns: True on success, False on fail """ - file = None - + file_type = file_id = None # The file name contains path separators, check folders if PATH_SEP in file_name: - local_folders = file_name.split(PATH_SEP)[:-1] # Remove last item since this is the file name - current_overleaf_folder = project_infos['rootFolder'][0]['folders'] # Set the current remote folder - - for local_folder in local_folders: + items = file_name.split(PATH_SEP) + dir_depth = len(items) - 1 + only_file_name = items[-1] + current_overleaf_folder = project_infos['rootFolder'][0]['folders'] + for i in range(dir_depth): + success = False for remote_folder in current_overleaf_folder: - if local_folder.lower() == remote_folder['name'].lower(): - file = next((v for v in remote_folder['docs'] if v['name'] == file_name.split(PATH_SEP)[-1]), - None) - current_overleaf_folder = remote_folder['folders'] + if items[i] == remote_folder['name']: + if i != dir_depth - 1: + current_overleaf_folder = remote_folder['folders'] + else: + file_id, file_type = search_dic( + only_file_name, remote_folder) + success = True break - # File is in root folder - else: - file = next((v for v in project_infos['rootFolder'][0]['docs'] if v['name'] == file_name), None) + if not success: + print("Local folder {} does not exist in remote!".format( + items[i])) + return False + else: # File is in root folder + remote_folder = project_infos['rootFolder'][0] + file_id, file_type = search_dic(file_name, remote_folder) # File not found! - if file is None: - return False + if file_id is None: return False - headers = { - "X-Csrf-Token": self._csrf - } + headers = {"X-Csrf-Token": self._csrf} - r = reqs.delete(DELETE_URL.format(project_id, file['_id']), cookies=self._cookie, headers=headers, json={}) + r = reqs.delete(DELETE_URL.format(project_id, file_type, file_id), + cookies=self._cookie, + headers=headers) - return r.status_code == str(204) + return r.status_code == '204' def download_pdf(self, project_id): """ @@ -291,9 +299,7 @@ def download_pdf(self, project_id): Returns: PDF file name and content on success """ - headers = { - "X-Csrf-Token": self._csrf - } + headers = {"X-Csrf-Token": self._csrf} body = { "check": "silent", @@ -303,7 +309,10 @@ def download_pdf(self, project_id): "stopOnFirstError": False } - r = reqs.post(COMPILE_URL.format(project_id), cookies=self._cookie, headers=headers, json=body) + r = reqs.post(COMPILE_URL.format(project_id), + cookies=self._cookie, + headers=headers, + json=body) if not r.ok: raise reqs.HTTPError() @@ -313,9 +322,12 @@ def download_pdf(self, project_id): if compile_result["status"] != "success": raise reqs.HTTPError() - pdf_file = next(v for v in compile_result['outputFiles'] if v['type'] == 'pdf') + pdf_file = next(v for v in compile_result['outputFiles'] + if v['type'] == 'pdf') - download_req = reqs.get(BASE_URL + pdf_file['url'], cookies=self._cookie, headers=headers) + download_req = reqs.get(BASE_URL + pdf_file['url'], + cookies=self._cookie, + headers=headers) if download_req.ok: return pdf_file['path'], download_req.content diff --git a/olsync/olsync.py b/olsync/olsync/olsync.py similarity index 53% rename from olsync/olsync.py rename to olsync/olsync/olsync.py index 56c5f9e..18c67e2 100644 --- a/olsync/olsync.py +++ b/olsync/olsync/olsync.py @@ -9,49 +9,82 @@ # Version: 1.2.0 ################################################## -import click +import fnmatch +import glob +import io import os -from yaspin import yaspin import pickle -import zipfile -import io -import dateutil.parser -import glob -import fnmatch import traceback +import zipfile from pathlib import Path +import click +import dateutil.parser +from yaspin import yaspin + try: # Import for pip installation / wheel - from olsync.olclient import OverleafClient import olsync.olbrowserlogin as olbrowserlogin + from olsync.olclient import OverleafClient except ImportError: # Import for development - from olclient import OverleafClient import olbrowserlogin + from olclient import OverleafClient @click.group(invoke_without_command=True) -@click.option('-l', '--local-only', 'local', is_flag=True, help="Sync local project files to Overleaf only.") -@click.option('-r', '--remote-only', 'remote', is_flag=True, - help="Sync remote project files from Overleaf to local file system only.") -@click.option('-n', '--name', 'project_name', default="", - help="Specify the Overleaf project name instead of the default name of the sync directory.") -@click.option('--store-path', 'cookie_path', default=".olauth", type=click.Path(exists=False), +@click.option('-l', + '--local-only', + 'local', + is_flag=True, + help="Sync local project files to Overleaf only.") +@click.option( + '-r', + '--remote-only', + 'remote', + is_flag=True, + help="Sync remote project files from Overleaf to local file system only.") +@click.option( + '-n', + '--name', + 'project_name', + default="", + help= + "Specify the Overleaf project name instead of the default name of the sync directory." +) +@click.option('--store-path', + 'cookie_path', + default=".olauth", + type=click.Path(exists=False), help="Relative path to load the persisted Overleaf cookie.") -@click.option('-p', '--path', 'sync_path', default=".", type=click.Path(exists=True), +@click.option('-p', + '--path', + 'sync_path', + default=".", + type=click.Path(exists=True), help="Path of the project to sync.") -@click.option('-i', '--olignore', 'olignore_path', default=".olignore", type=click.Path(exists=False), - help="Path to the .olignore file relative to sync path (ignored if syncing from remote to local). See " - "fnmatch / unix filename pattern matching for information on how to use it.") -@click.option('-v', '--verbose', 'verbose', is_flag=True, help="Enable extended error logging.") +@click.option( + '-i', + '--olignore', + 'olignore_path', + default=".olignore", + type=click.Path(exists=False), + help= + "Path to the .olignore file relative to sync path (ignored if syncing from remote to local). See " + "fnmatch / unix filename pattern matching for information on how to use it.") +@click.option('-v', + '--verbose', + 'verbose', + is_flag=True, + help="Enable extended error logging.") @click.version_option(package_name='overleaf-sync') @click.pass_context -def main(ctx, local, remote, project_name, cookie_path, sync_path, olignore_path, verbose): +def main(ctx, local, remote, project_name, cookie_path, sync_path, olignore_path, + verbose): if ctx.invoked_subcommand is None: if not os.path.isfile(cookie_path): raise click.ClickException( - "Persisted Overleaf cookie not found. Please login or check store path.") + "Cookie not found. Please login or check store path.") with open(cookie_path, 'rb') as f: store = pickle.load(f) @@ -61,93 +94,123 @@ def main(ctx, local, remote, project_name, cookie_path, sync_path, olignore_path # Change the current directory to the specified sync path os.chdir(sync_path) - project_name = project_name or os.path.basename(os.getcwd()) + project_name = get_project_name(project_name) + print("Using project name:", project_name) project = execute_action( - lambda: overleaf_client.get_project(project_name), - "Querying project", - "Project queried successfully.", - "Project could not be queried.", + lambda: overleaf_client.get_project(project_name), "Querying project", + "Project queried successfully.", "Project could not be queried.", verbose) project_infos = execute_action( lambda: overleaf_client.get_project_infos(project["id"]), - "Querying project details", - "Project details queried successfully.", - "Project details could not be queried.", - verbose) + "Querying project details", "Project details queried successfully.", + "Project details could not be queried.", verbose) zip_file = execute_action( - lambda: zipfile.ZipFile(io.BytesIO( - overleaf_client.download_project(project["id"]))), - "Downloading project", - "Project downloaded successfully.", - "Project could not be downloaded.", - verbose) + lambda: zipfile.ZipFile( + io.BytesIO(overleaf_client.download_project(project["id"]))), + "Downloading project", "Project downloaded successfully.", + "Project could not be downloaded.", verbose) sync = not (local or remote) if remote or sync: sync_func( files_from=zip_file.namelist(), - deleted_files=[f for f in olignore_keep_list(olignore_path) if f not in zip_file.namelist() and not sync], - create_file_at_to=lambda name: write_file(name, zip_file.read(name)), + deleted_files=[ + f for f in olignore_keep_list(olignore_path) + if f not in zip_file.namelist() and not sync + ], + create_file_at_to=lambda name: write_file( + name, zip_file.read(name)), delete_file_at_to=lambda name: delete_file(name), create_file_at_from=lambda name: overleaf_client.upload_file( - project["id"], project_infos, name, os.path.getsize(name), open(name, 'rb')), + project["id"], project_infos, name, os.path.getsize(name), + open(name, 'rb')), from_exists_in_to=lambda name: os.path.isfile(name), - from_equal_to_to=lambda name: open(name, 'rb').read() == zip_file.read(name), - from_newer_than_to=lambda name: dateutil.parser.isoparse(project["lastUpdated"]).timestamp() > - os.path.getmtime(name), + from_equal_to_to=lambda name: open(name, 'rb').read( + ) == zip_file.read(name), + from_newer_than_to=lambda name: dateutil.parser.isoparse(project[ + "lastUpdated"]).timestamp() > os.path.getmtime(name), from_name="remote", to_name="local", verbose=verbose) if local or sync: sync_func( files_from=olignore_keep_list(olignore_path), - deleted_files=[f for f in zip_file.namelist() if f not in olignore_keep_list(olignore_path) and not sync], + deleted_files=[ + f for f in zip_file.namelist() + if f not in olignore_keep_list(olignore_path) and not sync + ], create_file_at_to=lambda name: overleaf_client.upload_file( - project["id"], project_infos, name, os.path.getsize(name), open(name, 'rb')), - delete_file_at_to=lambda name: overleaf_client.delete_file(project["id"], project_infos, name), - create_file_at_from=lambda name: write_file(name, zip_file.read(name)), + project["id"], project_infos, name, os.path.getsize(name), + open(name, 'rb')), + delete_file_at_to=lambda name: overleaf_client.delete_file( + project["id"], project_infos, name), + create_file_at_from=lambda name: write_file( + name, zip_file.read(name)), from_exists_in_to=lambda name: name in zip_file.namelist(), - from_equal_to_to=lambda name: open(name, 'rb').read() == zip_file.read(name), - from_newer_than_to=lambda name: os.path.getmtime(name) > dateutil.parser.isoparse( - project["lastUpdated"]).timestamp(), + from_equal_to_to=lambda name: open(name, 'rb').read( + ) == zip_file.read(name), + from_newer_than_to=lambda name: os.path.getmtime(name) > dateutil. + parser.isoparse(project["lastUpdated"]).timestamp(), from_name="local", to_name="remote", verbose=verbose) @main.command() -@click.option('--path', 'cookie_path', default=".olauth", type=click.Path(exists=False), +@click.option('--path', + 'cookie_path', + default=".olauth", + type=click.Path(exists=False), help="Path to store the persisted Overleaf cookie.") -@click.option('-v', '--verbose', 'verbose', is_flag=True, help="Enable extended error logging.") +@click.option('-v', + '--verbose', + 'verbose', + is_flag=True, + help="Enable extended error logging.") def login(cookie_path, verbose): if os.path.isfile(cookie_path) and not click.confirm( - 'Persisted Overleaf cookie already exist. Do you want to override it?'): + 'Cookie already exist. Do you want to override it?'): return click.clear() - execute_action(lambda: login_handler(cookie_path), "Login", - "Login successful. Cookie persisted as `" + click.format_filename( - cookie_path) + "`. You may now sync your project.", - "Login failed. Please try again.", verbose) + execute_action( + lambda: login_handler(cookie_path), "Login", + "Login successful. Cookie persisted as `" + + click.format_filename(cookie_path) + "`. You may now sync your project.", + "Login failed. Please try again.", verbose) @main.command(name='list') -@click.option('--store-path', 'cookie_path', default=".olauth", type=click.Path(exists=False), +@click.option('--store-path', + 'cookie_path', + default=".olauth", + type=click.Path(exists=False), help="Relative path to load the persisted Overleaf cookie.") -@click.option('-v', '--verbose', 'verbose', is_flag=True, help="Enable extended error logging.") +@click.option('-v', + '--verbose', + 'verbose', + is_flag=True, + help="Enable extended error logging.") def list_projects(cookie_path, verbose): + def query_projects(): - for index, p in enumerate(sorted(overleaf_client.all_projects(), key=lambda x: x['lastUpdated'], reverse=True)): + for index, p in enumerate( + sorted(overleaf_client.all_projects(), + key=lambda x: x['lastUpdated'], + reverse=True)): if not index: click.echo("\n") - click.echo(f"{dateutil.parser.isoparse(p['lastUpdated']).strftime('%m/%d/%Y, %H:%M:%S')} - {p['name']}") + click.echo( + f"{dateutil.parser.isoparse(p['lastUpdated']).strftime('%m/%d/%Y, %H:%M:%S')} - {p['name']}" + ) return True if not os.path.isfile(cookie_path): raise click.ClickException( - "Persisted Overleaf cookie not found. Please login or check store path.") + "Persisted Overleaf cookie not found. Please login or check store path." + ) with open(cookie_path, 'rb') as f: store = pickle.load(f) @@ -161,21 +224,36 @@ def query_projects(): @main.command(name='download') -@click.option('-n', '--name', 'project_name', default="", - help="Specify the Overleaf project name instead of the default name of the sync directory.") -@click.option('--download-path', 'download_path', default=".", type=click.Path(exists=True)) -@click.option('--store-path', 'cookie_path', default=".olauth", type=click.Path(exists=False), +@click.option( + '-n', + '--name', + 'project_name', + default="", + help= + "Specify the Overleaf project name instead of the default name of the sync directory." +) +@click.option('--download-path', + 'download_path', + default=".", + type=click.Path(exists=True)) +@click.option('--store-path', + 'cookie_path', + default=".olauth", + type=click.Path(exists=False), help="Relative path to load the persisted Overleaf cookie.") -@click.option('-v', '--verbose', 'verbose', is_flag=True, help="Enable extended error logging.") +@click.option('-v', + '--verbose', + 'verbose', + is_flag=True, + help="Enable extended error logging.") def download_pdf(project_name, download_path, cookie_path, verbose): + def download_project_pdf(): nonlocal project_name project_name = project_name or os.path.basename(os.getcwd()) project = execute_action( - lambda: overleaf_client.get_project(project_name), - "Querying project", - "Project queried successfully.", - "Project could not be queried.", + lambda: overleaf_client.get_project(project_name), "Querying project", + "Project queried successfully.", "Project could not be queried.", verbose) file_name, content = overleaf_client.download_pdf(project["id"]) @@ -189,7 +267,8 @@ def download_project_pdf(): if not os.path.isfile(cookie_path): raise click.ClickException( - "Persisted Overleaf cookie not found. Please login or check store path.") + "Persisted Overleaf cookie not found. Please login or check store path." + ) with open(cookie_path, 'rb') as f: store = pickle.load(f) @@ -207,6 +286,7 @@ def login_handler(path): store = olbrowserlogin.login() if store is None: return False + with open(path, 'wb+') as f: pickle.dump(store, f) return True @@ -236,9 +316,17 @@ def write_file(path, content): f.write(content) -def sync_func(files_from, deleted_files, create_file_at_to, delete_file_at_to, create_file_at_from, from_exists_in_to, - from_equal_to_to, from_newer_than_to, from_name, - to_name, verbose=False): +def sync_func(files_from, + deleted_files, + create_file_at_to, + delete_file_at_to, + create_file_at_from, + from_exists_in_to, + from_equal_to_to, + from_newer_than_to, + from_name, + to_name, + verbose=False): click.echo("\nSyncing files from [%s] to [%s]" % (from_name, to_name)) click.echo('=' * 40) @@ -255,7 +343,8 @@ def sync_func(files_from, deleted_files, create_file_at_to, delete_file_at_to, c if not from_equal_to_to(name): if not from_newer_than_to(name) and not click.confirm( '\n-> Warning: last-edit time stamp of file <%s> from [%s] is older than [%s].\nContinue to ' - 'overwrite with an older version?' % (name, from_name, to_name)): + 'overwrite with an older version?' % + (name, from_name, to_name)): not_sync_list.append(name) continue @@ -268,7 +357,8 @@ def sync_func(files_from, deleted_files, create_file_at_to, delete_file_at_to, c for name in deleted_files: delete_choice = click.prompt( '\n-> Warning: file <%s> does not exist on [%s] anymore (but it still exists on [%s]).' - '\nShould the file be [d]eleted, [r]estored or [i]gnored?' % (name, from_name, to_name), + '\nShould the file be [d]eleted, [r]estored or [i]gnored?' % + (name, from_name, to_name), default="i", type=click.Choice(['d', 'r', 'i'])) if delete_choice == "d": @@ -278,8 +368,7 @@ def sync_func(files_from, deleted_files, create_file_at_to, delete_file_at_to, c elif delete_choice == "i": not_restored_list.append(name) - click.echo( - "\n[NEW] Following new file(s) created on [%s]" % to_name) + click.echo("\n[NEW] Following new file(s) created on [%s]" % to_name) for name in newly_add_list: click.echo("\t%s" % name) try: @@ -287,10 +376,11 @@ def sync_func(files_from, deleted_files, create_file_at_to, delete_file_at_to, c except: if verbose: print(traceback.format_exc()) - raise click.ClickException("\n[ERROR] An error occurred while creating new file(s) on [%s]" % to_name) + raise click.ClickException( + "\n[ERROR] An error occurred while creating new file(s) on [%s]" % + to_name) - click.echo( - "\n[NEW] Following new file(s) created on [%s]" % from_name) + click.echo("\n[NEW] Following new file(s) created on [%s]" % from_name) for name in restore_list: click.echo("\t%s" % name) try: @@ -298,10 +388,11 @@ def sync_func(files_from, deleted_files, create_file_at_to, delete_file_at_to, c except: if verbose: print(traceback.format_exc()) - raise click.ClickException("\n[ERROR] An error occurred while creating new file(s) on [%s]" % from_name) + raise click.ClickException( + "\n[ERROR] An error occurred while creating new file(s) on [%s]" % + from_name) - click.echo( - "\n[UPDATE] Following file(s) updated on [%s]" % to_name) + click.echo("\n[UPDATE] Following file(s) updated on [%s]" % to_name) for name in update_list: click.echo("\t%s" % name) try: @@ -309,10 +400,11 @@ def sync_func(files_from, deleted_files, create_file_at_to, delete_file_at_to, c except: if verbose: print(traceback.format_exc()) - raise click.ClickException("\n[ERROR] An error occurred while updating file(s) on [%s]" % to_name) + raise click.ClickException( + "\n[ERROR] An error occurred while updating file(s) on [%s]" % + to_name) - click.echo( - "\n[DELETE] Following file(s) deleted on [%s]" % to_name) + click.echo("\n[DELETE] Following file(s) deleted on [%s]" % to_name) for name in delete_list: click.echo("\t%s" % name) try: @@ -320,20 +412,17 @@ def sync_func(files_from, deleted_files, create_file_at_to, delete_file_at_to, c except: if verbose: print(traceback.format_exc()) - raise click.ClickException("\n[ERROR] An error occurred while creating new file(s) on [%s]" % to_name) - - click.echo( - "\n[SYNC] Following file(s) are up to date") - for name in synced_list: - click.echo("\t%s" % name) + raise click.ClickException( + "\n[ERROR] An error occurred while creating new file(s) on [%s]" % + to_name) - click.echo( - "\n[SKIP] Following file(s) on [%s] have not been synced to [%s]" % (from_name, to_name)) + click.echo("\n[SKIP] Following file(s) on [%s] have not been synced to [%s]" % + (from_name, to_name)) for name in not_sync_list: click.echo("\t%s" % name) - click.echo( - "\n[SKIP] Following file(s) on [%s] have not been synced to [%s]" % (to_name, from_name)) + click.echo("\n[SKIP] Following file(s) on [%s] have not been synced to [%s]" % + (to_name, from_name)) for name in not_restored_list: click.echo("\t%s" % name) @@ -342,7 +431,11 @@ def sync_func(files_from, deleted_files, create_file_at_to, delete_file_at_to, c click.echo("") -def execute_action(action, progress_message, success_message, fail_message, verbose_error_logging=False): +def execute_action(action, + progress_message, + success_message, + fail_message, + verbose_error_logging=False): with yaspin(text=progress_message, color="green") as spinner: try: success = action() @@ -369,21 +462,41 @@ def olignore_keep_list(olignore_path): # get list of files recursively (ignore .* files) files = glob.glob('**', recursive=True) - click.echo("="*40) + # click.echo("="*40) if not os.path.isfile(olignore_path): - click.echo("\nNotice: .olignore file does not exist, will sync all items.") + # click.echo("\nNotice: .olignore file does not exist, will sync all items.") keep_list = files else: - click.echo("\n.olignore: using %s to filter items" % olignore_path) + # click.echo("\n.olignore: using %s to filter items" % olignore_path) with open(olignore_path, 'r') as f: ignore_pattern = f.read().splitlines() - keep_list = [f for f in files if not any( - fnmatch.fnmatch(f, ignore) for ignore in ignore_pattern)] + keep_list = [ + f for f in files + if not any(fnmatch.fnmatch(f, ignore) for ignore in ignore_pattern) + ] - keep_list = [Path(item).as_posix() for item in keep_list if not os.path.isdir(item)] + keep_list = [ + Path(item).as_posix() for item in keep_list if not os.path.isdir(item) + ] return keep_list +def get_project_name(project_name): + """If the project_name is provided, save it to file ".olproject_name". + Otherwise, try to read it from ".olproject_name". If the project_name is still + empty, then use currrent folder name. + + """ + if project_name: + with open(".olproject_name", "w") as fw: + fw.write(project_name) + elif os.path.isfile(".olproject_name"): + with open(".olproject_name", 'r') as f: + project_name = f.read().rstrip() + project_name = project_name or os.path.basename(os.getcwd()) + return project_name + + if __name__ == "__main__": main() diff --git a/olsync/setup.py b/olsync/setup.py new file mode 100644 index 0000000..2e6bb1c --- /dev/null +++ b/olsync/setup.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# created on 2018-08-12 19:22 + +import setuptools + +setuptools.setup( + name="olsync", + version="0.0.1", + author="J. Zhao", + author_email="author@example.com", + description="A small example package", + packages=setuptools.find_packages(), +) diff --git a/requirements.txt b/requirements.txt index b5c7240..a9ecf9a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -requests==2.* -beautifulsoup4==4.11.1 -yaspin==2.* -python-dateutil~=2.8.1 -click==8.* -socketIO-client==0.5.7.2 # Do not upgrade! -PySide6==6.* \ No newline at end of file +requests~=2.31.0 +beautifulsoup4~=4.11.1 +yaspin~=2.2.0 +python-dateutil~=2.8.2 +click==8.0.1 +socketIO-client==0.5.7.4 +PySide6==6.5.0 \ No newline at end of file