diff --git a/.gitignore b/.gitignore index dff3c38..3468fe2 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ .htmlcov .DS_Store .idea +.vscode +setup.cfg diff --git a/Makefile b/Makefile index 2d08017..ed9a6fc 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -PLUGIN_VERSION=1.0.0 +PLUGIN_VERSION=1.0.1 PLUGIN_ID=box-com plugin: diff --git a/parameter-sets/box-set-id/parameter-set.json b/parameter-sets/box-set-id/parameter-set.json index 694c2c4..80cdb6a 100644 --- a/parameter-sets/box-set-id/parameter-set.json +++ b/parameter-sets/box-set-id/parameter-set.json @@ -13,6 +13,14 @@ "type": "PASSWORD", "description": "Generated on Box.com for use in DSS", "mandatory": true + }, + { + "name": "enterprise_id", + "label": "Company name", + "type": "STRING", + "description": "Only for private API", + "visibilityCondition": false, + "defaultValue": null } ] } \ No newline at end of file diff --git a/parameter-sets/oauth-login/parameter-set.json b/parameter-sets/oauth-login/parameter-set.json new file mode 100644 index 0000000..6df7f95 --- /dev/null +++ b/parameter-sets/oauth-login/parameter-set.json @@ -0,0 +1,33 @@ +{ + "meta": { + "label": "Box.com Single Sign On", + "description": "", + "icon": "icon-cloud" + }, + "defaultDefinableInline": true, + "defaultDefinableAtProjectLevel": true, + "pluginParams": [], + "params": [ + { + "name": "boxcom_oauth", + "type": "CREDENTIAL_REQUEST", + "label": "Box.com Single Sign On", + "credentialRequestSettings": { + "type": "OAUTH2", + "oauth2Flow": "authorization_code", + "oauth2Provider": "AZURE", + "authorizationEndpoint": "https://account.box.com/api/oauth2/authorize", + "tokenEndpoint": "https://api.box.com/oauth2/token", + "scope": "root_readwrite" + } + }, + { + "name": "enterprise_id", + "label": "Company name", + "type": "STRING", + "description": "Only for private API", + "visibilityCondition": false, + "defaultValue": null + } + ] +} \ No newline at end of file diff --git a/plugin.json b/plugin.json index f513bff..e495b6a 100644 --- a/plugin.json +++ b/plugin.json @@ -1,6 +1,6 @@ { "id": "box-com", - "version": "1.0.0", + "version": "1.0.1", "meta": { "label": "Box.com", "description": "Read and write data from/to your Box.com account", diff --git a/python-fs-providers/box-com_box-com/fs-provider.json b/python-fs-providers/box-com_box-com/fs-provider.json index bde5795..6ed137d 100644 --- a/python-fs-providers/box-com_box-com/fs-provider.json +++ b/python-fs-providers/box-com_box-com/fs-provider.json @@ -5,11 +5,35 @@ "icon": "icon-cloud" }, "params": [ + { + "name": "auth_type", + "label": "Type of authentication", + "type": "SELECT", + "defaultValue": "token", + "selectChoices": [ + { + "value": "token", + "label": "Access token" + }, + { + "value": "oauth", + "label": "Box.com Single Sign On" + } + ] + }, { "name": "box_com_connection", "label": "Box.com connection", "type": "PRESET", - "parameterSetId": "box-set-id" + "parameterSetId": "box-set-id", + "visibilityCondition": "model.auth_type == 'token'" + }, + { + "name": "oauth_login", + "label": "Box.com Single Sign On", + "type": "PRESET", + "parameterSetId": "oauth-login", + "visibilityCondition": "model.auth_type == 'oauth'" }, { "name": "cache_enabled", diff --git a/python-fs-providers/box-com_box-com/fs-provider.py b/python-fs-providers/box-com_box-com/fs-provider.py index 8b27f17..d1f0f8a 100644 --- a/python-fs-providers/box-com_box-com/fs-provider.py +++ b/python-fs-providers/box-com_box-com/fs-provider.py @@ -1,11 +1,15 @@ from dataiku.fsprovider import FSProvider from boxsdk import OAuth2, Client - -import os, shutil, json, hashlib, logging +import os +import shutil +import hashlib +from safe_logger import SafeLogger from box_item import BoxItem from utils import get_full_path, get_rel_path, get_normalized_path +logger = SafeLogger("box-com plugin", forbiden_keys=["access_token", "boxcom_oauth"]) + class BoxComFSProvider(FSProvider): def __init__(self, root, config, client): @@ -17,8 +21,15 @@ def __init__(self, root, config, client): if len(root) > 0 and root[0] == '/': root = root[1:] self.root = root + logger.warning("config={}".format(logger.filter_secrets(config))) + logger.warning("client={}".format(logger.filter_secrets(client))) self.connection = client.get("box_com_connection") - self.access_token = self.connection['access_token'] + self.enterprise_id = self.connection.get("enterprise_id", None) + self.auth_type = config.get("auth_type", "token") + if self.auth_type == "oauth": + self.access_token = config.get("oauth_login")["boxcom_oauth"] + else: + self.access_token = self.connection['access_token'] self.cache_enabled = config.get("cache_enabled") if self.cache_enabled: cache_file_name = hashlib.sha1(self.access_token.encode('utf-8')).hexdigest() @@ -30,7 +41,14 @@ def __init__(self, root, config, client): access_token=self.access_token ) self.client = Client(auth) - self.user = self.client.user().get() + if self.enterprise_id is not None and self.enterprise_id != "": + session = self.client._session + api = API() + api.BASE_API_URL = 'https://{enterprise_id}.ent.box.com/2.0'.format(enterprise_id=self.enterprise_id) + session._api_config = api + self.client = Client(auth, session=session) + logger.warning("base api url updated to: {}".format(self.client.session._api_config.BASE_API_URL)) + self.box_item = BoxItem(cache_file_name, root, self.client) self.box_item.check_path_format(get_normalized_path(root)) @@ -65,16 +83,25 @@ def browse(self, path): full_path = get_full_path(self.root, path) item = self.box_item.get_by_path(get_rel_path(full_path)) if item.not_exists(): - return {'fullPath' : normalized_path, 'exists' : False} + return { + 'fullPath': normalized_path, + 'exists': False + } if item.is_folder(): - return {'fullPath' : normalized_path, 'exists' : True, 'directory' : True, 'children' : item.get_children(normalized_path), 'lastModified' : item.get_last_modified()} + return { + 'fullPath': normalized_path, + 'exists': True, + 'directory': True, + 'children': item.get_children(normalized_path), + 'lastModified': item.get_last_modified() + } else: return item.get_as_browse() - + def enumerate(self, path, first_non_empty): """ Enumerate files recursively from prefix. If first_non_empty, stop at the first non-empty file. - + If the prefix doesn't denote a file or folder, return None """ full_path = get_full_path(self.root, path) @@ -88,18 +115,18 @@ def enumerate(self, path, first_non_empty): if item.is_folder(): paths = self.list_recursive(normalized_path, item.id, first_non_empty) else: - paths.append({'path':normalized_path.split("/")[-1], 'size':item.size, 'lastModified':int(0) * 1000}) + paths.append({'path': normalized_path.split("/")[-1], 'size': item.size, 'lastModified': int(0) * 1000}) return paths def list_recursive(self, path, folder_id, first_non_empty): paths = [] if path == "/": path = "" - for child in self.client.folder(folder_id).get_items(fields = ['modified_at','name','type','size']): + for child in self.client.folder(folder_id).get_items(fields=['modified_at', 'name', 'type', 'size']): if child.type == self.box_item.BOX_FOLDER: paths.extend(self.list_recursive(path + '/' + child.name, child.id, first_non_empty)) else: - paths.append({'path':path + '/' + child.name, 'size':child.size}) + paths.append({'path': path + '/' + child.name, 'size': child.size}) if first_non_empty: return paths return paths @@ -109,7 +136,7 @@ def delete_recursive(self, path): Delete recursively from path. Return the number of deleted files (optional) """ full_path = get_full_path(self.root, path) - item = self.box_item.get_by_path(full_path, force_no_cache = True) + item = self.box_item.get_by_path(full_path, force_no_cache=True) if item.not_exists(): return 0 else: @@ -126,7 +153,7 @@ def move(self, from_path, to_path): from_base, from_item_name = os.path.split(full_from_path) to_base, to_item_name = os.path.split(full_to_path) - from_item = self.box_item.get_by_path(full_from_path, force_no_cache = True) + from_item = self.box_item.get_by_path(full_from_path, force_no_cache=True) if from_item.not_exists(): return False @@ -134,9 +161,9 @@ def move(self, from_path, to_path): from_item_id = from_item.get_id() from_item_is_folder = from_item.is_folder() - to_item = self.box_item.get_by_path(full_to_path, force_no_cache = True) + to_item = self.box_item.get_by_path(full_to_path, force_no_cache=True) if to_item.not_exists(): - to_item = self.box_item.get_by_path(to_base, force_no_cache = True) + to_item = self.box_item.get_by_path(to_base, force_no_cache=True) destination_folder = self.client.folder(to_item.get_id()) @@ -156,7 +183,7 @@ def read(self, path, stream, limit): full_path = get_full_path(self.root, path) byte_range = None - if limit is not None and limit is not "-1": + if limit is not None and limit != "-1": int_limit = int(limit) if int_limit > 0: byte_range = (0, int(limit) - 1) @@ -171,8 +198,17 @@ def write(self, path, stream): Write the stream to the object denoted by path into the stream """ full_path = get_full_path(self.root, path) - item = self.box_item.create_path(full_path, force_no_cache = True) + item = self.box_item.create_path(full_path, force_no_cache=True) if item.is_folder(): item.write_stream(stream) else: - raise Exception('Not a file name') \ No newline at end of file + raise Exception('Not a file name') + + +class API(object): + """Configuration object containing the URLs for the Box API.""" + BASE_API_URL = 'https://api.box.com/2.0' + UPLOAD_URL = 'https://upload.box.com/api/2.0' + OAUTH2_API_URL = 'https://api.box.com/oauth2' # 1) and my_child - def id_default_folder(self,name): + def id_default_folder(self, name): try: probe_folder = self.client.folder(self.id).create_subfolder(name) return probe_folder.id @@ -284,7 +298,7 @@ def id_default_folder(self,name): return None def check_path_format(self, path): - special_names = [".",".."] + special_names = [".", ".."] if not all(c in string.printable for c in path): raise Exception('The path contains non-printable char(s)') for element in path.split('/'): diff --git a/python-lib/cache_handler.py b/python-lib/cache_handler.py index 5c17321..47bfd95 100644 --- a/python-lib/cache_handler.py +++ b/python-lib/cache_handler.py @@ -1,9 +1,14 @@ -import os, json, uuid, time, errno, logging +import os +import json +import uuid +import errno +import logging from shutil import move logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO, # avoid getting log from 3rd party module format='box-com plugin %(levelname)s - %(message)s') + class CacheHandler(): def __init__(self, cache_file_name): if cache_file_name is None: @@ -23,6 +28,7 @@ def load_cache(self): file_handle.close() except Exception as error: self.cache = {} + logger.error("Error loading cache {}".format(error)) def reset(self): if not self.cache_enabled: @@ -46,12 +52,12 @@ def write_onto_disk(self): except Exception as error: logger.error('Error while saving cache' + error) - def create_dir(self,filename): + def create_dir(self, filename): if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) return 1 - except OSError as error: # Guard against race condition + except OSError as error: # Guard against race condition if error.errno != errno.EEXIST: raise logger.info("Error :" + error) @@ -61,10 +67,10 @@ def create_dir(self,filename): def add(self, path, item_id, item_type): if not self.cache_enabled: return - self.cache[path] = {"item_id":item_id, "item_type":item_type} + self.cache[path] = {"item_id": item_id, "item_type": item_type} self.write_onto_disk() - def query(self, path, force_no_cache = False): + def query(self, path, force_no_cache=False): if not self.cache_enabled or force_no_cache: return None, None if path in self.cache: diff --git a/python-lib/safe_logger.py b/python-lib/safe_logger.py new file mode 100644 index 0000000..d43196a --- /dev/null +++ b/python-lib/safe_logger.py @@ -0,0 +1,47 @@ +import logging +import copy + + +class SafeLogger(object): + def __init__(self, name, forbiden_keys=None): + self.name = name + self.logger = logging.getLogger(self.name) + logging.basicConfig( + level=logging.INFO, + format='{} %(levelname)s - %(message)s'.format(self.name) + ) + self.forbiden_keys = forbiden_keys + + def info(self, message): + self.logger.info(message) + + def debug(self, message): + self.logger.debug(message) + + def warning(self, message): + self.logger.warning(message) + + def error(self, message): + self.logger.error(message) + + def filter_secrets(self, dictionary): + ret = copy.deepcopy(dictionary) + ret = self.dig_secrets(ret) + return ret + + def dig_secrets(self, dictionary): + for key in dictionary: + if isinstance(dictionary[key], dict): + dictionary[key] = self.filter_secrets(dictionary[key]) + if key in self.forbiden_keys: + dictionary[key] = hash(dictionary[key]) + return dictionary + + +def hash(data): + data_type = type(data).__name__ + if data_type in ["str", "dict", "list", "unicode"]: + data_len = len(data) + else: + data_len = 0 + return "HASHED_SECRET:{}:{}".format(data_type, data_len) diff --git a/python-lib/utils.py b/python-lib/utils.py index 95c62cc..7165e70 100644 --- a/python-lib/utils.py +++ b/python-lib/utils.py @@ -2,15 +2,19 @@ def get_rel_path(path): if len(path) > 0 and path[0] == '/': path = path[1:] return path + + def get_normalized_path(path): if len(path) == 0 or path == '/': return '/' elts = path.split('/') elts = [e for e in elts if len(e) > 0] return '/' + '/'.join(elts) + + def get_full_path(root, path): normalized_path = get_normalized_path(path) if normalized_path == '/': return get_normalized_path(root) else: - return get_normalized_path(root) + normalized_path \ No newline at end of file + return get_normalized_path(root) + normalized_path diff --git a/python-runnables/clean-box-com-cache/runnable.py b/python-runnables/clean-box-com-cache/runnable.py index 1e37bbf..a9fc157 100644 --- a/python-runnables/clean-box-com-cache/runnable.py +++ b/python-runnables/clean-box-com-cache/runnable.py @@ -1,5 +1,7 @@ from dataiku.runnables import Runnable -import os, hashlib +import os +import hashlib + class CleanCache(Runnable): @@ -10,7 +12,7 @@ def __init__(self, project_key, config, plugin_config): self.connection = self.plugin_config.get("box_com_connection") self.access_token = self.connection['access_token'] self.cache_location = os.environ["DIP_HOME"] + '/caches/plugins/box-com/' + hashlib.sha1(self.access_token.encode('utf-8')).hexdigest() - + def get_progress_target(self): return None @@ -20,4 +22,3 @@ def run(self, progress_callback): return "Done!" else: return "Error: no cache found" - \ No newline at end of file diff --git a/python-runnables/get-box-com-sharing-email/runnable.py b/python-runnables/get-box-com-sharing-email/runnable.py index 3604929..4aa4f4f 100644 --- a/python-runnables/get-box-com-sharing-email/runnable.py +++ b/python-runnables/get-box-com-sharing-email/runnable.py @@ -1,16 +1,17 @@ from dataiku.runnables import Runnable from boxsdk import OAuth2, Client + class GetBoxSharingEmail(Runnable): def __init__(self, project_key, config, plugin_config): self.project_key = project_key self.config = config self.plugin_config = plugin_config - + self.connection = self.plugin_config.get("box_com_connection") self.access_token = self.connection['access_token'] - + auth = OAuth2( client_id="", client_secret="", @@ -18,10 +19,9 @@ def __init__(self, project_key, config, plugin_config): ) self.client = Client(auth) self.user = self.client.user().get() - + def get_progress_target(self): return None def run(self, progress_callback): return self.user.login - \ No newline at end of file