diff --git a/README.md b/README.md index 75ef31c1..e2d735cf 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,21 @@ execute: site along with the `-x` option. For example, `-x stanford`, if the course that you want to get is hosted on Stanford's site. + +*Alternative to providing username and password everytime:* + +On \*nix platforms, the use of a `~/.netrc` file is a good alternative to +specifying both your username (i.e., your email address) and password every +time on the command line. To use it, simply add a line like the one below to +a file named `.netrc` in your home directory with contents like: + + machine coursera-dl login password + +Then, simply invoke your command using the `-n` option like: + + edx-dl -n --list-courses + + # Docker container You can run this application via [Docker](https://docker.com) if you want. Just install docker and run diff --git a/edx_dl/edx_dl.py b/edx_dl/edx_dl.py index 64486674..4080274b 100644 --- a/edx_dl/edx_dl.py +++ b/edx_dl/edx_dl.py @@ -14,6 +14,12 @@ import pickle import re import sys +import platform + +try: + import netrc +except ImportError: + netrc = None from functools import partial from multiprocessing.dummy import Pool as ThreadPool @@ -57,6 +63,14 @@ ) +class CredentialsError(BaseException): + """ + Class to be thrown if the credentials are not found. + """ + + pass + + OPENEDX_SITES = { 'edx': { 'url': 'https://courses.edx.org', @@ -210,6 +224,136 @@ def edx_get_subtitle(url, headers, return None +def get_config_paths(config_name): # pragma: no test + """ + [Code from https://github.com/coursera-dl/coursera-dl/...] + Return a list of config files paths to try in order, given config file + name and possibly a user-specified path. + + For Windows platforms, there are several paths that can be tried to + retrieve the netrc file. There is, however, no "standard way" of doing + things. + + A brief recap of the situation (all file paths are written in Unix + convention): + + 1. By default, Windows does not define a $HOME path. However, some + people might define one manually, and many command-line tools imported + from Unix will search the $HOME environment variable first. This + includes MSYSGit tools (bash, ssh, ...) and Emacs. + + 2. Windows defines two 'user paths': $USERPROFILE, and the + concatenation of the two variables $HOMEDRIVE and $HOMEPATH. Both of + these paths point by default to the same location, e.g. + C:\\Users\\Username + + 3. $USERPROFILE cannot be changed, however $HOMEDRIVE and $HOMEPATH + can be changed. They are originally intended to be the equivalent of + the $HOME path, but there are many known issues with them + + 4. As for the name of the file itself, most of the tools ported from + Unix will use the standard '.dotfile' scheme, but some of these will + instead use "_dotfile". Of the latter, the two notable exceptions are + vim, which will first try '_vimrc' before '.vimrc' (but it will try + both) and git, which will require the user to name its netrc file + '_netrc'. + + Relevant links : + http://markmail.org/message/i33ldu4xl5aterrr + http://markmail.org/message/wbzs4gmtvkbewgxi + http://stackoverflow.com/questions/6031214/ + + Because the whole thing is a mess, I suggest we tried various sensible + defaults until we succeed or have depleted all possibilities. + """ + + if platform.system() != 'Windows': + return [None] + + # Now, we only treat the case of Windows + env_vars = [["HOME"], + ["HOMEDRIVE", "HOMEPATH"], + ["USERPROFILE"], + ["SYSTEMDRIVE"]] + + env_dirs = [] + for var_list in env_vars: + + var_values = [_getenv_or_empty(var) for var in var_list] + + directory = ''.join(var_values) + if not directory: + logging.debug('Environment var(s) %s not defined, skipping', + var_list) + else: + env_dirs.append(directory) + + additional_dirs = ["C:", ""] + + all_dirs = env_dirs + additional_dirs + + leading_chars = [".", "_"] + + res = [''.join([directory, os.sep, lc, config_name]) + for directory in all_dirs + for lc in leading_chars] + + return res + + +def authenticate_through_netrc(path=None): + """ + [Base code from https://github.com/coursera-dl/coursera-dl/...] + Return the tuple user / password given a path for the .netrc file. + + Raises CredentialsError if no valid netrc file is found. + """ + errors = [] + netrc_machine = 'edx-dl' + paths = [path] if path else get_config_paths("netrc") + for path in paths: + try: + logging.debug('Trying netrc file %s', path) + auths = netrc.netrc(path).authenticators(netrc_machine) + except (IOError, netrc.NetrcParseError) as e: + errors.append(e) + else: + if auths is None: + errors.append('Didn\'t find any credentials for ' + + netrc_machine) + else: + return auths[0], auths[2] + + error_messages = '\n'.join(str(e) for e in errors) + raise CredentialsError( + 'Did not find valid netrc file:\n' + error_messages + + '\nPlease run this command: chmod og-rw ~/.netrc') + + +def get_credentials(username=None, password=None, netrc=None): + """ + Return valid username, password tuple. + + Raises CredentialsError if username or password is missing. + """ + if netrc: + path = None if netrc is True else netrc + return authenticate_through_netrc(path) + + if username: + # Query password, if not alredy passed by command line + # or not found in any netrc file. + if not password: + password = getpass.getpass(stream=sys.stderr) + + if not username or not password: + logging.error("You must supply username and password to log-in" + + ", or provide them in a netrc file") + exit(ExitCode.MISSING_CREDENTIALS) + + return username, password + + def edx_login(url, headers, username, password): """ Log in user into the openedx website. @@ -246,7 +390,7 @@ def parse_args(): # optional parser.add_argument('-u', '--username', - required=True, + default=None, action='store', help='your edX username (email)') @@ -256,6 +400,20 @@ def parse_args(): help='your edX password, ' 'beware: it might be visible to other users on your system') + # the netrc option can be passed a path to the netrc fileĀ or + # it can be used with noting in order to use the default + # netrc file location + parser.add_argument( + '-n', + '--netrc', + dest='netrc', + nargs='?', + action='store', + const=True, + default=False, + help='use netrc for reading passwords, uses default' + ' location if no path specified. Only for *nix systems.') + parser.add_argument('-f', '--format', dest='format', @@ -988,14 +1146,9 @@ def main(): change_openedx_site(args.platform) - # Query password, if not alredy passed by command line. - if not args.password: - args.password = getpass.getpass(stream=sys.stderr) - - if not args.username or not args.password: - logging.error("You must supply username and password to log-in") - exit(ExitCode.MISSING_CREDENTIALS) - + # Query password, if not alredy passed by command line or if no netrc file provided. + args.username, args.password = get_credentials(args.username, args.password, args.netrc) + # Prepare Headers headers = edx_get_headers() diff --git a/test/auth/netrc b/test/auth/netrc new file mode 100644 index 00000000..3032f98f --- /dev/null +++ b/test/auth/netrc @@ -0,0 +1 @@ +machine edx-dl login user@mail.com password secret \ No newline at end of file diff --git a/test/auth/not_netrc b/test/auth/not_netrc new file mode 100644 index 00000000..f0f877ce --- /dev/null +++ b/test/auth/not_netrc @@ -0,0 +1 @@ +fake \ No newline at end of file diff --git a/test_edx_dl.py b/test_edx_dl.py index 5e716fcb..ac76e264 100644 --- a/test_edx_dl.py +++ b/test_edx_dl.py @@ -1,11 +1,41 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import os + import pytest from edx_dl import edx_dl, parsing from edx_dl.common import Unit, Video, DEFAULT_FILE_FORMATS +NETRC = \ + os.path.join(os.path.dirname(__file__), + "test", "auth", "netrc") + +NOT_NETRC = \ + os.path.join(os.path.dirname(__file__), + "test", "auth", "not_netrc") + + +def test_authenticate_through_netrc_with_given_path(): + username, password = edx_dl.authenticate_through_netrc(NETRC) + assert username == 'user@mail.com' + assert password == 'secret' + + +def test_authenticate_through_netrc_raises_exception(): + pytest.raises( + edx_dl.CredentialsError, + edx_dl.authenticate_through_netrc, + NOT_NETRC) + + +def test_get_credentials_with_netrc(): + username, password = edx_dl.get_credentials(netrc=NETRC) + assert username == 'user@mail.com' + assert password == 'secret' + + def test_failed_login(): resp = edx_dl.edx_login( edx_dl.LOGIN_API, edx_dl.edx_get_headers(), "guest", "guest")