Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,21 @@ execute:
site along with the `-x` option. For example, `-x stanford`, if the course
that you want to get is hosted on Stanford's site.


*Alternative to providing username and password everytime:*

On \*nix platforms, the use of a `~/.netrc` file is a good alternative to
specifying both your username (i.e., your email address) and password every
time on the command line. To use it, simply add a line like the one below to
a file named `.netrc` in your home directory with contents like:

machine coursera-dl login <user> password <pass>

Then, simply invoke your command using the `-n` option like:

edx-dl -n --list-courses


# Docker container

You can run this application via [Docker](https://docker.com) if you want. Just install docker and run
Expand Down
171 changes: 162 additions & 9 deletions edx_dl/edx_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
import pickle
import re
import sys
import platform

try:
import netrc
except ImportError:
netrc = None

from functools import partial
from multiprocessing.dummy import Pool as ThreadPool
Expand Down Expand Up @@ -57,6 +63,14 @@
)


class CredentialsError(BaseException):
"""
Class to be thrown if the credentials are not found.
"""

pass


OPENEDX_SITES = {
'edx': {
'url': 'https://courses.edx.org',
Expand Down Expand Up @@ -210,6 +224,136 @@ def edx_get_subtitle(url, headers,
return None


def get_config_paths(config_name): # pragma: no test
"""
[Code from https://github.com/coursera-dl/coursera-dl/...]
Return a list of config files paths to try in order, given config file
name and possibly a user-specified path.

For Windows platforms, there are several paths that can be tried to
retrieve the netrc file. There is, however, no "standard way" of doing
things.

A brief recap of the situation (all file paths are written in Unix
convention):

1. By default, Windows does not define a $HOME path. However, some
people might define one manually, and many command-line tools imported
from Unix will search the $HOME environment variable first. This
includes MSYSGit tools (bash, ssh, ...) and Emacs.

2. Windows defines two 'user paths': $USERPROFILE, and the
concatenation of the two variables $HOMEDRIVE and $HOMEPATH. Both of
these paths point by default to the same location, e.g.
C:\\Users\\Username

3. $USERPROFILE cannot be changed, however $HOMEDRIVE and $HOMEPATH
can be changed. They are originally intended to be the equivalent of
the $HOME path, but there are many known issues with them

4. As for the name of the file itself, most of the tools ported from
Unix will use the standard '.dotfile' scheme, but some of these will
instead use "_dotfile". Of the latter, the two notable exceptions are
vim, which will first try '_vimrc' before '.vimrc' (but it will try
both) and git, which will require the user to name its netrc file
'_netrc'.

Relevant links :
http://markmail.org/message/i33ldu4xl5aterrr
http://markmail.org/message/wbzs4gmtvkbewgxi
http://stackoverflow.com/questions/6031214/

Because the whole thing is a mess, I suggest we tried various sensible
defaults until we succeed or have depleted all possibilities.
"""

if platform.system() != 'Windows':
return [None]

# Now, we only treat the case of Windows
env_vars = [["HOME"],
["HOMEDRIVE", "HOMEPATH"],
["USERPROFILE"],
["SYSTEMDRIVE"]]

env_dirs = []
for var_list in env_vars:

var_values = [_getenv_or_empty(var) for var in var_list]

directory = ''.join(var_values)
if not directory:
logging.debug('Environment var(s) %s not defined, skipping',
var_list)
else:
env_dirs.append(directory)

additional_dirs = ["C:", ""]

all_dirs = env_dirs + additional_dirs

leading_chars = [".", "_"]

res = [''.join([directory, os.sep, lc, config_name])
for directory in all_dirs
for lc in leading_chars]

return res


def authenticate_through_netrc(path=None):
"""
[Base code from https://github.com/coursera-dl/coursera-dl/...]
Return the tuple user / password given a path for the .netrc file.

Raises CredentialsError if no valid netrc file is found.
"""
errors = []
netrc_machine = 'edx-dl'
paths = [path] if path else get_config_paths("netrc")
for path in paths:
try:
logging.debug('Trying netrc file %s', path)
auths = netrc.netrc(path).authenticators(netrc_machine)
except (IOError, netrc.NetrcParseError) as e:
errors.append(e)
else:
if auths is None:
errors.append('Didn\'t find any credentials for ' +
netrc_machine)
else:
return auths[0], auths[2]

error_messages = '\n'.join(str(e) for e in errors)
raise CredentialsError(
'Did not find valid netrc file:\n' + error_messages +
'\nPlease run this command: chmod og-rw ~/.netrc')


def get_credentials(username=None, password=None, netrc=None):
"""
Return valid username, password tuple.

Raises CredentialsError if username or password is missing.
"""
if netrc:
path = None if netrc is True else netrc
return authenticate_through_netrc(path)

if username:
# Query password, if not alredy passed by command line
# or not found in any netrc file.
if not password:
password = getpass.getpass(stream=sys.stderr)

if not username or not password:
logging.error("You must supply username and password to log-in" +
", or provide them in a netrc file")
exit(ExitCode.MISSING_CREDENTIALS)

return username, password


def edx_login(url, headers, username, password):
"""
Log in user into the openedx website.
Expand Down Expand Up @@ -246,7 +390,7 @@ def parse_args():
# optional
parser.add_argument('-u',
'--username',
required=True,
default=None,
action='store',
help='your edX username (email)')

Expand All @@ -256,6 +400,20 @@ def parse_args():
help='your edX password, '
'beware: it might be visible to other users on your system')

# the netrc option can be passed a path to the netrc file or
# it can be used with noting in order to use the default
# netrc file location
parser.add_argument(
'-n',
'--netrc',
dest='netrc',
nargs='?',
action='store',
const=True,
default=False,
help='use netrc for reading passwords, uses default'
' location if no path specified. Only for *nix systems.')

parser.add_argument('-f',
'--format',
dest='format',
Expand Down Expand Up @@ -988,14 +1146,9 @@ def main():

change_openedx_site(args.platform)

# Query password, if not alredy passed by command line.
if not args.password:
args.password = getpass.getpass(stream=sys.stderr)

if not args.username or not args.password:
logging.error("You must supply username and password to log-in")
exit(ExitCode.MISSING_CREDENTIALS)

# Query password, if not alredy passed by command line or if no netrc file provided.
args.username, args.password = get_credentials(args.username, args.password, args.netrc)

# Prepare Headers
headers = edx_get_headers()

Expand Down
1 change: 1 addition & 0 deletions test/auth/netrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
machine edx-dl login user@mail.com password secret
1 change: 1 addition & 0 deletions test/auth/not_netrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fake
30 changes: 30 additions & 0 deletions test_edx_dl.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,41 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

import pytest
from edx_dl import edx_dl, parsing
from edx_dl.common import Unit, Video, DEFAULT_FILE_FORMATS


NETRC = \
os.path.join(os.path.dirname(__file__),
"test", "auth", "netrc")

NOT_NETRC = \
os.path.join(os.path.dirname(__file__),
"test", "auth", "not_netrc")


def test_authenticate_through_netrc_with_given_path():
username, password = edx_dl.authenticate_through_netrc(NETRC)
assert username == 'user@mail.com'
assert password == 'secret'


def test_authenticate_through_netrc_raises_exception():
pytest.raises(
edx_dl.CredentialsError,
edx_dl.authenticate_through_netrc,
NOT_NETRC)


def test_get_credentials_with_netrc():
username, password = edx_dl.get_credentials(netrc=NETRC)
assert username == 'user@mail.com'
assert password == 'secret'


def test_failed_login():
resp = edx_dl.edx_login(
edx_dl.LOGIN_API, edx_dl.edx_get_headers(), "guest", "guest")
Expand Down