diff --git a/README.md b/README.md
index 75ef31c1..4cabf138 100644
--- a/README.md
+++ b/README.md
@@ -114,6 +114,7 @@ These are the current supported sites:
 - [France Université Numérique](https://www.france-universite-numerique-mooc.fr/)
 - [GW Online SEAS](http://openedx.seas.gwu.edu/) - George Washington University
 - [GW Online Open](http://mooc.online.gwu.edu/) - George Washington University
+- [Xuetangx （学堂在线）](http://www.xuetangx.com/)
 
 This is the full [list of sites powered by Open edX][sites]. Not all of them
 are supported at the moment, we welcome you to contribute support for them
diff --git a/edx_dl/edx_dl.py b/edx_dl/edx_dl.py
index 64486674..218a74ac 100644
--- a/edx_dl/edx_dl.py
+++ b/edx_dl/edx_dl.py
@@ -14,13 +14,14 @@
 import pickle
 import re
 import sys
+import math
 
 from functools import partial
 from multiprocessing.dummy import Pool as ThreadPool
 
 from six.moves.http_cookiejar import CookieJar
 from six.moves.urllib.error import HTTPError, URLError
-from six.moves.urllib.parse import urlencode
+from six.moves.urllib.parse import urlencode, quote
 from six.moves.urllib.request import (
     urlopen,
     build_opener,
@@ -93,19 +94,25 @@
     'bits':{
         'url':'http://any-learn.bits-pilani.ac.in',
         'courseware-selector': ('nav', {'aria-label': 'Course Navigation'}),
+    },
+    'xuetangx': {
+        'url': 'http://www.xuetangx.com',
+        'courseware-selector': None,
     }
 }
-BASE_URL = OPENEDX_SITES['edx']['url']
+SITE_NAME = 'edx'
+BASE_URL = OPENEDX_SITES[SITE_NAME]['url']
 EDX_HOMEPAGE = BASE_URL + '/login_ajax'
 LOGIN_API = BASE_URL + '/login_ajax'
 DASHBOARD = BASE_URL + '/dashboard'
-COURSEWARE_SEL = OPENEDX_SITES['edx']['courseware-selector']
+COURSEWARE_SEL = OPENEDX_SITES[SITE_NAME]['courseware-selector']
 
 
 def change_openedx_site(site_name):
     """
     Changes the openedx website for the given one via the key
     """
+    global SITE_NAME
     global BASE_URL
     global EDX_HOMEPAGE
     global LOGIN_API
@@ -117,11 +124,15 @@ def change_openedx_site(site_name):
         logging.error("OpenEdX platform should be one of: %s", ', '.join(sites))
         sys.exit(ExitCode.UNKNOWN_PLATFORM)
 
-    BASE_URL = OPENEDX_SITES[site_name]['url']
+    SITE_NAME = site_name
+    BASE_URL = OPENEDX_SITES[SITE_NAME]['url']
     EDX_HOMEPAGE = BASE_URL + '/login_ajax'
     LOGIN_API = BASE_URL + '/login_ajax'
-    DASHBOARD = BASE_URL + '/dashboard'
-    COURSEWARE_SEL = OPENEDX_SITES[site_name]['courseware-selector']
+    if site_name == 'xuetangx':
+        DASHBOARD = BASE_URL + '/api/web/courses/mycourses?format=json'
+    else:
+        DASHBOARD = BASE_URL + '/dashboard'
+    COURSEWARE_SEL = OPENEDX_SITES[SITE_NAME]['courseware-selector']
 
 
 def _display_courses(courses):
@@ -135,10 +146,67 @@ def _display_courses(courses):
         logging.info('     %s', course.url)
 
 
+def get_courses_info_xuetangx(url, headers):
+    """
+    Extracts the courses information from the dashboard.
+
+    This function is re-implemented for http://www.xuetangx.com, because
+    Xuetangx uses a REST API, which is quite different from other OpenEdX sites.
+    """
+    def fetch_and_parse(base_url, param):
+        """
+        Fetches the JSON API, and returns the total count, and a list of dicts
+        for the results on the current page.
+
+        :param base_url: the URL of the API.
+        :param param: query parameters, represented by a list of tuples.
+        :return: a (total, results) tuple; (0, []) on failure.
+        """
+        url = base_url + '?' + urlencode(param)
+        page = get_page_contents(url, headers)
+        try:
+            d = json.loads(page)
+            total = d['total']
+            results = d['results']
+        except (json.JSONDecodeError, KeyError):
+            total = 0
+            results = []
+        return total, results
+
+    logging.info('Extracting course information from JSON API.')
+
+    api_url = BASE_URL + '/api/web/courses/mycourses'
+    query_params = [
+        [('type', 'started'), ('format', 'json')],
+        [('type', 'ended'), ('format', 'json')]
+    ]
+    # use default page size, and fetch multiple times, in case there is a hard
+    # limit set by the API
+    page_size = 10
+
+    courses = []
+    page_extractor = get_page_extractor(url)
+
+    for param in query_params:
+        total, results = fetch_and_parse(api_url, param)
+        page_count = int(math.ceil(1.0 * total / page_size))
+        for i in range(page_count):
+            if i:
+                # page needs to be re-fetched unless it is the first one
+                new_param = param + [('offset', i * page_size)]
+                _, results = fetch_and_parse(api_url, new_param)
+            courses += page_extractor.extract_courses(results, BASE_URL)
+
+    return courses
+
+
 def get_courses_info(url, headers):
     """
     Extracts the courses information from the dashboard.
     """
+    if SITE_NAME == 'xuetangx':
+        return get_courses_info_xuetangx(url, headers)
+
     logging.info('Extracting course information from dashboard.')
 
     page = get_page_contents(url, headers)
@@ -310,6 +378,14 @@ def parse_args():
                         default=False,
                         help='list available sections')
 
+    parser.add_argument('--quality',
+                        dest='quality',
+                        action='store',
+                        choices={'high', 'standard'},
+                        default='high',
+                        help='quality of video to download; works for xuetangx'
+                             ' only')
+
     parser.add_argument('--youtube-dl-options',
                         dest='youtube_dl_options',
                         action='store',
@@ -437,6 +513,9 @@ def extract_units(url, headers, file_formats):
 
     page = get_page_contents(url, headers)
     page_extractor = get_page_extractor(url)
+    set_headers = getattr(page_extractor, 'set_headers', None)
+    if callable(set_headers):
+        set_headers(headers)
     units = page_extractor.extract_units_from_html(page, BASE_URL, file_formats)
 
     return units
@@ -666,27 +745,45 @@ def _build_subtitles_downloads(video, target_dir, filename_prefix, headers):
     return downloads
 
 
-def _build_url_downloads(urls, target_dir, filename_prefix):
+def _build_url_downloads(urls, target_dir, filename_prefix, args,
+                         is_video=False):
     """
     Builds a dict {url: filename} for the given urls
     If it is a youtube url it uses the valid template for youtube-dl
     otherwise just takes the name of the file from the url
     """
+    if SITE_NAME == 'xuetangx' and is_video and urls:
+        # take advantage of the fact that the URL of HQ videos are
+        # lexicographically larger on Xuetangx ('quality20' > 'quality10')
+        urls = [max(urls)] if args.quality == 'high' else [min(urls)]
     downloads = {url:
-                 _build_filename_from_url(url, target_dir, filename_prefix)
+                 _build_filename_from_url(url, target_dir, filename_prefix,
+                                          is_video=is_video)
                  for url in urls}
     return downloads
 
 
-def _build_filename_from_url(url, target_dir, filename_prefix):
+def _build_filename_from_url(url, target_dir, filename_prefix, is_video=False,
+                             video_counter=[0]):
     """
     Builds the appropriate filename for the given args
     """
+    # video file names in Xuetangx do not make sense;
+    # use a counter as a workaround
+    if is_video:
+        video_counter[0] += 1
+
     if is_youtube_url(url):
         filename_template = filename_prefix + "-%(title)s-%(id)s.%(ext)s"
         filename = os.path.join(target_dir, filename_template)
     else:
-        original_filename = url.rsplit('/', 1)[1]
+        if SITE_NAME == 'xuetangx' and is_video:
+            original_filename = 'video_%05d.mp4' % video_counter[0]
+        else:
+            original_filename = url.rsplit('/', 1)[1]
+        # remove special characters that may cause problems under Windows
+        original_filename = ''.join(list(filter(
+            lambda c: c not in ';/?:@&=+$,', original_filename)))
         filename = os.path.join(target_dir,
                                 filename_prefix + '-' + original_filename)
 
@@ -697,6 +794,8 @@ def download_url(url, filename, headers, args):
     """
     Downloads the given url in filename.
     """
+    # resolve unicode issue
+    url = quote(url, safe=';/?:@&=+$,')
 
     if is_youtube_url(url):
         download_youtube_url(url, filename, headers, args)
@@ -779,13 +878,15 @@ def skip_or_download(downloads, headers, args, f=download_url):
 def download_video(video, args, target_dir, filename_prefix, headers):
     if args.prefer_cdn_videos or video.video_youtube_url is None:
         mp4_downloads = _build_url_downloads(video.mp4_urls, target_dir,
-                                             filename_prefix)
+                                             filename_prefix, args,
+                                             is_video=True)
         skip_or_download(mp4_downloads, headers, args)
     else:
         if video.video_youtube_url is not None:
             youtube_downloads = _build_url_downloads([video.video_youtube_url],
                                                      target_dir,
-                                                     filename_prefix)
+                                                     filename_prefix,
+                                                     is_video=True)
             skip_or_download(youtube_downloads, headers, args)
 
     # the behavior with subtitles is different, since the subtitles don't know
@@ -813,7 +914,7 @@ def download_unit(unit, args, target_dir, filename_prefix, headers):
             download_video(video, args, target_dir, new_prefix, headers)
 
     res_downloads = _build_url_downloads(unit.resources_urls, target_dir,
-                                         filename_prefix)
+                                         filename_prefix, args)
     skip_or_download(res_downloads, headers, args)
 
 
@@ -827,13 +928,19 @@ def download(args, selections, all_units, headers):
     # notice that we could iterate over all_units, but we prefer to do it over
     # sections/subsections to add correct prefixes and show nicer information.
 
+    # courses on Xuetangx may contain chinese characters
+    preserve_non_ascii = (SITE_NAME == 'xuetangx')
+
     for selected_course, selected_sections in selections.items():
-        coursename = directory_name(selected_course.name)
+        coursename = directory_name(selected_course.name,
+                                    minimal_change=preserve_non_ascii)
         for selected_section in selected_sections:
             section_dirname = "%02d-%s" % (selected_section.position,
                                            selected_section.name)
             target_dir = os.path.join(args.output_dir, coursename,
-                                      clean_filename(section_dirname))
+                                      clean_filename(section_dirname,
+                                                     minimal_change=
+                                                     preserve_non_ascii))
             mkdir_p(target_dir)
             counter = 0
             for subsection in selected_section.subsections:
diff --git a/edx_dl/parsing.py b/edx_dl/parsing.py
index 5e50d354..e8980376 100644
--- a/edx_dl/parsing.py
+++ b/edx_dl/parsing.py
@@ -5,6 +5,7 @@
 """
 import re
 import json
+import logging
 
 from datetime import timedelta, datetime
 
@@ -12,6 +13,7 @@
 from bs4 import BeautifulSoup as BeautifulSoup_
 
 from .common import Course, Section, SubSection, Unit, Video
+from .utils import get_page_contents, remove_blanks
 
 
 # Force use of bs4 with html.parser
@@ -188,7 +190,9 @@ def extract_resources_urls(self, text, BASE_URL, file_formats):
         youtube_links = re_youtube_links.findall(text)
         resources_urls += youtube_links
 
-        return resources_urls
+        # there may be some surplus blank characters extracted from the HTML;
+        # remove them
+        return list(map(remove_blanks, resources_urls))
 
     def extract_sections_from_html(self, page, BASE_URL):
         """
@@ -408,6 +412,75 @@ def _make_subsections(section_soup):
         return sections
 
 
+class XuetangxPageExtractor(ClassicEdXPageExtractor):
+
+    def __init__(self):
+        self.headers = None
+
+    def set_headers(self, headers):
+        """Sets the headers necessary for accessing the video URL API"""
+        self.headers = headers
+        self.base_url = None
+
+    def extract_courses(self, results, BASE_URL):
+        """
+        Extract courses from a list of dicts.
+        """
+        courses = []
+
+        for result in results:
+            try:
+                course_id = result['id']
+                course_name = result['name']
+                course_url = BASE_URL + result['info_link']
+                # Xuetangx allows accessing materials for all archived courses,
+                # so it's safe to mark all courses as 'Started'.
+                course_state = 'Started'
+            except KeyError:
+                continue
+            courses.append(Course(id=course_id,
+                                  name=course_name,
+                                  url=course_url,
+                                  state=course_state))
+
+        return courses
+
+    def extract_units_from_html(self, page, BASE_URL, file_formats):
+        self.base_url = BASE_URL
+        return ClassicEdXPageExtractor.extract_units_from_html(self, page,
+                                                               BASE_URL,
+                                                               file_formats)
+
+    def extract_mp4_urls(self, text):
+        """
+        Looks for available links to the mp4 version of the videos
+        """
+        # Xuetangx does not provide the video URL directly in the page;
+        # instead, a video id can be found in the page and translated into
+        # actual URL through a "video2source" API.
+        m = re.search('(?<=data-ccsource=&#39;).+(?=&#39;)', text)
+        if not m:
+            return []
+
+        video_id = m.group(0)
+        if not self.base_url:
+            logging.debug('Base URL unset; please set self.base_url before '
+                          'calling extract_mp4_urls')
+            return []
+        video_src_url = self.base_url + '/videoid2source/' + video_id
+        video_src_json = get_page_contents(video_src_url, self.headers)
+        try:
+            sources = json.loads(video_src_json)['sources']
+        except (json.JSONDecodeError, KeyError):
+            return []
+
+        mp4_urls = []
+        for quality in sources:
+            if sources[quality]:
+                mp4_urls.append(sources[quality][0])
+        return mp4_urls
+
+
 def get_page_extractor(url):
     """
     factory method for page extractors
@@ -423,6 +496,8 @@ def get_page_extractor(url):
         url.startswith('https://www.fun-mooc.fr')
     ):
         return CurrentEdXPageExtractor()
+    elif 'xuetangx.com' in url:
+        return XuetangxPageExtractor()
     else:
         return ClassicEdXPageExtractor()
 
diff --git a/edx_dl/utils.py b/edx_dl/utils.py
index 0ec44718..6a61f601 100644
--- a/edx_dl/utils.py
+++ b/edx_dl/utils.py
@@ -42,11 +42,11 @@ def execute_command(cmd, args):
             raise e
 
 
-def directory_name(initial_name):
+def directory_name(initial_name, minimal_change=False):
     """
     Transform the name of a directory into an ascii version
     """
-    result = clean_filename(initial_name)
+    result = clean_filename(initial_name, minimal_change=minimal_change)
     return result if result != "" else "course_folder"
 
 
@@ -139,3 +139,8 @@ def clean_filename(s, minimal_change=False):
     s = s.strip().replace(' ', '_')
     valid_chars = '-_.()%s%s' % (string.ascii_letters, string.digits)
     return ''.join(c for c in s if c in valid_chars)
+
+
+def remove_blanks(s):
+    """Remove all blank characters from a string."""
+    return ''.join(list(filter(lambda c: not c.isspace(), s)))
\ No newline at end of file