@@ -43,28 +43,32 @@ def detect(self, source, ref=None, extra_args=None):
4343 if not parsed_url .netloc :
4444 return None
4545
46- url_parts_1 = parsed_url .path .split ("/history/" )
47- url_parts_2 = url_parts_1 [0 ].split ("/" )
48- if url_parts_2 [- 2 ] == "dataset" :
49- self .dataset_id = url_parts_2 [- 1 ]
50- else :
46+ if "/dataset/" not in parsed_url .path :
47+ # Not actually a dataset
5148 return None
5249
53- api_url_path = "/api/3/action/"
50+ # CKAN may be under a URL prefix, and we should accomodate that
51+ url_prefix , dataset_url = parsed_url .path .split ("/dataset/" )
52+
53+ dataset_url_parts = dataset_url .split ("/" )
54+ self .dataset_id = dataset_url_parts [0 ]
55+
5456 api_url = parsed_url ._replace (
55- path = "/" . join ( url_parts_2 [: - 2 ]) + api_url_path , query = ""
57+ path = f" { url_prefix } /api/3/action/" , query = ""
5658 ).geturl ()
5759
5860 status_show_url = f"{ api_url } status_show"
5961 resp = self .urlopen (status_show_url )
6062 if resp .status_code == 200 :
6163
62- # handle the activites
64+ # Activity ID may be present either as a query parameter, activity_id
65+ # or as part of the URL, under `/history/<activity-id>`. If `/history/`
66+ # is present, that takes precedence over `activity_id`
6367 activity_id = None
64- if parse_qs (parsed_url .query ).get ("activity_id" ) is not None :
68+ if "history" in dataset_url_parts :
69+ activity_id = dataset_url_parts [dataset_url_parts .index ("history" ) + 1 ]
70+ elif parse_qs (parsed_url .query ).get ("activity_id" ) is not None :
6571 activity_id = parse_qs (parsed_url .query ).get ("activity_id" )[0 ]
66- if len (url_parts_1 ) == 2 :
67- activity_id = url_parts_1 [- 1 ]
6872
6973 self .version = self ._fetch_version (api_url )
7074 return {
0 commit comments