@@ -43,28 +43,32 @@ def detect(self, source, ref=None, extra_args=None):
43
43
if not parsed_url .netloc :
44
44
return None
45
45
46
- url_parts_1 = parsed_url .path .split ("/history/" )
47
- url_parts_2 = url_parts_1 [0 ].split ("/" )
48
- if url_parts_2 [- 2 ] == "dataset" :
49
- self .dataset_id = url_parts_2 [- 1 ]
50
- else :
46
+ if "/dataset/" not in parsed_url .path :
47
+ # Not actually a dataset
51
48
return None
52
49
53
- api_url_path = "/api/3/action/"
50
+ # CKAN may be under a URL prefix, and we should accomodate that
51
+ url_prefix , dataset_url = parsed_url .path .split ("/dataset/" )
52
+
53
+ dataset_url_parts = dataset_url .split ("/" )
54
+ self .dataset_id = dataset_url_parts [0 ]
55
+
54
56
api_url = parsed_url ._replace (
55
- path = "/" . join ( url_parts_2 [: - 2 ]) + api_url_path , query = ""
57
+ path = f" { url_prefix } /api/3/action/" , query = ""
56
58
).geturl ()
57
59
58
60
status_show_url = f"{ api_url } status_show"
59
61
resp = self .urlopen (status_show_url )
60
62
if resp .status_code == 200 :
61
63
62
- # handle the activites
64
+ # Activity ID may be present either as a query parameter, activity_id
65
+ # or as part of the URL, under `/history/<activity-id>`. If `/history/`
66
+ # is present, that takes precedence over `activity_id`
63
67
activity_id = None
64
- if parse_qs (parsed_url .query ).get ("activity_id" ) is not None :
68
+ if "history" in dataset_url_parts :
69
+ activity_id = dataset_url_parts [dataset_url_parts .index ("history" ) + 1 ]
70
+ elif parse_qs (parsed_url .query ).get ("activity_id" ) is not None :
65
71
activity_id = parse_qs (parsed_url .query ).get ("activity_id" )[0 ]
66
- if len (url_parts_1 ) == 2 :
67
- activity_id = url_parts_1 [- 1 ]
68
72
69
73
self .version = self ._fetch_version (api_url )
70
74
return {
0 commit comments