6
6
"""
7
7
import enum
8
8
import os
9
- from typing import Any , Dict , List , Optional , Tuple
9
+ import re
10
+ from typing import Any , Dict , List , Optional , Pattern , Tuple
10
11
11
12
import jsonschema
12
13
import yaml
44
45
REPO_TYPE_GITLAB : str = "gitlab"
45
46
_REPO_TYPES : List [str ] = [REPO_TYPE_GITHUB , REPO_TYPE_GITLAB ]
46
47
48
+ _GITHUB_REF_RE : Pattern [str ] = re .compile (r"/([^/]+)/data-manager/" )
49
+
47
50
48
51
class TextEncoding (enum .Enum ):
49
52
"""A general text encoding format, used initially for Job text fields."""
@@ -93,7 +96,7 @@ def get_supported_repository_types() -> List[str]:
93
96
def _get_github_job_doc_url (
94
97
manifest_url : str , collection : str , job_id : str , doc_url : Optional [str ]
95
98
) -> str :
96
- """Returns the path to the doc for a GitHub public reference,
99
+ """Returns the path to the 'pretty' doc for a GitHub public reference,
97
100
based on the manifest URL, collection and Job ID.
98
101
"""
99
102
manifest_directory_url , _ = os .path .split (manifest_url )
@@ -110,6 +113,21 @@ def _get_github_job_doc_url(
110
113
# How did we get here?
111
114
assert False
112
115
116
+ # The doc-url here is to the 'raw' file.
117
+ # Adjust it so that it should refer to the 'pretty' file.
118
+ #
119
+ # We replace:
120
+ #
121
+ # - raw.githubusercontent.com with github.com
122
+ # - The field prior to 'data-manager' (the branch/tag) with '/blob/(field)
123
+ doc_url = doc_url .replace ("raw.githubusercontent.com" , "github.com" , 1 )
124
+ ref_match = _GITHUB_REF_RE .search (doc_url )
125
+ assert ref_match
126
+ original_ref = ref_match [0 ]
127
+ ref = ref_match [1 ]
128
+ new_ref : str = f"/blob/{ ref } /data-manager/"
129
+ doc_url = doc_url .replace (original_ref , new_ref , 1 )
130
+
113
131
return doc_url
114
132
115
133
0 commit comments