Skip to content
This repository was archived by the owner on Jun 24, 2022. It is now read-only.

Commit 0b457b8

Browse files
committed
Adds Makefile and logging
1 parent d77a8ac commit 0b457b8

File tree

2 files changed

+36
-3
lines changed

2 files changed

+36
-3
lines changed

Makefile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
PYTHON = python
2+
3+
build:
4+
$(PYTHON) scripts/extractor.py
5+
6+
clean:
7+
rm -rf schemas

scripts/extractor.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import os
22
import re
3+
import sys
34
import json
45
import shutil
6+
import logging
57
import requests
68

79
from typing import Optional, Dict
@@ -18,6 +20,12 @@
1820
'extract_schemas', 'CLOUDFLARE_DOCS_URL', 'CLOUDFLARE_SCHEMAS_URL',
1921
]
2022

23+
log = logging.getLogger(__name__)
24+
log.setLevel(logging.INFO)
25+
handler = logging.StreamHandler(sys.stdout)
26+
handler.setFormatter(logging.Formatter('[%(asctime)s] [%(levelname)s] %(message)s'))
27+
log.addHandler(handler)
28+
2129
CLOUDFLARE_DOCS_URL = os.environ.get('NX_CLOUDFLARE_DOCS_URL', 'https://api.cloudflare.com/')
2230
CLOUDFLARE_SCHEMAS_URL = os.environ.get('NX_CLOUDFLARE_SCHEMAS_URL', f'{CLOUDFLARE_DOCS_URL}schemas/v4/')
2331

@@ -31,23 +39,30 @@ def _fetch_text(
3139
cache_path: Optional[str] = None,
3240
invalidate: bool = False
3341
) -> str:
42+
log.info('Fetching %s', url)
3443
if cache_path and not invalidate:
3544
if os.path.exists(cache_path):
45+
log.info('Reading cache from %s', cache_path)
3646
with open(cache_path) as cache_file:
3747
return cache_file.read()
3848
plain_text = requests.get(url).text
3949
if cache_path:
50+
log.info('Writing cache to %s', cache_path)
4051
with open(cache_path, 'w') as cache_file:
4152
cache_file.write(plain_text)
4253
return plain_text
4354

4455

4556
def _process_docs(text: str, *, base_url: str) -> Dict:
57+
log.info('Processing HTML docs...')
4658
tree = etree.fromstring(text, parser=etree.HTMLParser(huge_tree=True, remove_comments=True))
47-
return {
59+
result = {
4860
'sections': tree.xpath('//article[contains(@class, "api-section")]/header/h2/text()'),
4961
'app_url': urljoin(base_url, tree.xpath('//script[contains(@src, "apidocs-static/app-")]/@src')[0]),
5062
}
63+
log.info('Sections found: %d', len(result['sections']))
64+
log.info('App bundle URL: %s', result['app_url'])
65+
return result
5166

5267

5368
class _PyVisitor(NodeVisitor):
@@ -86,6 +101,7 @@ def visit_dict(self, obj):
86101

87102

88103
def _process_app(code: str) -> Dict:
104+
log.info('Processing app bundle...')
89105
result = {}
90106
visitor, obj_pos = _PyVisitor(), 0
91107
while section := RE_APP_SECTION.search(code, pos=obj_pos):
@@ -99,6 +115,7 @@ def _process_app(code: str) -> Dict:
99115
.replace('/', '.')
100116
.replace('-', '_')
101117
)
118+
log.debug('Found %s', py_obj['id'])
102119
result[py_id] = py_obj
103120
return result
104121

@@ -112,6 +129,7 @@ def extract_schemas(
112129
app_url: Optional[str] = None,
113130
base_url: Optional[str] = None,
114131
) -> Dict:
132+
log.info('Extracting schemas...')
115133
docs_info = {}
116134
if not app_url:
117135
base_url = base_url or CLOUDFLARE_DOCS_URL
@@ -120,6 +138,7 @@ def extract_schemas(
120138

121139
api_schemas = _process_app(_fetch_text(app_url))
122140
if verify and docs_info:
141+
log.info('Checking integrity...')
123142
# TODO: Do we have additional ways to verify the integrity?
124143
assert len(docs_info['sections']) == len(api_schemas)
125144

@@ -132,11 +151,14 @@ def extract_schemas(
132151
if output_path:
133152
output_path = Path(output_path)
134153
if remove_existing and output_path.exists():
154+
log.info('Deleting directory %s (--remove-existing)', output_path)
135155
shutil.rmtree(output_path)
136156

157+
log.info('Serializing schemas to %s', output_path)
137158
for _, schema in api_schemas.items():
138159
file_path = output_path / schema['id'].replace(CLOUDFLARE_SCHEMAS_URL, '')
139160
os.makedirs(file_path.parent, exist_ok=True)
161+
log.debug('Writing %s', file_path)
140162
with open(file_path, 'w') as json_file:
141163
json.dump({
142164
**schema,
@@ -148,6 +170,10 @@ def extract_schemas(
148170

149171

150172
if __name__ == '__main__':
151-
all_schemas = extract_schemas(output_path='../schemas/', remove_existing=True)
152-
with open('../schemas/schemas.json', 'w') as schemas_file:
173+
schemas_dir = Path(__file__).resolve().parent.parent / 'schemas'
174+
all_schemas = extract_schemas(output_path=schemas_dir, remove_existing=True)
175+
176+
schemas_path = schemas_dir / 'schemas.json'
177+
log.info('Serializing registry to %s', schemas_path)
178+
with open(schemas_path, 'w') as schemas_file:
153179
json.dump(all_schemas, schemas_file, indent=4)

0 commit comments

Comments
 (0)