Skip to content

Commit b209e4e

Browse files
committed
Merge commit '7a93ea' into develop
2 parents 673f898 + 7a93ea0 commit b209e4e

File tree

7 files changed

+283
-12
lines changed

7 files changed

+283
-12
lines changed

dataprep/connector/connector.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,9 @@ async def _query_imp( # pylint: disable=too-many-locals,too-many-branches,too-m
274274
# The API returns empty for this page, maybe we've reached the end
275275
break
276276

277-
last_id = int(df.iloc[-1, df.columns.get_loc(pagdef.seek_id)]) - 1 # type: ignore
277+
cid = df.columns.get_loc(pagdef.seek_id)
278+
last_id = int(df.iloc[-1, cid]) - 1 # type: ignore
279+
278280
dfs.append(df)
279281
elif isinstance(pagdef, TokenPaginationDef):
280282
next_token = None
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""ConfigGenerator"""
2+
from .generator import ConfigGenerator
3+
from .ui import ConfigGeneratorUI
4+
5+
__all__ = ["ConfigGenerator", "ConfigGeneratorUI"]
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
"""This module implements the generation of connector configuration files."""
2+
3+
from dataprep.connector.schema.base import BaseDef
4+
from pathlib import Path
5+
from typing import Any, Dict, Optional, Union
6+
from urllib.parse import parse_qs, urlparse, urlunparse
7+
8+
import requests
9+
10+
from ..schema import (
11+
AuthorizationDef,
12+
ConfigDef,
13+
PaginationDef,
14+
)
15+
from .state import ConfigState
16+
from .table import gen_schema_from_path, search_table_path
17+
18+
# class Example(TypedDict):
19+
# url: str
20+
# method: str
21+
# params: Dict[str, str]
22+
# authorization: Tuple[Dict[str, Any], Dict[str, Any]]
23+
# pagination: Dict[str, Any]
24+
25+
26+
class ConfigGenerator:
27+
"""Config Generator.
28+
29+
Parameters
30+
----------
31+
config
32+
Initialize the config generator with existing config file.
33+
34+
"""
35+
36+
config: ConfigState
37+
storage: Dict[str, Any] # for auth usage
38+
39+
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
40+
if config is None:
41+
self.config = ConfigState(None)
42+
else:
43+
self.config = ConfigState(ConfigDef(**config))
44+
self.storage = {}
45+
46+
def add_example(
47+
self, example: Dict[str, Any]
48+
) -> None: # pylint: disable=too-many-locals
49+
"""Add an example to the generator. The example
50+
should be in the dictionary format.
51+
52+
class Example(TypedDict):
53+
url: str
54+
method: str
55+
params: Dict[str, str]
56+
# 0 for def and 1 for params
57+
authorization: Optional[Tuple[Dict[str, Any], Dict[str, Any]]]
58+
pagination: Optional[Dict[str, Any]]
59+
60+
Parameters
61+
----------
62+
req_example
63+
The request example.
64+
"""
65+
url = example["url"]
66+
method = example["method"]
67+
if method not in {"POST", "GET", "PUT"}:
68+
raise ValueError(f"{method} not allowed.")
69+
if method != "GET":
70+
raise NotImplementedError(f"{method} not implemented.")
71+
72+
params = example.get("params", {})
73+
74+
# Move url params to params
75+
parsed = urlparse(url)
76+
77+
query_string = parse_qs(parsed.query)
78+
for key, (val, *_) in query_string.items():
79+
if key in params and params[key] != val:
80+
raise ValueError(
81+
f"{key} appears in both url and params, but have different values."
82+
)
83+
params[key] = val
84+
85+
url = urlunparse((*parsed[:4], "", *parsed[5:]))
86+
req = {
87+
"method": method,
88+
"url": url,
89+
"headers": {},
90+
"params": params,
91+
}
92+
93+
# Parse authorization and build authorization into request
94+
authdef: Optional[AuthorizationDef] = None
95+
authparams: Optional[Dict[str, Any]] = None
96+
if example.get("authorization") is not None:
97+
authorization, authparams = example["authorization"]
98+
authdef = AuthUnion(val=authorization).val
99+
100+
if authdef is not None and authparams is not None:
101+
authdef.build(req, authparams, self.storage)
102+
103+
# Send out request and construct config
104+
config = _create_config(req)
105+
106+
# Add pagination information into the config
107+
pagination = example.get("pagination")
108+
if pagination is not None:
109+
pagdef = PageUnion(val=pagination).val
110+
config.request.pagination = pagdef
111+
112+
self.config += config
113+
114+
def to_string(self) -> str:
115+
"""Output the string format of the current config."""
116+
return str(self.config)
117+
118+
def save(self, path: Union[str, Path]) -> None:
119+
"""Save the current config to a file.
120+
121+
Parameters
122+
----------
123+
path
124+
The path to the saved file, with the file extension.
125+
"""
126+
path = Path(path)
127+
128+
with open(path, "w") as f:
129+
f.write(self.to_string())
130+
131+
132+
def _create_config(req: Dict[str, Any]) -> ConfigDef:
133+
resp = requests.request(
134+
req["method"].lower(), req["url"], params=req["params"], headers=req["headers"],
135+
)
136+
137+
if resp.status_code != 200:
138+
raise RuntimeError(
139+
f"Request to HTTP endpoint not successful: {resp.status_code}: {resp.text}"
140+
)
141+
payload = resp.json()
142+
143+
table_path = search_table_path(payload)
144+
145+
ret: Dict[str, Any] = {
146+
"version": 1,
147+
"request": {
148+
"url": req["url"],
149+
"method": req["method"],
150+
"params": {key: False for key in req["params"]},
151+
},
152+
"response": {
153+
"ctype": "application/json",
154+
"orient": "records",
155+
"tablePath": table_path,
156+
"schema": gen_schema_from_path(table_path, payload),
157+
},
158+
}
159+
160+
return ConfigDef(**ret)
161+
162+
163+
class AuthUnion(BaseDef):
164+
val: AuthorizationDef
165+
166+
167+
class PageUnion(BaseDef):
168+
val: PaginationDef
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
"""Defines ConfigState."""
2+
3+
from typing import Optional
4+
5+
from dataprep.connector.schema.defs import ConfigDef
6+
7+
8+
class ConfigState:
9+
"""ConfigState"""
10+
11+
config: Optional[ConfigDef] = None
12+
13+
def __init__(self, config: Optional[ConfigDef]) -> None:
14+
self.config = config
15+
16+
def __add__(self, rhs: ConfigDef) -> "ConfigState":
17+
if self.config is None:
18+
return ConfigState(rhs)
19+
20+
return ConfigState(self.config.merge(rhs))
21+
22+
def __str__(self) -> str:
23+
return str(self.config)
24+
25+
def __repr__(self) -> str:
26+
return str(self)
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
"""Table parsing utilities."""
2+
3+
from collections import defaultdict
4+
from operator import itemgetter
5+
from typing import Any, Dict, Set, Tuple
6+
7+
from jsonpath_ng import parse as jparse
8+
9+
from ..schema import SchemaFieldDef
10+
11+
12+
def search_table_path(val: Dict[str, Any]) -> str:
13+
"""Search table path in a json dict."""
14+
15+
paths = _search_table_path("$", val)
16+
if not paths:
17+
raise ValueError("No tables found.")
18+
return max(paths, key=itemgetter(1))[0]
19+
20+
21+
def _search_table_path(base: str, val: Dict[str, Any]) -> Set[Tuple[str, int]]:
22+
table_paths = set()
23+
for key, value in val.items():
24+
cur = f"{base}.{key}"
25+
if is_table_node(value):
26+
table_paths.add((f"{cur}[*]", len(value)))
27+
else:
28+
if isinstance(value, dict):
29+
table_paths.update(_search_table_path(cur, value))
30+
31+
return table_paths
32+
33+
34+
def is_table_node(node: Any) -> bool:
35+
"""Detect if a node is a table node."""
36+
37+
if isinstance(node, list):
38+
for row in node:
39+
if not isinstance(row, dict):
40+
return False
41+
for key in row.keys():
42+
if not isinstance(key, str):
43+
return False
44+
45+
# Better solutions? For different rows we might get different key sets
46+
# keys = node[0].keys()
47+
# for row in node[1:]:
48+
# if row.keys() != keys:
49+
# return False
50+
return True
51+
else:
52+
return False
53+
54+
55+
def gen_schema_from_path(path: str, val: Dict[str, Any]) -> Dict[str, SchemaFieldDef]:
56+
"""Generate the table schema from a path to the table."""
57+
58+
finder = jparse(path)
59+
rows = finder.find(val)
60+
ret = {}
61+
62+
for row in rows:
63+
for key, value in row.value.items():
64+
if key in ret:
65+
continue
66+
target = f"$.{key}"
67+
typ = _TYPE_MAPPING[type(value)]
68+
description = "auto generated"
69+
ret[key] = SchemaFieldDef(target=target, type=typ, description=description)
70+
71+
return ret
72+
73+
74+
_TYPE_MAPPING = defaultdict(
75+
lambda: "object", {int: "int", str: "string", float: "float", bool: "boolean",}
76+
)

dataprep/connector/implicit_database.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,7 @@ def from_json(self, data: str) -> Dict[str, List[Any]]:
6363
if respdef.orient == "records": # pylint: disable=no-member
6464
data_rows = [match.value for match in table_expr.find(data)]
6565

66-
for (
67-
column_name,
68-
column_def,
69-
) in respdef.schema_.items(): # pylint: disable=no-member
66+
for (column_name, column_def,) in respdef.schema_.items():
7067
column_target = column_def.target
7168
column_type = column_def.type
7269

@@ -107,13 +104,10 @@ def from_xml(self, data: str) -> Dict[str, List[Any]]:
107104
data = data.replace('<?xml version="1.0" encoding="UTF-8"?>', "")
108105

109106
root = etree.parse(StringIO(data))
110-
data_rows = root.xpath(respdef.table_path) # pylint: disable=no-member
107+
data_rows = root.xpath(respdef.table_path)
111108

112-
if respdef.orient == "records": # pylint: disable=no-member
113-
for (
114-
column_name,
115-
column_def,
116-
) in respdef.schema_.items(): # pylint: disable=no-member
109+
if respdef.orient == "records":
110+
for (column_name, column_def,) in respdef.schema_.items():
117111
column_target = column_def.target
118112
column_type = column_def.type
119113

dataprep/connector/schema/defs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def merge(self, rhs: BaseDefT) -> BaseDefT:
211211
"string": None,
212212
"float": "string",
213213
"int": "float",
214-
"bool": "string",
214+
"boolean": "string",
215215
}
216216

217217

0 commit comments

Comments
 (0)