Skip to content

Commit bcd9733

Browse files
droserasproutm-kus
andauthored
Fix parsing storage with nested array fields (#207)
* Fix nested sets in tzkt * Lint * Cleanup * changelog * Refactor storage processing * Linter fixes * Fix tests * Schemas, scripts * Fix jsonschema preprocessing * Cleanup, fix tests * naming * Naming Co-authored-by: Michael Zaikin <[email protected]>
1 parent 90d3355 commit bcd9733

File tree

28 files changed

+2400
-73
lines changed

28 files changed

+2400
-73
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,15 @@ Please use [this](https://docs.gitlab.com/ee/development/changelog.html) documen
44

55
## [unreleased]
66

7+
### Added
8+
9+
* cli: Added `--keep-schemas` flag to `init` command to preserve JSONSchemas along with generated types.
10+
711
### Fixed
812

913
* demos: Tezos Domains and Homebase DAO demos were updated from edo2net to mainnet contracts.
1014
* hasura: Fixed missing relations for models with `ManyToManyField` fields.
15+
* tzkt: Fixed parsing storage with nested structures.
1116

1217
### Performance
1318

scripts/init_tests.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
#$/bin/bash
22
cd tests/test_dipdup
3-
for name in "asdf" "qwer" "hjkl" "zxcv" "rewq" "hen_subjkt" "kolibri_ovens"
3+
for name in "asdf" "qwer" "hjkl" "zxcv" "rewq" "hen_subjkt" "kolibri_ovens" "yupana"
44
do
5-
dipdup -c $name.yml init
5+
dipdup -c $name.yml init --keep-schemas
66
mkdir -p types/$name/
7+
mkdir -p schemas/$name/
78
mv $name/types/$name/storage.py types/$name/storage.py
9+
touch types/$name/__init__.py
10+
mv $name/schemas/$name/storage.json schemas/$name/storage.json
811
mv $name/types/$name/parameter/set_delegate.py types/$name/set_delegate.py || true
912
rm -r $name
1013
done

src/dipdup/cli.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,12 +201,13 @@ async def run(
201201

202202
@cli.command(help='Generate missing callbacks and types')
203203
@click.option('--overwrite-types', is_flag=True, help='Regenerate existing types')
204+
@click.option('--keep-schemas', is_flag=True, help='Do not remove JSONSchemas after generating types')
204205
@click.pass_context
205206
@cli_wrapper
206-
async def init(ctx, overwrite_types: bool):
207+
async def init(ctx, overwrite_types: bool, keep_schemas: bool) -> None:
207208
config: DipDupConfig = ctx.obj.config
208209
dipdup = DipDup(config)
209-
await dipdup.init(overwrite_types)
210+
await dipdup.init(overwrite_types, keep_schemas)
210211

211212

212213
@cli.command(help='Migrate project to the new spec version')

src/dipdup/codegen.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ def preprocess_storage_jsonschema(schema: Dict[str, Any]) -> Dict[str, Any]:
6666
We resolve bigmaps from diffs so no need to include int in type signature."""
6767
if not isinstance(schema, dict):
6868
return schema
69+
if 'oneOf' in schema:
70+
schema['oneOf'] = [preprocess_storage_jsonschema(sub_schema) for sub_schema in schema['oneOf']]
6971
if 'properties' in schema:
7072
return {
7173
**schema,
@@ -104,14 +106,15 @@ def __init__(self, config: DipDupConfig, datasources: Dict[DatasourceConfigT, Da
104106
self._datasources = datasources
105107
self._schemas: Dict[TzktDatasourceConfig, Dict[str, Dict[str, Any]]] = {}
106108

107-
async def init(self, overwrite_types: bool = False) -> None:
109+
async def init(self, overwrite_types: bool = False, keep_schemas: bool = False) -> None:
108110
self._logger.info('Initializing project')
109111
await self.create_package()
110112
await self.fetch_schemas()
111113
await self.generate_types(overwrite_types)
112114
await self.generate_hooks()
113115
await self.generate_handlers()
114-
await self.cleanup()
116+
if not keep_schemas:
117+
await self.cleanup()
115118
await self.verify_package()
116119

117120
async def docker_init(self, image: str, tag: str, env_file: str) -> None:

src/dipdup/datasources/tzkt/models.py

Lines changed: 66 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import typing
12
from contextlib import suppress
23
from functools import lru_cache
34
from itertools import groupby
@@ -6,11 +7,11 @@
67
from typing import Iterable
78
from typing import List
89
from typing import Optional
10+
from typing import Tuple
911
from typing import Type
1012
from typing import Union
1113

1214
from pydantic.error_wrappers import ValidationError
13-
from pydantic.fields import FieldInfo
1415
from typing_extensions import get_args
1516
from typing_extensions import get_origin
1617

@@ -21,74 +22,83 @@
2122
IntrospectionError = (KeyError, IndexError, AttributeError)
2223

2324

24-
def _extract_root_type(storage_type: Type) -> Type:
25+
def extract_root_outer_type(storage_type: Type) -> Type:
2526
"""Extract Pydantic __root__ type"""
26-
return storage_type.__fields__['__root__'].type_
27+
root_field = storage_type.__fields__['__root__']
28+
if root_field.allow_none:
29+
return typing.Optional[root_field.type_] # type: ignore
30+
else:
31+
return root_field.outer_type_
2732

2833

2934
@lru_cache(None)
30-
def _is_array(storage_type: Type) -> bool:
35+
def is_array_type(storage_type: Type) -> bool:
3136
"""TzKT can return bigmaps as objects or as arrays of key-value objects. Guess it from storage type."""
3237
# NOTE: List[...]
3338
if get_origin(storage_type) == list:
3439
return True
3540

36-
# NOTE: Neither a list not Pydantic model, can't be an array
37-
fields: Optional[Dict[str, FieldInfo]] = getattr(storage_type, '__fields__', None)
38-
if fields is None:
39-
return False
40-
41-
# NOTE: An item of TzKT array
42-
if 'key' in fields and 'value' in fields:
43-
return True
44-
45-
# NOTE: Pydantic model with __root__ field, dive into it
41+
# NOTE: Pydantic model with __root__ field subclassing List
4642
with suppress(*IntrospectionError):
47-
root_type = _extract_root_type(storage_type)
48-
return _is_array(root_type) # type: ignore
43+
root_type = extract_root_outer_type(storage_type)
44+
return is_array_type(root_type) # type: ignore
4945

5046
# NOTE: Something else
5147
return False
5248

5349

5450
@lru_cache(None)
55-
def _extract_list_types(storage_type: Type[Any]) -> Iterable[Type[Any]]:
56-
"""Extract list item types from field type"""
57-
# NOTE: Pydantic model with __root__ field
58-
with suppress(*IntrospectionError):
59-
return (_extract_root_type(storage_type),)
51+
def get_list_elt_type(list_type: Type[Any]) -> Type[Any]:
52+
"""Extract list item type from list type"""
53+
# NOTE: regular list
54+
if get_origin(list_type) == list:
55+
return get_args(list_type)[0]
6056

61-
# NOTE: Python list, return all args unpacking unions
62-
with suppress(*IntrospectionError):
63-
item_type = get_args(storage_type)[0]
64-
if get_origin(item_type) == Union:
65-
return get_args(item_type)
66-
return (item_type,)
67-
68-
# NOTE: Something else
69-
return ()
57+
# NOTE: Pydantic model with __root__ field subclassing List
58+
root_type = extract_root_outer_type(list_type)
59+
return get_list_elt_type(root_type) # type: ignore
7060

7161

7262
@lru_cache(None)
73-
def _extract_dict_types(storage_type: Type[Any], key: str) -> Iterable[Type[Any]]:
63+
def get_dict_value_type(dict_type: Type[Any], key: Optional[str] = None) -> Type[Any]:
7464
"""Extract dict value types from field type"""
7565
# NOTE: Regular dict
76-
if get_origin(storage_type) == dict:
77-
return (get_args(storage_type)[1],)
66+
if get_origin(dict_type) == dict:
67+
return get_args(dict_type)[1]
68+
69+
# NOTE: Pydantic model with __root__ field subclassing Dict
70+
with suppress(*IntrospectionError):
71+
root_type = extract_root_outer_type(dict_type)
72+
return get_dict_value_type(root_type, key) # type: ignore
7873

79-
# NOTE: Unpack union args
80-
if get_origin(storage_type) == Union:
81-
return get_args(storage_type)
74+
if key is None:
75+
raise KeyError('Key name or alias is required for object introspection')
8276

8377
# NOTE: Pydantic model, find corresponding field and return it's type
78+
fields = dict_type.__fields__
79+
for field in fields.values():
80+
if key in (field.name, field.alias):
81+
# NOTE: Pydantic does not preserve outer_type_ for Optional
82+
if field.allow_none:
83+
return typing.Optional[field.type_] # type: ignore
84+
else:
85+
return field.outer_type_
86+
87+
# NOTE: typically when we try the wrong Union path
88+
raise KeyError('Key not found')
89+
90+
91+
@lru_cache(None)
92+
def unwrap_union_type(union_type: Type) -> Tuple[bool, Tuple[Type, ...]]:
93+
"""Check if the type is either optional or union and return arg types if so"""
94+
if get_origin(union_type) == Union:
95+
return True, get_args(union_type)
96+
8497
with suppress(*IntrospectionError):
85-
fields = storage_type.__fields__
86-
for field in fields.values():
87-
if key in (field.name, field.alias):
88-
return (field.type_,)
98+
root_type = extract_root_outer_type(union_type)
99+
return unwrap_union_type(root_type) # type: ignore
89100

90-
# NOTE: Something else
91-
return ()
101+
return False, ()
92102

93103

94104
def _preprocess_bigmap_diffs(diffs: Iterable[Dict[str, Any]]) -> Dict[int, Iterable[Dict[str, Any]]]:
@@ -124,30 +134,32 @@ def _apply_bigmap_diffs(
124134
return dict_storage
125135

126136

127-
def _process_storage(
128-
storage: Any,
129-
storage_type: Type[StorageType],
130-
bigmap_diffs: Dict[int, Iterable[Dict[str, Any]]],
131-
) -> Any:
137+
def _process_storage(storage: Any, storage_type: Type[Any], bigmap_diffs: Dict[int, Iterable[Dict[str, Any]]]) -> Any:
132138
"""Replace bigmap pointers with actual data from diffs"""
139+
# Check if Union or Optional (== Union[Any, NoneType])
140+
is_union, arg_types = unwrap_union_type(storage_type) # type: ignore
141+
if is_union:
142+
# NOTE: We have no way but trying every possible branch until first success
143+
for arg_type in arg_types:
144+
with suppress(*IntrospectionError):
145+
return _process_storage(storage, arg_type, bigmap_diffs)
146+
133147
# NOTE: Bigmap pointer, apply diffs
134148
if isinstance(storage, int) and type(storage) != storage_type:
135-
is_array = _is_array(storage_type)
149+
is_array = is_array_type(storage_type) # type: ignore
136150
storage = _apply_bigmap_diffs(storage, bigmap_diffs, is_array)
137151

138152
# NOTE: List, process recursively
139153
elif isinstance(storage, list):
154+
elt_type = get_list_elt_type(storage_type) # type: ignore
140155
for i, _ in enumerate(storage):
141-
for item_type in _extract_list_types(storage_type):
142-
with suppress(*IntrospectionError):
143-
storage[i] = _process_storage(storage[i], item_type, bigmap_diffs)
156+
storage[i] = _process_storage(storage[i], elt_type, bigmap_diffs)
144157

145158
# NOTE: Dict, process recursively
146159
elif isinstance(storage, dict):
147160
for key, value in storage.items():
148-
for value_type in _extract_dict_types(storage_type, key):
149-
with suppress(*IntrospectionError):
150-
storage[key] = _process_storage(value, value_type, bigmap_diffs)
161+
value_type = get_dict_value_type(storage_type, key) # type: ignore
162+
storage[key] = _process_storage(value, value_type, bigmap_diffs)
151163

152164
else:
153165
pass

src/dipdup/dipdup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,15 +323,15 @@ def schema(self) -> Schema:
323323
raise DipDupException('Schema is not initialized')
324324
return self._schema
325325

326-
async def init(self, overwrite_types: bool = True) -> None:
326+
async def init(self, overwrite_types: bool = False, keep_schemas: bool = False) -> None:
327327
"""Create new or update existing dipdup project"""
328328
await self._create_datasources()
329329

330330
async with AsyncExitStack() as stack:
331331
for datasource in self._datasources.values():
332332
await stack.enter_async_context(datasource)
333333

334-
await self._codegen.init(overwrite_types)
334+
await self._codegen.init(overwrite_types, keep_schemas)
335335

336336
async def docker_init(self, image: str, tag: str, env_file: str) -> None:
337337
await self._codegen.docker_init(image, tag, env_file)

src/dipdup/exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def _help(self) -> str:
270270
Failed to validate datasource message against generated type class.
271271
272272
Expected type:
273-
`{self.type_cls.__class__.__qualname__}`
273+
`{self.type_cls.__name__}`
274274
275275
Invalid data:
276276
{self.data}

tests/test_dipdup/asdf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ contracts:
99
datasources:
1010
tzkt:
1111
kind: tzkt
12-
url: https://api.tzkt.io
12+
url: https://api.hangzhou2net.tzkt.io
1313

1414
indexes:
1515
asdf:

tests/test_dipdup/hjkl.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ contracts:
99
datasources:
1010
tzkt:
1111
kind: tzkt
12-
url: https://api.tzkt.io
12+
url: https://api.hangzhou2net.tzkt.io
1313

1414
indexes:
1515
hjkl:

tests/test_dipdup/qwer.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ contracts:
99
datasources:
1010
tzkt:
1111
kind: tzkt
12-
url: https://api.tzkt.io
12+
url: https://api.hangzhou2net.tzkt.io
1313

1414
indexes:
1515
qwer:

0 commit comments

Comments
 (0)