Skip to content

Commit 983c69a

Browse files
first attempt
1 parent 6b72909 commit 983c69a

File tree

2 files changed

+56
-62
lines changed

2 files changed

+56
-62
lines changed

frictionless/resources/table.py

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from ..indexer import Indexer
1515
from ..platform import platform
1616
from ..resource import Resource
17+
from ..schema.fields_info import FieldsInfo
1718
from ..system import system
1819
from ..table import Header, Lookup, Row, Table
1920
from ..transformer import Transformer
@@ -265,24 +266,7 @@ def __open_lookup(self):
265266
self.__lookup[source_name][source_key].add(cells)
266267

267268
def __open_row_stream(self):
268-
# TODO: we need to rework this field_info / row code
269-
# During row streaming we create a field info structure
270-
# This structure is optimized and detached version of schema.fields
271-
# We create all data structures in-advance to share them between rows
272-
273-
# Create field info
274-
field_number = 0
275-
field_info: Dict[str, Any] = {"names": [], "objects": [], "mapping": {}}
276-
for field in self.schema.fields:
277-
field_number += 1
278-
field_info["names"].append(field.name)
279-
field_info["objects"].append(field.to_copy())
280-
field_info["mapping"][field.name] = (
281-
field,
282-
field_number,
283-
field.create_cell_reader(),
284-
field.create_cell_writer(),
285-
)
269+
field_info = FieldsInfo(self.schema.fields)
286270

287271
# Create state
288272
memory_unique: Dict[str, Any] = {}
@@ -405,13 +389,13 @@ def row_stream():
405389
self.__row_stream = row_stream()
406390

407391
def remove_missing_required_label_from_field_info(
408-
self, field: Field, field_info: Dict[str, Any]
392+
self, field: Field, fields_info: FieldsInfo
409393
):
410394
is_case_sensitive = self.dialect.header_case
411395
if self.label_is_missing(
412-
field.name, field_info["names"], self.labels, is_case_sensitive
396+
field.name, fields_info.ls(), self.labels, is_case_sensitive
413397
):
414-
self.remove_field_from_field_info(field.name, field_info)
398+
fields_info.rm(field.name)
415399

416400
@staticmethod
417401
def label_is_missing(
@@ -432,13 +416,6 @@ def label_is_missing(
432416

433417
return field_name not in table_labels and field_name in expected_field_names
434418

435-
@staticmethod
436-
def remove_field_from_field_info(field_name: str, field_info: Dict[str, Any]):
437-
field_index = field_info["names"].index(field_name)
438-
del field_info["names"][field_index]
439-
del field_info["objects"][field_index]
440-
del field_info["mapping"][field_name]
441-
442419
def primary_key_cells(self, row: Row, case_sensitive: bool) -> Tuple[Any, ...]:
443420
"""Create a tuple containg all cells from a given row associated to primary
444421
keys"""

frictionless/table/row.py

Lines changed: 51 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from .. import errors, helpers
88
from ..platform import platform
9+
from ..schema.fields_info import FieldsInfo
910

1011
# NOTE:
1112
# Currently dict.update/setdefault/pop/popitem/clear is not disabled (can be confusing)
@@ -36,11 +37,11 @@ def __init__(
3637
self,
3738
cells: List[Any],
3839
*,
39-
field_info: Dict[str, Any],
40+
field_info: FieldsInfo,
4041
row_number: int,
4142
):
4243
self.__cells = cells
43-
self.__field_info = field_info
44+
self.__fields_info = field_info
4445
self.__row_number = row_number
4546
self.__processed: bool = False
4647
self.__blank_cells: Dict[str, Any] = {}
@@ -61,7 +62,7 @@ def __repr__(self):
6162

6263
def __setitem__(self, key: str, value: Any):
6364
try:
64-
_, field_number, _, _ = self.__field_info["mapping"][key]
65+
field_number = self.__fields_info.get(key).field_number
6566
except KeyError:
6667
raise KeyError(f"Row does not have a field {key}")
6768
if len(self.__cells) < field_number:
@@ -73,38 +74,38 @@ def __missing__(self, key: str):
7374
return self.__process(key)
7475

7576
def __iter__(self):
76-
return iter(self.__field_info["names"])
77+
return iter(self.__fields_info.ls())
7778

7879
def __len__(self):
79-
return len(self.__field_info["names"])
80+
return len(self.__fields_info.ls())
8081

8182
def __contains__(self, key: object):
82-
return key in self.__field_info["mapping"]
83+
return key in self.__fields_info.ls()
8384

8485
def __reversed__(self):
85-
return reversed(self.__field_info["names"])
86+
return reversed(self.__fields_info.ls())
8687

8788
def keys(self):
88-
return iter(self.__field_info["names"])
89+
return iter(self.__fields_info.ls())
8990

9091
def values(self): # type: ignore
91-
for name in self.__field_info["names"]:
92+
for name in self.__fields_info.ls():
9293
yield self[name]
9394

9495
def items(self): # type: ignore
95-
for name in self.__field_info["names"]:
96+
for name in self.__fields_info.ls():
9697
yield (name, self[name])
9798

9899
def get(self, key: str, default: Optional[Any] = None):
99-
if key not in self.__field_info["names"]:
100+
if key not in self.__fields_info.ls():
100101
return default
101102
return self[key]
102103

103104
@cached_property
104105
def cells(self):
105106
"""
106-
Returns:
107-
Field[]: table schema fields
107+
.ls():
108+
Field[]: table schema fields
108109
"""
109110
return self.__cells
110111

@@ -114,23 +115,23 @@ def fields(self):
114115
Returns:
115116
Field[]: table schema fields
116117
"""
117-
return self.__field_info["objects"]
118+
return self.__fields_info.get_copies()
118119

119120
@cached_property
120121
def field_names(self) -> List[str]:
121122
"""
122123
Returns:
123124
str[]: field names
124125
"""
125-
return self.__field_info["names"]
126+
return self.__fields_info.ls()
126127

127128
@cached_property
128129
def field_numbers(self):
129130
"""
130131
Returns:
131132
str[]: field numbers
132133
"""
133-
return list(range(1, len(self.__field_info["names"]) + 1))
134+
return list(range(1, len(self.__fields_info.ls()) + 1))
134135

135136
@cached_property
136137
def row_number(self) -> int:
@@ -201,14 +202,18 @@ def to_list(self, *, json: bool = False, types: Optional[List[str]] = None):
201202

202203
# Prepare
203204
self.__process()
204-
result = [self[name] for name in self.__field_info["names"]]
205+
result = [self[name] for name in self.__fields_info.ls()]
205206
if types is None and json:
206207
types = platform.frictionless_formats.JsonParser.supported_types
207208

208209
# Convert
209210
if types is not None:
210-
for index, field_mapping in enumerate(self.__field_info["mapping"].values()):
211-
field, _, _, cell_writer = field_mapping
211+
field_names = self.__fields_info.ls()
212+
for index, field_name in enumerate(field_names):
213+
field_info = self.__fields_info.get(field_name)
214+
field = field_info.field
215+
cell_writer = field_info.cell_writer
216+
212217
# Here we can optimize performance if we use a types mapping
213218
if field.type in types:
214219
continue
@@ -223,7 +228,11 @@ def to_list(self, *, json: bool = False, types: Optional[List[str]] = None):
223228
return result
224229

225230
def to_dict(
226-
self, *, csv: bool = False, json: bool = False, types: Optional[List[str]] = None
231+
self,
232+
*,
233+
csv: bool = False,
234+
json: bool = False,
235+
types: Optional[List[str]] = None,
227236
) -> Dict[str, Any]:
228237
"""
229238
Parameters:
@@ -235,16 +244,20 @@ def to_dict(
235244

236245
# Prepare
237246
self.__process()
238-
result = {name: self[name] for name in self.__field_info["names"]}
247+
result = {name: self[name] for name in self.__fields_info.ls()}
239248
if types is None and json:
240249
types = platform.frictionless_formats.JsonParser.supported_types
241250
if types is None and csv:
242251
types = platform.frictionless_formats.CsvParser.supported_types
243252

244253
# Convert
245254
if types is not None:
246-
for field_mapping in self.__field_info["mapping"].values():
247-
field, _, _, cell_writer = field_mapping
255+
field_names = self.__fields_info.ls()
256+
for field_name in field_names:
257+
field_info = self.__fields_info.get(field_name)
258+
field = field_info.field
259+
cell_writer = field_info.cell_writer
260+
248261
# Here we can optimize performance if we use a types mapping
249262
if field.type not in types:
250263
cell = result[field.name]
@@ -268,26 +281,30 @@ def __process(self, key: Optional[str] = None):
268281
# Prepare context
269282
cells = self.__cells
270283
to_str = lambda v: str(v) if v is not None else "" # type: ignore
271-
fields = self.__field_info["objects"]
272-
field_mapping = self.__field_info["mapping"]
273-
iterator = zip_longest(field_mapping.values(), cells)
284+
fields = self.__fields_info.get_copies()
285+
names = self.__fields_info.ls()
286+
field_infos = [self.__fields_info.get(name) for name in names]
287+
iterator = zip_longest(field_infos, cells)
274288
is_empty = not bool(super().__len__())
289+
275290
if key:
276291
try:
277-
field, field_number, cell_reader, cell_writer = self.__field_info[
278-
"mapping"
279-
][key]
280-
except KeyError:
292+
field_info = self.__fields_info.get(key)
293+
field_number = field_info.field_number
294+
except ValueError:
281295
raise KeyError(f"Row does not have a field {key}")
282296
cell = cells[field_number - 1] if len(cells) >= field_number else None
283-
iterator = zip([(field, field_number, cell_reader, cell_writer)], [cell])
297+
iterator = zip([field_info], [cell])
284298

285299
# Iterate cells
286-
for field_mapping, source in iterator:
300+
for field_info, source in iterator:
287301
# Prepare context
288-
if field_mapping is None:
302+
if field_info is None:
289303
break
290-
field, field_number, cell_reader, _ = field_mapping
304+
field = field_info.field
305+
field_number = field_info.field_number
306+
cell_reader = field_info.cell_reader
307+
291308
if not is_empty and super().__contains__(field.name):
292309
continue
293310

0 commit comments

Comments
 (0)