@@ -76,14 +76,18 @@ def get_tables(tables_dir):
76
76
77
77
78
78
79
- float_value_re = re .compile (r"([+-]?\s*(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)" )
79
+ float_value_re = re .compile (r"([+-]?\s*((\d{1,2}(,\d{3})+|\d+)(\.\d*)?|\.\d+)([eE][+-]?\d+)?)" )
80
+ letters_re = re .compile ("[^\W\d_]" , re .UNICODE )
81
+
82
+ # float value possibly with std
83
+ metric_value_re = re .compile (float_value_re .pattern + r"(\s*±\s*" + float_value_re .pattern + ")?" )
80
84
whitespace_re = re .compile (r"\s+" )
81
85
82
86
83
87
def normalize_float_value (s ):
84
- match = float_value_re .search (s )
88
+ match = metric_value_re .search (s )
85
89
if match :
86
- return whitespace_re .sub ("" , match .group (0 ) )
90
+ return whitespace_re .sub ("" , match .group (1 )). replace ( "," , "" )
87
91
return '-'
88
92
89
93
@@ -103,7 +107,7 @@ def fuzzy_match(metric, metric_value, target_value):
103
107
return False
104
108
metric_value = Decimal (metric_value )
105
109
106
- for match in float_value_re .findall (target_value ):
110
+ for match in metric_value_re .findall (target_value ):
107
111
value = whitespace_re .sub ("" , match [0 ])
108
112
value = Decimal (value )
109
113
@@ -200,8 +204,10 @@ def normalize_metric(value):
200
204
201
205
202
206
def normalize_cell (cell ):
203
- matches = float_value_re .findall (cell )
204
- matches = [whitespace_re .sub ("" , match [0 ]) for match in matches ]
207
+ if len (letters_re .findall (cell )) > 2 :
208
+ return []
209
+ matches = metric_value_re .findall (cell )
210
+ matches = [normalize_float_value (match [0 ]) for match in matches ]
205
211
values = [Decimal (value ) for value in matches ]
206
212
return values
207
213
0 commit comments