@@ -27,7 +27,7 @@ class OpenCSVFile:
2727 active : int
2828 lock : asyncio .Lock
2929 write_back_key : bool = True
30- write_back_label : bool = False
30+ write_back_tag : bool = False
3131
3232 async def inc (self ):
3333 async with self .lock :
@@ -40,15 +40,15 @@ async def dec(self):
4040
4141
4242CSV_SOURCE_CONFIG_DEFAULT_KEY = "key"
43- CSV_SOURCE_CONFIG_DEFAULT_LABEL = "unlabeled "
44- CSV_SOURCE_CONFIG_DEFAULT_LABEL_COLUMN = "label "
43+ CSV_SOURCE_CONFIG_DEFAULT_tag = "untagged "
44+ CSV_SOURCE_CONFIG_DEFAULT_tag_COLUMN = "tag "
4545
4646
4747@config
4848class CSVSourceConfig (FileSourceConfig ):
4949 key : str = CSV_SOURCE_CONFIG_DEFAULT_KEY
50- label : str = CSV_SOURCE_CONFIG_DEFAULT_LABEL
51- labelcol : str = CSV_SOURCE_CONFIG_DEFAULT_LABEL_COLUMN
50+ tag : str = CSV_SOURCE_CONFIG_DEFAULT_tag
51+ tagcol : str = CSV_SOURCE_CONFIG_DEFAULT_tag_COLUMN
5252
5353
5454# CSVSource is a bit of a mess
@@ -91,24 +91,24 @@ async def read_csv(self, fd, open_file):
9191 # Record what headers are present when the file was opened
9292 if not self .config .key in dict_reader .fieldnames :
9393 open_file .write_back_key = False
94- if self .config .labelcol in dict_reader .fieldnames :
95- open_file .write_back_label = True
96- # Store all the repos by their label in write_out
94+ if self .config .tagcol in dict_reader .fieldnames :
95+ open_file .write_back_tag = True
96+ # Store all the repos by their tag in write_out
9797 open_file .write_out = {}
98- # If there is no key track row index to be used as key by label
98+ # If there is no key track row index to be used as key by tag
9999 index = {}
100100 for row in dict_reader :
101- # Grab label from row
102- label = row .get (self .config .labelcol , self .config .label )
103- if self .config .labelcol in row :
104- del row [self .config .labelcol ]
105- index .setdefault (label , 0 )
101+ # Grab tag from row
102+ tag = row .get (self .config .tagcol , self .config .tag )
103+ if self .config .tagcol in row :
104+ del row [self .config .tagcol ]
105+ index .setdefault (tag , 0 )
106106 # Grab key from row
107- key = row .get (self .config .key , str (index [label ]))
107+ key = row .get (self .config .key , str (index [tag ]))
108108 if self .config .key in row :
109109 del row [self .config .key ]
110110 else :
111- index [label ] += 1
111+ index [tag ] += 1
112112 # Repo data we are going to parse from this row (must include
113113 # features).
114114 repo_data = {}
@@ -159,18 +159,18 @@ async def read_csv(self, fd, open_file):
159159 }
160160 repo_data .update ({"prediction" : predictions })
161161 # If there was no data in the row, skip it
162- if not repo_data and key == str (index [label ] - 1 ):
162+ if not repo_data and key == str (index [tag ] - 1 ):
163163 continue
164164 # Add the repo to our internal memory representation
165- open_file .write_out .setdefault (label , {})
166- open_file .write_out [label ][key ] = Repo (key , data = repo_data )
165+ open_file .write_out .setdefault (tag , {})
166+ open_file .write_out [tag ][key ] = Repo (key , data = repo_data )
167167
168168 async def load_fd (self , fd ):
169169 """
170170 Parses a CSV stream into Repo instances
171171 """
172172 async with self ._open_csv (fd ) as open_file :
173- self .mem = open_file .write_out .get (self .config .label , {})
173+ self .mem = open_file .write_out .get (self .config .tag , {})
174174 self .logger .debug ("%r loaded %d records" , self , len (self .mem ))
175175
176176 async def dump_fd (self , fd ):
@@ -179,20 +179,20 @@ async def dump_fd(self, fd):
179179 """
180180 async with self .OPEN_CSV_FILES_LOCK :
181181 open_file = self .OPEN_CSV_FILES [self .config .filename ]
182- open_file .write_out .setdefault (self .config .label , {})
183- open_file .write_out [self .config .label ].update (self .mem )
182+ open_file .write_out .setdefault (self .config .tag , {})
183+ open_file .write_out [self .config .tag ].update (self .mem )
184184 # Bail if not last open source for this file
185185 if not (await open_file .dec ()):
186186 return
187187 # Add our headers
188188 fieldnames = (
189189 [] if not open_file .write_back_key else [self .config .key ]
190190 )
191- fieldnames .append (self .config .labelcol )
191+ fieldnames .append (self .config .tagcol )
192192 # Get all the feature names
193193 feature_fieldnames = set ()
194194 prediction_fieldnames = set ()
195- for label , repos in open_file .write_out .items ():
195+ for tag , repos in open_file .write_out .items ():
196196 for repo in repos .values ():
197197 feature_fieldnames |= set (repo .data .features .keys ())
198198 prediction_fieldnames |= set (repo .data .prediction .keys ())
@@ -209,12 +209,12 @@ async def dump_fd(self, fd):
209209 # Write out the file
210210 writer = csv .DictWriter (fd , fieldnames = fieldnames )
211211 writer .writeheader ()
212- for label , repos in open_file .write_out .items ():
212+ for tag , repos in open_file .write_out .items ():
213213 for repo in repos .values ():
214214 repo_data = repo .dict ()
215215 row = {name : "" for name in fieldnames }
216- # Always write the label
217- row [self .config .labelcol ] = label
216+ # Always write the tag
217+ row [self .config .tagcol ] = tag
218218 # Write the key if it existed
219219 if open_file .write_back_key :
220220 row [self .config .key ] = repo .key
0 commit comments