@@ -176,19 +176,19 @@ def __post_init__(self):
176176
177177
178178@dataclass
179- class PermutableAttributeValue :
180- """Value to be used for the attribute."""
179+ class SampledAttributeValue :
180+ """Value to be sampled for the attribute."""
181181
182182 id : str
183183 """ID to be used when referencing the attribute value during synthesis."""
184184
185- value : str
186- """Value to be used for the attribute.
187- Referenced as {attribute_id.value }"""
185+ name : str
186+ """Plaintext name of the attribute value .
187+ Referenced as {attribute_id}"""
188188
189189 description : str
190190 """Description of the attribute value.
191- Referenced as {attribute_id.value. description}"""
191+ Referenced as {attribute_id.description}"""
192192
193193 sample_rate : Optional [float ] = None
194194 """Sample rate for the attribute value. If not specified, will assume uniform
@@ -197,34 +197,34 @@ class PermutableAttributeValue:
197197 def __post_init__ (self ):
198198 """Verifies/populates params."""
199199 if not self .id :
200- raise ValueError ("PermutableAttributeValue .id cannot be empty." )
201- if not self .value :
202- raise ValueError ("PermutableAttributeValue.value cannot be empty." )
200+ raise ValueError ("SampledAttributeValue .id cannot be empty." )
201+ if not self .name :
202+ raise ValueError ("SampledAttributeValue.name cannot be empty." )
203203 if not self .description :
204- raise ValueError ("PermutableAttributeValue .description cannot be empty." )
204+ raise ValueError ("SampledAttributeValue .description cannot be empty." )
205205 if self .sample_rate is not None and (
206206 self .sample_rate < 0 or self .sample_rate > 1
207207 ):
208208 raise ValueError (
209- "PermutableAttributeValue .sample_rate must be between 0 and 1."
209+ "SampledAttributeValue .sample_rate must be between 0 and 1."
210210 )
211211
212212
213213@dataclass
214- class PermutableAttribute :
215- """Attributes to be varied across the dataset."""
214+ class SampledAttribute :
215+ """Attributes to be sampled across the dataset."""
216216
217217 id : str
218218 """ID to be used when referencing the attribute during synthesis."""
219219
220- attribute : str
221- """Plaintext name of the attribute. Referenced as {attribute_id }"""
220+ name : str
221+ """Plaintext name of the attribute. Referenced as {id.parent }"""
222222
223223 description : str
224- """Description of the attribute. Referenced as {attribute_id .description}"""
224+ """Description of the attribute. Referenced as {id.parent .description}"""
225225
226- possible_values : list [PermutableAttributeValue ]
227- """Type of the attribute."""
226+ possible_values : list [SampledAttributeValue ]
227+ """Values to be sampled for the attribute."""
228228
229229 def get_value_distribution (self ) -> dict [str , float ]:
230230 """Get the distribution of attribute values."""
@@ -236,13 +236,13 @@ def get_value_distribution(self) -> dict[str, float]:
236236 def __post_init__ (self ):
237237 """Verifies/populates params."""
238238 if not self .id :
239- raise ValueError ("PermutableAttribute .id cannot be empty." )
240- if not self .attribute :
241- raise ValueError ("PermutableAttribute.attribute cannot be empty." )
239+ raise ValueError ("SampledAttribute .id cannot be empty." )
240+ if not self .name :
241+ raise ValueError ("SampledAttribute.name cannot be empty." )
242242 if not self .description :
243- raise ValueError ("PermutableAttribute .description cannot be empty." )
243+ raise ValueError ("SampledAttribute .description cannot be empty." )
244244 if not self .possible_values :
245- raise ValueError ("PermutableAttribute .possible_values cannot be empty." )
245+ raise ValueError ("SampledAttribute .possible_values cannot be empty." )
246246
247247 value_ids = []
248248 sample_rates = []
@@ -252,9 +252,7 @@ def __post_init__(self):
252252
253253 value_ids_set = set (value_ids )
254254 if len (value_ids ) != len (value_ids_set ):
255- raise ValueError (
256- "PermutableAttribute.possible_values must have unique IDs."
257- )
255+ raise ValueError ("SampledAttribute.possible_values must have unique IDs." )
258256
259257 # Normalize sample rates
260258 normalized_sample_rates = []
@@ -267,7 +265,7 @@ def __post_init__(self):
267265 undefined_sample_rate_count += 1
268266
269267 if defined_sample_rate > 1.0 :
270- raise ValueError ("PermutableAttribute .possible_values must sum to 1.0." )
268+ raise ValueError ("SampledAttribute .possible_values must sum to 1.0." )
271269
272270 # Assign remaining sample rate to undefined sample rates
273271 remaining_sample_rate = 1.0 - defined_sample_rate
@@ -517,7 +515,7 @@ class GeneralSynthesisParams(BaseParams):
517515 Examples will be enumerated during sampling, and attributes can be referenced as
518516 attributes when generating new attributes."""
519517
520- permutable_attributes : Optional [list [PermutableAttribute ]] = None
518+ sampled_attributes : Optional [list [SampledAttribute ]] = None
521519 """Attributes to be varied across the dataset.
522520
523521 Attributes each have a set of possible values which will be randomly sampled
@@ -636,18 +634,18 @@ def _check_example_source_attribute_ids(self, all_attribute_ids: set[str]) -> No
636634 for new_key in example_keys :
637635 self ._check_attribute_ids (all_attribute_ids , new_key )
638636
639- def _check_permutable_attribute_ids (self , all_attribute_ids : set [str ]) -> None :
640- """Check attribute IDs from permutable attributes for uniqueness."""
641- if self .permutable_attributes is None :
637+ def _check_sampled_attribute_ids (self , all_attribute_ids : set [str ]) -> None :
638+ """Check attribute IDs from sampled attributes for uniqueness."""
639+ if self .sampled_attributes is None :
642640 return
643641
644- if len (self .permutable_attributes ) == 0 :
642+ if len (self .sampled_attributes ) == 0 :
645643 raise ValueError (
646- "GeneralSynthesisParams.permutable_attributes cannot be empty."
644+ "GeneralSynthesisParams.sampled_attributes cannot be empty."
647645 )
648646
649- for permutable_attribute in self .permutable_attributes :
650- attribute_id = permutable_attribute .id
647+ for sampled_attribute in self .sampled_attributes :
648+ attribute_id = sampled_attribute .id
651649 self ._check_attribute_ids (all_attribute_ids , attribute_id )
652650
653651 def _check_generated_attribute_ids (self , all_attribute_ids : set [str ]) -> None :
@@ -716,7 +714,7 @@ def __post_init__(self):
716714 self ._check_dataset_source_attribute_ids (all_attribute_ids )
717715 self ._check_document_source_attribute_ids (all_attribute_ids )
718716 self ._check_example_source_attribute_ids (all_attribute_ids )
719- self ._check_permutable_attribute_ids (all_attribute_ids )
717+ self ._check_sampled_attribute_ids (all_attribute_ids )
720718 self ._check_generated_attribute_ids (all_attribute_ids )
721719 self ._check_transformed_attribute_ids (all_attribute_ids )
722720 self ._check_passthrough_attribute_ids ()
0 commit comments