@@ -103,7 +103,7 @@ def __init__(self, name, colType=None, minValue=0, maxValue=None, step=1, prefix
103103 if colType is None : # default to integer field if none specified
104104 colType = IntegerType ()
105105
106- assert isinstance (colType , DataType ), "colType `{}` is not instance of DataType" . format ( colType )
106+ assert isinstance (colType , DataType ), f "colType `{ colType } ` is not instance of DataType"
107107
108108 self ._initialBuildPlan = [] # the build plan for the column - descriptive only
109109 self .executionHistory = [] # the execution history for the column
@@ -375,19 +375,19 @@ def _setupTemporaryColumns(self):
375375 ensure (self ['numColumns' ] is None or self ['numColumns' ] <= 1 ,
376376 "weighted columns not supported for multi-column or multi-feature values" )
377377 if self .random :
378- temp_name = "_rnd_{}" . format ( self .name )
378+ temp_name = f "_rnd_{ self .name } "
379379 self .dependencies .append (temp_name )
380- desc = "adding temporary column {} required by {}" . format ( temp_name , self .name )
380+ desc = f "adding temporary column { temp_name } required by { self .name } "
381381 self ._initialBuildPlan .append (desc )
382382 sql_random_generator = self ._getUniformRandomSQLExpression (self .name )
383383 self .temporaryColumns .append ((temp_name , DoubleType (), {'expr' : sql_random_generator , 'omit' : True ,
384384 'description' : desc }))
385385 self ._weightedBaseColumn = temp_name
386386 else :
387387 # create temporary expression mapping values to range of weights
388- temp_name = "_scaled_{}" . format ( self .name )
388+ temp_name = f "_scaled_{ self .name } "
389389 self .dependencies .append (temp_name )
390- desc = "adding temporary column {} required by {}" . format ( temp_name , self .name )
390+ desc = f "adding temporary column { temp_name } required by { self .name } "
391391 self ._initialBuildPlan .append (desc )
392392
393393 # use a base expression based on mapping base column to size of data
@@ -511,10 +511,10 @@ def _getUniformRandomExpression(self, col_name):
511511 """
512512 assert col_name is not None , "`col_name` must not be None"
513513 if self ._randomSeedMethod == RANDOM_SEED_FIXED and self ._randomSeed != RANDOM_SEED_RANDOM :
514- return expr ("rand({})" . format ( self ._randomSeed ) )
514+ return expr (f "rand({ self ._randomSeed } )" )
515515 elif self ._randomSeedMethod == RANDOM_SEED_HASH_FIELD_NAME :
516516 assert self .name is not None , " `self.name` must not be none"
517- return expr ("rand(hash('{}'))" . format ( self . name ) )
517+ return expr (f "rand(hash('{ self . name } '))" )
518518 else :
519519 return rand ()
520520
@@ -530,8 +530,7 @@ def _getRandomExpressionForDistribution(self, col_name, col_distribution):
530530 assert isinstance (col_distribution , DataDistribution ), \
531531 "`distribution` object must be an instance of data distribution"
532532
533- self .executionHistory .append (".. random number generation via distribution `{}`"
534- .format (str (col_distribution )))
533+ self .executionHistory .append (f".. random number generation via distribution `{ col_distribution } `" )
535534
536535 return col_distribution .generateNormalizedDistributionSample ()
537536
@@ -543,10 +542,10 @@ def _getUniformRandomSQLExpression(self, col_name):
543542 assert col_name is not None , " `col_name` must not be None"
544543 if self ._randomSeedMethod == RANDOM_SEED_FIXED and self ._randomSeed != RANDOM_SEED_RANDOM :
545544 assert self ._randomSeed is not None , "`randomSeed` must not be None"
546- return "rand({})" . format ( self ._randomSeed )
545+ return f "rand({ self ._randomSeed } )"
547546 elif self ._randomSeedMethod == RANDOM_SEED_HASH_FIELD_NAME :
548547 assert self .name is not None , "`self.name` must not be none"
549- return "rand(hash('{}'))" . format ( self . name )
548+ return f "rand(hash('{ self . name } '))"
550549 else :
551550 return "rand()"
552551
@@ -597,7 +596,7 @@ def _getScaledIntSQLExpression(self, col_name, scale, base_columns, base_datatyp
597596 result = f"cast( ( floor(({ column_set } % { scale } ) + { scale } ) % { scale } ) as double) "
598597
599598 if normalize :
600- result = "({} / {})" . format ( result , ( scale * 1.0 ) - 1.0 )
599+ result = f "({ result } / { ( scale * 1.0 ) - 1.0 } )"
601600
602601 self .logger .debug ("computing scaled field [%s] as expression [%s]" , col_name , result )
603602 return result
@@ -613,7 +612,7 @@ def getNames(self):
613612 struct_type = self ._csOptions .getOrElse ('structType' , None )
614613
615614 if num_columns > 1 and struct_type is None :
616- return ["{0 }_{1}" . format ( self . name , x ) for x in range (0 , num_columns )]
615+ return [f" { self . name } _{ x } " for x in range (0 , num_columns )]
617616 else :
618617 return [self .name ]
619618
@@ -623,7 +622,7 @@ def getNamesAndTypes(self):
623622 struct_type = self ._csOptions .getOrElse ('structType' , None )
624623
625624 if num_columns > 1 and struct_type is None :
626- return [("{0 }_{1}" . format ( self . name , x ) , self .datatype ) for x in range (0 , num_columns )]
625+ return [(f" { self . name } _{ x } " , self .datatype ) for x in range (0 , num_columns )]
627626 else :
628627 return [(self .name , self .datatype )]
629628
@@ -786,26 +785,22 @@ def _checkProps(self, column_props):
786785 raise ValueError ("Effective range greater than range of type" )
787786
788787 for k in column_props .keys ():
789- ensure (k in ColumnSpecOptions ._ALLOWED_PROPERTIES , 'invalid column option {0}' . format ( k ) )
788+ ensure (k in ColumnSpecOptions ._ALLOWED_PROPERTIES , f 'invalid column option { k } ' )
790789
791790 for arg in ColumnSpecOptions ._REQUIRED_PROPERTIES :
792- ensure (arg in column_props .keys () and column_props [arg ] is not None ,
793- 'missing column option {0}' .format (arg ))
791+ ensure (column_props .get (arg ) is not None , f'missing column option { arg } ' )
794792
795793 for arg in ColumnSpecOptions ._FORBIDDEN_PROPERTIES :
796- ensure (arg not in column_props .keys (),
797- 'forbidden column option {0}' .format (arg ))
794+ ensure (arg not in column_props , f'forbidden column option { arg } ' )
798795
799796 # check weights and values
800- if 'weights' in column_props . keys () :
801- ensure ('values' in column_props . keys () ,
802- "weights are only allowed for columns with values - column '{}' " . format ( column_props ['name' ]) )
797+ if 'weights' in column_props :
798+ ensure ('values' in column_props ,
799+ f "weights are only allowed for columns with values - column '{ column_props ['name' ]} ' " )
803800 ensure (column_props ['values' ] is not None and len (column_props ['values' ]) > 0 ,
804- "weights must be associated with non-empty list of values - column '{}' " .format (
805- column_props ['name' ]))
801+ f"weights must be associated with non-empty list of values - column '{ column_props ['name' ]} ' " )
806802 ensure (len (column_props ['values' ]) == len (column_props ['weights' ]),
807- "length of list of weights must be equal to length of list of values - column '{}' " .format (
808- column_props ['name' ]))
803+ f"length of list of weights must be equal to length of list of values - column '{ column_props ['name' ]} ' " )
809804
810805 def getPlanEntry (self ):
811806 """ Get execution plan entry for object
@@ -816,7 +811,7 @@ def getPlanEntry(self):
816811 if desc is not None :
817812 return " |-- " + desc
818813 else :
819- return " |-- building column generator for column {}" . format ( self .name )
814+ return f " |-- building column generator for column { self .name } "
820815
821816 def _makeWeightedColumnValuesExpression (self , values , weights , seed_column_name ):
822817 """make SQL expression to compute the weighted values expression
@@ -872,17 +867,17 @@ def _getSeedExpression(self, base_column):
872867 assert len (base_column ) > 0 , "`baseColumn` must be list of column names"
873868 if len (base_column ) == 1 :
874869 if self ._baseColumnComputeMethod == HASH_COMPUTE_METHOD :
875- return expr ("hash({})" . format ( base_column [0 ]) )
870+ return expr (f "hash({ base_column [0 ]} )" )
876871 else :
877872 return col (base_column [0 ])
878873 elif self ._baseColumnComputeMethod == VALUES_COMPUTE_METHOD :
879- base_values = ["string(ifnull(`{}`, 'null'))" . format ( x ) for x in base_column ]
880- return expr ("array({})" . format ( "," .join (base_values )) )
874+ base_values = [f "string(ifnull(`{ x } `, 'null'))" for x in base_column ]
875+ return expr (f "array({ ',' .join (base_values )} )" )
881876 else :
882- return expr ("hash({})" . format ( "," .join (base_column )) )
877+ return expr (f "hash({ ',' .join (base_column )} )" )
883878 else :
884879 if self ._baseColumnComputeMethod == HASH_COMPUTE_METHOD :
885- return expr ("hash({})" . format ( base_column ) )
880+ return expr (f "hash({ base_column } )" )
886881 else :
887882 return col (base_column )
888883
@@ -1002,7 +997,7 @@ def _makeSingleGenerationExpression(self, index=None, use_pandas_optimizations=F
1002997 self .executionHistory .append (f".. using SQL expression `{ self .expr } ` as base" )
1003998 self .executionHistory .append (f".. casting to `{ self .datatype } `" )
1004999 elif self ._dataRange is not None and self ._dataRange .isFullyPopulated ():
1005- self .executionHistory .append (".. computing ranged value: {}" . format ( self ._dataRange ) )
1000+ self .executionHistory .append (f ".. computing ranged value: { self ._dataRange } " )
10061001 new_def = self ._computeRangedColumn (base_column = self .baseColumn , datarange = self ._dataRange ,
10071002 is_random = col_is_rand )
10081003 elif type (self .datatype ) is DateType :
@@ -1011,7 +1006,7 @@ def _makeSingleGenerationExpression(self, index=None, use_pandas_optimizations=F
10111006 # record execution history
10121007 self .executionHistory .append (".. using random date expression" )
10131008 sql_random_generator = self ._getUniformRandomSQLExpression (self .name )
1014- new_def = expr ("date_sub(current_date, rounding({}*1024))" . format ( sql_random_generator ) ).astype (
1009+ new_def = expr (f "date_sub(current_date, rounding({ sql_random_generator } *1024))" ).astype (
10151010 self .datatype )
10161011 else :
10171012 if self ._baseColumnComputeMethod == VALUES_COMPUTE_METHOD :
@@ -1051,7 +1046,7 @@ def _applyTextFormatExpression(self, new_def, sformat):
10511046 # note :
10521047 # while it seems like this could use a shared instance, this does not work if initialized
10531048 # in a class method
1054- self .executionHistory .append (".. applying column format `{}`" . format ( sformat ) )
1049+ self .executionHistory .append (f ".. applying column format `{ sformat } `" )
10551050 new_def = format_string (sformat , new_def )
10561051 return new_def
10571052
@@ -1083,13 +1078,11 @@ def _applyTextGenerationExpression(self, new_def, use_pandas_optimizations):
10831078 # in a class method
10841079 tg = self .textGenerator
10851080 if use_pandas_optimizations :
1086- self .executionHistory .append (".. text generation via pandas scalar udf `{}`"
1087- .format (str (tg )))
1081+ self .executionHistory .append (f".. text generation via pandas scalar udf `{ tg } `" )
10881082 u_value_from_generator = pandas_udf (tg .pandasGenerateText ,
10891083 returnType = StringType ()).asNondeterministic ()
10901084 else :
1091- self .executionHistory .append (".. text generation via udf `{}`"
1092- .format (str (tg )))
1085+ self .executionHistory .append (f".. text generation via udf `{ tg } `" )
10931086 u_value_from_generator = udf (tg .classicGenerateText ,
10941087 StringType ()).asNondeterministic ()
10951088 new_def = u_value_from_generator (new_def )
@@ -1102,7 +1095,7 @@ def _applyFinalCastExpression(self, col_type, new_def):
11021095 :param new_def: column definition being created
11031096 :returns: new column definition
11041097 """
1105- self .executionHistory .append (".. casting column [{}] to `{}`" . format ( self . name , col_type ) )
1098+ self .executionHistory .append (f ".. casting column [{ self . name } ] to `{ col_type } `" )
11061099
11071100 # cast the result to the appropriate type. For dates, cast first to timestamp, then to date
11081101 if type (col_type ) is DateType :
@@ -1119,7 +1112,7 @@ def _applyComputePercentNullsExpression(self, newDef, probabilityNulls):
11191112 :param probabilityNulls: Probability of nulls to be generated for particular column. Values can be 0.0 - 1.0
11201113 :returns: new column definition with probability of nulls applied
11211114 """
1122- assert self .nullable , "Column `{}` must be nullable for `percent_nulls` option" . format ( self . name )
1115+ assert self .nullable , f "Column `{ self . name } ` must be nullable for `percent_nulls` option"
11231116 self .executionHistory .append (".. applying null generator - `when rnd > prob then value - else null`" )
11241117
11251118 assert probabilityNulls is not None , "option 'percent_nulls' must not be null value or None"
@@ -1140,7 +1133,7 @@ def _computeImpliedRangeIfNeeded(self, col_type):
11401133 self ._dataRange = NRange (0 , len (self .values ) - 1 , 1 )
11411134 elif type (col_type ) is BooleanType :
11421135 self ._dataRange = NRange (0 , 1 , 1 )
1143- self .executionHistory .append (".. using adjusted effective range: {}" . format ( self ._dataRange ) )
1136+ self .executionHistory .append (f ".. using adjusted effective range: { self ._dataRange } " )
11441137
11451138 def makeGenerationExpressions (self ):
11461139 """ Generate structured column if multiple columns or features are specified
@@ -1171,7 +1164,7 @@ def makeGenerationExpressions(self):
11711164 exec_step_history += f"`{ self .baseColumn } `, method: `{ self ._baseColumnComputeMethod } `"
11721165 self .executionHistory .append (exec_step_history )
11731166 else :
1174- self .executionHistory .append ("generating multiple columns {0 } - `{1}`" . format ( num_columns , self ['name' ]) )
1167+ self .executionHistory .append (f "generating multiple columns { num_columns } - `{ self ['name' ]} `" )
11751168 retval = [self ._makeSingleGenerationExpression (x ) for x in range (num_columns )]
11761169
11771170 if struct_type == 'array' :
0 commit comments