@@ -220,46 +220,43 @@ def get_recipe(recipe: str, subdir: str = None) -> Any:
220220
221221def qconfig_init (recipe : str = None , args : Any = None ) -> dict :
222222 """Three possible ways to create qcfg:
223- 1. create a default qcfg
224- 2. load from a json
225- 3. parse the args
226- NOTE: Content from higher number, e.g. arg parser, will override thier counterpart from lower
227- numbers, e.g. json.
223+ 1. create a default qcfg
224+ 2. load from a json
225+ 3. parse the args
226+ NOTE: Content from higher number, e.g. arg parser, will override thier counterpart from
227+ lower numbers, e.g. json.
228228
229229 Args:
230- recipe: str. Recipe filename (json) that contains settings, if specified and exists. Will search
231- cwd and fms_mo/recipes folder. ok to omit '.json' extension.
232- args: argparser object that may contain relavant parameters.
233-
234- Important items in the config dictionary:
235- nbits_[w|a]_alt: "_alt" stands for "alternative" -> the default prec for those "skipped" layers
236- e.g. usually the 1st/last layers are "skipped" and will NOT be swapped to
237- QLinear. But, if "nbits_x_alt = 8", they will.
238- qmodel_calibration[_new]: set to non-zero will trigger calibration. "_new" means calibration
239- will happen during the first N calls of fwd path, better for long
240- training or fine-tuning that you don't mind losing the first N iters
241-
242- qlayer_name_pattern: allows partial or regex name matching, the layers satisfy the criteria will
243- be skipped. NOTE: tracing will be bypassed entirely if this arg is used
244- qskip_layer_name: user can specify exact name to skip
245- qspecial_layers: special case handling. user can specify any quant params for any given layer,
246- e.g. {'1st.conv':{'nbits_w':8,'qw_mode':'pact+sym'}, '2nd.layers':{...} }
247-
248- extend_act_range: symmetric act quantizers (maxsym, pactsym+, ...) to use full range, e.g.,
249- [-128, 127] instead [-127,127], TODO: should default to True?
250-
251- ptq_nbatch: total number of batches of data that will be fetched from loader for PTQ tuning
252- ptq_batchsize: data used in PTQ tuning usually is fetched from loader directly, i.e. batchsize
253- is the unchanged from dataloader.batch_size. although it could be different if
254- needed, e.g. PTQ may allow larger bs due to only partial model tuning. But fine-
255- grain shuffling will be needed in that case.
256- ptq_nouterloop: number of optimization "steps" in the PTQ outer loop. 1 outer loop uses 1 cached
257- data batch. when Nouter >= Nbatch, data will be re-used
258- ptq_ninnerloop: number of "inner loop" for PTQ optimization. When 1 batch of data is fetched,
259- run (loss->loss.back->optim.step) this many times before fetching the next batch
260- NOTE: usually doesn't make big differences, hence, default to 1
261- ptq_coslr: can be "", "W" or "A" or "WA", indicating which (or both) optimizer will use cosLR,
262- otherwise use constantLR as default
230+ recipe: str. Recipe filename (json) that contains settings, if specified and exists.
231+ Will search cwd and fms_mo/recipes folder. ok to omit '.json' extension.
232+ args: argparser object that may contain relevant parameters.
233+
234+ Important items in the config dictionary:
235+ nbits_[w|a]_alt: "_alt" stands for "alternative" -> the default prec for those "skipped"
236+ layers e.g. usually the 1st/last layers are "skipped" and will NOT be swapped to
237+ QLinear. But, if "nbits_x_alt = 8", they will.
238+ qmodel_calibration[_new]: set to non-zero will trigger calibration. "_new" means
239+ calibration will happen during the first N calls of fwd path, better for long
240+ training or fine-tuning that you don't mind losing the first N iters
241+ qlayer_name_pattern: allows partial or regex name matching, the layers satisfy the
242+ criteria will be skipped. NOTE: tracing will be bypassed entirely if this arg is used
243+ qskip_layer_name: user can specify exact name to skip
244+ qspecial_layers: special case handling. user can specify any quant params for any
245+ given layer, e.g. {'1st.conv':{'nbits_w':8,'qw_mode':'pact+sym'}, '2nd.layers':{...} }
246+ extend_act_range: symmetric act quantizers (maxsym, pactsym+, ...) to use full range, e.g.,
247+ [-128, 127] instead [-127,127], TODO: should default to True?
248+ ptq_nbatch: total number of batches of data that will be fetched from loader for PTQ tuning
249+ ptq_batchsize: data used in PTQ tuning usually is fetched from loader directly,
250+ i.e. batchsize is the unchanged from dataloader.batch_size. although it could be
251+ different if needed, e.g. PTQ may allow larger bs due to only partial model tuning.
252+ But fine-grain shuffling will be needed in that case.
253+ ptq_nouterloop: number of optimization "steps" in the PTQ outer loop. 1 outer loop uses
254+ 1 cached data batch. when Nouter >= Nbatch, data will be re-used
255+ ptq_ninnerloop: number of "inner loop" for PTQ optimization. When 1 batch of data is
256+ fetched, run (loss->loss.back->optim.step) this many times before fetching the next
257+ batch. NOTE: usually doesn't make big differences, hence, default to 1
258+ ptq_coslr: can be "", "W" or "A" or "WA", indicating which (or both) optimizer will use
259+ cosLR, otherwise use constantLR as default
263260 """
264261
265262 qcfg = {}
@@ -379,7 +376,7 @@ def qconfig_init(recipe: str = None, args: Any = None) -> dict:
379376 temp_cfg = get_recipe (recipe )
380377 if temp_cfg :
381378 qcfg .update (temp_cfg )
382- logger .info ("Updated config w/ recipe values" )
379+ logger .info ("Updated config with recipe values" )
383380 else :
384381 raise ValueError (f"Config recipe { recipe } was not found." )
385382
@@ -406,7 +403,7 @@ def qconfig_init(recipe: str = None, args: Any = None) -> dict:
406403def has_non_serializable_object (anything : Any ) -> bool :
407404 """
408405 Generalized recursive function looking for any non-serializable Python object
409- Only types that are JSON serializable are None, primatives , tuples, lists, and dicts.
406+ Only types that are JSON serializable are None, primitives , tuples, lists, and dicts.
410407 Any other types must be converted into one of the types above.
411408 """
412409 if isinstance (anything , (list , tuple )):
@@ -468,7 +465,7 @@ def remove_unwanted_from_config(config: dict, minimal: bool = True):
468465 "checkQerr_frequency" ,
469466 "newlySwappedModules" ,
470467 "force_calib_once" ,
471- # if we keep the follwing LUTs, it will save the entire model
468+ # if we keep the following LUTs, it will save the entire model
472469 "LUTmodule_name" ,
473470 "qkvsync_my_1st_sibling" ,
474471 "graph_in_out" ,
0 commit comments