Skip to content

Commit d96a12d

Browse files
committed
Merge tag 'v2.2.0-rc.1' into v2.2.0-rc1
Signed-off-by: Abhishek <[email protected]>
2 parents 1e82e02 + 7df3416 commit d96a12d

File tree

59 files changed

+1857
-833
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+1857
-833
lines changed

.pylintrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ callbacks=cb_,
638638
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
639639

640640
# Argument names that match this expression will be ignored.
641-
ignored-argument-names=_.*|^ignored_|^unused_
641+
ignored-argument-names=_.*|^ignored_|^unused_|kwargs
642642

643643
# Tells whether we should check for unused import in __init__ files.
644644
init-import=no

architecture_records/003-generic-tracker-framework.md

Lines changed: 1 addition & 1 deletion

architecture_records/004-datapreprocessor.md

Lines changed: 422 additions & 0 deletions

tests/acceleration/test_acceleration_framework.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,13 @@
5454
from tuning.utils.import_utils import is_fms_accelerate_available
5555

5656
# for some reason the CI will raise an import error if we try to import
57-
# these from tests.data
57+
# these from tests.artifacts.testdata
5858
TWITTER_COMPLAINTS_JSON_FORMAT = os.path.join(
59-
os.path.dirname(__file__), "../data/twitter_complaints_json.json"
59+
os.path.dirname(__file__), "../artifacts/testdata/twitter_complaints_json.json"
6060
)
6161
TWITTER_COMPLAINTS_TOKENIZED = os.path.join(
6262
os.path.dirname(__file__),
63-
"../data/twitter_complaints_tokenized_with_maykeye_tinyllama_v0.json",
63+
"../artifacts/testdata/twitter_complaints_tokenized_with_maykeye_tinyllama_v0.json",
6464
)
6565

6666
# pylint: disable=import-error
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright The FMS HF Tuning Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Helpful datasets for configuring individual unit tests.
16+
"""
17+
# Standard
18+
import os
19+
20+
### Constants used for data
21+
PREDEFINED_DATA_CONFIGS = os.path.join(os.path.dirname(__file__))
22+
APPLY_CUSTOM_TEMPLATE_YAML = os.path.join(
23+
PREDEFINED_DATA_CONFIGS, "apply_custom_template.yaml"
24+
)
25+
PRETOKENIZE_JSON_DATA_YAML = os.path.join(
26+
PREDEFINED_DATA_CONFIGS, "pretokenized_json_data.yaml"
27+
)
28+
TOKENIZE_AND_APPLY_INPUT_MASKING_YAML = os.path.join(
29+
PREDEFINED_DATA_CONFIGS, "tokenize_and_apply_input_masking.yaml"
30+
)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
dataprocessor:
2+
type: default
3+
datasets:
4+
- name: apply_custom_data_template
5+
data_paths:
6+
- "FILE_PATH"
7+
data_handlers:
8+
- name: apply_custom_data_formatting_template
9+
arguments:
10+
remove_columns: all
11+
batched: false
12+
fn_kwargs:
13+
dataset_text_field: "dataset_text_field"
14+
dataset_template: "dataset_template"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
dataprocessor:
2+
type: default
3+
datasets:
4+
- name: pretokenized_dataset
5+
data_paths:
6+
- "FILE_PATH"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
dataprocessor:
2+
type: default
3+
datasets:
4+
- name: text_dataset_input_output_masking
5+
data_paths:
6+
- "FILE_PATH"
7+
data_handlers:
8+
- name: tokenize_and_apply_input_masking
9+
arguments:
10+
remove_columns: all
11+
batched: false
12+
fn_kwargs:
13+
input_field: "INPUT"
14+
output_field: "OUTPUT"

0 commit comments

Comments
 (0)