Skip to content

Commit 1ab11e7

Browse files
committed
Added unit tests for chat template based training.
Signed-off-by: Dushyant Behl <dushyantbehl@in.ibm.com>
1 parent 613527d commit 1ab11e7

File tree

11 files changed

+291670
-16
lines changed

11 files changed

+291670
-16
lines changed

tests/artifacts/testdata/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@
6060
TWITTER_COMPLAINTS_TOKENIZED_PARQUET = os.path.join(
6161
PARQUET_DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.parquet"
6262
)
63+
CHAT_DATA_SINGLE_TURN = os.path.join(JSON_DATA_DIR, "single_turn_chat.jsonl")
64+
CHAT_DATA_MULTI_TURN = os.path.join(JSON_DATA_DIR, "multi_turn_chat.jsonl")
6365
EMPTY_DATA = os.path.join(JSON_DATA_DIR, "empty_data.json")
6466
MALFORMATTED_DATA = os.path.join(JSON_DATA_DIR, "malformatted_data.json")
67+
68+
# Other constants
69+
CUSTOM_TOKENIZER_TINYLLAMA = os.path.join(
70+
DATA_DIR, "tinyllama_tokenizer_special_tokens"
71+
)
6572
MODEL_NAME = "Maykeye/TinyLLama-v0"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Using the word \"grace\", come up with a word that rhymes and has the same number of syllables\n<nopace>", "role": "user"}, {"content": "Certainly! Here's a word that rhymes with \"grace\" and has the same number of syllables:\n1\\. Space", "role": "assistant"}, {"content": "Using the word \"grace\", come up with a word that rhymes and has the same number of syllables\n<nopace>", "role": "user"}, {"content": "Certainly! Here's a word that rhymes with \"grace\" and has the same number of syllables:\n1\\. Space", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 2}"}
2+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Tell me one word that rhymes and has the opposite meaning of \"open\".", "role": "user"}, {"content": "Sure! Here's a word that rhymes with \"open\" and has the opposite meaning:\n1\\. Shut", "role": "assistant"}, {"content": "Tell me one word that rhymes and has the opposite meaning of \"open\".", "role": "user"}, {"content": "Sure! Here's a word that rhymes with \"open\" and has the opposite meaning:\n1\\. Shut", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 2}"}
3+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "For the word \"dream\", give an example of a word that rhymes with it and its synonym.", "role": "user"}, {"content": "Here's an example for \"dream\" that includes a word that rhymes with it and a synonym:\n1\\. Word that rhymes with \"dream\": \"beam\"\nSynonym: \"ideal\"", "role": "assistant"}, {"content": "For the word \"dream\", give an example of a word that rhymes with it and its synonym.", "role": "user"}, {"content": "Here's an example for \"dream\" that includes a word that rhymes with it and a synonym:\n1\\. Word that rhymes with \"dream\": \"beam\"\nSynonym: \"ideal\"", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 2}"}
4+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Can you think of a word that rhymes with tree?", "role": "user"}, {"content": "Of course, I can help! Here's one word that rhymes with \"tree\":\n1\\. Three", "role": "assistant"}, {"content": "Can you think of a word that rhymes with tree?", "role": "user"}, {"content": "Of course, I can help! Here's one word that rhymes with \"tree\":\n1\\. Three", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 2}"}
5+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Look up a word that rhymes with exist", "role": "user"}, {"content": "I found a word that rhymes with \"exist\":\n1\\. Mist", "role": "assistant"}, {"content": "Look up a word that rhymes with exist", "role": "user"}, {"content": "I found a word that rhymes with \"exist\":\n1\\. Mist", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 2}"}
6+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Provide five words that rhyme with \"green\".", "role": "user"}, {"content": "Here are five words that rhyme with \"green\":\n1\\. Bean\n2\\. Clean\n3\\. Keen\n4\\. Mean\n5\\. Seen", "role": "assistant"}, {"content": "Provide five words that rhyme with \"green\".", "role": "user"}, {"content": "Here are five words that rhyme with \"green\":\n1\\. Bean\n2\\. Clean\n3\\. Keen\n4\\. Mean\n5\\. Seen", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 2}"}
7+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "What are three words that rhyme with \"peace\"?", "role": "user"}, {"content": "Here are three words that rhyme with \"peace\":\n1\\. Breeze\n2\\. Pieces\n3\\. Increase", "role": "assistant"}, {"content": "What are three words that rhyme with \"peace\"?", "role": "user"}, {"content": "Here are three words that rhyme with \"peace\":\n1\\. Breeze\n2\\. Pieces\n3\\. Increase", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 2}"}
8+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Can you name a word that rhymes with high and starts with the letter \"t\"?", "role": "user"}, {"content": "Certainly! Here's a word that rhymes with \"high\" and starts with \"t\":\n1\\. Sky", "role": "assistant"}, {"content": "Can you name a word that rhymes with high and starts with the letter \"t\"?", "role": "user"}, {"content": "Certainly! Here's a word that rhymes with \"high\" and starts with \"t\":\n1\\. Sky", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 2}"}
9+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Provide two rhyming words for the word \"love\".\n<no above>", "role": "user"}, {"content": "Here are two rhyming words for \"love\":\n1\\. Dove\n2\\. Glove", "role": "assistant"}, {"content": "Provide two rhyming words for the word \"love\".\n<no above>", "role": "user"}, {"content": "Here are two rhyming words for \"love\":\n1\\. Dove\n2\\. Glove", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 2}"}
10+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Identify the word that rhymes with \"love\" and has the same vowel sounds in the same order as the word \"busy\".", "role": "user"}, {"content": "The word that rhymes with \"love\" and has the same vowel sounds in the same order as the word \"busy\" is:\n1\\. \"Shoe\".", "role": "assistant"}, {"content": "Identify the word that rhymes with \"love\" and has the same vowel sounds in the same order as the word \"busy\".", "role": "user"}, {"content": "The word that rhymes with \"love\" and has the same vowel sounds in the same order as the word \"busy\" is:\n1\\. \"Shoe\".", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 2}"}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Using the word \"grace\", come up with a word that rhymes and has the same number of syllables\n<nopace>", "role": "user"}, {"content": "Certainly! Here's a word that rhymes with \"grace\" and has the same number of syllables:\n1\\. Space", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
2+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Tell me one word that rhymes and has the opposite meaning of \"open\".", "role": "user"}, {"content": "Sure! Here's a word that rhymes with \"open\" and has the opposite meaning:\n1\\. Shut", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
3+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "For the word \"dream\", give an example of a word that rhymes with it and its synonym.", "role": "user"}, {"content": "Here's an example for \"dream\" that includes a word that rhymes with it and a synonym:\n1\\. Word that rhymes with \"dream\": \"beam\"\nSynonym: \"ideal\"", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
4+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Can you think of a word that rhymes with tree?", "role": "user"}, {"content": "Of course, I can help! Here's one word that rhymes with \"tree\":\n1\\. Three", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
5+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Look up a word that rhymes with exist", "role": "user"}, {"content": "I found a word that rhymes with \"exist\":\n1\\. Mist", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
6+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Provide five words that rhyme with \"green\".", "role": "user"}, {"content": "Here are five words that rhyme with \"green\":\n1\\. Bean\n2\\. Clean\n3\\. Keen\n4\\. Mean\n5\\. Seen", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
7+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "What are three words that rhyme with \"peace\"?", "role": "user"}, {"content": "Here are three words that rhyme with \"peace\":\n1\\. Breeze\n2\\. Pieces\n3\\. Increase", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
8+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Can you name a word that rhymes with high and starts with the letter \"t\"?", "role": "user"}, {"content": "Certainly! Here's a word that rhymes with \"high\" and starts with \"t\":\n1\\. Sky", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
9+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Provide two rhyming words for the word \"love\".\n<no above>", "role": "user"}, {"content": "Here are two rhyming words for \"love\":\n1\\. Dove\n2\\. Glove", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
10+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Identify the word that rhymes with \"love\" and has the same vowel sounds in the same order as the word \"busy\".", "role": "user"}, {"content": "The word that rhymes with \"love\" and has the same vowel sounds in the same order as the word \"busy\" is:\n1\\. \"Shoe\".", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
11+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Find a word that rhymes with \"dream\" and means 'a blur'.", "role": "user"}, {"content": "Sorry, I can't find a rhyming word that also means 'a blur'. However, here's a word that rhymes with \"dream\" and has a similar meaning to 'blur':\n1\\. \"Stream\" (as in, a blurry or fast-moving stream)", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
12+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "What is a word that rhymes with \"book,\" and how does it contrast in meaning with the original word?", "role": "user"}, {"content": "For the word \"book,\" a word that rhymes with it is \"crook.\" While a book is a collection of written work, a crook is a person who is dishonest or fraudulent.", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
13+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Generate an example of a word that rhymes with \"peace\" and describe its antonym.", "role": "user"}, {"content": "A word that rhymes with \"peace\" is \"grease.\" Its antonym is \"cleanliness\" or \"purity,\" a state free from contamination or impurities.", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
14+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Identify two words that rhyme with \"laughter\" and find out their synonyms.", "role": "user"}, {"content": "Two words that rhyme with \"laughter\" are \"after\" and \"together.\" Some synonyms for \"after\" are \"subsequently\" or \"subsequently to,\" and synonyms for \"together\" include \"united,\" \"joined,\" or \"combined.\"", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
15+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Can you provide a sentence using a word that rhymes with \"sing\" and demonstrate its past participle in the same sentence?", "role": "user"}, {"content": "Certainly, here is a sentence: \"Yesterday, the choir members had sung in the cathedral till twilight.\" In this sentence, \"sung\" is the past participle form of the base form \"sing.\"", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
16+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Identify a word that rhymes with \"butter\"", "role": "user"}, {"content": "I found a word that rhymes with \"butter\":\n1\\. Summer", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
17+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "What is a rhyming word for \"cry\"?", "role": "user"}, {"content": "Here is a rhyming word for \"cry\":\n1\\. Sky", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
18+
{"messages": [{"content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.", "role": "system"}, {"content": "Provide a word that rhymes with \"grace\" and means 'a person who excels in elegance and poise.'", "role": "user"}, {"content": "A word that rhymes with \"grace\" and has a similar meaning to 'a person who excels in elegance and poise' is \"pace.\" A \"pacer\" exhibits elegance through their smooth, rhythmic movements.", "role": "assistant"}], "group": "lab_extension", "dataset": "base/full-extension", "metadata": "{\"num_turns\": 1}"}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"<|assistant|>": 32001,
3+
"<|system|>": 32002,
4+
"<|user|>": 32000
5+
}

0 commit comments

Comments
 (0)