@@ -226,14 +226,21 @@ print(dataset['text'][0])
226226``` python
227227dataset = gemma_template.load_dataset(
228228 " your_huggingface_dataset" ,
229- output_format = ' gpt' , # enum: text, gpt, alpaca
230- instruction_template = INSTRUCTION_TEMPLATE , # Template for instruction the user prompt.
231- structure_template = STRUCTURE_TEMPLATE , # Template for structuring the user prompt.
232- max_hidden_ratio = 10 , # Percentage of documents that need to be word masked. Min: 0, Max: 1. Default: 0.
229+ # enum: text, gpt, alpaca
230+ output_format = ' gpt' ,
231+ # Template for instruction the user prompt.
232+ instruction_template = INSTRUCTION_TEMPLATE ,
233+ # Template for structuring the user prompt.
234+ structure_template = STRUCTURE_TEMPLATE ,
235+ # Percentage of documents that need to be word masked.
236+ # Min: 0, Max: 1. Default: 0.
237+ max_hidden_ratio = .1 ,
233238 # Replace 10% of words in the input document with '_____'.
234239 # Use int to extract the correct number of words. The `max_hidden_ratio` parameter must be greater than 0.
235240 max_hidden_words = .1 ,
236- min_chars_length = 2 , # Minimum character of a word, used to create unigrams, bigrams, and trigrams. Default is 2.
237- max_chars_length = 8 # Maximum character of a word, used to create unigrams, bigrams and trigrams. Default is 0.
241+ # Minimum character of a word, used to create unigrams, bigrams, and trigrams. Default is 2.
242+ min_chars_length = 2 ,
243+ # Maximum character of a word, used to create unigrams, bigrams and trigrams. Default is 0.
244+ max_chars_length = 8 ,
238245)
239246```
0 commit comments