diff --git a/README.md b/README.md index 917a44cac..45a578572 100644 --- a/README.md +++ b/README.md @@ -288,6 +288,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te ### Models 1. **[ALBERT](https://huggingface.co/docs/transformers/model_doc/albert)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://huggingface.co/papers/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut. +1. **[Arcee](https://huggingface.co/docs/transformers/model_doc/arcee)** (from Arcee AI) released with the blog post [Announcing Arcee Foundation Models](https://www.arcee.ai/blog/announcing-the-arcee-foundation-model-family) by Fernando Fernandes, Varun Singh, Charles Goddard, Lucas Atkins, Mark McQuade, Maziyar Panahi, Conner Stewart, Colin Kealty, Raghav Ravishankar, Lucas Krauss, Anneketh Vij, Pranav Veldurthi, Abhishek Thakur, Julien Simon, Scott Zembsch, Benjamin Langer, Aleksiej Cecocho, Maitri Patel. 1. **[Audio Spectrogram Transformer](https://huggingface.co/docs/transformers/model_doc/audio-spectrogram-transformer)** (from MIT) released with the paper [AST: Audio Spectrogram Transformer](https://huggingface.co/papers/2104.01778) by Yuan Gong, Yu-An Chung, James Glass. 1. **[BART](https://huggingface.co/docs/transformers/model_doc/bart)** (from Facebook) released with the paper [BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension](https://huggingface.co/papers/1910.13461) by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer. 1. **[BEiT](https://huggingface.co/docs/transformers/model_doc/beit)** (from Microsoft) released with the paper [BEiT: BERT Pre-Training of Image Transformers](https://huggingface.co/papers/2106.08254) by Hangbo Bao, Li Dong, Furu Wei. diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet index 6111a58c3..d0459db74 100644 --- a/docs/snippets/6_supported-models.snippet +++ b/docs/snippets/6_supported-models.snippet @@ -2,6 +2,7 @@ ### Models 1. **[ALBERT](https://huggingface.co/docs/transformers/model_doc/albert)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://huggingface.co/papers/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut. +1. **[Arcee](https://huggingface.co/docs/transformers/model_doc/arcee)** (from Arcee AI) released with the blog post [Announcing Arcee Foundation Models](https://www.arcee.ai/blog/announcing-the-arcee-foundation-model-family) by Fernando Fernandes, Varun Singh, Charles Goddard, Lucas Atkins, Mark McQuade, Maziyar Panahi, Conner Stewart, Colin Kealty, Raghav Ravishankar, Lucas Krauss, Anneketh Vij, Pranav Veldurthi, Abhishek Thakur, Julien Simon, Scott Zembsch, Benjamin Langer, Aleksiej Cecocho, Maitri Patel. 1. **[Audio Spectrogram Transformer](https://huggingface.co/docs/transformers/model_doc/audio-spectrogram-transformer)** (from MIT) released with the paper [AST: Audio Spectrogram Transformer](https://huggingface.co/papers/2104.01778) by Yuan Gong, Yu-An Chung, James Glass. 1. **[BART](https://huggingface.co/docs/transformers/model_doc/bart)** (from Facebook) released with the paper [BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension](https://huggingface.co/papers/1910.13461) by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer. 1. **[BEiT](https://huggingface.co/docs/transformers/model_doc/beit)** (from Microsoft) released with the paper [BEiT: BERT Pre-Training of Image Transformers](https://huggingface.co/papers/2106.08254) by Hangbo Bao, Li Dong, Furu Wei. diff --git a/src/configs.js b/src/configs.js index f47b06a73..e32594fe6 100644 --- a/src/configs.js +++ b/src/configs.js @@ -111,6 +111,7 @@ function getNormalizedConfig(config) { mapping['hidden_size'] = 'hidden_size'; break; case 'llama': + case 'arcee': case 'lfm2': case 'smollm3': case 'olmo': diff --git a/src/models.js b/src/models.js index c32ba1623..98e6b796f 100644 --- a/src/models.js +++ b/src/models.js @@ -4594,6 +4594,13 @@ export class LlamaModel extends LlamaPreTrainedModel { } export class LlamaForCausalLM extends LlamaPreTrainedModel { } ////////////////////////////////////////////////// +////////////////////////////////////////////////// +// Arcee models +export class ArceePreTrainedModel extends PreTrainedModel { } +export class ArceeModel extends ArceePreTrainedModel { } +export class ArceeForCausalLM extends ArceePreTrainedModel { } +////////////////////////////////////////////////// + ////////////////////////////////////////////////// // LFM2 models export class Lfm2PreTrainedModel extends PreTrainedModel { } @@ -7820,6 +7827,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([ ['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]], ['codegen', ['CodeGenModel', CodeGenModel]], ['llama', ['LlamaModel', LlamaModel]], + ['arcee', ['ArceeModel', ArceeModel]], ['lfm2', ['Lfm2Model', Lfm2Model]], ['smollm3', ['SmolLM3Model', SmolLM3Model]], ['exaone', ['ExaoneModel', ExaoneModel]], @@ -7927,6 +7935,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([ ['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]], ['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]], ['llama', ['LlamaForCausalLM', LlamaForCausalLM]], + ['arcee', ['ArceeForCausalLM', ArceeForCausalLM]], ['lfm2', ['Lfm2ForCausalLM', Lfm2ForCausalLM]], ['smollm3', ['SmolLM3ForCausalLM', SmolLM3ForCausalLM]], ['exaone', ['ExaoneForCausalLM', ExaoneForCausalLM]], diff --git a/tests/models/arcee/test_modeling_arcee.js b/tests/models/arcee/test_modeling_arcee.js new file mode 100644 index 000000000..a852aeb49 --- /dev/null +++ b/tests/models/arcee/test_modeling_arcee.js @@ -0,0 +1,51 @@ +import { PreTrainedTokenizer, ArceeForCausalLM } from "../../../src/transformers.js"; + +import { MAX_MODEL_LOAD_TIME, MAX_TEST_EXECUTION_TIME, MAX_MODEL_DISPOSE_TIME, DEFAULT_MODEL_OPTIONS } from "../../init.js"; + +export default () => { + describe("ArceeForCausalLM", () => { + const model_id = "onnx-internal-testing/tiny-random-ArceeForCausalLM"; + /** @type {ArceeForCausalLM} */ + let model; + /** @type {PreTrainedTokenizer} */ + let tokenizer; + beforeAll(async () => { + model = await ArceeForCausalLM.from_pretrained(model_id, DEFAULT_MODEL_OPTIONS); + tokenizer = await PreTrainedTokenizer.from_pretrained(model_id); + tokenizer.padding_side = "left"; + }, MAX_MODEL_LOAD_TIME); + + it( + "batch_size=1", + async () => { + const inputs = tokenizer("hello"); + const outputs = await model.generate({ + ...inputs, + max_length: 10, + }); + expect(outputs.tolist()).toEqual([[1n, 22172n, 1316n, 11038n, 25378n, 11619n, 7959n, 15231n, 15231n, 23659n]]); + }, + MAX_TEST_EXECUTION_TIME, + ); + + it( + "batch_size>1", + async () => { + const inputs = tokenizer(["hello", "hello world"], { padding: true }); + const outputs = await model.generate({ + ...inputs, + max_length: 10, + }); + expect(outputs.tolist()).toEqual([ + [2n, 1n, 22172n, 5706n, 3803n, 11619n, 28763n, 4015n, 18904n, 7959n], + [1n, 22172n, 3186n, 1316n, 11038n, 22918n, 9469n, 25671n, 22918n, 2687n], + ]); + }, + MAX_TEST_EXECUTION_TIME, + ); + + afterAll(async () => { + await model?.dispose(); + }, MAX_MODEL_DISPOSE_TIME); + }); +};