diff --git a/README.md b/README.md index 890dff000..ac9164e50 100644 --- a/README.md +++ b/README.md @@ -416,6 +416,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te 1. **[SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://huggingface.co/papers/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo. 1. **[Segment Anything](https://huggingface.co/docs/transformers/model_doc/sam)** (from Meta AI) released with the paper [Segment Anything](https://huggingface.co/papers/2304.02643v1.pdf) by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick. 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://huggingface.co/papers/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. +1. **[SmolLM3](https://huggingface.co/docs/transformers/main/model_doc/smollm3) (from Hugging Face) released with the blog post [SmolLM3: smol, multilingual, long-context reasoner](https://huggingface.co/blog/smollm3) by the Hugging Face TB Research team. 1. **[SmolVLM](https://huggingface.co/docs/transformers/main/model_doc/smolvlm) (from Hugging Face) released with the blog posts [SmolVLM - small yet mighty Vision Language Model](https://huggingface.co/blog/smolvlm) and [SmolVLM Grows Smaller – Introducing the 250M & 500M Models!](https://huggingface.co/blog/smolervlm) by the Hugging Face TB Research team. 1. **SNAC** (from Papla Media, ETH Zurich) released with the paper [SNAC: Multi-Scale Neural Audio Codec](https://huggingface.co/papers/2410.14411) by Hubert Siuzdak, Florian Grötschla, Luca A. Lanzendörfer. 1. **[SpeechT5](https://huggingface.co/docs/transformers/model_doc/speecht5)** (from Microsoft Research) released with the paper [SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing](https://huggingface.co/papers/2110.07205) by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei. diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet index 9d31a60e0..5c3cd5fa6 100644 --- a/docs/snippets/6_supported-models.snippet +++ b/docs/snippets/6_supported-models.snippet @@ -130,6 +130,7 @@ 1. **[SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://huggingface.co/papers/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo. 1. **[Segment Anything](https://huggingface.co/docs/transformers/model_doc/sam)** (from Meta AI) released with the paper [Segment Anything](https://huggingface.co/papers/2304.02643v1.pdf) by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick. 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://huggingface.co/papers/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. +1. **[SmolLM3](https://huggingface.co/docs/transformers/main/model_doc/smollm3) (from Hugging Face) released with the blog post [SmolLM3: smol, multilingual, long-context reasoner](https://huggingface.co/blog/smollm3) by the Hugging Face TB Research team. 1. **[SmolVLM](https://huggingface.co/docs/transformers/main/model_doc/smolvlm) (from Hugging Face) released with the blog posts [SmolVLM - small yet mighty Vision Language Model](https://huggingface.co/blog/smolvlm) and [SmolVLM Grows Smaller – Introducing the 250M & 500M Models!](https://huggingface.co/blog/smolervlm) by the Hugging Face TB Research team. 1. **SNAC** (from Papla Media, ETH Zurich) released with the paper [SNAC: Multi-Scale Neural Audio Codec](https://huggingface.co/papers/2410.14411) by Hubert Siuzdak, Florian Grötschla, Luca A. Lanzendörfer. 1. **[SpeechT5](https://huggingface.co/docs/transformers/model_doc/speecht5)** (from Microsoft Research) released with the paper [SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing](https://huggingface.co/papers/2110.07205) by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei. diff --git a/src/configs.js b/src/configs.js index 598f5fa9d..75bcfb3c5 100644 --- a/src/configs.js +++ b/src/configs.js @@ -109,6 +109,7 @@ function getNormalizedConfig(config) { mapping['hidden_size'] = 'hidden_size'; break; case 'llama': + case 'smollm3': case 'olmo': case 'olmo2': case 'mobilellm': diff --git a/src/models.js b/src/models.js index 5a62b4973..1e6b158b5 100644 --- a/src/models.js +++ b/src/models.js @@ -4586,6 +4586,13 @@ export class LlamaModel extends LlamaPreTrainedModel { } export class LlamaForCausalLM extends LlamaPreTrainedModel { } ////////////////////////////////////////////////// +////////////////////////////////////////////////// +// SmolLM3 models +export class SmolLM3PreTrainedModel extends PreTrainedModel { } +export class SmolLM3Model extends SmolLM3PreTrainedModel { } +export class SmolLM3ForCausalLM extends SmolLM3PreTrainedModel { } +////////////////////////////////////////////////// + ////////////////////////////////////////////////// // Helium models export class HeliumPreTrainedModel extends PreTrainedModel { } @@ -7796,6 +7803,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([ ['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]], ['codegen', ['CodeGenModel', CodeGenModel]], ['llama', ['LlamaModel', LlamaModel]], + ['smollm3', ['SmolLM3Model', SmolLM3Model]], ['exaone', ['ExaoneModel', ExaoneModel]], ['olmo', ['OlmoModel', OlmoModel]], ['olmo2', ['Olmo2Model', Olmo2Model]], @@ -7900,6 +7908,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([ ['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]], ['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]], ['llama', ['LlamaForCausalLM', LlamaForCausalLM]], + ['smollm3', ['SmolLM3ForCausalLM', SmolLM3ForCausalLM]], ['exaone', ['ExaoneForCausalLM', ExaoneForCausalLM]], ['olmo', ['OlmoForCausalLM', OlmoForCausalLM]], ['olmo2', ['Olmo2ForCausalLM', Olmo2ForCausalLM]],