Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
aa032aa
Rename config and remove dropout
vrdn-23 Jul 14, 2025
32ea2be
Remove NER and id2label
vrdn-23 Jul 14, 2025
e611686
Load debertav2 classification head but not implement trait/forward
vrdn-23 Jul 15, 2025
7cd8dd8
[WIP] loaded weights but failing during warmup
vrdn-23 Jul 15, 2025
940b239
IT WORKS!! but not for batches :(
vrdn-23 Jul 15, 2025
8c01371
fix merge conflicts
vrdn-23 Oct 30, 2025
537f83a
interim review changes
vrdn-23 Oct 30, 2025
f38caea
interim changes again
vrdn-23 Oct 30, 2025
64b4b3a
fix linting
vrdn-23 Oct 30, 2025
46eab77
refactor our xsoftmax
vrdn-23 Oct 30, 2025
9abd6eb
linting
vrdn-23 Oct 30, 2025
0e707ea
Merge remote-tracking branch 'origin/main' into vidamoda/deberta-v2
vrdn-23 Oct 30, 2025
94ccfcd
solve compatibility with fp16
vrdn-23 Oct 30, 2025
1448272
Merge remote-tracking branch 'origin/main' into vidamoda/deberta-v2
vrdn-23 Dec 3, 2025
4523193
Add cuda configuration
vrdn-23 Dec 3, 2025
fab91a2
Merge remote-tracking branch 'origin/main' into vidamoda/deberta-v2
vrdn-23 Dec 10, 2025
c1d910f
Move base scale tensor to load
vrdn-23 Dec 10, 2025
8a920fd
Remove `deberta_hidden_act_deserializer` to use `HiddenAct::Gelu`
alvarobartt Feb 5, 2026
a891a7e
Merge branch 'main' into vidamoda/deberta-v2
alvarobartt Feb 5, 2026
c59e9ac
Align DebertaV2 with former impls (WIP)
alvarobartt Feb 5, 2026
290e564
Add `test_debertav2.rs` with `mixedbread-ai/mxbai-rerank-xsmall-v1`
alvarobartt Feb 5, 2026
135b169
Drop `GeluExact` from `HiddenAct` (more information in #753)
alvarobartt Feb 5, 2026
b039b91
Remove `serde(rename(deserialize = ...))` for `DebertaV2`
alvarobartt Feb 5, 2026
cfed784
Add missing traits `Context`, `IndexOp` and `Module`
alvarobartt Feb 5, 2026
27281cd
Remove comments + use `DType` over `candle::DType`
alvarobartt Feb 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backends/candle/src/layers/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use serde::Deserialize;
pub enum HiddenAct {
#[serde(alias = "gelu_pytorch_tanh")]
Gelu,
GeluExact,
Relu,
Silu,
Swiglu,
Expand All @@ -16,6 +17,7 @@ impl HiddenAct {
pub fn forward(&self, x: &Tensor) -> Result<Tensor> {
match self {
Self::Gelu => x.gelu(),
Self::GeluExact => x.gelu_erf(),
Self::Relu => x.relu(),
Self::Silu => x.silu(),
Self::Swiglu => candle_nn::ops::swiglu(x),
Expand Down Expand Up @@ -85,6 +87,7 @@ impl Linear {
if let Some(act) = &self.act {
match act {
HiddenAct::Gelu => x.gelu(),
HiddenAct::GeluExact => x.gelu_erf(),
HiddenAct::Relu => x.relu(),
HiddenAct::Silu => x.silu(),
HiddenAct::Swiglu => candle_nn::ops::swiglu(&x),
Expand Down
15 changes: 11 additions & 4 deletions backends/candle/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ use crate::compute_cap::{
compatible_compute_cap, get_compile_compute_cap, get_runtime_compute_cap,
};
use crate::models::{
BertConfig, BertModel, Dense, DenseConfig, DenseLayer, DistilBertConfig, DistilBertModel,
GTEConfig, GTEModel, Gemma3Config, Gemma3Model, JinaBertModel, JinaCodeBertModel, MPNetConfig,
MPNetModel, MistralConfig, Model, ModernBertConfig, ModernBertModel, NomicBertModel,
NomicConfig, Qwen2Config, Qwen3Config, Qwen3Model,
BertConfig, BertModel, DebertaV2Config, DebertaV2Model, Dense, DenseConfig, DenseLayer,
DistilBertConfig, DistilBertModel, GTEConfig, GTEModel, Gemma3Config, Gemma3Model,
JinaBertModel, JinaCodeBertModel, MPNetConfig, MPNetModel, MistralConfig, Model,
ModernBertConfig, ModernBertModel, NomicBertModel, NomicConfig, Qwen2Config, Qwen3Config,
Qwen3Model,
};
#[cfg(feature = "cuda")]
use crate::models::{
Expand Down Expand Up @@ -92,6 +93,8 @@ impl<'de> Deserialize<'de> for BertConfigWrapper {
#[serde(tag = "model_type", rename_all = "kebab-case")]
enum Config {
Bert(BertConfigWrapper),
#[serde(rename(deserialize = "deberta-v2"))]
DebertaV2(DebertaV2Config),
Camembert(BertConfig),
#[serde(rename(deserialize = "distilbert"))]
DistilBert(DistilBertConfig),
Expand Down Expand Up @@ -250,6 +253,10 @@ impl CandleBackend {
Ok(Box::new(BertModel::load(vb, &config, model_type).s()?))
}
},
(Config::DebertaV2(config), Device::Cpu | Device::Metal(_)) => {
tracing::info!("Starting DebertaV2 model on {:?}", device);
Ok(Box::new(DebertaV2Model::load(vb, &config, model_type).s()?))
}
(
Config::Camembert(config) | Config::Roberta(config) | Config::XlmRoberta(config),
Device::Cpu | Device::Metal(_),
Expand Down
Loading