Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/data_designer/cli/commands/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def display_models(model_repo: ModelRepository) -> None:
# Display as table
table = Table(title="Model Configurations", border_style=NordColor.NORD8.value)
table.add_column("Alias", style=NordColor.NORD14.value, no_wrap=True)
table.add_column("Model ID", style=NordColor.NORD4.value)
table.add_column("Model", style=NordColor.NORD4.value)
table.add_column("Provider", style=NordColor.NORD9.value, no_wrap=True)
table.add_column("Inference Parameters", style=NordColor.NORD15.value)

Expand Down
4 changes: 2 additions & 2 deletions src/data_designer/cli/forms/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ def create_form(self, initial_data: dict[str, Any] | None = None) -> Form:
fields.append(
TextField(
"model",
"Model ID",
"Model",
default=initial_data.get("model") if initial_data else None,
required=True,
validator=lambda x: (False, "Model ID is required") if not x else (True, None),
validator=lambda x: (False, "Model is required") if not x else (True, None),
)
)

Expand Down
47 changes: 11 additions & 36 deletions src/data_designer/config/default_model_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,57 +27,32 @@
logger = logging.getLogger(__name__)


def get_default_text_alias_inference_parameters() -> ChatCompletionInferenceParams:
return ChatCompletionInferenceParams(
temperature=0.85,
top_p=0.95,
)


def get_default_reasoning_alias_inference_parameters() -> ChatCompletionInferenceParams:
return ChatCompletionInferenceParams(
temperature=0.35,
top_p=0.95,
)


def get_default_vision_alias_inference_parameters() -> ChatCompletionInferenceParams:
return ChatCompletionInferenceParams(
temperature=0.85,
top_p=0.95,
)


def get_default_embedding_alias_inference_parameters(provider: str) -> EmbeddingInferenceParams:
args = dict(encoding_format="float")
if provider == "nvidia":
args["extra_body"] = {"input_type": "query"}
return EmbeddingInferenceParams(**args)


def get_default_inference_parameters(
model_alias: Literal["text", "reasoning", "vision", "embedding"], provider: str
model_alias: Literal["text", "reasoning", "vision", "embedding"],
inference_parameters: dict[str, Any],
) -> InferenceParamsT:
if model_alias == "reasoning":
return get_default_reasoning_alias_inference_parameters()
return ChatCompletionInferenceParams(**inference_parameters)
elif model_alias == "vision":
return get_default_vision_alias_inference_parameters()
return ChatCompletionInferenceParams(**inference_parameters)
elif model_alias == "embedding":
return get_default_embedding_alias_inference_parameters(provider)
return EmbeddingInferenceParams(**inference_parameters)
else:
return get_default_text_alias_inference_parameters()
return ChatCompletionInferenceParams(**inference_parameters)


def get_builtin_model_configs() -> list[ModelConfig]:
model_configs = []
for provider, model_alias_map in PREDEFINED_PROVIDERS_MODEL_MAP.items():
for model_alias, model_id in model_alias_map.items():
for model_alias, settings in model_alias_map.items():
model_configs.append(
ModelConfig(
alias=f"{provider}-{model_alias}",
model=model_id,
model=settings["model"],
provider=provider,
inference_parameters=get_default_inference_parameters(model_alias, provider),
inference_parameters=get_default_inference_parameters(
model_alias, settings["inference_parameters"]
),
)
)
return model_configs
Expand Down
26 changes: 18 additions & 8 deletions src/data_designer/config/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,17 +299,27 @@ class NordColor(Enum):
},
]


DEFAULT_TEXT_INFERENCE_PARAMS = {"temperature": 0.85, "top_p": 0.95}
DEFAULT_REASONING_INFERENCE_PARAMS = {"temperature": 0.35, "top_p": 0.95}
DEFAULT_VISION_INFERENCE_PARAMS = {"temperature": 0.85, "top_p": 0.95}
DEFAULT_EMBEDDING_INFERENCE_PARAMS = {"encoding_format": "float"}


PREDEFINED_PROVIDERS_MODEL_MAP = {
NVIDIA_PROVIDER_NAME: {
"text": "nvidia/nemotron-3-nano-30b-a3b",
"reasoning": "openai/gpt-oss-20b",
"vision": "nvidia/nemotron-nano-12b-v2-vl",
"embedding": "nvidia/llama-3.2-nv-embedqa-1b-v2",
"text": {"model": "nvidia/nemotron-3-nano-30b-a3b", "inference_parameters": {"temperature": 1.0, "top_p": 1.0}},
"reasoning": {"model": "openai/gpt-oss-20b", "inference_parameters": DEFAULT_REASONING_INFERENCE_PARAMS},
"vision": {"model": "nvidia/nemotron-nano-12b-v2-vl", "inference_parameters": DEFAULT_VISION_INFERENCE_PARAMS},
"embedding": {
"model": "nvidia/llama-3.2-nv-embedqa-1b-v2",
"inference_parameters": DEFAULT_EMBEDDING_INFERENCE_PARAMS | {"extra_body": {"input_type": "query"}},
},
Comment on lines +311 to +317
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

main update is to keep the default models next to their default settings

},
OPENAI_PROVIDER_NAME: {
"text": "gpt-4.1",
"reasoning": "gpt-5",
"vision": "gpt-5",
"embedding": "text-embedding-3-large",
"text": {"model": "gpt-4.1", "inference_parameters": DEFAULT_TEXT_INFERENCE_PARAMS},
"reasoning": {"model": "gpt-5", "inference_parameters": DEFAULT_REASONING_INFERENCE_PARAMS},
"vision": {"model": "gpt-5", "inference_parameters": DEFAULT_VISION_INFERENCE_PARAMS},
"embedding": {"model": "text-embedding-3-large", "inference_parameters": DEFAULT_EMBEDDING_INFERENCE_PARAMS},
},
}
10 changes: 5 additions & 5 deletions tests/cli/forms/test_model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,19 @@ def test_alias_field_accepts_any_alias_when_no_existing() -> None:
assert alias_field.value == "my-model"


# Model ID validation tests
def test_model_id_field_rejects_empty_string() -> None:
# Model validation tests
def test_model_field_rejects_empty_string() -> None:
"""Test model ID field rejects empty strings."""
builder = ModelFormBuilder()
form = builder.create_form()
model_field = form.get_field("model")

with pytest.raises(ValidationError, match="Model ID is required"):
with pytest.raises(ValidationError, match="Model is required"):
model_field.value = ""


def test_model_id_field_accepts_any_non_empty_string() -> None:
"""Test model ID field accepts any non-empty string."""
def test_model_field_accepts_any_non_empty_string() -> None:
"""Test model field accepts any non-empty string."""
builder = ModelFormBuilder()
form = builder.create_form()
model_field = form.get_field("model")
Expand Down
18 changes: 13 additions & 5 deletions tests/config/test_default_model_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,31 @@


def test_get_default_inference_parameters():
assert get_default_inference_parameters("text", "nvidia") == ChatCompletionInferenceParams(
assert get_default_inference_parameters(
"text", {"temperature": 0.85, "top_p": 0.95}
) == ChatCompletionInferenceParams(
temperature=0.85,
top_p=0.95,
)
assert get_default_inference_parameters("reasoning", "nvidia") == ChatCompletionInferenceParams(
assert get_default_inference_parameters(
"reasoning", {"temperature": 0.35, "top_p": 0.95}
) == ChatCompletionInferenceParams(
temperature=0.35,
top_p=0.95,
)
assert get_default_inference_parameters("vision", "nvidia") == ChatCompletionInferenceParams(
assert get_default_inference_parameters(
"vision", {"temperature": 0.85, "top_p": 0.95}
) == ChatCompletionInferenceParams(
temperature=0.85,
top_p=0.95,
)
assert get_default_inference_parameters("embedding", "nvidia") == EmbeddingInferenceParams(
assert get_default_inference_parameters(
"embedding", {"encoding_format": "float", "extra_body": {"input_type": "query"}}
) == EmbeddingInferenceParams(
encoding_format="float",
extra_body={"input_type": "query"},
)
assert get_default_inference_parameters("embedding", "openai") == EmbeddingInferenceParams(
assert get_default_inference_parameters("embedding", {"encoding_format": "float"}) == EmbeddingInferenceParams(
encoding_format="float",
)

Expand Down