Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions autointent/_pipeline/_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
SearchSpaceValidationMode,
)
from autointent.metrics import DECISION_METRICS
from autointent.nodes import InferenceNode, NodeOptimizer
from autointent.nodes import InferenceNode, NodeOptimizer, OptimizationSearchSpaceConfig
from autointent.utils import load_preset, load_search_space

from ._schemas import InferencePipelineOutput, InferencePipelineUtteranceOutput
Expand Down Expand Up @@ -94,7 +94,8 @@ def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str, seed
"""
if not isinstance(search_space, list):
search_space = load_search_space(search_space)
nodes = [NodeOptimizer(**node) for node in search_space]
validated_search_space = OptimizationSearchSpaceConfig(search_space).model_dump() # type: ignore[arg-type]
nodes = [NodeOptimizer(**node) for node in validated_search_space]
return cls(nodes=nodes, seed=seed)

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion autointent/_presets/light_extra.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ search_space:
thresh:
low: 0.1
high: 0.9
n_trials: 10
n_trials: 10
- module_name: argmax
sampler: random
19 changes: 19 additions & 0 deletions autointent/custom_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Annotated, Literal, TypeAlias

from annotated_types import Interval
from pydantic import BaseModel, Field


class LogLevel(Enum):
Expand Down Expand Up @@ -83,3 +84,21 @@ class Split:
SearchSpaceValidationMode = Literal["raise", "warning", "filter"]

SearchSpacePresets = Literal["light", "light_moderate", "light_extra", "heavy", "heavy_moderate", "heavy_extra"]


class ParamSpaceInt(BaseModel):
"""Param space for optimizing int parameters for Optuna."""

low: int = Field(..., description="Low boundary of the search space.")
high: int = Field(..., description="High boundary of the search space.")
step: int = Field(1, description="Step of the search space.")
log: bool = Field(False, description="Whether to use a logarithmic scale.")


class ParamSpaceFloat(BaseModel):
"""Param space for optimizing float parameters for Optuna."""

low: float = Field(..., description="Low boundary of the search space.")
high: float = Field(..., description="High boundary of the search space.")
step: float | None = Field(None, description="Step of the search space.")
log: bool = Field(False, description="Whether to use a logarithmic scale.")
20 changes: 14 additions & 6 deletions autointent/modules/scoring/_sklearn/sklearn_scorer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Any
from typing import Any, Literal

import numpy as np
import numpy.typing as npt
Expand All @@ -26,6 +26,8 @@
if hasattr(class_, "predict_proba")
}

AVAILABLE_CLASSIFIERS_NAMES = tuple(AVAILABLE_CLASSIFIERS.keys())


class SklearnScorer(BaseScorer):
"""
Expand All @@ -43,9 +45,9 @@ class SklearnScorer(BaseScorer):

def __init__(
self,
clf_name: str,
clf_name: Literal[AVAILABLE_CLASSIFIERS_NAMES], # type: ignore[valid-type]
embedder_config: EmbedderConfig | str | dict[str, Any] | None = None,
**clf_args: Any, # noqa: ANN401
**clf_args: dict[str, Any],
) -> None:
"""
Initialize the SklearnScorer.
Expand All @@ -58,6 +60,9 @@ def __init__(
self.clf_name = clf_name

if AVAILABLE_CLASSIFIERS.get(self.clf_name):
if "clf_args" in clf_args:
# during inference wrong save
clf_args = clf_args["clf_args"]
self._base_clf = AVAILABLE_CLASSIFIERS[self.clf_name](**clf_args)
else:
msg = f"Class {self.clf_name} does not exist in sklearn or does not have predict_proba method"
Expand All @@ -68,9 +73,9 @@ def __init__(
def from_context(
cls,
context: Context,
clf_name: str,
clf_name: Literal[AVAILABLE_CLASSIFIERS_NAMES], # type: ignore[valid-type]
embedder_config: EmbedderConfig | str | None = None,
**clf_args: float | str | bool,
clf_args: dict[str, int | float | str | bool | list[Any]] | None = None,
) -> Self:
"""
Create a SklearnScorer instance using a Context object.
Expand All @@ -84,10 +89,13 @@ def from_context(
if embedder_config is None:
embedder_config = context.resolve_embedder()

if clf_args is None:
clf_args = {}

return cls(
embedder_config=embedder_config,
clf_name=clf_name,
**clf_args,
**clf_args, # type: ignore[arg-type]
)

def fit(
Expand Down
33 changes: 16 additions & 17 deletions autointent/nodes/_optimization/_node_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,14 @@
import optuna
import torch
from optuna.trial import Trial
from pydantic import BaseModel, Field
from typing_extensions import assert_never

from autointent import Dataset
from autointent.context import Context
from autointent.custom_types import NodeType, SamplerType, SearchSpaceValidationMode
from autointent.custom_types import NodeType, ParamSpaceFloat, ParamSpaceInt, SamplerType, SearchSpaceValidationMode
from autointent.nodes.info import NODES_INFO


class ParamSpaceInt(BaseModel):
low: int = Field(..., description="Low boundary of the search space.")
high: int = Field(..., description="High boundary of the search space.")
step: int = Field(1, description="Step of the search space.")
log: bool = Field(False, description="Whether to use a logarithmic scale.")


class ParamSpaceFloat(BaseModel):
low: float = Field(..., description="Low boundary of the search space.")
high: float = Field(..., description="High boundary of the search space.")
step: float | None = Field(None, description="Step of the search space.")
log: bool = Field(False, description="Whether to use a logarithmic scale.")


class NodeOptimizer:
"""Node optimizer class."""

Expand Down Expand Up @@ -148,7 +133,7 @@ def objective(

return target_metric

def suggest(self, trial: Trial, search_space: dict[str, Any | list[Any]]) -> dict[str, Any]:
def suggest(self, trial: Trial, search_space: dict[str, Any | list[Any]]) -> dict[str, Any]: # noqa: C901
res: dict[str, Any] = {}

def is_valid_param_space(
Expand All @@ -167,6 +152,20 @@ def is_valid_param_space(
res[param_name] = trial.suggest_int(param_name, **param_space)
elif is_valid_param_space(param_space, ParamSpaceFloat):
res[param_name] = trial.suggest_float(param_name, **param_space)
elif isinstance(param_space, dict):
# sklearn_scorer clf_args
clf_args: dict[str, Any] = {}
for k, v in param_space.items():
if isinstance(v, list):
clf_args[k] = trial.suggest_categorical(f"{param_name}_{k}", choices=v)
elif is_valid_param_space(v, ParamSpaceInt):
clf_args[k] = trial.suggest_int(f"{param_name}_{k}", **v)
elif is_valid_param_space(v, ParamSpaceFloat):
clf_args[k] = trial.suggest_float(f"{param_name}_{k}", **v)
else:
msg = f"Unsupported type of param search space: {v}"
raise TypeError(msg)
res["clf_args"] = clf_args
else:
msg = f"Unsupported type of param search space: {param_space}"
raise TypeError(msg)
Expand Down
71 changes: 55 additions & 16 deletions autointent/nodes/schemes.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
"""Schemes."""

import functools
import inspect
import operator
from collections.abc import Iterator
from types import NoneType, UnionType
from typing import Annotated, Any, Literal, TypeAlias, Union, get_args, get_origin, get_type_hints

from pydantic import BaseModel, Field, PositiveInt, RootModel
from pydantic import BaseModel, ConfigDict, Field, PositiveInt, RootModel

from autointent.custom_types import NodeType
from autointent.custom_types import NodeType, ParamSpaceFloat, ParamSpaceInt
from autointent.modules.abc import BaseModule
from autointent.nodes._optimization._node_optimizer import ParamSpaceFloat, ParamSpaceInt
from autointent.nodes.info import DecisionNodeInfo, EmbeddingNodeInfo, RegexNodeInfo, ScoringNodeInfo


Expand All @@ -26,17 +28,26 @@ def type_matches(target: type, tp: type) -> bool:
"""
Recursively check if the target type is present in the given type.

This function handles union types by unwrapping Annotated types where necessary.
This function handles union types and generic types (e.g. dict[...] by checking
their origin) after unwrapping Annotated types.

:param target: Target type
:param tp: Given type
:return: If the target type is present in the given type
:param target: Target type to check for.
:param tp: Given type which may be a union, generic, or annotated type.
:return: True if the target type is present in the given type.
"""
origin = get_origin(tp)

if origin is Union: # float | list[float]
if origin is Union:
return any(type_matches(target, arg) for arg in get_args(tp))
return unwrap_annotated(tp) is target

# Unwrap Annotated types, if any.
unwrapped = unwrap_annotated(tp)

# If the unwrapped type is a generic type, check its origin.
generic_origin = get_origin(unwrapped)
if generic_origin is not None:
return generic_origin is target

return unwrapped is target


def get_optuna_class(param_type: type) -> type[ParamSpaceInt | ParamSpaceFloat] | None:
Expand All @@ -56,7 +67,12 @@ def get_optuna_class(param_type: type) -> type[ParamSpaceInt | ParamSpaceFloat]
return None


def generate_models_and_union_type_for_classes(
def to_union(types: list[type]) -> type:
"""Convert a tuple of types into a union type."""
return functools.reduce(operator.or_, types)


def generate_models_and_union_type_for_classes( # noqa: PLR0912, C901
classes: list[type[BaseModule]],
) -> type[BaseModel]:
"""Dynamically generates Pydantic models for class constructors and creates a union type."""
Expand All @@ -70,6 +86,7 @@ def generate_models_and_union_type_for_classes(
fields = {
"module_name": (Literal[cls.name], Field(...)),
"n_trials": (PositiveInt | None, Field(None, description="Number of trials")),
"model_config": (ConfigDict, ConfigDict(extra="forbid")),
}

for param_name, param in init_signature.parameters.items():
Expand All @@ -78,11 +95,33 @@ def generate_models_and_union_type_for_classes(

param_type: TypeAlias = type_hints.get(param_name, Any) # type: ignore[valid-type] # noqa: PYI042
field = Field(default=[param.default]) if param.default is not inspect.Parameter.empty else Field(...)
search_type = get_optuna_class(param_type)
if search_type is None:
fields[param_name] = (list[param_type], field)
if not type_matches(dict, param_type):
search_type = get_optuna_class(param_type)
if search_type is None:
fields[param_name] = (list[param_type], field)
else:
fields[param_name] = (list[param_type] | search_type, field)
else:
fields[param_name] = (list[param_type] | search_type, field)
dict_key_type, dict_values_types = get_args(param_type)
is_optional = False
if dict_values_types is NoneType: # if dict is optional
is_optional = True
dict_key_type, dict_values_types = get_args(dict_key_type)
if get_origin(dict_values_types) is UnionType:
filed_types: list[type[Any]] = []
for value in get_args(dict_values_types):
search_type = get_optuna_class(value)
if search_type is not None:
filed_types.append(search_type)
filed_types.append(list[value]) # type: ignore[valid-type]
filed_type = to_union(filed_types)
else:
filed_type = dict_values_types

if is_optional:
fields[param_name] = (dict[dict_key_type, filed_type] | None, field) # type: ignore[valid-type]
else:
fields[param_name] = (dict[dict_key_type, filed_type], field) # type: ignore[valid-type]

model_name = f"{cls.__name__}InitModel"
models[cls.__name__] = type(
Expand Down Expand Up @@ -157,7 +196,7 @@ class RegexNodeValidator(BaseModel):
search_space: list[RegexpSearchSpaceType]


SearchSpaceTypes: TypeAlias = EmbeddingNodeValidator | ScoringNodeValidator | DecisionNodeValidator | RegexNodeValidator
SearchSpaceTypes: TypeAlias = ScoringNodeValidator | EmbeddingNodeValidator | DecisionNodeValidator | RegexNodeValidator


class OptimizationSearchSpaceConfig(RootModel[list[SearchSpaceTypes]]):
Expand Down
Loading
Loading