diff --git a/lib/instructor.ex b/lib/instructor.ex index 70178e9..0cb3ce0 100644 --- a/lib/instructor.ex +++ b/lib/instructor.ex @@ -130,24 +130,48 @@ defmodule Instructor do is_stream = Keyword.get(params, :stream, false) response_model = Keyword.fetch!(params, :response_model) + {response_model, schema_context} = + case response_model do + {:partial, {:array, {response_model, context}}} -> + {{:partial, {:array, response_model}}, context} + + {:partial, {response_model, context}} -> + {{:partial, response_model}, context} + + {:array, {response_model, context}} -> + {{:array, response_model}, context} + + {:partial, {:array, response_model}} -> + {:partial, {:array, response_model}, %{}} + + {:partial, response_model} -> + {{:partial, response_model}, %{}} + + {:array, response_model} -> + {{:array, response_model}, %{}} + + rm -> + {rm, %{}} + end + case {response_model, is_stream} do {{:partial, {:array, response_model}}, true} -> - do_streaming_partial_array_chat_completion(response_model, params, config) + do_streaming_partial_array_chat_completion(response_model, params, config, schema_context) {{:partial, response_model}, true} -> - do_streaming_partial_chat_completion(response_model, params, config) + do_streaming_partial_chat_completion(response_model, params, config, schema_context) {{:array, response_model}, true} -> - do_streaming_array_chat_completion(response_model, params, config) + do_streaming_array_chat_completion(response_model, params, config, schema_context) {{:array, response_model}, false} -> params = Keyword.put(params, :stream, true) - do_streaming_array_chat_completion(response_model, params, config) + do_streaming_array_chat_completion(response_model, params, config, schema_context) |> Enum.to_list() {response_model, false} -> - do_chat_completion(response_model, params, config) + do_chat_completion(response_model, params, config, schema_context) {_, true} -> raise """ @@ -268,7 +292,12 @@ defmodule Instructor do changeset end - defp do_streaming_partial_array_chat_completion(response_model, params, config) do + defp do_streaming_partial_array_chat_completion( + response_model, + params, + config, + schema_context \\ %{} + ) do wrapped_model = %{ value: Ecto.ParameterizedType.init(Ecto.Embedded, cardinality: :many, related: response_model) @@ -277,7 +306,7 @@ defmodule Instructor do params = Keyword.put(params, :response_model, wrapped_model) validation_context = Keyword.get(params, :validation_context, %{}) mode = Keyword.get(params, :mode, :tools) - params = params_for_mode(mode, wrapped_model, params) + params = params_for_mode(mode, wrapped_model, params, schema_context) model = if is_ecto_schema(response_model) do @@ -337,7 +366,7 @@ defmodule Instructor do ) end - defp do_streaming_partial_chat_completion(response_model, params, config) do + defp do_streaming_partial_chat_completion(response_model, params, config, schema_context \\ %{}) do wrapped_model = %{ value: Ecto.ParameterizedType.init(Ecto.Embedded, cardinality: :one, related: response_model) @@ -346,7 +375,7 @@ defmodule Instructor do params = Keyword.put(params, :response_model, wrapped_model) validation_context = Keyword.get(params, :validation_context, %{}) mode = Keyword.get(params, :mode, :tools) - params = params_for_mode(mode, wrapped_model, params) + params = params_for_mode(mode, wrapped_model, params, schema_context) adapter(config).chat_completion(params, config) |> Instructor.JSONStreamParser.parse() @@ -383,7 +412,7 @@ defmodule Instructor do ) end - defp do_streaming_array_chat_completion(response_model, params, config) do + defp do_streaming_array_chat_completion(response_model, params, config, schema_context \\ %{}) do wrapped_model = %{ value: Ecto.ParameterizedType.init(Ecto.Embedded, cardinality: :many, related: response_model) @@ -392,7 +421,7 @@ defmodule Instructor do params = Keyword.put(params, :response_model, wrapped_model) validation_context = Keyword.get(params, :validation_context, %{}) mode = Keyword.get(params, :mode, :tools) - params = params_for_mode(mode, wrapped_model, params) + params = params_for_mode(mode, wrapped_model, params, schema_context) adapter(config).chat_completion(params, config) |> Jaxon.Stream.from_enumerable() @@ -417,11 +446,11 @@ defmodule Instructor do end) end - defp do_chat_completion(response_model, params, config) do + defp do_chat_completion(response_model, params, config, schema_context \\ %{}) do validation_context = Keyword.get(params, :validation_context, %{}) max_retries = Keyword.get(params, :max_retries) mode = Keyword.get(params, :mode, :tools) - params = params_for_mode(mode, response_model, params) + params = params_for_mode(mode, response_model, params, schema_context) model = if is_ecto_schema(response_model) do @@ -438,6 +467,8 @@ defmodule Instructor do {:ok, changeset |> Ecto.Changeset.apply_changes()} else {%Ecto.Changeset{} = changeset, raw_response} -> + IO.puts("in else") + if max_retries > 0 do errors = Instructor.ErrorFormatter.format_errors(changeset) @@ -497,8 +528,8 @@ defmodule Instructor do end end - defp params_for_mode(mode, response_model, params) do - json_schema = JSONSchema.from_ecto_schema(response_model) + defp params_for_mode(mode, response_model, params, schema_context \\ %{}) do + json_schema = JSONSchema.from_ecto_schema(response_model, schema_context) params = params diff --git a/lib/instructor/adapters/azure.ex b/lib/instructor/adapters/azure.ex new file mode 100644 index 0000000..e7a4c0f --- /dev/null +++ b/lib/instructor/adapters/azure.ex @@ -0,0 +1,241 @@ +defmodule Instructor.Adapters.Azure do + @moduledoc """ + Documentation for `Instructor.Adapters.Azure`. + """ + @behaviour Instructor.Adapter + @supported_modes [:tools, :json, :md_json, :json_schema] + + @default_model "o3-mini" + + alias Instructor.JSONSchema + alias Instructor.SSEStreamParser + + @impl true + def chat_completion(params, user_config \\ nil) do + config = config(user_config, params) + + # Peel off instructor only parameters + {_, params} = Keyword.pop(params, :response_model) + {_, params} = Keyword.pop(params, :validation_context) + {_, params} = Keyword.pop(params, :max_retries) + {mode, params} = Keyword.pop(params, :mode) + stream = Keyword.get(params, :stream, false) + params = Enum.into(params, %{}) + + if mode not in @supported_modes do + raise "Unsupported OpenAI mode #{mode}. Supported modes: #{inspect(@supported_modes)}" + end + + params = + case params do + # OpenAI's json_schema mode doesn't support format or pattern attributes + %{response_format: %{json_schema: %{schema: _schema}}} -> + update_in(params, [:response_format, :json_schema, :schema], &normalize_json_schema/1) + + _ -> + params + end + + if stream do + do_streaming_chat_completion(mode, params, config) + else + do_chat_completion(mode, params, config) + end + end + + defp normalize_json_schema(schema) do + JSONSchema.traverse_and_update(schema, fn + %{"type" => _} = x when is_map_key(x, "format") or is_map_key(x, "pattern") -> + {format, x} = Map.pop(x, "format") + {pattern, x} = Map.pop(x, "pattern") + + Map.update(x, "description", "", fn description -> + "#{description} (format: #{format}, pattern: #{pattern})" + end) + + x -> + x + end) + end + + @impl true + def reask_messages(raw_response, params, _config) do + reask_messages_for_mode(params[:mode], raw_response) + end + + defp reask_messages_for_mode(:tools, %{ + "choices" => [ + %{ + "message" => + %{ + "tool_calls" => [ + %{"id" => tool_call_id, "function" => %{"name" => name, "arguments" => args}} = + function + ] + } = message + } + ] + }) do + [ + Map.put(message, "content", function |> Jason.encode!()) + |> Map.new(fn {k, v} -> {String.to_atom(k), v} end), + %{ + role: "tool", + tool_call_id: tool_call_id, + name: name, + content: args + } + ] + end + + defp reask_messages_for_mode(_mode, _raw_response) do + [] + end + + defp do_streaming_chat_completion(mode, params, config) do + pid = self() + options = http_options(config) + ref = make_ref() + + Stream.resource( + fn -> + Task.async(fn -> + options = + Keyword.merge(options, [ + auth_header(config), + json: params, + into: fn {:data, data}, {req, resp} -> + send(pid, {ref, data}) + {:cont, {req, resp}} + end + ]) + + Req.post(url(config), options) + send(pid, {ref, :done}) + end) + end, + fn task -> + receive do + {^ref, :done} -> + {:halt, task} + + {^ref, data} -> + {[data], task} + after + 15_000 -> + raise "Timeout waiting for LLM call to receive streaming data" + end + end, + fn _ -> nil end + ) + |> SSEStreamParser.parse() + |> Stream.map(fn chunk -> parse_stream_chunk_for_mode(mode, chunk) end) + end + + defp do_chat_completion(mode, params, config) do + options = Keyword.merge(http_options(config), [auth_header(config), json: params]) + + with {:ok, %Req.Response{status: 200, body: body} = response} <- + Req.post(url(config), options), + {:ok, content} <- parse_response_for_mode(mode, body) do + {:ok, response, content} + else + {:ok, %Req.Response{status: status, body: body}} -> + {:error, "Unexpected HTTP response code: #{status}\n#{inspect(body)}"} + + e -> + e + end + end + + defp parse_response_for_mode(:tools, %{ + "choices" => [ + %{"message" => %{"tool_calls" => [%{"function" => %{"arguments" => args}}]}} + ] + }), + do: Jason.decode(args) + + defp parse_response_for_mode(:md_json, %{"choices" => [%{"message" => %{"content" => content}}]}), + do: Jason.decode(content) + + defp parse_response_for_mode(:json, %{"choices" => [%{"message" => %{"content" => content}}]}), + do: Jason.decode(content) + + defp parse_response_for_mode(:json_schema, %{ + "choices" => [%{"message" => %{"content" => content}}] + }), + do: Jason.decode(content) + + defp parse_response_for_mode(mode, response) do + {:error, "Unsupported OpenAI mode #{mode} with response #{inspect(response)}"} + end + + defp parse_stream_chunk_for_mode(:md_json, %{"choices" => [%{"delta" => %{"content" => chunk}}]}), + do: chunk + + defp parse_stream_chunk_for_mode(:json, %{"choices" => [%{"delta" => %{"content" => chunk}}]}), + do: chunk + + defp parse_stream_chunk_for_mode(:json_schema, %{ + "choices" => [%{"delta" => %{"content" => chunk}}] + }), + do: chunk + + defp parse_stream_chunk_for_mode(:tools, %{ + "choices" => [ + %{"delta" => %{"tool_calls" => [%{"function" => %{"arguments" => chunk}}]}} + ] + }), + do: chunk + + defp parse_stream_chunk_for_mode(:tools, %{ + "choices" => [ + %{"delta" => delta} + ] + }) do + case delta do + nil -> "" + %{} -> "" + %{"content" => chunk} -> chunk + end + end + + defp parse_stream_chunk_for_mode(_, %{"choices" => [%{"finish_reason" => "stop"}]}), do: "" + + defp url(config), do: api_url(config) <> api_path(config) + defp api_url(config), do: Keyword.fetch!(config, :api_url) + defp api_path(config), do: Keyword.fetch!(config, :api_path) + + defp api_key(config) do + case Keyword.fetch!(config, :api_key) do + string when is_binary(string) -> string + fun when is_function(fun, 0) -> fun.() + end + end + + defp auth_header(config) do + case Keyword.fetch!(config, :auth_mode) do + # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference + :api_key_header -> {:headers, %{"api-key" => api_key(config)}} + _ -> {:auth, {:bearer, api_key(config)}} + end + end + + defp http_options(config), do: Keyword.fetch!(config, :http_options) + + defp config(nil, params), do: config(Application.get_env(:instructor, :azure, []), params) + + defp config(base_config, params) do + model = Keyword.get(params, :model, @default_model) + + default_config = [ + api_url: System.fetch_env!("AZURE_API_URL"), + api_path: "/openai/deployments/#{model}/chat/completions?api-version=2025-01-01-preview", + api_key: System.fetch_env!("AZURE_API_KEY"), + auth_mode: :api_key_header, + http_options: [receive_timeout: 60_000] + ] + + Keyword.merge(default_config, base_config) + end +end diff --git a/lib/instructor/json_schema.ex b/lib/instructor/json_schema.ex index d2419bb..5f2926b 100644 --- a/lib/instructor/json_schema.ex +++ b/lib/instructor/json_schema.ex @@ -9,11 +9,11 @@ defmodule Instructor.JSONSchema do Note: This will output a correct JSON Schema for the given Ecto schema, but it will not necessarily be optimal, nor support all Ecto types. """ - def from_ecto_schema(ecto_schema) do + def from_ecto_schema(ecto_schema, schema_context \\ %{}) do do_deprecation_warning(ecto_schema) defs = - for schema <- bfs_from_ecto_schema([ecto_schema], %MapSet{}), into: %{} do + for schema <- bfs_from_ecto_schema([ecto_schema], %MapSet{}, schema_context), into: %{} do {schema.title, schema} end @@ -125,9 +125,9 @@ defmodule Instructor.JSONSchema do defp fetch_old_ecto_schema_doc(_), do: nil - defp bfs_from_ecto_schema([], _seen_schemas), do: [] + defp bfs_from_ecto_schema([], _seen_schemas, _schema_context), do: [] - defp bfs_from_ecto_schema([ecto_schema | rest], seen_schemas) + defp bfs_from_ecto_schema([ecto_schema | rest], seen_schemas, schema_context) when is_ecto_schema(ecto_schema) do seen_schemas = MapSet.put(seen_schemas, ecto_schema) @@ -135,7 +135,7 @@ defmodule Instructor.JSONSchema do ecto_schema.__schema__(:fields) |> Enum.map(fn field -> type = ecto_schema.__schema__(:type, field) - value = for_type(type) + value = for_type(type, schema_context) value = Map.merge(%{title: Atom.to_string(field)}, value) {field, value} @@ -195,14 +195,14 @@ defmodule Instructor.JSONSchema do description: fetch_ecto_schema_doc(ecto_schema) || "" } - [schema | bfs_from_ecto_schema(rest, seen_schemas)] + [schema | bfs_from_ecto_schema(rest, seen_schemas, schema_context)] end - defp bfs_from_ecto_schema([ecto_types | rest], seen_schemas) + defp bfs_from_ecto_schema([ecto_types | rest], seen_schemas, schema_context) when is_ecto_types(ecto_types) do properties = for {field, type} <- ecto_types, into: %{} do - {field, for_type(type)} + {field, for_type(type, schema_context)} end required = Map.keys(properties) |> Enum.sort() @@ -229,7 +229,7 @@ defmodule Instructor.JSONSchema do properties: properties } - [schema | bfs_from_ecto_schema(rest, seen_schemas)] + [schema | bfs_from_ecto_schema(rest, seen_schemas, schema_context)] end defp title_for(ecto_schema) when is_ecto_schema(ecto_schema) do @@ -261,17 +261,17 @@ defmodule Instructor.JSONSchema do defp find_all_values(_, _pred), do: [] - defp for_type(:any), do: %{} - defp for_type(:id), do: %{type: "integer", description: "Integer, e.g. 1"} - defp for_type(:binary_id), do: %{type: "string"} - defp for_type(:integer), do: %{type: "integer", description: "Integer, e.g. 1"} - defp for_type(:float), do: %{type: "number", description: "Float, e.g. 1.27", format: "float"} - defp for_type(:boolean), do: %{type: "boolean", description: "Boolean, e.g. true"} - defp for_type(:string), do: %{type: "string", description: "String, e.g. 'hello'"} + defp for_type(:any, _), do: %{} + defp for_type(:id, _), do: %{type: "integer", description: "Integer, e.g. 1"} + defp for_type(:binary_id, _), do: %{type: "string"} + defp for_type(:integer, _), do: %{type: "integer", description: "Integer, e.g. 1"} + defp for_type(:float, _), do: %{type: "number", description: "Float, e.g. 1.27", format: "float"} + defp for_type(:boolean, _), do: %{type: "boolean", description: "Boolean, e.g. true"} + defp for_type(:string, _), do: %{type: "string", description: "String, e.g. 'hello'"} # defp for_type(:binary), do: %{type: "unsupported"} - defp for_type({:array, type}), do: %{type: "array", items: for_type(type)} + defp for_type({:array, type}, _), do: %{type: "array", items: for_type(type)} - defp for_type(:map), + defp for_type(:map, _), do: %{ type: "object", properties: %{}, @@ -279,7 +279,7 @@ defmodule Instructor.JSONSchema do description: "An object with arbitrary keys and values, e.g. { key: value }" } - defp for_type({:map, type}), + defp for_type({:map, type}, _), do: %{ type: "object", properties: %{}, @@ -287,47 +287,47 @@ defmodule Instructor.JSONSchema do description: "An object with values of a type #{inspect(type)}, e.g. { key: value }" } - defp for_type(:decimal), do: %{type: "number", format: "float"} + defp for_type(:decimal, _), do: %{type: "number", format: "float"} - defp for_type(:date), + defp for_type(:date, _), do: %{type: "string", description: "ISO8601 Date, e.g. \"2024-07-20\"", format: "date"} - defp for_type(:time), + defp for_type(:time, _), do: %{ type: "string", description: "ISO8601 Time, e.g. \"12:00:00\"", pattern: "^[0-9]{2}:?[0-9]{2}:?[0-9]{2}$" } - defp for_type(:time_usec), + defp for_type(:time_usec, _), do: %{ type: "string", description: "ISO8601 Time with microseconds, e.g. \"12:00:00.000000\"", pattern: "^[0-9]{2}:?[0-9]{2}:?[0-9]{2}.[0-9]{6}$" } - defp for_type(:naive_datetime), + defp for_type(:naive_datetime, _), do: %{ type: "string", description: "ISO8601 DateTime, e.g. \"2024-07-20T12:00:00\"", format: "date-time" } - defp for_type(:naive_datetime_usec), + defp for_type(:naive_datetime_usec, _), do: %{ type: "string", description: "ISO8601 DateTime with microseconds, e.g. \"2024-07-20T12:00:00.000000\"", format: "date-time" } - defp for_type(:utc_datetime), + defp for_type(:utc_datetime, _), do: %{ type: "string", description: "ISO8601 DateTime, e.g. \"2024-07-20T12:00:00Z\"", format: "date-time" } - defp for_type(:utc_datetime_usec), + defp for_type(:utc_datetime_usec, _), do: %{ type: "string", description: "ISO8601 DateTime with microseconds, e.g. \"2024-07-20T12:00:00.000000Z\"", @@ -335,7 +335,7 @@ defmodule Instructor.JSONSchema do } defp for_type( - {:parameterized, {Ecto.Embedded, %Ecto.Embedded{cardinality: :many, related: related}}} + {:parameterized, {Ecto.Embedded, %Ecto.Embedded{cardinality: :many, related: related}}}, _ ) when is_ecto_schema(related) do title = title_for(related) @@ -348,7 +348,7 @@ defmodule Instructor.JSONSchema do end defp for_type( - {:parameterized, {Ecto.Embedded, %Ecto.Embedded{cardinality: :many, related: related}}} + {:parameterized, {Ecto.Embedded, %Ecto.Embedded{cardinality: :many, related: related}}}, _ ) when is_ecto_types(related) do properties = @@ -369,14 +369,14 @@ defmodule Instructor.JSONSchema do end defp for_type( - {:parameterized, {Ecto.Embedded, %Ecto.Embedded{cardinality: :one, related: related}}} + {:parameterized, {Ecto.Embedded, %Ecto.Embedded{cardinality: :one, related: related}}}, _ ) when is_ecto_schema(related) do %{"$ref": "#/$defs/#{title_for(related)}"} end defp for_type( - {:parameterized, {Ecto.Embedded, %Ecto.Embedded{cardinality: :one, related: related}}} + {:parameterized, {Ecto.Embedded, %Ecto.Embedded{cardinality: :one, related: related}}}, _ ) when is_ecto_types(related) do properties = @@ -394,13 +394,29 @@ defmodule Instructor.JSONSchema do } end - defp for_type({:parameterized, {Ecto.Enum, %{mappings: mappings}}}) do + defp for_type({:parameterized, {Ecto.Enum, %{mappings: mappings}}}, _) do %{ type: "string", enum: Keyword.keys(mappings) } end + defp for_type({:parameterized, {mod, opts}}, schema_context) when is_atom(mod) do + if function_exported?(mod, :to_json_schema, 2) do + mod.to_json_schema(opts, schema_context) + else + raise "Unsupported type: #{inspect(mod)}, please implement `to_json_schema/1` via `use Instructor.EctoType`" + end + end + + defp for_type(mod, schema_context) do + if function_exported?(mod, :to_json_schema, 1) do + mod.to_json_schema(schema_context) + else + raise "Unsupported type: #{inspect(mod)}, please implement `to_json_schema/0` via `use Instructor.EctoType`" + end + end + defp for_type(mod) do if function_exported?(mod, :to_json_schema, 0) do mod.to_json_schema() diff --git a/lib/instructor/types/enum.ex b/lib/instructor/types/enum.ex new file mode 100644 index 0000000..b31c201 --- /dev/null +++ b/lib/instructor/types/enum.ex @@ -0,0 +1,69 @@ +defmodule Instructor.Types.Enum do + @moduledoc """ + A custom Ecto type for enumerated values with extended JSON Schema attributes. + + This type allows you to define string enums with static or dynamic values. + + ## Example with static values: + schema "tasks" do + field :status, Instructor.Types.Enum, + values: ["pending", "active", "completed"], + description: "Current status of the task" + end + + ## Example with dynamic values: + schema "projects" do + field :user_id, Instructor.Types.Enum, + values: fn -> MyApp.Users.list_user_ids() end, + description: "ID of the user who owns this project" + end + """ + use Ecto.ParameterizedType + use Instructor.EctoType + + # Initialize the type with the given parameters + def init(opts), do: Enum.into(opts, %{}) + + # The underlying Ecto type + def type(_opts), do: :string + + # Cast with options + def cast(value, opts) when is_binary(value), do: {:ok, value} + def cast(_value, _opts), do: :error + + # Load/dump with options + def load(_opts, value), do: {:ok, value} + def dump(_opts, value), do: {:ok, value} + + # These are required by Ecto.ParameterizedType + def embed_as(_opts, _format), do: :self + def equal?(opts, a, b), do: a == b + + # Dump with options and dumper function (3-arity version for ParameterizedType) + def dump(value, _dumper, _opts), do: {:ok, value} + + # Load with options and loader function (3-arity version for ParameterizedType) + def load(value, _loader, _opts), do: {:ok, value} + + # JSON Schema generation + def to_json_schema(opts, context \\ %{}) do + values = get_values(opts[:values], context) + + %{ + "type" => "string", + "enum" => values + } + |> maybe_add("description", opts[:description], context) + end + + defp get_values(values, context) when is_function(values, 1), do: values.(context) + defp get_values(values, _context) when is_list(values), do: values + defp get_values(_, _context), do: [] + + defp maybe_add(map, _key, nil, _context), do: map + + defp maybe_add(map, key, value, context) when is_function(value, 1), + do: Map.put(map, key, value.(context)) + + defp maybe_add(map, key, value, _context), do: Map.put(map, key, value) +end diff --git a/lib/instructor/types/float.ex b/lib/instructor/types/float.ex new file mode 100644 index 0000000..c52a391 --- /dev/null +++ b/lib/instructor/types/float.ex @@ -0,0 +1,72 @@ +defmodule Instructor.Types.Float do + @moduledoc """ + A custom Ecto type for float with extended JSON Schema attributes. + + This type extends the basic Ecto float type with additional JSON Schema properties + like description, minimum, maximum, multipleOf, etc. + + ## Example + schema "products" do + field :quantity, Instructor.Types.Float, + description: "Available quantity", + minimum: 0, + maximum: 1000 + + field :price_cents, Instructor.Types.Float, + description: "Price in cents", + minimum: 0, + multipleOf: 1 + end + """ + use Ecto.ParameterizedType + use Instructor.EctoType + + # Initialize the type with the given parameters + def init(opts), do: Enum.into(opts, %{}) + + # The underlying Ecto type + def type(_opts), do: :float + + # Cast with options + def cast(value, opts) when is_float(value), do: {:ok, value} + + def cast(value, opts) when is_binary(value) do + case Float.parse(value) do + {float, ""} -> {:ok, float} + _ -> :error + end + end + + def cast(_opts, _), do: :error + + # Load/dump with options + def load(_opts, value), do: {:ok, value} + def dump(_opts, value), do: {:ok, value} + + # These are required by Ecto.ParameterizedType + def embed_as(_opts, _format), do: :self + def equal?(opts, a, b), do: a == b + + # Dump with options and dumper function (3-arity version for ParameterizedType) + def dump(value, _dumper, _opts), do: {:ok, value} + + # JSON Schema generation + def to_json_schema(opts, context \\ %{}) do + base = %{"type" => "float"} + + base + |> maybe_add("description", opts[:description], context) + |> maybe_add("minimum", opts[:minimum], context) + |> maybe_add("maximum", opts[:maximum], context) + |> maybe_add("exclusiveMinimum", opts[:exclusiveMinimum], context) + |> maybe_add("exclusiveMaximum", opts[:exclusiveMaximum], context) + |> maybe_add("multipleOf", opts[:multipleOf], context) + end + + defp maybe_add(map, _key, nil, _context), do: map + + defp maybe_add(map, key, value, context) when is_function(value, 1), + do: Map.put(map, key, value.(context)) + + defp maybe_add(map, key, value, _context), do: Map.put(map, key, value) +end diff --git a/lib/instructor/types/integer.ex b/lib/instructor/types/integer.ex new file mode 100644 index 0000000..8e3f493 --- /dev/null +++ b/lib/instructor/types/integer.ex @@ -0,0 +1,67 @@ +defmodule Instructor.Types.Integer do + @moduledoc """ + A custom Ecto type for integers with extended JSON Schema attributes. + + This type extends the basic Ecto integer type with additional JSON Schema properties + like description, minimum, maximum, multipleOf, etc. + + ## Example + schema "products" do + field :quantity, Instructor.Types.Integer, + description: "Available quantity", + minimum: 0, + maximum: 1000 + + field :price_cents, Instructor.Types.Integer, + description: "Price in cents", + minimum: 0, + multipleOf: 1 + end + """ + use Ecto.ParameterizedType + use Instructor.EctoType + + # Initialize the type with the given parameters + def init(opts), do: Enum.into(opts, %{}) + + # The underlying Ecto type + def type(_opts), do: :integer + + # Cast with options + def cast(value, opts) when is_integer(value), do: {:ok, value} + def cast(value, opts) when is_binary(value) do + case Integer.parse(value) do + {int, ""} -> {:ok, int} + _ -> :error + end + end + def cast(_opts, _), do: :error + + # Load/dump with options + def load(_opts, value), do: {:ok, value} + def dump(_opts, value), do: {:ok, value} + + # These are required by Ecto.ParameterizedType + def embed_as(_opts, _format), do: :self + def equal?(opts, a, b), do: a == b + + # Dump with options and dumper function (3-arity version for ParameterizedType) + def dump(value, _dumper, _opts), do: {:ok, value} + + # JSON Schema generation + def to_json_schema(opts, context \\ %{}) do + base = %{"type" => "integer"} + + base + |> maybe_add("description", opts[:description], context) + |> maybe_add("minimum", opts[:minimum], context) + |> maybe_add("maximum", opts[:maximum], context) + |> maybe_add("exclusiveMinimum", opts[:exclusiveMinimum], context) + |> maybe_add("exclusiveMaximum", opts[:exclusiveMaximum], context) + |> maybe_add("multipleOf", opts[:multipleOf], context) + end + + defp maybe_add(map, _key, nil, _context), do: map + defp maybe_add(map, key, value, context) when is_function(value, 1), do: Map.put(map, key, value.(context)) + defp maybe_add(map, key, value, _context), do: Map.put(map, key, value) +end diff --git a/lib/instructor/types/string.ex b/lib/instructor/types/string.ex new file mode 100644 index 0000000..1a4fe3b --- /dev/null +++ b/lib/instructor/types/string.ex @@ -0,0 +1,66 @@ +defmodule Instructor.Types.String do + @moduledoc """ + A custom Ecto type for strings with extended JSON Schema attributes. + + This type extends the basic Ecto string type with additional JSON Schema properties + like description, format, min/max length, pattern, etc. + + ## Example + schema "users" do + field :name, Instructor.Types.String, + description: "User's full name", + minLength: 2, + maxLength: 50 + + field :email, Instructor.Types.String, + description: "User's email address", + format: "email" + end + """ + use Ecto.ParameterizedType + use Instructor.EctoType + + # Initialize the type with the given parameters + def init(opts), do: Enum.into(opts, %{}) + + # The underlying Ecto type + def type(_opts), do: :string + + # Cast with options + def cast(value, opts) when is_binary(value), do: {:ok, value} + def cast(value, opts) do + :error + end + + # Load with options + def load(_opts, value), do: {:ok, value} + + # Dump with options (2-arity version for compatibility) + def dump(_opts, value), do: {:ok, value} + + # Dump with options and dumper function (3-arity version for ParameterizedType) + def dump(value, _dumper, _opts), do: {:ok, value} + + # Load with options and loader function (3-arity version for ParameterizedType) + def load(value, _loader, _opts), do: {:ok, value} + + # These are required by Ecto.ParameterizedType + def embed_as(_opts, _format), do: :self + def equal?(opts, a, b), do: a == b + + # JSON Schema generation + def to_json_schema(opts, context \\ %{}) do + base = %{"type" => "string"} + + base + |> maybe_add("description", opts[:description], context) + |> maybe_add("minLength", opts[:minLength], context) + |> maybe_add("maxLength", opts[:maxLength], context) + |> maybe_add("pattern", opts[:pattern], context) + |> maybe_add("format", opts[:format], context) + end + + defp maybe_add(map, _key, nil, _context), do: map + defp maybe_add(map, key, value, context) when is_function(value, 1), do: Map.put(map, key, value.(context)) + defp maybe_add(map, key, value, _context), do: Map.put(map, key, value) +end diff --git a/lib/mix/tasks/test_types.ex b/lib/mix/tasks/test_types.ex new file mode 100644 index 0000000..d0959a9 --- /dev/null +++ b/lib/mix/tasks/test_types.ex @@ -0,0 +1,65 @@ +defmodule Mix.Tasks.Instructor.TestTypes do + @moduledoc """ + Tests the custom Instructor types by generating a JSON schema for a test schema. + + ## Usage + + mix instructor.test_types + """ + use Mix.Task + + @shortdoc "Tests the custom Instructor types" + def run(_) do + # Ensure all dependencies are started + Mix.Task.run("app.start") + + # Define a test schema that uses our custom types + defmodule TestSchema do + use Ecto.Schema + use Instructor + + @primary_key false + embedded_schema do + field :name, Instructor.Types.String, + description: "The name of the test item", + minLength: 3, + maxLength: 50 + + field :count, Instructor.Types.Integer, + description: "The count of items", + minimum: 0, + maximum: 100 + + field :status, Instructor.Types.Enum, + values: ["active", "pending", "completed"], + description: &__MODULE__.get_description/1 + end + + def get_description(context) do + "This is a test schema with context: #{inspect(context)}" + end + end + + # Generate JSON schema + json_schema = Instructor.JSONSchema.from_ecto_schema(TestSchema, %{status: "active"}) + + # Pretty print the schema + IO.puts("Generated JSON Schema:") + IO.puts(Jason.encode!(Jason.decode!(json_schema), pretty: true)) + + Instructor.chat_completion( + [ + model: "gpt-4o-mini", + response_model: {TestSchema, %{status: "active"}}, + mode: :json, + messages: [ + %{ + role: "user", + content: "This is a structured output test, please reply with test data" + } + ] + ], + adapter: Instructor.Adapters.OpenAI + ) + end +end