Skip to content
81 changes: 79 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,26 @@ def deps do
end
```

## Provider Compatibility

The following table shows which features are supported by each provider:

| Feature | OpenAI | OpenRouter | Ollama | Bedrock |
|---------|--------|------------|--------|---------|
| Basic Chat | ✅ | ✅ | ✅ | ✅ |
| Streaming | ✅ | ✅ | ✅ | ❌ |
| Function Calls | ✅ | ✅ | ❌ | ❌ |
| Auto Function Execution | ✅ | ✅ | ❌ | ❌ |
| Fallback Models | ❌ | ✅ | ❌ | ❌ |
| Provider Routing | ❌ | ✅ | ❌ | ❌ |

### Notes:
- **OpenRouter** offers the most comprehensive feature set, including unique capabilities like fallback models and provider routing
- **Bedrock** support is provided via AWS ExAws integration and requires proper AWS configuration
- **Ollama** requires an ollama server instance to be running
- **Function Calls** require the provider to support OpenAI-compatible function calling format
- **Streaming** is **not** compatible with Tesla **retries**.

## Usage

### Simple Bot Definition
Expand Down Expand Up @@ -150,7 +170,65 @@ LlmComposer.Message.new(
)
```

No function calls support in Ollama (for now)
**Note:** Ollama does not provide token usage information, so `input_tokens` and `output_tokens` will always be empty in debug logs and response metadata. Function calls are also not supported with Ollama.

### Streaming Responses

LlmComposer supports streaming responses for real-time output, which is particularly useful for long-form content generation. This feature works with providers that support streaming (like Ollama, OpenRouter and OpenAI).

```elixir
# Make sure to configure Tesla adapter for streaming (Finch recommended)
Application.put_env(:llm_composer, :tesla_adapter, {Tesla.Adapter.Finch, name: MyFinch})
{:ok, finch} = Finch.start_link(name: MyFinch)

defmodule MyStreamingChat do
@settings %LlmComposer.Settings{
provider: LlmComposer.Providers.Ollama,
provider_opts: [model: "llama3.2"],
system_prompt: "You are a creative storyteller.",
stream_response: true
}

def run_streaming_chat() do
messages = [
%LlmComposer.Message{type: :user, content: "Tell me a short story about space exploration"}
]

{:ok, res} = LlmComposer.run_completion(@settings, messages)

# Process the stream and output content in real-time
res.stream
|> LlmComposer.parse_stream_response()
|> Enum.each(fn parsed_data ->
content = get_in(parsed_data, ["message", "content"]) || ""
if content != "", do: IO.write(content)
end)

IO.puts("\n--- Stream complete ---")
end
end

MyStreamingChat.run_streaming_chat()
```

Example of execution:

```
mix run streaming_sample.ex

Once upon a time, in the vast expanse of space, a brave astronaut embarked on a journey to explore distant galaxies. The stars shimmered as the spaceship soared beyond the known universe, uncovering secrets of the cosmos...

--- Stream complete ---
```

**Note:** The `stream_response: true` setting enables streaming mode, and `parse_stream_response/1` filters and parses the raw stream data into usable content chunks.

**Important:** When using Stream read chat completion, LlmComposer does not track input/output/cache/thinking tokens. There are two approaches to handle token counting in this mode:

1. Calculate tokens using libraries like `tiktoken` for OpenAI provider.
2. Read token data from the last stream object if the provider supplies it (currently only OpenRouter supports this).

In Ollama provider, we do not track tokens.

### Using OpenRouter

Expand Down Expand Up @@ -334,4 +412,3 @@ In this example, the bot first calls OpenAI to understand the user's intent and
Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
be found at <https://hexdocs.pm/llm_composer>.

1 change: 1 addition & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ config :llm_composer,
openai_key: "",
ollama_uri: "http://localhost:11434",
open_router_key: "",
tesla_adapter: nil,
timeout: nil

import_config "#{Mix.env()}.exs"
54 changes: 54 additions & 0 deletions lib/llm_composer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ defmodule LlmComposer do
provider_opts =
Keyword.merge(settings.provider_opts,
functions: settings.functions,
stream_response: settings.stream_response,
api_key: settings.api_key
)

Expand All @@ -110,6 +111,59 @@ defmodule LlmComposer do
end)
end

@doc """
Processes a raw stream response and returns a parsed stream of message content.

## Parameters
- `stream`: The raw stream object from the LLM response.

## Returns
- A stream that yields parsed content strings, filtering out "[DONE]" markers and decode errors.

## Example

```elixir
# Stream tested with Finch, maybe works with other adapters.
Application.put_env(:llm_composer, :tesla_adapter, {Tesla.Adapter.Finch, name: MyFinch})
{:ok, finch} = Finch.start_link(name: MyFinch)

settings = %LlmComposer.Settings{
provider: LlmComposer.Providers.Ollama,
provider_opts: [model: "llama3.2"],
stream_response: true
}

messages = [
%LlmComposer.Message{type: :user, content: "Tell me a short story"}
]

{:ok, res} = LlmComposer.run_completion(settings, messages)

# Process the stream and print each parsed chunk
res.stream
|> LlmComposer.parse_stream_response()
|> Enum.each(fn parsed_data ->
content = get_in(parsed_data, ["message", "content"])
if content, do: IO.write(content)
end)
```
"""
@spec parse_stream_response(Enumerable.t()) :: Enumerable.t()
def parse_stream_response(stream) do
stream
|> Stream.filter(fn chunk -> chunk != "[DONE]" end)
|> Stream.map(fn data ->
case Jason.decode(data) do
{:ok, parsed} ->
parsed

{:error, _} ->
nil
end
end)
|> Stream.filter(fn content -> content != nil and content != "" end)
end

@spec user_prompt(Settings.t(), String.t(), map()) :: String.t()
defp user_prompt(settings, message, opts) do
prompt = Map.get(opts, :user_prompt_prefix, settings.user_prompt_prefix)
Expand Down
53 changes: 53 additions & 0 deletions lib/llm_composer/http_client.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
defmodule LlmComposer.HttpClient do
@moduledoc """
Helper mod for setup the Tesla http client and its options
"""

@default_timeout 50_000

@spec client(binary(), keyword()) :: Tesla.Client.t()
def client(base_url, opts \\ []) do
base_url
|> middlewares(opts)
|> Tesla.client(adapter())
end

@spec adapter() :: term()
defp adapter do
Application.get_env(:llm_composer, :tesla_adapter)
end

@spec middlewares(binary(), keyword()) :: list(term())
defp middlewares(base_url, opts) do
stream = Keyword.get(opts, :stream_response)

resp = [
{
Tesla.Middleware.BaseUrl,
base_url
},
Tesla.Middleware.JSON
]

if stream do
resp ++ [{Tesla.Middleware.SSE, only: :data}]
else
resp ++
[
{Tesla.Middleware.Retry,
delay: :timer.seconds(1),
max_delay: :timer.seconds(10),
max_retries: 10,
should_retry: fn
{:ok, %{status: status}} when status in [429, 500, 503] -> true
{:error, :closed} -> true
_other -> false
end},
{Tesla.Middleware.Timeout,
timeout:
Application.get_env(:llm_composer, :timeout) ||
Keyword.get(opts, :default_timeout, @default_timeout)}
]
end
end
end
44 changes: 41 additions & 3 deletions lib/llm_composer/llm_response.ex
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ defmodule LlmComposer.LlmResponse do
@type t() :: %__MODULE__{
actions: [[FunctionCall.t()]] | [FunctionCall.t()],
input_tokens: pos_integer() | nil,
main_response: Message.t(),
main_response: Message.t() | nil,
output_tokens: pos_integer() | nil,
previous_response: map() | nil,
raw: map(),
status: :ok | :error
status: :ok | :error,
stream: nil | Enum.t()
}

defstruct [
Expand All @@ -25,7 +26,8 @@ defmodule LlmComposer.LlmResponse do
:output_tokens,
:previous_response,
:raw,
:status
:status,
:stream
]

@type model_response :: Tesla.Env.result()
Expand All @@ -41,6 +43,24 @@ defmodule LlmComposer.LlmResponse do
{:error, resp}
end

# Stream response case
def new(
{status, %{response: stream}} = raw_response,
llm_model
)
when llm_model in [:open_ai, :open_router] and is_function(stream) do
{:ok,
%__MODULE__{
actions: [],
input_tokens: nil,
output_tokens: nil,
stream: stream,
main_response: nil,
raw: raw_response,
status: status
}}
end

def new(
{status,
%{actions: actions, response: %{"choices" => [first_choice | _]} = raw_response}},
Expand All @@ -65,6 +85,24 @@ defmodule LlmComposer.LlmResponse do
}}
end

# Stream response case for Ollama
def new(
{status, %{response: stream}} = raw_response,
:ollama
)
when is_function(stream) do
{:ok,
%__MODULE__{
actions: [],
input_tokens: nil,
output_tokens: nil,
stream: stream,
main_response: nil,
raw: raw_response,
status: status
}}
end

def new(
{status, %{actions: actions, response: %{"message" => message} = raw_response}},
:ollama
Expand Down
24 changes: 5 additions & 19 deletions lib/llm_composer/providers/ollama.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,12 @@ defmodule LlmComposer.Providers.Ollama do
"""
@behaviour LlmComposer.Provider

use Tesla

alias LlmComposer.HttpClient
alias LlmComposer.LlmResponse
alias LlmComposer.Providers.Utils

@uri Application.compile_env(:llm_composer, :ollama_uri, "http://localhost:11434")

plug(Tesla.Middleware.BaseUrl, @uri)

plug(Tesla.Middleware.JSON)

plug(Tesla.Middleware.Retry,
delay: :timer.seconds(1),
max_delay: :timer.seconds(10),
max_retries: 5,
should_retry: fn
{:ok, %{status: status}} when status in [429, 500, 503] -> true
{:error, :closed} -> true
_other -> false
end
)

@impl LlmComposer.Provider
def name, do: :ollama

Expand All @@ -37,11 +21,13 @@ defmodule LlmComposer.Providers.Ollama do
"""
def run(messages, system_message, opts) do
model = Keyword.get(opts, :model)
client = HttpClient.client(@uri, opts)
req_opts = Utils.get_req_opts(opts)

if model do
messages
|> build_request(system_message, model, opts)
|> then(&post("/api/chat", &1))
|> then(&Tesla.post(client, "/api/chat", &1, opts: req_opts))
|> handle_response()
|> LlmResponse.new(name())
else
Expand All @@ -52,7 +38,7 @@ defmodule LlmComposer.Providers.Ollama do
defp build_request(messages, system_message, model, opts) do
base_request = %{
model: model,
stream: false,
stream: Keyword.get(opts, :stream_response, false),
# tools: get_tools(Keyword.get(opts, :functions)),
messages: Utils.map_messages([system_message | messages])
}
Expand Down
Loading