Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
59 changes: 1 addition & 58 deletions app/controllers/discourse_ai/admin/ai_personas_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ module Admin
class AiPersonasController < ::Admin::AdminController
requires_plugin ::DiscourseAi::PLUGIN_NAME

before_action :find_ai_persona,
only: %i[show update destroy create_user indexing_status_check]
before_action :find_ai_persona, only: %i[show update destroy create_user]

def index
ai_personas =
Expand Down Expand Up @@ -75,37 +74,6 @@ def destroy
end
end

def upload_file
file = params[:file] || params[:files].first

if !SiteSetting.ai_embeddings_enabled?
raise Discourse::InvalidAccess.new("Embeddings not enabled")
end

validate_extension!(file.original_filename)
validate_file_size!(file.tempfile.size)

hijack do
upload =
UploadCreator.new(
file.tempfile,
file.original_filename,
type: "discourse_ai_rag_upload",
skip_validations: true,
).create_for(current_user.id)

if upload.persisted?
render json: UploadSerializer.new(upload)
else
render json: failed_json.merge(errors: upload.errors.full_messages), status: 422
end
end
end

def indexing_status_check
render json: RagDocumentFragment.indexing_status(@ai_persona, @ai_persona.uploads)
end

private

def find_ai_persona
Expand Down Expand Up @@ -163,31 +131,6 @@ def permit_tools(tools)
end
end
end

def validate_extension!(filename)
extension = File.extname(filename)[1..-1] || ""
authorized_extensions = %w[txt md]
if !authorized_extensions.include?(extension)
raise Discourse::InvalidParameters.new(
I18n.t(
"upload.unauthorized",
authorized_extensions: authorized_extensions.join(" "),
),
)
end
end

def validate_file_size!(filesize)
max_size_bytes = 20.megabytes
if filesize > max_size_bytes
raise Discourse::InvalidParameters.new(
I18n.t(
"upload.attachments.too_large_humanized",
max_size: ActiveSupport::NumberHelper.number_to_human_size(max_size_bytes),
),
)
end
end
end
end
end
13 changes: 11 additions & 2 deletions app/controllers/discourse_ai/admin/ai_tools_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,20 @@ def show
end

def create
ai_tool = AiTool.new(ai_tool_params)
ai_tool = AiTool.new(ai_tool_params.except(:rag_uploads))
ai_tool.created_by_id = current_user.id

if ai_tool.save
RagDocumentFragment.link_target_and_uploads(ai_tool, attached_upload_ids)
render_serialized(ai_tool, AiCustomToolSerializer, status: :created)
else
render_json_error ai_tool
end
end

def update
if @ai_tool.update(ai_tool_params)
if @ai_tool.update(ai_tool_params.except(:rag_uploads))
RagDocumentFragment.update_target_uploads(@ai_tool, attached_upload_ids)
render_serialized(@ai_tool, AiCustomToolSerializer)
else
render_json_error @ai_tool
Expand Down Expand Up @@ -71,6 +73,10 @@ def test

private

def attached_upload_ids
ai_tool_params[:rag_uploads].to_a.map { |h| h[:id] }
end

def find_ai_tool
@ai_tool = AiTool.find(params[:id])
end
Expand All @@ -81,6 +87,9 @@ def ai_tool_params
:description,
:script,
:summary,
:rag_chunk_tokens,
:rag_chunk_overlap_tokens,
rag_uploads: [:id],
parameters: [:name, :type, :description, :required, enum: []],
)
end
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# frozen_string_literal: true

module DiscourseAi
module Admin
class RagDocumentFragmentsController < ::Admin::AdminController
requires_plugin ::DiscourseAi::PLUGIN_NAME

def indexing_status_check
if params[:target_type] == "AiPersona"
@target = AiPersona.find(params[:target_id])
elsif params[:target_type] == "AiTool"
@target = AiTool.find(params[:target_id])
else
raise Discourse::InvalidParameters.new("Invalid target type")
end

render json: RagDocumentFragment.indexing_status(@target, @target.uploads)
end

def upload_file
file = params[:file] || params[:files].first

if !SiteSetting.ai_embeddings_enabled?
raise Discourse::InvalidAccess.new("Embeddings not enabled")
end

validate_extension!(file.original_filename)
validate_file_size!(file.tempfile.size)

hijack do
upload =
UploadCreator.new(
file.tempfile,
file.original_filename,
type: "discourse_ai_rag_upload",
skip_validations: true,
).create_for(current_user.id)

if upload.persisted?
render json: UploadSerializer.new(upload)
else
render json: failed_json.merge(errors: upload.errors.full_messages), status: 422
end
end
end

private

def validate_extension!(filename)
extension = File.extname(filename)[1..-1] || ""
authorized_extensions = %w[txt md]
if !authorized_extensions.include?(extension)
raise Discourse::InvalidParameters.new(
I18n.t(
"upload.unauthorized",
authorized_extensions: authorized_extensions.join(" "),
),
)
end
end

def validate_file_size!(filesize)
max_size_bytes = 20.megabytes
if filesize > max_size_bytes
raise Discourse::InvalidParameters.new(
I18n.t(
"upload.attachments.too_large_humanized",
max_size: ActiveSupport::NumberHelper.number_to_human_size(max_size_bytes),
),
)
end
end
end
end
end
6 changes: 4 additions & 2 deletions app/jobs/regular/digest_rag_upload.rb
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ def chunk_document(file:, tokenizer:, chunk_tokens:, overlap_tokens:)

while overlap_token_ids.present?
begin
overlap = tokenizer.decode(overlap_token_ids) + split_char
padding = split_char
padding = " " if padding.empty?
overlap = tokenizer.decode(overlap_token_ids) + padding
break if overlap.encoding == Encoding::UTF_8
rescue StandardError
# it is possible that we truncated mid char
Expand All @@ -135,7 +137,7 @@ def chunk_document(file:, tokenizer:, chunk_tokens:, overlap_tokens:)
end

# remove first word it is probably truncated
overlap = overlap.split(" ", 2).last
overlap = overlap.split(/\s/, 2).last.to_s.lstrip
end
end

Expand Down
106 changes: 96 additions & 10 deletions app/models/ai_tool.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ class AiTool < ActiveRecord::Base
validates :script, presence: true, length: { maximum: 100_000 }
validates :created_by_id, presence: true
belongs_to :created_by, class_name: "User"
has_many :rag_document_fragments, dependent: :destroy, as: :target
has_many :upload_references, as: :target, dependent: :destroy
has_many :uploads, through: :upload_references
before_update :regenerate_rag_fragments

def signature
{ name: name, description: description, parameters: parameters.map(&:symbolize_keys) }
Expand All @@ -28,6 +32,82 @@ def bump_persona_cache
AiPersona.persona_cache.flush!
end

def regenerate_rag_fragments
if rag_chunk_tokens_changed? || rag_chunk_overlap_tokens_changed?
RagDocumentFragment.where(target: self).delete_all
end
end

def self.preamble
<<~JS
/**
* Tool API Quick Reference
*
* Entry Functions
*
* invoke(parameters): Main function. Receives parameters (Object). Must return a JSON-serializable value.
* Example:
* function invoke(parameters) { return "result"; }
*
* details(): Optional. Returns a string describing the tool.
* Example:
* function details() { return "Tool description."; }
*
* Provided Objects
*
* 1. http
* http.get(url, options?): Performs an HTTP GET request.
* Parameters:
* url (string): The request URL.
* options (Object, optional):
* headers (Object): Request headers.
* Returns:
* { status: number, body: string }
*
* http.post(url, options?): Performs an HTTP POST request.
* Parameters:
* url (string): The request URL.
* options (Object, optional):
* headers (Object): Request headers.
* body (string): Request body.
* Returns:
* { status: number, body: string }
*
* Note: Max 20 HTTP requests per execution.
*
* 2. llm
* llm.truncate(text, length): Truncates text to a specified token length.
* Parameters:
* text (string): Text to truncate.
* length (number): Max tokens.
* Returns:
* Truncated string.
*
* 3. index
* index.search(query, options?): Searches indexed documents.
* Parameters:
* query (string): Search query.
* options (Object, optional):
* filenames (Array): Limit search to specific files.
* limit (number): Max fragments (up to 200).
* Returns:
* Array of { fragment: string, metadata: string }
*
* Constraints
*
* Execution Time: ≤ 2000ms
* Memory: ≤ 10MB
* HTTP Requests: ≤ 20 per execution
* Exceeding limits will result in errors or termination.
*
* Security
*
* Sandboxed Environment: No access to system or global objects.
* No File System Access: Cannot read or write files.
*/
JS
end

def self.presets
[
{
Expand All @@ -38,6 +118,7 @@ def self.presets
{ name: "url", type: "string", required: true, description: "The URL to browse" },
],
script: <<~SCRIPT,
#{preamble}
let url;
function invoke(p) {
url = p.url;
Expand Down Expand Up @@ -70,6 +151,7 @@ def self.presets
{ name: "amount", type: "number", description: "Amount to convert eg: 123.45" },
],
script: <<~SCRIPT,
#{preamble}
// note: this script uses the open.er-api.com service, it is only updated
// once every 24 hours, for more up to date rates see: https://www.exchangerate-api.com
function invoke(params) {
Expand Down Expand Up @@ -118,6 +200,7 @@ def self.presets
},
],
script: <<~SCRIPT,
#{preamble}
function invoke(params) {
const apiKey = 'YOUR_ALPHAVANTAGE_API_KEY'; // Replace with your actual API key
const url = `https://www.alphavantage.co/query?function=GLOBAL_QUOTE&symbol=${params.symbol}&apikey=${apiKey}`;
Expand Down Expand Up @@ -154,6 +237,7 @@ def self.presets
summary: "Get real-time stock quotes using AlphaVantage API",
},
{ preset_id: "empty_tool", script: <<~SCRIPT },
#{preamble}
function invoke(params) {
// logic here
return params;
Expand All @@ -173,14 +257,16 @@ def self.presets
#
# Table name: ai_tools
#
# id :bigint not null, primary key
# name :string not null
# description :string not null
# summary :string not null
# parameters :jsonb not null
# script :text not null
# created_by_id :integer not null
# enabled :boolean default(TRUE), not null
# created_at :datetime not null
# updated_at :datetime not null
# id :bigint not null, primary key
# name :string not null
# description :string not null
# summary :string not null
# parameters :jsonb not null
# script :text not null
Comment on lines +261 to +265
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we limit the size of these fields?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah probably but a bit unrelated to this commit, it was there from before

# created_by_id :integer not null
# enabled :boolean default(TRUE), not null
# created_at :datetime not null
# updated_at :datetime not null
# rag_chunk_tokens :integer default(374), not null
# rag_chunk_overlap_tokens :integer default(10), not null
#
6 changes: 1 addition & 5 deletions app/models/rag_document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,7 @@ def indexing_status(persona, uploads)
end

def publish_status(upload, status)
MessageBus.publish(
"/discourse-ai/ai-persona-rag/#{upload.id}",
status,
user_ids: [upload.user_id],
)
MessageBus.publish("/discourse-ai/rag/#{upload.id}", status, user_ids: [upload.user_id])
end
end
end
Expand Down
Loading