discourse · SamSaffron · Jan 14, 2025 · Jan 2, 2025 · Jan 3, 2025 · Jan 6, 2025
diff --git a/.discourse-compatibility b/.discourse-compatibility
@@ -1,3 +1,4 @@
+< 3.4.0.beta4-dev: 20612fde52d3f740cad64823ef8aadb0748b567f
 < 3.4.0.beta3-dev: decf1bb49d737ea15308400f22f89d1d1e71d13d
 < 3.4.0.beta1-dev: 9d887ad4ace8e33c3fe7dbb39237e882c08b4f0b
 < 3.3.0.beta5-dev: 4d8090002f6dcd8e34d41033606bf131fa221475

diff --git a/app/controllers/discourse_ai/admin/ai_llm_quotas_controller.rb b/app/controllers/discourse_ai/admin/ai_llm_quotas_controller.rb
@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Admin
+    class AiLlmQuotasController < ::Admin::AdminController
+      requires_plugin ::DiscourseAi::PLUGIN_NAME
+
+      def index
+        quotas = LlmQuota.includes(:group)
+
+        render json: {
+                 quotas:
+                   ActiveModel::ArraySerializer.new(quotas, each_serializer: LlmQuotaSerializer),
+               }
+      end
+
+      def create
+        quota = LlmQuota.new(quota_params)
+
+        if quota.save
+          render json: LlmQuotaSerializer.new(quota), status: :created
+        else
+          render_json_error quota
+        end
+      end
+
+      def update
+        quota = LlmQuota.find(params[:id])
+
+        if quota.update(quota_params)
+          render json: LlmQuotaSerializer.new(quota)
+        else
+          render_json_error quota
+        end
+      end
+
+      def destroy
+        quota = LlmQuota.find(params[:id])
+        quota.destroy!
+
+        head :no_content
+      rescue ActiveRecord::RecordNotFound
+        render json: { error: I18n.t("not_found") }, status: 404
+      end
+
+      private
+
+      def quota_params
+        params.require(:quota).permit(
+          :group_id,
+          :llm_model_id,
+          :max_tokens,
+          :max_usages,
+          :duration_seconds,
+        )
+      end
+    end
+  end
+end
diff --git a/app/controllers/discourse_ai/admin/ai_llms_controller.rb b/app/controllers/discourse_ai/admin/ai_llms_controller.rb
@@ -6,7 +6,7 @@ class AiLlmsController < ::Admin::AdminController
       requires_plugin ::DiscourseAi::PLUGIN_NAME
 
       def index
-        llms = LlmModel.all.order(:display_name)
+        llms = LlmModel.all.includes(:llm_quotas).order(:display_name)
 
         render json: {
                  ai_llms:
@@ -40,6 +40,11 @@ def edit
 
       def create
         llm_model = LlmModel.new(ai_llm_params)
+
+        # we could do nested attributes but the mechanics are not ideal leading
+        # to lots of complex debugging, this is simpler
+        quota_params.each { |quota| llm_model.llm_quotas.build(quota) } if quota_params
+
         if llm_model.save
           llm_model.toggle_companion_user
           render json: LlmModelSerializer.new(llm_model), status: :created
@@ -51,6 +56,25 @@ def create
       def update
         llm_model = LlmModel.find(params[:id])
 
+        if params[:ai_llm].key?(:llm_quotas)
+          if quota_params
+            existing_quota_group_ids = llm_model.llm_quotas.pluck(:group_id)
+            new_quota_group_ids = quota_params.map { |q| q[:group_id] }
+
+            llm_model
+              .llm_quotas
+              .where(group_id: existing_quota_group_ids - new_quota_group_ids)
+              .destroy_all
+
+            quota_params.each do |quota_param|
+              quota = llm_model.llm_quotas.find_or_initialize_by(group_id: quota_param[:group_id])
+              quota.update!(quota_param)
+            end
+          else
+            llm_model.llm_quotas.destroy_all
+          end
+        end
+
         if llm_model.seeded?
           return render_json_error(I18n.t("discourse_ai.llm.cannot_edit_builtin"), status: 403)
         end
@@ -110,6 +134,19 @@ def test
 
       private
 
+      def quota_params
+        if params[:ai_llm][:llm_quotas].present?
+          params[:ai_llm][:llm_quotas].map do |quota|
+            mapped = {}
+            mapped[:group_id] = quota[:group_id].to_i
+            mapped[:max_tokens] = quota[:max_tokens].to_i if quota[:max_tokens].present?
+            mapped[:max_usages] = quota[:max_usages].to_i if quota[:max_usages].present?
+            mapped[:duration_seconds] = quota[:duration_seconds].to_i
+            mapped
+          end
+        end
+      end
+
       def ai_llm_params(updating: nil)
         return {} if params[:ai_llm].blank?
 

diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb
@@ -4,6 +4,7 @@ class LlmModel < ActiveRecord::Base
   FIRST_BOT_USER_ID = -1200
   BEDROCK_PROVIDER_NAME = "aws_bedrock"
 
+  has_many :llm_quotas, dependent: :destroy
   belongs_to :user
 
   validates :display_name, presence: true, length: { maximum: 100 }

diff --git a/app/models/llm_quota.rb b/app/models/llm_quota.rb
@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+
+class LlmQuota < ActiveRecord::Base
+  self.table_name = "llm_quotas"
+
+  belongs_to :group
+  belongs_to :llm_model
+  has_many :llm_quota_usages
+
+  validates :group_id, presence: true
+  # we can not validate on create cause it breaks build
+  validates :llm_model_id, presence: true, on: :update
+  validates :duration_seconds, presence: true, numericality: { greater_than: 0 }
+  validates :max_tokens, numericality: { only_integer: true, greater_than: 0, allow_nil: true }
+  validates :max_usages, numericality: { greater_than: 0, allow_nil: true }
+
+  validate :at_least_one_limit
+
+  def self.check_quotas!(llm, user)
+    return true if user.blank?
+    quotas = joins(:group).where(llm_model: llm).where(group: user.groups)
+
+    return true if quotas.empty?
+    errors =
+      quotas.map do |quota|
+        usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota)
+        begin
+          usage.check_quota!
+          nil
+        rescue LlmQuotaUsage::QuotaExceededError => e
+          e
+        end
+      end
+
+    return if errors.include?(nil)
+
+    raise errors.first
+  end
+
+  def self.log_usage(llm, user, input_tokens, output_tokens)
+    return if user.blank?
+
+    quotas = joins(:group).where(llm_model: llm).where(group: user.groups)
+
+    quotas.each do |quota|
+      usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota)
+      usage.increment_usage!(input_tokens: input_tokens, output_tokens: output_tokens)
+    end
+  end
+
+  def available_tokens
+    max_tokens
+  end
+
+  def available_usages
+    max_usages
+  end
+
+  private
+
+  def at_least_one_limit
+    if max_tokens.nil? && max_usages.nil?
+      errors.add(:base, I18n.t("discourse_ai.errors.quota_required"))
+    end
+  end
+end
+
+# == Schema Information
+#
+# Table name: llm_quotas
+#
+#  id               :bigint           not null, primary key
+#  group_id         :bigint           not null
+#  llm_model_id     :bigint           not null
+#  max_tokens       :integer
+#  max_usages       :integer
+#  duration_seconds :integer          not null
+#  created_at       :datetime         not null
+#  updated_at       :datetime         not null
+#
+# Indexes
+#
+#  index_llm_quotas_on_group_id_and_llm_model_id  (group_id,llm_model_id) UNIQUE
+#  index_llm_quotas_on_llm_model_id               (llm_model_id)
+#
diff --git a/app/models/llm_quota_usage.rb b/app/models/llm_quota_usage.rb
@@ -0,0 +1,120 @@
+# frozen_string_literal: true
+
+class LlmQuotaUsage < ActiveRecord::Base
+  self.table_name = "llm_quota_usages"
+
+  QuotaExceededError = Class.new(StandardError)
+
+  belongs_to :user
+  belongs_to :llm_quota
+
+  validates :user_id, presence: true
+  validates :llm_quota_id, presence: true
+  validates :input_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
+  validates :output_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
+  validates :usages, presence: true, numericality: { greater_than_or_equal_to: 0 }
+  validates :started_at, presence: true
+  validates :reset_at, presence: true
+
+  def self.find_or_create_for(user:, llm_quota:)
+    usage = find_or_initialize_by(user: user, llm_quota: llm_quota)
+
+    if usage.new_record?
+      now = Time.current
+      usage.started_at = now
+      usage.reset_at = now + llm_quota.duration_seconds.seconds
+      usage.input_tokens_used = 0
+      usage.output_tokens_used = 0
+      usage.usages = 0
+      usage.save!
+    end
+
+    usage
+  end
+
+  def reset_if_needed!
+    return if Time.current < reset_at
+
+    now = Time.current
+    update!(
+      input_tokens_used: 0,
+      output_tokens_used: 0,
+      usages: 0,
+      started_at: now,
+      reset_at: now + llm_quota.duration_seconds.seconds,
+    )
+  end
+
+  def increment_usage!(input_tokens:, output_tokens:)
+    reset_if_needed!
+
+    increment!(:usages)
+    increment!(:input_tokens_used, input_tokens)
+    increment!(:output_tokens_used, output_tokens)
+  end
+
+  def check_quota!
+    reset_if_needed!
+
+    if quota_exceeded?
+      raise QuotaExceededError.new(
+              I18n.t(
+                "discourse_ai.errors.quota_exceeded",
+                relative_time: AgeWords.distance_of_time_in_words(reset_at, Time.now),
+              ),
+            )
+    end
+  end
+
+  def quota_exceeded?
+    return false if !llm_quota
+
+    (llm_quota.max_tokens.present? && total_tokens_used > llm_quota.max_tokens) ||
+      (llm_quota.max_usages.present? && usages > llm_quota.max_usages)
+  end
+
+  def total_tokens_used
+    input_tokens_used + output_tokens_used
+  end
+
+  def remaining_tokens
+    return nil if llm_quota.max_tokens.nil?
+    [0, llm_quota.max_tokens - total_tokens_used].max
+  end
+
+  def remaining_usages
+    return nil if llm_quota.max_usages.nil?
+    [0, llm_quota.max_usages - usages].max
+  end
+
+  def percentage_tokens_used
+    return 0 if llm_quota.max_tokens.nil? || llm_quota.max_tokens.zero?
+    [(total_tokens_used.to_f / llm_quota.max_tokens * 100).round, 100].min
+  end
+
+  def percentage_usages_used
+    return 0 if llm_quota.max_usages.nil? || llm_quota.max_usages.zero?
+    [(usages.to_f / llm_quota.max_usages * 100).round, 100].min
+  end
+end
+
+# == Schema Information
+#
+# Table name: llm_quota_usages
+#
+#  id                 :bigint           not null, primary key
+#  user_id            :bigint           not null
+#  llm_quota_id       :bigint           not null
+#  input_tokens_used  :integer          not null
+#  output_tokens_used :integer          not null
+#  usages             :integer          not null
+#  started_at         :datetime         not null
+#  reset_at           :datetime         not null
+#  created_at         :datetime         not null
+#  updated_at         :datetime         not null
+#
+# Indexes
+#
+#  index_llm_quota_usages_on_llm_quota_id              (llm_quota_id)
+#  index_llm_quota_usages_on_user_id_and_llm_quota_id  (user_id,llm_quota_id) UNIQUE
+#
diff --git a/app/serializers/llm_model_serializer.rb b/app/serializers/llm_model_serializer.rb
@@ -20,6 +20,7 @@ class LlmModelSerializer < ApplicationSerializer
              :used_by
 
   has_one :user, serializer: BasicUserSerializer, embed: :object
+  has_many :llm_quotas, serializer: LlmQuotaSerializer, embed: :objects
 
   def used_by
     llm_usage =

diff --git a/app/serializers/llm_quota_serializer.rb b/app/serializers/llm_quota_serializer.rb
@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+
+class LlmQuotaSerializer < ApplicationSerializer
+  attributes :id, :group_id, :llm_model_id, :max_tokens, :max_usages, :duration_seconds, :group_name
+
+  def group_name
+    object.group.name
+  end
+end
diff --git a/assets/javascripts/discourse/admin/models/ai-llm.js b/assets/javascripts/discourse/admin/models/ai-llm.js
@@ -21,7 +21,7 @@ export default class AiLlm extends RestModel {
   updateProperties() {
     const attrs = this.createProperties();
     attrs.id = this.id;
-
+    attrs.llm_quotas = this.llm_quotas;
     return attrs;
   }