FEATURE: llm quotas

SamSaffron · SamSaffron · commit 6d3faf11705f · 2025-01-02T17:20:26.000+11:00
This introduces a new feature for per group quotas
diff --git a/app/models/llm_quota.rb b/app/models/llm_quota.rb
@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+
+class LlmQuota < ActiveRecord::Base
+  self.table_name = "llm_quotas"
+
+  belongs_to :group
+  belongs_to :llm_model
+  has_many :llm_quota_usages
+
+  validates :group_id, presence: true
+  validates :llm_model_id, presence: true
+  validates :duration_seconds, presence: true, numericality: { greater_than: 0 }
+  validates :max_tokens, numericality: { greater_than: 0, allow_nil: true }
+  validates :max_usages, numericality: { greater_than: 0, allow_nil: true }
+
+  validate :at_least_one_limit
+
+  def self.within_quota?(llm, user)
+  end
+
+  def self.log_usage(llm, user, input_tokens, output_tokens)
+  end
+
+  def available_tokens
+    max_tokens
+  end
+
+  def available_usages
+    max_usages
+  end
+
+  private
+
+  def at_least_one_limit
+    if max_tokens.nil? && max_usages.nil?
+      errors.add(:base, I18n.t("discourse_ai.errors.quota_required"))
+    end
+  end
+end
+
+# == Schema Information
+#
+# Table name: llm_quotas
+#
+#  id               :bigint           not null, primary key
+#  group_id         :bigint           not null
+#  llm_model_id     :bigint           not null
+#  max_tokens       :integer
+#  max_usages       :integer
+#  duration_seconds :integer          not null
+#  created_at       :datetime         not null
+#  updated_at       :datetime         not null
+#
diff --git a/app/models/llm_quota_usage.rb b/app/models/llm_quota_usage.rb
@@ -0,0 +1,116 @@
+# frozen_string_literal: true
+
+class LlmQuotaUsage < ActiveRecord::Base
+  self.table_name = "llm_quota_usages"
+
+  QuotaExceededError = Class.new(StandardError)
+
+  belongs_to :user
+  belongs_to :llm_quota
+
+  validates :user_id, presence: true
+  validates :llm_quota_id, presence: true
+  validates :input_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
+  validates :output_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
+  validates :usages, presence: true, numericality: { greater_than_or_equal_to: 0 }
+  validates :started_at, presence: true
+  validates :reset_at, presence: true
+
+  def self.find_or_create_for(user:, llm_quota:)
+    usage = find_or_initialize_by(user: user, llm_quota: llm_quota)
+
+    if usage.new_record?
+      now = Time.current
+      usage.started_at = now
+      usage.reset_at = now + llm_quota.duration_seconds.seconds
+      usage.input_tokens_used = 0
+      usage.output_tokens_used = 0
+      usage.usages = 0
+      usage.save!
+    end
+
+    usage
+  end
+
+  def reset_if_needed!
+    return if Time.current < reset_at
+
+    now = Time.current
+    update!(
+      input_tokens_used: 0,
+      output_tokens_used: 0,
+      usages: 0,
+      started_at: now,
+      reset_at: now + llm_quota.duration_seconds.seconds,
+    )
+  end
+
+  def increment_usage!(input_tokens:, output_tokens:)
+    reset_if_needed!
+
+    increment!(:usages)
+    increment!(:input_tokens_used, input_tokens)
+    increment!(:output_tokens_used, output_tokens)
+  end
+
+  def check_quota!
+    reset_if_needed!
+
+    if quota_exceeded?
+      raise QuotaExceededError.new(
+              I18n.t(
+                "discourse_ai.errors.quota_exceeded",
+                group: llm_quota.group.name,
+                reset_at: reset_at,
+              ),
+            )
+    end
+  end
+
+  def quota_exceeded?
+    return false if !llm_quota
+
+    (llm_quota.max_tokens.present? && total_tokens_used > llm_quota.max_tokens) ||
+      (llm_quota.max_usages.present? && usages > llm_quota.max_usages)
+  end
+
+  def total_tokens_used
+    input_tokens_used + output_tokens_used
+  end
+
+  def remaining_tokens
+    return nil if llm_quota.max_tokens.nil?
+    [0, llm_quota.max_tokens - total_tokens_used].max
+  end
+
+  def remaining_usages
+    return nil if llm_quota.max_usages.nil?
+    [0, llm_quota.max_usages - usages].max
+  end
+
+  def percentage_tokens_used
+    return 0 if llm_quota.max_tokens.nil? || llm_quota.max_tokens.zero?
+    [(total_tokens_used.to_f / llm_quota.max_tokens * 100).round, 100].min
+  end
+
+  def percentage_usages_used
+    return 0 if llm_quota.max_usages.nil? || llm_quota.max_usages.zero?
+    [(usages.to_f / llm_quota.max_usages * 100).round, 100].min
+  end
+end
+
+# == Schema Information
+#
+# Table name: llm_quota_usages
+#
+#  id                :bigint           not null, primary key
+#  user_id          :bigint           not null
+#  llm_quota_id     :bigint           not null
+#  input_tokens_used :integer          not null
+#  output_tokens_used:integer          not null
+#  usages           :integer          not null
+#  started_at       :datetime         not null
+#  reset_at         :datetime         not null
+#  created_at       :datetime         not null
+#  updated_at       :datetime         not null
+#
diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
@@ -446,6 +446,8 @@ en:
       bedrock_invalid_url: "Please complete all the fields to use this model."
 
     errors:
+      quota_exceeded: "You have exceeded the quota for this model. Please try again after %{reset_at}."
+      quota_required: "You must specify maximum tokens or usages for this model."
       no_query_specified: The query parameter is required, please specify it.
       no_user_for_persona: The persona specified does not have a user associated with it.
       persona_not_found: The persona specified does not exist. Check the persona_name or persona_id params.
diff --git a/db/migrate/20250102035341_add_llm_quota_tables.rb b/db/migrate/20250102035341_add_llm_quota_tables.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+class AddLlmQuotaTables < ActiveRecord::Migration[7.2]
+  def change
+    create_table :llm_quotas do |t|
+      t.bigint :group_id, null: false
+      t.bigint :llm_model_id, null: false
+      t.integer :max_tokens
+      t.integer :max_usages
+      t.integer :duration_seconds, null: false
+      t.timestamps
+    end
+
+    add_index :llm_quotas, :llm_model_id
+    add_index :llm_quotas, %i[group_id llm_model_id], unique: true
+
+    create_table :llm_quota_usages do |t|
+      t.bigint :user_id, null: false
+      t.bigint :llm_quota_id, null: false
+      t.integer :input_tokens_used, null: false
+      t.integer :output_tokens_used, null: false
+      t.integer :usages, null: false
+      t.datetime :started_at, null: false
+      t.datetime :reset_at, null: false
+      t.timestamps
+    end
+
+    add_index :llm_quota_usages, :llm_quota_id
+    add_index :llm_quota_usages, %i[user_id llm_quota_id], unique: true
+  end
+end
diff --git a/spec/fabricators/llm_quota_fabricator.rb b/spec/fabricators/llm_quota_fabricator.rb
@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+Fabricator(:llm_quota) do
+  group
+  llm_model
+  max_tokens { 1000 }
+  max_usages { 10 }
+  duration_seconds { 1.day.to_i }
+end
diff --git a/spec/fabricators/llm_quota_usage_fabricator.rb b/spec/fabricators/llm_quota_usage_fabricator.rb
@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+Fabricator(:llm_quota_usage) do
+  user
+  llm_quota
+  input_tokens_used { 0 }
+  output_tokens_used { 0 }
+  usages { 0 }
+  started_at { Time.current }
+  reset_at { Time.current + 1.day }
+end
diff --git a/spec/models/llm_quota_usage_spec.rb b/spec/models/llm_quota_usage_spec.rb