most of the backend for a quota system is now done

SamSaffron · SamSaffron · commit 68aa44005c3f · 2025-01-03T17:18:42.000+11:00
diff --git a/app/models/llm_quota.rb b/app/models/llm_quota.rb
@@ -16,9 +16,31 @@ class LlmQuota < ActiveRecord::Base
   validate :at_least_one_limit
 
   def self.within_quota?(llm, user)
+    return true if user.blank?
+    quotas = joins(:group).where(llm_model: llm).where(group: user.groups)
+
+    return true if quotas.empty?
+    quotas.each do |quota|
+      usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota)
+      begin
+        usage.check_quota!
+      rescue LlmQuotaUsage::QuotaExceededError
+        return false
+      end
+    end
+
+    true
   end
 
   def self.log_usage(llm, user, input_tokens, output_tokens)
+    return if user.blank?
+
+    quotas = joins(:group).where(llm_model: llm).where(group: user.groups)
+
+    quotas.each do |quota|
+      usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota)
+      usage.increment_usage!(input_tokens: input_tokens, output_tokens: output_tokens)
+    end
   end
 
   def available_tokens
@@ -51,3 +73,8 @@ def at_least_one_limit
 #  created_at       :datetime         not null
 #  updated_at       :datetime         not null
 #
+# Indexes
+#
+#  index_llm_quotas_on_group_id_and_llm_model_id  (group_id,llm_model_id) UNIQUE
+#  index_llm_quotas_on_llm_model_id               (llm_model_id)
+#
diff --git a/app/models/llm_quota_usage.rb b/app/models/llm_quota_usage.rb
@@ -103,14 +103,19 @@ def percentage_usages_used
 #
 # Table name: llm_quota_usages
 #
-#  id                :bigint           not null, primary key
-#  user_id          :bigint           not null
-#  llm_quota_id     :bigint           not null
-#  input_tokens_used :integer          not null
-#  output_tokens_used:integer          not null
-#  usages           :integer          not null
-#  started_at       :datetime         not null
-#  reset_at         :datetime         not null
-#  created_at       :datetime         not null
-#  updated_at       :datetime         not null
+#  id                 :bigint           not null, primary key
+#  user_id            :bigint           not null
+#  llm_quota_id       :bigint           not null
+#  input_tokens_used  :integer          not null
+#  output_tokens_used :integer          not null
+#  usages             :integer          not null
+#  started_at         :datetime         not null
+#  reset_at           :datetime         not null
+#  created_at         :datetime         not null
+#  updated_at         :datetime         not null
+#
+# Indexes
+#
+#  index_llm_quota_usages_on_llm_quota_id              (llm_quota_id)
+#  index_llm_quota_usages_on_user_id_and_llm_quota_id  (user_id,llm_quota_id) UNIQUE
 #
diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb
@@ -65,6 +65,12 @@ def perform_completion!(
           partial_tool_calls: false,
           &blk
         )
+          if !LlmQuota.within_quota?(@llm_model, user)
+            raise LlmQuotaUsage::QuotaExceededError.new(
+                    I18n.t("discourse_ai.errors.quota_exceeded"),
+                  )
+          end
+
           @partial_tool_calls = partial_tool_calls
           model_params = normalize_model_params(model_params)
           orig_blk = blk
@@ -188,10 +194,9 @@ def perform_completion!(
               if log
                 log.raw_response_payload = response_raw
                 final_log_update(log)
-
                 log.response_tokens = tokenizer.size(partials_raw) if log.response_tokens.blank?
                 log.save!
-
+                LlmQuota.log_usage(@llm_model, user, log.request_tokens, log.response_tokens)
                 if Rails.env.development?
                   puts "#{self.class.name}: request_tokens #{log.request_tokens} response_tokens #{log.response_tokens}"
                 end
diff --git a/spec/models/llm_quota_spec.rb b/spec/models/llm_quota_spec.rb
@@ -0,0 +1,117 @@
+# frozen_string_literal: true
+#
+RSpec.describe LlmQuota do
+  fab!(:group)
+  fab!(:user)
+  fab!(:llm_model)
+
+  before { group.add(user) }
+
+  describe ".within_quota?" do
+    it "returns true when user is nil" do
+      expect(described_class.within_quota?(llm_model, nil)).to be true
+    end
+
+    it "returns true when no quotas exist for the user's groups" do
+      expect(described_class.within_quota?(llm_model, user)).to be true
+    end
+
+    it "returns true when usage is within limits" do
+      quota = Fabricate(:llm_quota, group: group, llm_model: llm_model)
+      _usage =
+        Fabricate(
+          :llm_quota_usage,
+          user: user,
+          llm_quota: quota,
+          input_tokens_used: quota.max_tokens - 100,
+        )
+
+      expect(described_class.within_quota?(llm_model, user)).to be true
+    end
+
+    it "returns false when usage exceeds token limit" do
+      quota = Fabricate(:llm_quota, group: group, llm_model: llm_model, max_tokens: 1000)
+      _usage = Fabricate(:llm_quota_usage, user: user, llm_quota: quota, input_tokens_used: 1100)
+
+      expect(described_class.within_quota?(llm_model, user)).to be false
+    end
+
+    it "returns false when usage exceeds usage limit" do
+      quota = Fabricate(:llm_quota, group: group, llm_model: llm_model, max_usages: 10)
+      _usage = Fabricate(:llm_quota_usage, user: user, llm_quota: quota, usages: 11)
+
+      expect(described_class.within_quota?(llm_model, user)).to be false
+    end
+
+    it "checks all quotas from user's groups" do
+      group2 = Fabricate(:group)
+      group2.add(user)
+
+      quota1 = Fabricate(:llm_quota, group: group, llm_model: llm_model, max_tokens: 1000)
+      quota2 = Fabricate(:llm_quota, group: group2, llm_model: llm_model, max_tokens: 500)
+
+      Fabricate(:llm_quota_usage, user: user, llm_quota: quota1, input_tokens_used: 900)
+      Fabricate(:llm_quota_usage, user: user, llm_quota: quota2, input_tokens_used: 600)
+
+      expect(described_class.within_quota?(llm_model, user)).to be false
+    end
+  end
+
+  describe ".log_usage" do
+    it "does nothing when user is nil" do
+      expect { described_class.log_usage(llm_model, nil, 100, 50) }.not_to change(
+        LlmQuotaUsage,
+        :count,
+      )
+    end
+
+    it "creates usage records when none exist" do
+      _quota = Fabricate(:llm_quota, group: group, llm_model: llm_model)
+
+      expect { described_class.log_usage(llm_model, user, 100, 50) }.to change(
+        LlmQuotaUsage,
+        :count,
+      ).by(1)
+
+      usage = LlmQuotaUsage.last
+      expect(usage.input_tokens_used).to eq(100)
+      expect(usage.output_tokens_used).to eq(50)
+      expect(usage.usages).to eq(1)
+    end
+
+    it "updates existing usage records" do
+      quota = Fabricate(:llm_quota, group: group, llm_model: llm_model)
+      usage =
+        Fabricate(
+          :llm_quota_usage,
+          user: user,
+          llm_quota: quota,
+          input_tokens_used: 100,
+          output_tokens_used: 50,
+          usages: 1,
+        )
+
+      described_class.log_usage(llm_model, user, 50, 25)
+
+      usage.reload
+      expect(usage.input_tokens_used).to eq(150)
+      expect(usage.output_tokens_used).to eq(75)
+      expect(usage.usages).to eq(2)
+    end
+
+    it "logs usage for all quotas from user's groups" do
+      group2 = Fabricate(:group)
+      group2.add(user)
+
+      _quota1 = Fabricate(:llm_quota, group: group, llm_model: llm_model)
+      _quota2 = Fabricate(:llm_quota, group: group2, llm_model: llm_model)
+
+      expect { described_class.log_usage(llm_model, user, 100, 50) }.to change(
+        LlmQuotaUsage,
+        :count,
+      ).by(2)
+
+      expect(LlmQuotaUsage.where(user: user).count).to eq(2)
+    end
+  end
+end