Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit d07cf51

Browse files
FEATURE: llm quotas (#1047)
Adds a comprehensive quota management system for LLM models that allows: - Setting per-group (applied per user in the group) token and usage limits with configurable durations - Tracking and enforcing token/usage limits across user groups - Quota reset periods (hourly, daily, weekly, or custom) - Admin UI for managing quotas with real-time updates This system provides granular control over LLM API usage by allowing admins to define limits on both total tokens and number of requests per group. Supports multiple concurrent quotas per model and automatically handles quota resets. Co-authored-by: Keegan George <[email protected]>
1 parent 20612fd commit d07cf51

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1684
-151
lines changed

.discourse-compatibility

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
< 3.4.0.beta4-dev: 20612fde52d3f740cad64823ef8aadb0748b567f
12
< 3.4.0.beta3-dev: decf1bb49d737ea15308400f22f89d1d1e71d13d
23
< 3.4.0.beta1-dev: 9d887ad4ace8e33c3fe7dbb39237e882c08b4f0b
34
< 3.3.0.beta5-dev: 4d8090002f6dcd8e34d41033606bf131fa221475
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Admin
5+
class AiLlmQuotasController < ::Admin::AdminController
6+
requires_plugin ::DiscourseAi::PLUGIN_NAME
7+
8+
def index
9+
quotas = LlmQuota.includes(:group)
10+
11+
render json: {
12+
quotas:
13+
ActiveModel::ArraySerializer.new(quotas, each_serializer: LlmQuotaSerializer),
14+
}
15+
end
16+
17+
def create
18+
quota = LlmQuota.new(quota_params)
19+
20+
if quota.save
21+
render json: LlmQuotaSerializer.new(quota), status: :created
22+
else
23+
render_json_error quota
24+
end
25+
end
26+
27+
def update
28+
quota = LlmQuota.find(params[:id])
29+
30+
if quota.update(quota_params)
31+
render json: LlmQuotaSerializer.new(quota)
32+
else
33+
render_json_error quota
34+
end
35+
end
36+
37+
def destroy
38+
quota = LlmQuota.find(params[:id])
39+
quota.destroy!
40+
41+
head :no_content
42+
rescue ActiveRecord::RecordNotFound
43+
render json: { error: I18n.t("not_found") }, status: 404
44+
end
45+
46+
private
47+
48+
def quota_params
49+
params.require(:quota).permit(
50+
:group_id,
51+
:llm_model_id,
52+
:max_tokens,
53+
:max_usages,
54+
:duration_seconds,
55+
)
56+
end
57+
end
58+
end
59+
end

app/controllers/discourse_ai/admin/ai_llms_controller.rb

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ class AiLlmsController < ::Admin::AdminController
66
requires_plugin ::DiscourseAi::PLUGIN_NAME
77

88
def index
9-
llms = LlmModel.all.order(:display_name)
9+
llms = LlmModel.all.includes(:llm_quotas).order(:display_name)
1010

1111
render json: {
1212
ai_llms:
@@ -40,6 +40,11 @@ def edit
4040

4141
def create
4242
llm_model = LlmModel.new(ai_llm_params)
43+
44+
# we could do nested attributes but the mechanics are not ideal leading
45+
# to lots of complex debugging, this is simpler
46+
quota_params.each { |quota| llm_model.llm_quotas.build(quota) } if quota_params
47+
4348
if llm_model.save
4449
llm_model.toggle_companion_user
4550
render json: LlmModelSerializer.new(llm_model), status: :created
@@ -51,6 +56,25 @@ def create
5156
def update
5257
llm_model = LlmModel.find(params[:id])
5358

59+
if params[:ai_llm].key?(:llm_quotas)
60+
if quota_params
61+
existing_quota_group_ids = llm_model.llm_quotas.pluck(:group_id)
62+
new_quota_group_ids = quota_params.map { |q| q[:group_id] }
63+
64+
llm_model
65+
.llm_quotas
66+
.where(group_id: existing_quota_group_ids - new_quota_group_ids)
67+
.destroy_all
68+
69+
quota_params.each do |quota_param|
70+
quota = llm_model.llm_quotas.find_or_initialize_by(group_id: quota_param[:group_id])
71+
quota.update!(quota_param)
72+
end
73+
else
74+
llm_model.llm_quotas.destroy_all
75+
end
76+
end
77+
5478
if llm_model.seeded?
5579
return render_json_error(I18n.t("discourse_ai.llm.cannot_edit_builtin"), status: 403)
5680
end
@@ -110,6 +134,19 @@ def test
110134

111135
private
112136

137+
def quota_params
138+
if params[:ai_llm][:llm_quotas].present?
139+
params[:ai_llm][:llm_quotas].map do |quota|
140+
mapped = {}
141+
mapped[:group_id] = quota[:group_id].to_i
142+
mapped[:max_tokens] = quota[:max_tokens].to_i if quota[:max_tokens].present?
143+
mapped[:max_usages] = quota[:max_usages].to_i if quota[:max_usages].present?
144+
mapped[:duration_seconds] = quota[:duration_seconds].to_i
145+
mapped
146+
end
147+
end
148+
end
149+
113150
def ai_llm_params(updating: nil)
114151
return {} if params[:ai_llm].blank?
115152

app/models/llm_model.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ class LlmModel < ActiveRecord::Base
44
FIRST_BOT_USER_ID = -1200
55
BEDROCK_PROVIDER_NAME = "aws_bedrock"
66

7+
has_many :llm_quotas, dependent: :destroy
78
belongs_to :user
89

910
validates :display_name, presence: true, length: { maximum: 100 }

app/models/llm_quota.rb

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# frozen_string_literal: true
2+
3+
class LlmQuota < ActiveRecord::Base
4+
self.table_name = "llm_quotas"
5+
6+
belongs_to :group
7+
belongs_to :llm_model
8+
has_many :llm_quota_usages
9+
10+
validates :group_id, presence: true
11+
# we can not validate on create cause it breaks build
12+
validates :llm_model_id, presence: true, on: :update
13+
validates :duration_seconds, presence: true, numericality: { greater_than: 0 }
14+
validates :max_tokens, numericality: { only_integer: true, greater_than: 0, allow_nil: true }
15+
validates :max_usages, numericality: { greater_than: 0, allow_nil: true }
16+
17+
validate :at_least_one_limit
18+
19+
def self.check_quotas!(llm, user)
20+
return true if user.blank?
21+
quotas = joins(:group).where(llm_model: llm).where(group: user.groups)
22+
23+
return true if quotas.empty?
24+
errors =
25+
quotas.map do |quota|
26+
usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota)
27+
begin
28+
usage.check_quota!
29+
nil
30+
rescue LlmQuotaUsage::QuotaExceededError => e
31+
e
32+
end
33+
end
34+
35+
return if errors.include?(nil)
36+
37+
raise errors.first
38+
end
39+
40+
def self.log_usage(llm, user, input_tokens, output_tokens)
41+
return if user.blank?
42+
43+
quotas = joins(:group).where(llm_model: llm).where(group: user.groups)
44+
45+
quotas.each do |quota|
46+
usage = LlmQuotaUsage.find_or_create_for(user: user, llm_quota: quota)
47+
usage.increment_usage!(input_tokens: input_tokens, output_tokens: output_tokens)
48+
end
49+
end
50+
51+
def available_tokens
52+
max_tokens
53+
end
54+
55+
def available_usages
56+
max_usages
57+
end
58+
59+
private
60+
61+
def at_least_one_limit
62+
if max_tokens.nil? && max_usages.nil?
63+
errors.add(:base, I18n.t("discourse_ai.errors.quota_required"))
64+
end
65+
end
66+
end
67+
68+
# == Schema Information
69+
#
70+
# Table name: llm_quotas
71+
#
72+
# id :bigint not null, primary key
73+
# group_id :bigint not null
74+
# llm_model_id :bigint not null
75+
# max_tokens :integer
76+
# max_usages :integer
77+
# duration_seconds :integer not null
78+
# created_at :datetime not null
79+
# updated_at :datetime not null
80+
#
81+
# Indexes
82+
#
83+
# index_llm_quotas_on_group_id_and_llm_model_id (group_id,llm_model_id) UNIQUE
84+
# index_llm_quotas_on_llm_model_id (llm_model_id)
85+
#

app/models/llm_quota_usage.rb

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# frozen_string_literal: true
2+
3+
class LlmQuotaUsage < ActiveRecord::Base
4+
self.table_name = "llm_quota_usages"
5+
6+
QuotaExceededError = Class.new(StandardError)
7+
8+
belongs_to :user
9+
belongs_to :llm_quota
10+
11+
validates :user_id, presence: true
12+
validates :llm_quota_id, presence: true
13+
validates :input_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
14+
validates :output_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
15+
validates :usages, presence: true, numericality: { greater_than_or_equal_to: 0 }
16+
validates :started_at, presence: true
17+
validates :reset_at, presence: true
18+
19+
def self.find_or_create_for(user:, llm_quota:)
20+
usage = find_or_initialize_by(user: user, llm_quota: llm_quota)
21+
22+
if usage.new_record?
23+
now = Time.current
24+
usage.started_at = now
25+
usage.reset_at = now + llm_quota.duration_seconds.seconds
26+
usage.input_tokens_used = 0
27+
usage.output_tokens_used = 0
28+
usage.usages = 0
29+
usage.save!
30+
end
31+
32+
usage
33+
end
34+
35+
def reset_if_needed!
36+
return if Time.current < reset_at
37+
38+
now = Time.current
39+
update!(
40+
input_tokens_used: 0,
41+
output_tokens_used: 0,
42+
usages: 0,
43+
started_at: now,
44+
reset_at: now + llm_quota.duration_seconds.seconds,
45+
)
46+
end
47+
48+
def increment_usage!(input_tokens:, output_tokens:)
49+
reset_if_needed!
50+
51+
increment!(:usages)
52+
increment!(:input_tokens_used, input_tokens)
53+
increment!(:output_tokens_used, output_tokens)
54+
end
55+
56+
def check_quota!
57+
reset_if_needed!
58+
59+
if quota_exceeded?
60+
raise QuotaExceededError.new(
61+
I18n.t(
62+
"discourse_ai.errors.quota_exceeded",
63+
relative_time: AgeWords.distance_of_time_in_words(reset_at, Time.now),
64+
),
65+
)
66+
end
67+
end
68+
69+
def quota_exceeded?
70+
return false if !llm_quota
71+
72+
(llm_quota.max_tokens.present? && total_tokens_used > llm_quota.max_tokens) ||
73+
(llm_quota.max_usages.present? && usages > llm_quota.max_usages)
74+
end
75+
76+
def total_tokens_used
77+
input_tokens_used + output_tokens_used
78+
end
79+
80+
def remaining_tokens
81+
return nil if llm_quota.max_tokens.nil?
82+
[0, llm_quota.max_tokens - total_tokens_used].max
83+
end
84+
85+
def remaining_usages
86+
return nil if llm_quota.max_usages.nil?
87+
[0, llm_quota.max_usages - usages].max
88+
end
89+
90+
def percentage_tokens_used
91+
return 0 if llm_quota.max_tokens.nil? || llm_quota.max_tokens.zero?
92+
[(total_tokens_used.to_f / llm_quota.max_tokens * 100).round, 100].min
93+
end
94+
95+
def percentage_usages_used
96+
return 0 if llm_quota.max_usages.nil? || llm_quota.max_usages.zero?
97+
[(usages.to_f / llm_quota.max_usages * 100).round, 100].min
98+
end
99+
end
100+
101+
# == Schema Information
102+
#
103+
# Table name: llm_quota_usages
104+
#
105+
# id :bigint not null, primary key
106+
# user_id :bigint not null
107+
# llm_quota_id :bigint not null
108+
# input_tokens_used :integer not null
109+
# output_tokens_used :integer not null
110+
# usages :integer not null
111+
# started_at :datetime not null
112+
# reset_at :datetime not null
113+
# created_at :datetime not null
114+
# updated_at :datetime not null
115+
#
116+
# Indexes
117+
#
118+
# index_llm_quota_usages_on_llm_quota_id (llm_quota_id)
119+
# index_llm_quota_usages_on_user_id_and_llm_quota_id (user_id,llm_quota_id) UNIQUE
120+
#

app/serializers/llm_model_serializer.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class LlmModelSerializer < ApplicationSerializer
2020
:used_by
2121

2222
has_one :user, serializer: BasicUserSerializer, embed: :object
23+
has_many :llm_quotas, serializer: LlmQuotaSerializer, embed: :objects
2324

2425
def used_by
2526
llm_usage =
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# frozen_string_literal: true
2+
3+
class LlmQuotaSerializer < ApplicationSerializer
4+
attributes :id, :group_id, :llm_model_id, :max_tokens, :max_usages, :duration_seconds, :group_name
5+
6+
def group_name
7+
object.group.name
8+
end
9+
end

assets/javascripts/discourse/admin/models/ai-llm.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ export default class AiLlm extends RestModel {
2121
updateProperties() {
2222
const attrs = this.createProperties();
2323
attrs.id = this.id;
24-
24+
attrs.llm_quotas = this.llm_quotas;
2525
return attrs;
2626
}
2727

0 commit comments

Comments
 (0)