Skip to content

Commit 23a4897

Browse files
authored
Merge pull request #244 from linagora/revert-234-feat/file_quota
Revert "Feat/file quota"
2 parents 347fa75 + 5e29485 commit 23a4897

File tree

17 files changed

+31
-592
lines changed

17 files changed

+31
-592
lines changed

.github/workflows/api_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,4 +88,4 @@ jobs:
8888
if: always()
8989
working-directory: .github/workflows/api_tests
9090
run: |
91-
docker compose down -v
91+
docker compose down -v

.github/workflows/api_tests/docker-compose.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ services:
107107
- RAY_ENABLE_UV_RUN_RUNTIME_ENV=0
108108
- RAY_memory_monitor_refresh_ms=0
109109
- PROMPTS_DIR=../prompts/example1
110-
- DEFAULT_FILE_QUOTA=10
111110
- AUTH_TOKEN=${AUTH_TOKEN:-test-admin-token}
112111
- SUPER_ADMIN_MODE=${SUPER_ADMIN_MODE:-true}
113112
ports:
@@ -129,4 +128,4 @@ services:
129128
start_period: 60s
130129

131130
volumes:
132-
test_data:
131+
test_data:

.gitmodules

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,3 @@
44
[submodule "extern/indexer-ui"]
55
path = extern/indexer-ui
66
url = https://github.com/linagora/openrag-admin-ui.git
7-
branch = main

.hydra_config/config.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ rdb:
4646
port: ${oc.env:POSTGRES_PORT, 5432}
4747
user: ${oc.env:POSTGRES_USER, root}
4848
password: ${oc.env:POSTGRES_PASSWORD, root_password}
49-
default_file_quota: ${oc.decode:${oc.env:DEFAULT_FILE_QUOTA, -1}}
5049

5150
reranker:
5251
enable: ${oc.decode:${oc.env:RERANKER_ENABLED, true}}
@@ -172,4 +171,4 @@ ray:
172171
num_replicas: ${oc.decode:${oc.env:RAY_SERVE_NUM_REPLICAS, 1}}
173172
host: ${oc.env:RAY_SERVE_HOST, 0.0.0.0}
174173
port: ${oc.env:RAY_SERVE_PORT, 8080}
175-
chainlit_port: ${oc.env:CHAINLIT_PORT, 8090}
174+
chainlit_port: ${oc.env:CHAINLIT_PORT, 8090}

docs/content/docs/documentation/data_model.md

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ erDiagram
3939
varchar token UK
4040
boolean is_admin
4141
datetime created_at
42-
int file_quota
43-
int file_count
4442
}
4543
4644
partition_memberships {
@@ -64,10 +62,8 @@ Stores information about API users and administrators.
6462
| `token` | String (unique, hashed) | SHA-256 hash of the user's API token |
6563
| `is_admin` | Boolean | Marks system administrator users |
6664
| `created_at` | DateTime | Timestamp of creation |
67-
| `file_quota` | Integer (nullable) | Max files allowed for that user |
68-
| `file_count`| Integer (default=0) | Number of uploaded files.|
69-
**Relationships**
70-
- `memberships`: one-to-many → `PartitionMembership`
65+
66+
**Relationships:** `memberships` one-to-many → `PartitionMembership`
7167

7268
---
7369

@@ -245,17 +241,6 @@ flowchart LR
245241

246242
---
247243

248-
## **File Quotas**
249-
Limits the number of files a user can upload (indexed files + pending tasks).
250-
251-
- Admins always have unlimited quota and can update quota for a given user
252-
- `DEFAULT_FILE_QUOTA < 0` to disable quota checking.
253-
- `DEFAULT_FILE_QUOTA >= 0` to set a default quota for all users.
254-
255-
The default value `DEFAULT_FILE_QUOTA` is -1, meaning that file quota checking is bypassed.
256-
257-
---
258-
259244
## Token Handling
260245

261246
- Tokens are generated at user creation time (`or-<random hex>`)

docs/content/docs/documentation/env_vars.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ The PostgreSQL database is configured using the following environment variables:
155155
| `POSTGRES_USER` | str | root | Username for database authentication |
156156
| `POSTGRES_PASSWORD` | str | root_password | Password for database authentication |
157157

158+
158159
## Chat Pipeline
159160
### LLM & VLM Configuration
160161
The system uses two types of language models:
@@ -417,11 +418,11 @@ The following environment variables configure the FastAPI server and control acc
417418
| `APP_PORT` | `number` | `8000` | Port number on which the FastAPI application listens for incoming requests. |
418419
| `AUTH_TOKEN` | `string` | `EMPTY` | An authentication token is required to access protected API endpoints. By default, this token corresponds to the API key of the created admin (see [Admin Bootstrapping](/openrag/documentation/user_auth/#2-admin-bootstrapping)). If left empty, authentication is disabled. |
419420
| `SUPER_ADMIN_MODE` | `boolean` | `false` | Enables super admin privileges when set to `true`, [granting unrestricted access](/openrag/documentation/data_model/#access-control) to all operations and bypassing standard access controls. This is for debugging |
420-
| `DEFAULT_FILE_QUOTA` | `int` | `-1` | Default per-user file quota. `<0` disables quotas globally; `>=0` sets the default limit when a user has no explicit quota. |
421421
|`API_NUM_WORKERS`|`int`|1|Number of uvicorn workers|
422422
| `PREFERRED_URL_SCHEME` | `string` | `null` | URL scheme (`http` or `https`) used when generating URLs in API responses (e.g., `task_status_url`). When running behind a reverse proxy that terminates SSL, set this to `https` to ensure generated URLs use the correct scheme. If unset, the scheme from the incoming request is used. |
423423

424424

425+
425426
:::caution[Security Notice]
426427
Always set a strong **`AUTH_TOKEN`** in production environments. Never leave it empty or use default values in production deployments.
427428
:::
@@ -438,4 +439,4 @@ Always set a strong **`AUTH_TOKEN`** in production environments. Never leave it
438439
### Chainlit
439440
[See this](/openrag/documentation/setup_chainlit_ui_auth/) for chainlit authentification
440441

441-
[See this](/openrag/documentation/chainlit_data_persistency/) for chainlit data persistency
442+
[See this](/openrag/documentation/chainlit_data_persistency/) for chainlit data persistency

openrag/components/indexer/indexer.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -146,15 +146,15 @@ async def insert_documents(self, chunks, user):
146146
await vectordb.async_add_documents.remote(chunks, user)
147147

148148
@ray.method(concurrency_group="delete")
149-
async def delete_file(self, file_id: str, partition: str, user: dict) -> bool:
149+
async def delete_file(self, file_id: str, partition: str) -> bool:
150150
log = self.logger.bind(file_id=file_id, partition=partition)
151151
vectordb = ray.get_actor("Vectordb", namespace="openrag")
152152
if not self.enable_insertion:
153153
log.error("Vector database is not enabled, but delete_file was called.")
154154
return False
155155

156156
try:
157-
await vectordb.delete_file.remote(file_id, partition, user_id=user.get("id"))
157+
await vectordb.delete_file.remote(file_id, partition)
158158
log.info("Deleted file from partition.", file_id=file_id, partition=partition)
159159

160160
except Exception as e:
@@ -180,7 +180,7 @@ async def update_file_metadata(
180180
for doc in docs:
181181
doc.metadata.update(metadata)
182182

183-
await self.delete_file(file_id, partition, user=user)
183+
await self.delete_file(file_id, partition)
184184
await vectordb.async_add_documents.remote(docs, user=user)
185185

186186
log.info("Metadata updated for file.")
@@ -380,16 +380,3 @@ async def get_pool_info(self) -> dict[str, int]:
380380
"max_tasks_per_worker": MAX_TASKS_PER_WORKER,
381381
"total_capacity": POOL_SIZE * MAX_TASKS_PER_WORKER,
382382
}
383-
384-
@ray.method(concurrency_group="queue_info")
385-
async def get_user_pending_task_count(self, user_id: int) -> int:
386-
"""Count tasks for a user that are not yet COMPLETED or FAILED."""
387-
async with self.lock:
388-
task_ids = self.user_index.get(user_id, set())
389-
pending_states = {"QUEUED", "SERIALIZING", "CHUNKING", "INSERTING"}
390-
count = 0
391-
for tid in task_ids:
392-
info = self.tasks.get(tid)
393-
if info and info.state in pending_states:
394-
count += 1
395-
return count

openrag/components/indexer/vectordb/utils.py

Lines changed: 3 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import secrets
44
from datetime import datetime
55

6-
from config import load_config
76
from sqlalchemy import (
87
JSON,
98
Boolean,
@@ -16,7 +15,6 @@
1615
String,
1716
UniqueConstraint,
1817
create_engine,
19-
func,
2018
text,
2119
)
2220
from sqlalchemy.orm import (
@@ -32,9 +30,6 @@
3230
from utils.logger import get_logger
3331

3432
logger = get_logger()
35-
config = load_config()
36-
37-
DEFAULT_FILE_QUOTA = config.rdb.get("default_file_quota", -1)
3833

3934
Base = declarative_base()
4035

@@ -116,8 +111,7 @@ class User(Base):
116111
token = Column(String, unique=True, nullable=True, index=True)
117112
is_admin = Column(Boolean, default=False, nullable=False)
118113
created_at = Column(DateTime, default=datetime.now, nullable=False)
119-
file_quota = Column(Integer, nullable=True, default=None)
120-
file_count = Column(Integer, nullable=False, default=0)
114+
121115
memberships = relationship("PartitionMembership", back_populates="user", cascade="all, delete-orphan")
122116

123117

@@ -157,7 +151,6 @@ def __init__(self, database_url: str, logger=logger):
157151
self.Session = sessionmaker(bind=self.engine)
158152
AUTH_TOKEN = os.getenv("AUTH_TOKEN")
159153
self._ensure_admin_user(AUTH_TOKEN)
160-
self.file_quota_per_user = DEFAULT_FILE_QUOTA
161154

162155
except Exception as e:
163156
raise VDBConnectionError(
@@ -260,12 +253,6 @@ def add_file_to_partition(
260253
)
261254

262255
session.add(file)
263-
264-
# Increment file_count for the user
265-
user = session.query(User).filter(User.id == user_id).first()
266-
if user:
267-
user.file_count = User.file_count + 1
268-
269256
session.commit()
270257
log.info("Added file successfully")
271258
return True
@@ -274,7 +261,7 @@ def add_file_to_partition(
274261
log.exception("Error adding file to partition")
275262
raise
276263

277-
def remove_file_from_partition(self, file_id: str, partition: str, user_id: int):
264+
def remove_file_from_partition(self, file_id: str, partition: str):
278265
"""Remove a file from its partition - Optimized without join"""
279266
log = self.logger.bind(file_id=file_id, partition=partition)
280267
with self.Session() as session:
@@ -283,12 +270,6 @@ def remove_file_from_partition(self, file_id: str, partition: str, user_id: int)
283270
file = session.query(File).filter(File.file_id == file_id, File.partition_name == partition).first()
284271
if file:
285272
session.delete(file)
286-
287-
# Decrement file_count for the user
288-
user = session.query(User).filter(User.id == user_id).first()
289-
if user and user.file_count > 0:
290-
user.file_count = func.greatest(0, User.file_count - 1)
291-
292273
session.commit()
293274
log.info(f"Removed file {file_id} from partition {partition}")
294275
return True
@@ -299,20 +280,11 @@ def remove_file_from_partition(self, file_id: str, partition: str, user_id: int)
299280
log.error(f"Error removing file: {e}")
300281
raise e
301282

302-
def delete_partition(self, partition: str, user_id: int):
283+
def delete_partition(self, partition: str):
303284
"""Delete a partition and all its files"""
304285
with self.Session() as session:
305286
partition_obj = session.query(Partition).filter_by(partition=partition).first()
306287
if partition_obj:
307-
# Count files in the partition before deletion
308-
file_count = session.query(File).filter(File.partition_name == partition).count()
309-
310-
# Decrement file_count for the user
311-
if file_count > 0:
312-
user = session.query(User).filter(User.id == user_id).first()
313-
if user:
314-
user.file_count = func.greatest(0, User.file_count - file_count)
315-
316288
session.delete(partition_obj) # Will delete all files due to cascade
317289
session.commit()
318290
self.logger.info("Deleted partition", partition=partition)
@@ -359,22 +331,17 @@ def create_user(
359331
display_name: str | None = None,
360332
external_user_id: str | None = None,
361333
is_admin: bool = False,
362-
file_quota: int | None = None,
363334
) -> dict:
364335
"""Create a user and generate an API token for them."""
365336
with self.Session() as s:
366337
token = f"or-{secrets.token_hex(16)}"
367338
hashed_token = self.hash_token(token)
368339

369-
if self.file_quota_per_user > 0 and file_quota is None:
370-
file_quota = self.file_quota_per_user # default to default quota
371-
372340
user = User(
373341
display_name=display_name,
374342
external_user_id=external_user_id,
375343
token=hashed_token,
376344
is_admin=is_admin,
377-
file_quota=file_quota,
378345
)
379346
s.add(user)
380347
s.commit()
@@ -386,8 +353,6 @@ def create_user(
386353
"external_user_id": user.external_user_id,
387354
"token": token,
388355
"is_admin": user.is_admin,
389-
"file_quota": user.file_quota,
390-
"file_count": user.file_count,
391356
}
392357

393358
def list_users(self) -> list[dict]:
@@ -399,8 +364,6 @@ def list_users(self) -> list[dict]:
399364
"display_name": u.display_name,
400365
"external_user_id": u.external_user_id,
401366
"is_admin": u.is_admin,
402-
"file_quota": u.file_quota,
403-
"file_count": u.file_count,
404367
"created_at": u.created_at.isoformat(),
405368
}
406369
for u in users
@@ -427,8 +390,6 @@ def get_user_by_token(self, token: str) -> dict | None:
427390
"display_name": user.display_name,
428391
"external_user_id": user.external_user_id,
429392
"is_admin": user.is_admin,
430-
"file_quota": user.file_quota,
431-
"file_count": user.file_count,
432393
"memberships": memberships,
433394
}
434395

@@ -452,8 +413,6 @@ def get_user_by_id(self, user_id: int) -> dict | None:
452413
"display_name": user.display_name,
453414
"external_user_id": user.external_user_id,
454415
"is_admin": user.is_admin,
455-
"file_quota": user.file_quota,
456-
"file_count": user.file_count,
457416
"memberships": memberships,
458417
}
459418

@@ -481,8 +440,6 @@ def regenerate_user_token(self, user_id: int) -> dict:
481440
"external_user_id": user.external_user_id,
482441
"token": new_token,
483442
"is_admin": user.is_admin,
484-
"file_quota": user.file_quota,
485-
"file_count": user.file_count,
486443
}
487444

488445
# Memberships
@@ -574,29 +531,6 @@ def user_is_partition_member(self, user_id: int, partition: str) -> bool:
574531
with self.Session() as s:
575532
return s.query(PartitionMembership).filter_by(user_id=user_id, partition_name=partition).first() is not None
576533

577-
def update_user_quota(self, user_id: int, file_quota: int | None) -> dict:
578-
"""
579-
Update a user's file quota.
580-
- None: Use global default (DEFAULT_FILE_QUOTA env var)
581-
- <0: Unlimited
582-
- >=0: Specific limit
583-
"""
584-
with self.Session() as s:
585-
user = s.query(User).filter(User.id == user_id).first()
586-
user.file_quota = file_quota
587-
s.commit()
588-
self.logger.info(f"Updated file_quota for user {user_id} to {file_quota}")
589-
s.refresh(user)
590-
591-
return {
592-
"id": user.id,
593-
"display_name": user.display_name,
594-
"external_user_id": user.external_user_id,
595-
"is_admin": user.is_admin,
596-
"file_quota": user.file_quota,
597-
"file_count": user.file_count,
598-
}
599-
600534
def hash_token(self, token: str) -> str:
601535
"""Return a SHA-256 hash of a token string."""
602536
return hashlib.sha256(token.encode("utf-8")).hexdigest()

0 commit comments

Comments
 (0)