33import uuid_utils .compat as uuid
44from django .contrib .postgres .search import SearchVectorField
55from django .db import models
6+ from django .db .models import QuerySet
67from django .db .models .signals import pre_delete
78from django .dispatch import receiver
89from mptt .fields import TreeForeignKey
910from mptt .models import MPTTModel
1011
1112from common .db .sql_execute import select_one
1213from common .mixins .app_model_mixin import AppModelMixin
14+ from common .utils .common import get_sha256_hash
1315from models_provider .models import Model
1416from users .models import User
1517
@@ -221,6 +223,19 @@ class SearchMode(models.TextChoices):
221223 blend = 'blend'
222224
223225
226+ class FileSourceType (models .TextChoices ):
227+ # 知识库 跟随知识库被删除而被删除 source_id 为知识库id
228+ KNOWLEDGE = "KNOWLEDGE"
229+ # 应用 跟随应用被删除而被删除 source_id 为应用id
230+ APPLICATION = "APPLICATION"
231+ # 临时30分钟 数据30分钟后被清理 source_id 为TEMPORARY_30_MINUTE
232+ TEMPORARY_30_MINUTE = "TEMPORARY_30_MINUTE"
233+ # 临时120分钟 数据120分钟后被清理 source_id为TEMPORARY_100_MINUTE
234+ TEMPORARY_120_MINUTE = "TEMPORARY_100_MINUTE"
235+ # 临时1天 数据1天后被清理 source_id为TEMPORARY_1_DAY
236+ TEMPORARY_1_DAY = "TEMPORARY_1_DAY"
237+
238+
224239class VectorField (models .Field ):
225240 def db_type (self , connection ):
226241 return 'vector'
@@ -246,16 +261,25 @@ class Meta:
246261class File (AppModelMixin ):
247262 id = models .UUIDField (primary_key = True , max_length = 128 , default = uuid .uuid7 , editable = False , verbose_name = "主键id" )
248263 file_name = models .CharField (max_length = 256 , verbose_name = "文件名称" , default = "" )
249- workspace_id = models .CharField (max_length = 64 , verbose_name = "工作空间id" , default = "default" , db_index = True )
264+ file_size = models .IntegerField (verbose_name = "文件大小" , default = 0 )
265+ sha256_hash = models .CharField (verbose_name = "文件sha256_hash标识" , default = "" )
266+ source_type = models .CharField (verbose_name = "资源类型" , choices = FileSourceType ,
267+ default = FileSourceType .TEMPORARY_120_MINUTE .value )
268+ source_id = models .CharField (verbose_name = "资源id" , default = FileSourceType .TEMPORARY_120_MINUTE .value )
250269 loid = models .IntegerField (verbose_name = "loid" )
251270 meta = models .JSONField (verbose_name = "文件关联数据" , default = dict )
252271
253272 class Meta :
254273 db_table = "file"
255274
256275 def save (self , bytea = None , force_insert = False , force_update = False , using = None , update_fields = None ):
257- result = select_one ("SELECT lo_from_bytea(%s, %s::bytea) as loid" , [0 , bytea ])
258- self .loid = result ['loid' ]
276+ sha256_hash = get_sha256_hash (bytea )
277+ f = QuerySet (File ).filter (sha256_hash = sha256_hash ).first ()
278+ if f is not None :
279+ self .loid = f .loid
280+ else :
281+ result = select_one ("SELECT lo_from_bytea(%s, %s::bytea) as loid" , [0 , bytea ])
282+ self .loid = result ['loid' ]
259283 super ().save ()
260284
261285 def get_bytes (self ):
@@ -265,4 +289,6 @@ def get_bytes(self):
265289
266290@receiver (pre_delete , sender = File )
267291def on_delete_file (sender , instance , ** kwargs ):
268- select_one (f'SELECT lo_unlink({ instance .loid } )' , [])
292+ exist = QuerySet (File ).filter (loid = instance .loid ).exclude (id = instance .id ).exists ()
293+ if not exist :
294+ select_one (f'SELECT lo_unlink({ instance .loid } )' , [])
0 commit comments