Skip to content

Commit 9844447

Browse files
committed
[IMP] dms_import: improve performance, reduce batch size, and bypass heavy compute fields
1 parent f755c12 commit 9844447

File tree

1 file changed

+161
-108
lines changed

1 file changed

+161
-108
lines changed

dms_import/hooks.py

Lines changed: 161 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl).
33

44
import logging
5+
import os
6+
from collections import defaultdict
57
from random import randint
68

79
from psycopg2.sql import SQL, Identifier
@@ -37,68 +39,48 @@ def _get_or_create_default_storage(env):
3739
return db_storage
3840

3941

40-
def _assign_dms_groups(cr, env, folder_id, new_dir):
41-
"""Migrate read/write groups from a documents.folder to dms.access.groups."""
42-
DmsAccessGroups = env["dms.access.group"]
43-
all_new_groups = DmsAccessGroups.browse()
42+
def _batch_fetch_folder_relations(cr, folder_ids):
43+
write_groups = defaultdict(list)
44+
read_groups = defaultdict(list)
45+
tags_by_folder = defaultdict(list)
4446

45-
# Handle Write Groups
4647
if table_exists(cr, "documents_folder_res_groups_rel"):
4748
cr.execute(
4849
SQL(
49-
"""
50-
SELECT res_groups_id FROM {}
51-
WHERE documents_folder_id = %s
52-
"""
53-
).format(Identifier("documents_folder_res_groups_rel")),
54-
(folder_id,),
50+
"""SELECT documents_folder_id, res_groups_id
51+
FROM documents_folder_res_groups_rel
52+
WHERE documents_folder_id = ANY(%s)"""
53+
),
54+
(list(folder_ids),),
5555
)
56-
group_ids = [r[0] for r in cr.fetchall()]
57-
if group_ids:
58-
write_group = DmsAccessGroups.create(
59-
{
60-
"name": f"{new_dir.name} Write Group",
61-
"perm_create": True,
62-
"perm_write": True,
63-
"perm_unlink": True,
64-
"group_ids": [Command.set(group_ids)],
65-
}
66-
)
67-
all_new_groups |= write_group
56+
for folder_id, group_id in cr.fetchall():
57+
write_groups[folder_id].append(group_id)
6858

69-
# Handle Read Groups
7059
if table_exists(cr, "documents_folder_read_groups"):
7160
cr.execute(
7261
SQL(
73-
"""
74-
SELECT res_groups_id FROM {}
75-
WHERE documents_folder_id = %s
76-
"""
77-
).format(Identifier("documents_folder_read_groups")),
78-
(folder_id,),
62+
"""SELECT documents_folder_id, res_groups_id
63+
FROM documents_folder_read_groups
64+
WHERE documents_folder_id = ANY(%s)"""
65+
),
66+
(list(folder_ids),),
7967
)
80-
read_group_ids = [r[0] for r in cr.fetchall()]
81-
if read_group_ids:
82-
read_group = DmsAccessGroups.create(
83-
{
84-
"name": f"{new_dir.name} Read Group",
85-
"group_ids": [Command.set(read_group_ids)],
86-
}
87-
)
88-
all_new_groups |= read_group
68+
for folder_id, group_id in cr.fetchall():
69+
read_groups[folder_id].append(group_id)
8970

90-
# If dont have any group, we need to assign a group to visible folder
91-
if not all_new_groups:
92-
all_new_groups = DmsAccessGroups.create(
93-
{
94-
"name": f"{new_dir.name} Default Group",
95-
"perm_create": True,
96-
"perm_write": True,
97-
"perm_unlink": True,
98-
"group_ids": [Command.set(env.ref("base.group_user").ids)],
99-
}
71+
if table_exists(cr, "documents_facet") and table_exists(cr, "documents_tag"):
72+
cr.execute(
73+
SQL(
74+
"""SELECT f.folder_id, t.id
75+
FROM {} t JOIN {} f ON f.id = t.facet_id
76+
WHERE f.folder_id = ANY(%s)"""
77+
).format(Identifier("documents_tag"), Identifier("documents_facet")),
78+
(list(folder_ids),),
10079
)
101-
return all_new_groups
80+
for folder_id, tag_id in cr.fetchall():
81+
tags_by_folder[folder_id].append(tag_id)
82+
83+
return write_groups, read_groups, tags_by_folder
10284

10385

10486
def _create_unique_directory(DmsDirectory, vals):
@@ -131,7 +113,7 @@ def migrate_documents_tags(cr, env, lang):
131113
tag_mapping = {}
132114
category_mapping = {}
133115

134-
# 1. FACETSCATEGORIES
116+
# 1. Facetscategories
135117
if table_exists(cr, "documents_facet"):
136118
cr.execute(
137119
SQL("SELECT id, name->>%s AS name FROM {}").format(
@@ -166,7 +148,7 @@ def migrate_documents_tags(cr, env, lang):
166148
if norm and norm in existing_categories:
167149
category_mapping[f["id"]] = existing_categories[norm].id
168150

169-
# 2. TAGS
151+
# 2. Tags
170152
existing_tags = {
171153
(_normalize(tag.name), tag.category_id.id or False): tag
172154
for tag in DmsTag.search([])
@@ -227,7 +209,7 @@ def migrate_documents_tags(cr, env, lang):
227209
return tag_mapping, category_mapping
228210

229211

230-
def migrate_documents_folders(cr, env, lang, tag_mapping, category_mapping):
212+
def migrate_documents_folders(cr, env, lang, tag_mapping):
231213
"""Migrate documents.folder to dms.directory, optimized for batch processing."""
232214
if not table_exists(cr, "documents_folder"):
233215
_logger.warning("Skipping folder migration: 'documents_folder' not found.")
@@ -242,11 +224,22 @@ def migrate_documents_folders(cr, env, lang, tag_mapping, category_mapping):
242224
(lang,),
243225
)
244226
folders = cr.dictfetchall()
227+
if not folders:
228+
_logger.info("No folders to migrate.")
229+
return {}
245230

246231
DmsDirectory = env["dms.directory"]
232+
DmsAccessGroups = env["dms.access.group"]
247233
folder_mapping = {}
248234
storage = _get_or_create_default_storage(env)
235+
default_user_group_id = env.ref("base.group_user").id
249236

237+
folder_ids = [f["id"] for f in folders]
238+
write_groups, read_groups, tags_by_folder = _batch_fetch_folder_relations(
239+
cr, folder_ids
240+
)
241+
access_groups_to_create_vals = []
242+
dirs_to_update_tags = defaultdict(list)
250243
for folder in folders:
251244
try:
252245
parent_id = folder_mapping.get(folder["parent_folder_id"])
@@ -263,49 +256,84 @@ def migrate_documents_folders(cr, env, lang, tag_mapping, category_mapping):
263256
"Created dms.directory: '%s' (ID: %s)", new_dir.name, new_dir.id
264257
)
265258

266-
# Assign groups
267-
dms_groups = _assign_dms_groups(cr, env, folder["id"], new_dir)
268-
dms_groups._compute_users() # trigger to recompute count_users
269-
new_dir.group_ids = [Command.set(dms_groups.ids)]
270-
_logger.info(
271-
"Assigned %s groups to directory '%s'",
272-
dms_groups.mapped("name"),
273-
new_dir.name,
274-
)
259+
# Assign groups using pre-fetched data
260+
has_groups = False
261+
write_group_ids = write_groups.get(folder["id"], [])
262+
if write_group_ids:
263+
has_groups = True
264+
vals = {
265+
"name": f"{new_dir.name} Write Group",
266+
"perm_create": True,
267+
"perm_write": True,
268+
"perm_unlink": True,
269+
"group_ids": [Command.set(write_group_ids)],
270+
"_dir_id": new_dir.id,
271+
}
272+
access_groups_to_create_vals.append(vals)
273+
read_group_ids = read_groups.get(folder["id"], [])
274+
if read_group_ids:
275+
has_groups = True
276+
vals = {
277+
"name": f"{new_dir.name} Read Group",
278+
"group_ids": [Command.set(read_group_ids)],
279+
"_dir_id": new_dir.id,
280+
}
281+
access_groups_to_create_vals.append(vals)
282+
# In case no group, directory will be invisible, so I decide to assign a new group
283+
# this group has full permision like no group in document folder
284+
if not has_groups:
285+
vals = {
286+
"name": f"{new_dir.name} Default Group",
287+
"perm_create": True,
288+
"perm_write": True,
289+
"perm_unlink": True,
290+
"group_ids": [Command.set([default_user_group_id])],
291+
"_dir_id": new_dir.id,
292+
}
293+
access_groups_to_create_vals.append(vals)
275294

276295
# Assign Tags
277-
if table_exists(cr, "documents_facet") and table_exists(
278-
cr, "documents_tag"
279-
):
280-
cr.execute(
281-
SQL(
282-
"""SELECT t.id FROM {} t
283-
JOIN {} f ON f.id = t.facet_id
284-
WHERE f.folder_id = %s"""
285-
).format(
286-
Identifier("documents_tag"),
287-
Identifier("documents_facet"),
288-
),
289-
(folder["id"],),
290-
)
291-
dms_tag_ids = [
292-
tag_mapping.get(r[0])
293-
for r in cr.fetchall()
294-
if tag_mapping.get(r[0])
295-
]
296-
if dms_tag_ids:
297-
new_dir.tag_ids = [Command.set(dms_tag_ids)]
296+
folder_tag_ids = [
297+
tag_mapping[tag_id]
298+
for tag_id in tags_by_folder.get(folder["id"], [])
299+
if tag_id in tag_mapping
300+
]
301+
if folder_tag_ids:
302+
dirs_to_update_tags[tuple(sorted(folder_tag_ids))].append(new_dir.id)
298303

299304
except Exception:
300305
_logger.exception(
301306
"Error migrating folder ID %s (%s)", folder["id"], folder["name"]
302307
)
303308

309+
for tag_ids_tuple, dir_ids in dirs_to_update_tags.items():
310+
DmsDirectory.browse(dir_ids).write(
311+
{"tag_ids": [Command.set(list(tag_ids_tuple))]}
312+
)
313+
314+
if access_groups_to_create_vals:
315+
new_groups = DmsAccessGroups.create(
316+
[
317+
{k: v for k, v in vals.items() if k != "_dir_id"}
318+
for vals in access_groups_to_create_vals
319+
]
320+
)
321+
# Recompute all user counts at once
322+
new_groups.invalidate_model(fnames=["users"])
323+
324+
dirs_to_update_groups = defaultdict(list)
325+
for vals, group in zip(access_groups_to_create_vals, new_groups):
326+
dirs_to_update_groups[vals["_dir_id"]].append(group.id)
327+
328+
for dir_id, group_ids in dirs_to_update_groups.items():
329+
DmsDirectory.browse(dir_id).write({"group_ids": [Command.set(group_ids)]})
330+
304331
_logger.info("Successfully migrated %d folders.", len(folder_mapping))
305332
return folder_mapping
306333

307334

308-
def migrate_documents_files(cr, env, folder_mapping, tag_mapping, category_mapping):
335+
# flake8: noqa: C901
336+
def migrate_documents_files(cr, env, folder_mapping, tag_mapping):
309337
"""Migrate documents.document to dms.file, optimized for batch processing."""
310338
if not table_exists(cr, "documents_document") or not folder_mapping:
311339
_logger.warning("Skipping file migration: table or folder mapping is missing.")
@@ -323,8 +351,10 @@ def migrate_documents_files(cr, env, folder_mapping, tag_mapping, category_mappi
323351
return
324352

325353
DmsFile = env["dms.file"]
326-
327-
for batch_of_ids in split_every(models.INSERT_BATCH_SIZE * 10, all_doc_ids):
354+
Attachment = env["ir.attachment"]
355+
filestore = Attachment._filestore()
356+
BATCH_SIZE = models.INSERT_BATCH_SIZE
357+
for batch_of_ids in split_every(BATCH_SIZE, all_doc_ids):
328358
_logger.info("Migrating files batch with %d documents", len(batch_of_ids))
329359
cr.execute(
330360
SQL(
@@ -337,6 +367,16 @@ def migrate_documents_files(cr, env, folder_mapping, tag_mapping, category_mappi
337367
if not documents:
338368
continue
339369

370+
attachment_ids = [
371+
d["attachment_id"] for d in documents if d.get("attachment_id")
372+
]
373+
attachments_data = {
374+
att["id"]: att["store_fname"]
375+
for att in Attachment.with_context(active_test=False).search_read(
376+
domain=[("id", "in", attachment_ids)], fields=["store_fname"]
377+
)
378+
}
379+
340380
# Batch-fetch tag relations for the current batch
341381
tags_by_doc = {}
342382
if table_exists(cr, "document_tag_rel"):
@@ -360,33 +400,50 @@ def migrate_documents_files(cr, env, folder_mapping, tag_mapping, category_mappi
360400
)
361401
continue
362402

403+
att_id = doc["attachment_id"]
404+
if not att_id:
405+
_logger.warning("Skipping doc %s: no attachment_id.", doc["id"])
406+
continue
407+
store_fname = attachments_data.get(att_id)
408+
if not store_fname:
409+
_logger.warning(
410+
"Skipping doc %s: attachment %s has no store_fname or does not exist.",
411+
doc["id"],
412+
att_id,
413+
)
414+
continue
415+
if not os.path.exists(os.path.join(filestore, store_fname)):
416+
_logger.warning(
417+
"Skipping doc %s: file not found in filestore for attachment %s (%s)",
418+
doc["id"],
419+
att_id,
420+
store_fname,
421+
)
422+
continue
423+
363424
files_to_create_vals.append(
364425
{
365426
"name": doc["name"],
366427
"directory_id": directory_id,
367428
"active": doc["active"],
368-
"attachment_id": doc["attachment_id"],
429+
"attachment_id": att_id,
369430
"tag_ids": [Command.set(tags_by_doc.get(doc["id"], []))],
370-
"_old_attachment_id": doc["attachment_id"],
371431
}
372432
)
373433

374434
if files_to_create_vals:
375-
new_files = DmsFile.create(
376-
[
377-
{k: v for k, v in vals.items() if k != "_old_attachment_id"}
378-
for vals in files_to_create_vals
379-
]
380-
)
435+
# avoid _compute_content in dms.file
436+
with DmsFile.env.norecompute():
437+
new_files = DmsFile.create(files_to_create_vals)
381438

382-
attachments_to_update = {}
439+
attachments_to_update = defaultdict(list)
383440
for vals, new_file in zip(files_to_create_vals, new_files):
384-
if vals["_old_attachment_id"]:
385-
attachments_to_update[vals["_old_attachment_id"]] = new_file.id
441+
if vals.get("attachment_id"):
442+
attachments_to_update[new_file.id].append(vals["attachment_id"])
386443

387-
if attachments_to_update:
388-
for att_id, file_id in attachments_to_update.items():
389-
env["ir.attachment"].browse(att_id).write(
444+
for file_id, att_ids in attachments_to_update.items():
445+
if att_ids:
446+
Attachment.browse(att_ids).write(
390447
{"res_model": "dms.file", "res_id": file_id}
391448
)
392449

@@ -403,15 +460,11 @@ def post_init_hook(cr, registry):
403460
try:
404461
lang = get_lang(env).code or env.lang
405462
_logger.info(
406-
"Starting migration from EE 'documents' to OCA 'dms' using lang '%s'", lang
463+
"Starting migration from EE 'documents' to CE 'dms' using lang '%s'", lang
407464
)
408-
409-
tag_mapping, category_mapping = migrate_documents_tags(cr, env, lang)
410-
folder_mapping = migrate_documents_folders(
411-
cr, env, lang, tag_mapping, category_mapping
412-
)
413-
migrate_documents_files(cr, env, folder_mapping, tag_mapping, category_mapping)
414-
465+
tag_mapping, _ = migrate_documents_tags(cr, env, lang)
466+
folder_mapping = migrate_documents_folders(cr, env, lang, tag_mapping)
467+
migrate_documents_files(cr, env, folder_mapping, tag_mapping)
415468
_logger.info("Migration from 'documents' to 'dms' completed successfully.")
416469
except Exception:
417470
_logger.exception("Migration from 'documents' to 'dms' failed!")

0 commit comments

Comments
 (0)