Skip to content

Commit 9149fcb

Browse files
fix: improve unref data cleanup performance (#247) (#248)
Co-authored-by: Aleksandr Karpov <aleksandr.v.karpov.qubership@gmail.com>
1 parent a389b81 commit 9149fcb

File tree

3 files changed

+157
-68
lines changed

3 files changed

+157
-68
lines changed

qubership-apihub-service/repository/UnreferencedDataCleanupRepository.go

Lines changed: 83 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
// Copyright 2024-2025 NetCracker Technology Corporation
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
115
package repository
216

317
import (
@@ -79,20 +93,48 @@ func (u unreferencedDataCleanupRepositoryImpl) DeleteUnreferencedOperationData(c
7993
}
8094
logger.Debugf(ctx, "Found %d operation data entities to delete in current batch", len(dataHash))
8195

82-
err = u.countRelatedDataForOperationDataTx(ctx, tx, dataHash, &deletedItems)
96+
logger.Tracef(ctx, "Deleting related data for operation data with hash: %v", dataHash)
97+
98+
logger.Debug(ctx, "Deleting related data from ts_graphql_operation_data")
99+
deleteGQLDataQuery := `DELETE FROM ts_graphql_operation_data WHERE data_hash IN (?)`
100+
gqlResult, err := tx.ExecContext(ctx, deleteGQLDataQuery, pg.In(dataHash))
101+
if err != nil {
102+
return fmt.Errorf("failed to delete records from ts_graphql_operation_data: %w", err)
103+
}
104+
deletedItems.TSGQLOperationData = gqlResult.RowsAffected()
105+
106+
logger.Debug(ctx, "Deleting related data from ts_rest_operation_data")
107+
deleteRestDataQuery := `DELETE FROM ts_rest_operation_data WHERE data_hash IN (?)`
108+
restResult, err := tx.ExecContext(ctx, deleteRestDataQuery, pg.In(dataHash))
83109
if err != nil {
84-
return fmt.Errorf("failed to count operation data related data: %w", err)
110+
return fmt.Errorf("failed to delete records from ts_rest_operation_data: %w", err)
85111
}
112+
deletedItems.TSRestOperationData = restResult.RowsAffected()
113+
114+
logger.Debug(ctx, "Deleting related data from ts_operation_data")
115+
deleteOpDataQuery := `DELETE FROM ts_operation_data WHERE data_hash IN (?)`
116+
opResult, err := tx.ExecContext(ctx, deleteOpDataQuery, pg.In(dataHash))
117+
if err != nil {
118+
return fmt.Errorf("failed to delete records from ts_operation_data: %w", err)
119+
}
120+
deletedItems.TSOperationData = opResult.RowsAffected()
121+
122+
logger.Debug(ctx, "Deleting related data from fts_operation_data")
123+
deleteFTSDataQuery := `DELETE FROM fts_operation_data WHERE data_hash IN (?)`
124+
ftsResult, err := tx.ExecContext(ctx, deleteFTSDataQuery, pg.In(dataHash))
125+
if err != nil {
126+
return fmt.Errorf("failed to delete records from fts_operation_data: %w", err)
127+
}
128+
deletedItems.FTSOperationData = ftsResult.RowsAffected()
86129

87130
logger.Tracef(ctx, "Deleting operation data with hash: %v", dataHash)
88-
deleteOperationDataQuery := `
89-
DELETE FROM operation_data
90-
WHERE data_hash IN (?)`
131+
132+
logger.Debug(ctx, "Deleting unreferenced data from operation_data")
133+
deleteOperationDataQuery := `DELETE FROM operation_data WHERE data_hash IN (?)`
91134
_, err = tx.ExecContext(ctx, deleteOperationDataQuery, pg.In(dataHash))
92135
if err != nil {
93136
return fmt.Errorf("failed to delete operation data: %w", err)
94137
}
95-
96138
deletedItems.OperationData = len(dataHash)
97139

98140
var cleanupRun entity.UnreferencedDataCleanupEntity
@@ -125,54 +167,27 @@ func (u unreferencedDataCleanupRepositoryImpl) DeleteUnreferencedOperationData(c
125167
return deletedItems.OperationData +
126168
deletedItems.TSGQLOperationData +
127169
deletedItems.TSRestOperationData +
128-
deletedItems.TSRestOperationData +
170+
deletedItems.TSOperationData +
129171
deletedItems.FTSOperationData, err
130172
}
131173

132-
func (u unreferencedDataCleanupRepositoryImpl) countRelatedDataForOperationDataTx(ctx context.Context, tx *pg.Tx, dataHash []string, deletedItems *entity.DeletedItemsCounts) error {
133-
_, err := tx.QueryOneContext(ctx, pg.Scan(&deletedItems.TSGQLOperationData),
134-
`SELECT COUNT(*) FROM ts_graphql_operation_data WHERE data_hash IN (?)`, pg.In(dataHash))
135-
if err != nil {
136-
return err
137-
}
138-
139-
_, err = tx.QueryOneContext(ctx, pg.Scan(&deletedItems.TSRestOperationData),
140-
`SELECT COUNT(*) FROM ts_rest_operation_data WHERE data_hash IN (?)`, pg.In(dataHash))
141-
if err != nil {
142-
return err
143-
}
144-
145-
_, err = tx.QueryOneContext(ctx, pg.Scan(&deletedItems.TSOperationData),
146-
`SELECT COUNT(*) FROM ts_operation_data WHERE data_hash IN (?)`, pg.In(dataHash))
147-
if err != nil {
148-
return err
149-
}
150-
151-
_, err = tx.QueryOneContext(ctx, pg.Scan(&deletedItems.FTSOperationData),
152-
`SELECT COUNT(*) FROM fts_operation_data WHERE data_hash IN (?)`, pg.In(dataHash))
153-
if err != nil {
154-
return err
155-
}
156-
157-
return nil
158-
}
159-
160174
func (u unreferencedDataCleanupRepositoryImpl) DeleteUnreferencedOperationGroupTemplates(ctx context.Context, runId string, batchSize int) (int, error) {
161175
var deletedTemplates int
162176

163177
err := u.cp.GetConnection().RunInTransaction(ctx, func(tx *pg.Tx) error {
164178
deleteUnreferencedOperationGroupTemplatesQuery := `
165-
WITH to_delete AS (
166-
SELECT ogt.checksum
167-
FROM operation_group_template ogt
168-
WHERE NOT EXISTS (
169-
SELECT 1 FROM operation_group og WHERE og.template_checksum = ogt.checksum
170-
)
171-
ORDER BY ogt.checksum
172-
LIMIT ?
173-
)
174-
DELETE FROM operation_group_template
175-
WHERE checksum IN (SELECT checksum FROM to_delete)`
179+
DELETE FROM operation_group_template ogt
180+
USING (
181+
SELECT checksum
182+
FROM operation_group_template ogt2
183+
WHERE NOT EXISTS (
184+
SELECT 1 FROM operation_group og WHERE og.template_checksum = ogt2.checksum
185+
)
186+
ORDER BY ogt2.checksum
187+
LIMIT ?
188+
) del
189+
WHERE ogt.checksum = del.checksum;`
190+
176191
res, err := tx.ExecContext(ctx, deleteUnreferencedOperationGroupTemplatesQuery, batchSize)
177192
if err != nil {
178193
return fmt.Errorf("failed to delete unreferenced operation group templates: %w", err)
@@ -214,17 +229,17 @@ func (u unreferencedDataCleanupRepositoryImpl) DeleteUnreferencedSrcArchives(ctx
214229

215230
err := u.cp.GetConnection().RunInTransaction(ctx, func(tx *pg.Tx) error {
216231
deleteUnreferencedSrcArchivesQuery := `
217-
WITH to_delete AS (
218-
SELECT psa.checksum
219-
FROM published_sources_archives psa
220-
WHERE NOT EXISTS (
221-
SELECT 1 FROM published_sources ps WHERE ps.archive_checksum = psa.checksum
222-
)
223-
ORDER BY psa.checksum
224-
LIMIT ?
225-
)
226-
DELETE FROM published_sources_archives
227-
WHERE checksum IN (SELECT checksum FROM to_delete)`
232+
DELETE FROM published_sources_archives psa
233+
USING (
234+
SELECT checksum
235+
FROM published_sources_archives psa2
236+
WHERE NOT EXISTS (
237+
SELECT 1 FROM published_sources ps WHERE ps.archive_checksum = psa2.checksum
238+
)
239+
ORDER BY psa2.checksum
240+
LIMIT ?
241+
) del
242+
WHERE psa.checksum = del.checksum;`
228243
res, err := tx.ExecContext(ctx, deleteUnreferencedSrcArchivesQuery, batchSize)
229244
if err != nil {
230245
return fmt.Errorf("failed to delete unreferenced source archives: %w", err)
@@ -266,17 +281,17 @@ func (u unreferencedDataCleanupRepositoryImpl) DeleteUnreferencedPublishedData(c
266281

267282
err := u.cp.GetConnection().RunInTransaction(ctx, func(tx *pg.Tx) error {
268283
deleteUnreferencedPublishDataQuery := `
269-
WITH to_delete AS (
270-
SELECT pd.checksum
271-
FROM published_data pd
272-
WHERE NOT EXISTS (
273-
SELECT 1 FROM published_version_revision_content pvrc WHERE pvrc.checksum = pd.checksum
274-
)
275-
ORDER BY pd.checksum
276-
LIMIT ?
277-
)
278-
DELETE FROM published_data
279-
WHERE checksum IN (SELECT checksum FROM to_delete)`
284+
DELETE FROM published_data pd
285+
USING (
286+
SELECT checksum
287+
FROM published_data pd2
288+
WHERE NOT EXISTS (
289+
SELECT 1 FROM published_version_revision_content pvrc WHERE pvrc.checksum = pd2.checksum
290+
)
291+
ORDER BY pd2.checksum
292+
LIMIT ?
293+
) del
294+
WHERE pd.checksum = del.checksum;`
280295
res, err := tx.ExecContext(ctx, deleteUnreferencedPublishDataQuery, batchSize)
281296
if err != nil {
282297
return fmt.Errorf("failed to delete unreferenced publish data: %w", err)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
DROP INDEX IF EXISTS operation_data_hash_idx;
2+
DROP INDEX IF EXISTS pvrc_checksum_idx;
3+
4+
ALTER TABLE ts_graphql_operation_data DROP CONSTRAINT IF EXISTS ts_graphql_operation_data_operation_data_hash_fk;
5+
ALTER TABLE ts_operation_data DROP CONSTRAINT IF EXISTS ts_operation_data_operation_data_hash_fk;
6+
ALTER TABLE ts_rest_operation_data DROP CONSTRAINT IF EXISTS ts_rest_operation_data_operation_data_hash_fk;
7+
ALTER TABLE fts_operation_data DROP CONSTRAINT IF EXISTS fts_operation_data_operation_data_hash_fk;
8+
ALTER TABLE published_version_revision_content DROP CONSTRAINT IF EXISTS published_version_revision_content_published_data_fk;
9+
10+
ALTER TABLE ts_graphql_operation_data
11+
ADD CONSTRAINT ts_graphql_operation_data_operation_data_hash_fk
12+
FOREIGN KEY (data_hash) REFERENCES operation_data (data_hash)
13+
ON UPDATE CASCADE ON DELETE CASCADE;
14+
15+
ALTER TABLE ts_operation_data
16+
ADD CONSTRAINT ts_operation_data_operation_data_hash_fk
17+
FOREIGN KEY (data_hash) REFERENCES operation_data (data_hash)
18+
ON UPDATE CASCADE ON DELETE CASCADE;
19+
20+
ALTER TABLE ts_rest_operation_data
21+
ADD CONSTRAINT "FK_operation_data"
22+
FOREIGN KEY (data_hash) REFERENCES operation_data (data_hash)
23+
ON UPDATE CASCADE ON DELETE CASCADE;
24+
25+
ALTER TABLE fts_operation_data
26+
ADD CONSTRAINT fts_operation_data_operation_data_hash_fk
27+
FOREIGN KEY (data_hash) REFERENCES operation_data (data_hash)
28+
ON UPDATE CASCADE ON DELETE CASCADE;
29+
30+
ALTER TABLE published_version_revision_content
31+
ADD CONSTRAINT "FK_published_data"
32+
FOREIGN KEY (checksum,package_id) REFERENCES published_data (checksum,package_id)
33+
ON UPDATE CASCADE ON DELETE CASCADE;
34+
35+
DROP INDEX IF EXISTS ix_operation_pvrt;
36+
DROP INDEX IF EXISTS ix_build_id_text;
37+
DROP INDEX IF EXISTS ix_activity_tracking_package_e_type;
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
CREATE INDEX IF NOT EXISTS operation_data_hash_idx ON operation (data_hash);
2+
CREATE INDEX IF NOT EXISTS pvrc_checksum_idx ON published_version_revision_content(checksum);
3+
4+
ALTER TABLE ts_graphql_operation_data DROP CONSTRAINT IF EXISTS ts_graphql_operation_data_operation_data_hash_fk;
5+
ALTER TABLE ts_operation_data DROP CONSTRAINT IF EXISTS ts_operation_data_operation_data_hash_fk;
6+
ALTER TABLE ts_rest_operation_data DROP CONSTRAINT IF EXISTS "FK_operation_data";
7+
ALTER TABLE fts_operation_data DROP CONSTRAINT IF EXISTS fts_operation_data_operation_data_hash_fk;
8+
ALTER TABLE published_version_revision_content DROP CONSTRAINT IF EXISTS "FK_published_data";
9+
10+
ALTER TABLE ts_graphql_operation_data
11+
ADD CONSTRAINT ts_graphql_operation_data_operation_data_hash_fk
12+
FOREIGN KEY (data_hash) REFERENCES operation_data (data_hash)
13+
ON UPDATE CASCADE;
14+
15+
ALTER TABLE ts_operation_data
16+
ADD CONSTRAINT ts_operation_data_operation_data_hash_fk
17+
FOREIGN KEY (data_hash) REFERENCES operation_data (data_hash)
18+
ON UPDATE CASCADE;
19+
20+
ALTER TABLE ts_rest_operation_data
21+
ADD CONSTRAINT ts_rest_operation_data_operation_data_hash_fk
22+
FOREIGN KEY (data_hash) REFERENCES operation_data (data_hash)
23+
ON UPDATE CASCADE;
24+
25+
ALTER TABLE fts_operation_data
26+
ADD CONSTRAINT fts_operation_data_operation_data_hash_fk
27+
FOREIGN KEY (data_hash) REFERENCES operation_data (data_hash)
28+
ON UPDATE CASCADE;
29+
30+
ALTER TABLE published_version_revision_content
31+
ADD CONSTRAINT published_version_revision_content_published_data_fk
32+
FOREIGN KEY (checksum,package_id) REFERENCES published_data (checksum,package_id)
33+
ON UPDATE CASCADE;
34+
35+
CREATE INDEX IF NOT EXISTS ix_operation_pvrt ON operation (package_id, version, revision, type);
36+
CREATE INDEX IF NOT EXISTS ix_build_id_text ON build ((build_id::text));
37+
CREATE INDEX IF NOT EXISTS ix_activity_tracking_package_e_type ON activity_tracking (package_id, e_type);

0 commit comments

Comments
 (0)