Skip to content

Commit 2910d11

Browse files
committed
refactor(address-service): DOMA-12746 optimize merge-duplicate-addresses script with batch processing and progress tracking
Add batch property reference checking to reduce database queries, implement progress bar visualization, and improve logging format. Replace per-address isAddressReferenced calls with single batch getReferencedAddressIds query. Add total record count display and page-based progress indicators.
1 parent a37d1b1 commit 2910d11

File tree

1 file changed

+59
-21
lines changed

1 file changed

+59
-21
lines changed

apps/address-service/bin/local/merge-duplicate-addresses.js

Lines changed: 59 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ const conf = require('@open-condo/config')
2424

2525
const PropertyGQL = generateGqlQueries('Property', '{ id addressKey }')
2626
const AddressGQL = generateGqlQueries('Address', '{ id key possibleDuplicateOf { id key } }')
27+
const PROGRESS_BAR_WIDTH = 20
2728

2829
const RESOLVE_ADDRESS_DUPLICATE_MUTATION = gql`
2930
mutation resolveAddressDuplicate ($data: ResolveAddressDuplicateInput!) {
@@ -61,14 +62,16 @@ async function createClients () {
6162
* Check if an address id is referenced by any Property in condo.
6263
* Property.addressKey stores Address.id.
6364
*/
64-
async function isAddressReferenced (condoClient, addressId) {
65-
if (!addressId) return false
65+
async function getReferencedAddressIds (condoClient, addressIds) {
66+
const uniqueAddressIds = [...new Set(addressIds.filter(Boolean))]
67+
if (uniqueAddressIds.length === 0) return new Set()
68+
6669
const properties = await condoClient.getModels({
6770
modelGql: PropertyGQL,
68-
where: { addressKey: addressId, deletedAt: null },
69-
first: 1,
71+
where: { addressKey_in: uniqueAddressIds, deletedAt: null },
7072
})
71-
return properties.length > 0
73+
74+
return new Set(properties.map((property) => property.addressKey).filter(Boolean))
7275
}
7376

7477
/**
@@ -100,6 +103,13 @@ async function resolveDuplicate (addressClient, addressId, winnerId) {
100103
return data.result.status
101104
}
102105

106+
function formatProgressBar (current, total, width = PROGRESS_BAR_WIDTH) {
107+
const safeTotal = total > 0 ? total : 1
108+
const ratio = Math.min(current / safeTotal, 1)
109+
const filled = Math.round(ratio * width)
110+
return `[${'#'.repeat(filled)}${'-'.repeat(width - filled)}]`
111+
}
112+
103113
async function main (args) {
104114
const isDryRun = args.includes('--dry-run')
105115
if (isDryRun) {
@@ -110,54 +120,76 @@ async function main (args) {
110120
console.info('Signed in to condo and address-service')
111121

112122
const pageSize = 100
123+
const duplicateWhere = { possibleDuplicateOf_is_null: false, deletedAt: null }
124+
const totalRecords = await addressClient.getCount({
125+
modelGql: AddressGQL,
126+
where: duplicateWhere,
127+
})
128+
const totalPages = Math.ceil(totalRecords / pageSize)
129+
130+
console.info(`Total to process: ${totalRecords} records across ~${totalPages} pages (pageSize=${pageSize})`)
131+
113132
let skip = 0
133+
let pageNumber = 0
114134
let totalProcessed = 0
115135
let totalMerged = 0
116136
let totalSkipped = 0
117137

118138
let addresses
119139
do {
140+
pageNumber++
120141
addresses = await addressClient.getModels({
121142
modelGql: AddressGQL,
122-
where: { possibleDuplicateOf_is_null: false, deletedAt: null },
143+
where: duplicateWhere,
123144
first: pageSize,
124145
skip,
125146
sortBy: ['createdAt_ASC'],
126147
})
127148

149+
if (addresses.length === 0) break
150+
151+
console.info(`\nPage ${pageNumber}/${totalPages || '?'}: ${addresses.length} records`)
152+
153+
const referencedAddressIds = await getReferencedAddressIds(
154+
condoClient,
155+
addresses.flatMap((address) => [address.id, address.possibleDuplicateOf && address.possibleDuplicateOf.id])
156+
)
157+
128158
let pageSkipped = 0
159+
let pageProcessed = 0
129160

130161
for (const address of addresses) {
131162
totalProcessed++
163+
pageProcessed++
164+
165+
const progress = formatProgressBar(pageProcessed, addresses.length)
166+
const progressLine = `${progress} batch=${pageNumber}/${totalPages || '?'} page ${pageProcessed}/${addresses.length} global=${totalProcessed}/${totalRecords} merged=${totalMerged} skipped=${totalSkipped}`
132167
const target = address.possibleDuplicateOf
133168

134169
if (!target) {
135-
console.info(`\n Processing: ${address.id} (key: ${address.key})`)
136-
console.info(' SKIP: possibleDuplicateOf target is null (possibly soft-deleted)')
170+
console.info(`${progressLine} | SKIP null target | current=${address.id || '-'}`)
171+
console.info(` current key: ${address.key || '-'}`)
137172
totalSkipped++
138173
pageSkipped++
139174
continue
140175
}
141176

142-
console.info(`\n Processing: ${address.id} (key: ${address.key})`)
143-
console.info(` Target: ${target.id} (key: ${target.key})`)
144-
145177
// Check which address id is actually used in condo Properties
146-
const currentReferenced = await isAddressReferenced(condoClient, address.id)
147-
const targetReferenced = await isAddressReferenced(condoClient, target.id)
178+
const currentReferenced = referencedAddressIds.has(address.id)
179+
const targetReferenced = referencedAddressIds.has(target.id)
148180

149181
let winner, loser
150182

151183
if (currentReferenced && targetReferenced) {
152-
console.info(' SKIP: both addresses are referenced in condo Properties')
184+
console.info(`${progressLine} | SKIP both referenced | current=${address.id || '-'} target=${target.id || '-'}`)
185+
console.info(` current key: ${address.key || '-'}`)
186+
console.info(` target key: ${target.key || '-'}`)
153187
totalSkipped++
154188
pageSkipped++
155189
continue
156190
} else if (currentReferenced) {
157-
console.info(' SKIP: current address (duplicate) is referenced in condo Properties, but the mutation requires the target to be the winner')
158-
totalSkipped++
159-
pageSkipped++
160-
continue
191+
winner = address
192+
loser = target
161193
} else if (targetReferenced) {
162194
winner = target
163195
loser = address
@@ -167,15 +199,19 @@ async function main (args) {
167199
loser = address
168200
}
169201

170-
console.info(` Winner: ${winner.id}, Loser: ${loser.id} (current=${currentReferenced ? 'ref' : '-'}, target=${targetReferenced ? 'ref' : '-'})`)
202+
console.info(
203+
`${progressLine} | MERGE winner=${winner.id} loser=${loser.id} current=${currentReferenced ? 'ref' : '-'} target=${targetReferenced ? 'ref' : '-'}`
204+
)
205+
console.info(` current key: ${address.key || '-'}`)
206+
console.info(` target key: ${target.key || '-'}`)
171207

172208
if (!isDryRun) {
173209
try {
174210
const status = await resolveDuplicate(addressClient, address.id, winner.id)
175-
console.info(` Result: ${status}`)
211+
console.info(` result: ${status}`)
176212
totalMerged++
177213
} catch (err) {
178-
console.info(` SKIP (server error): ${err.message}`)
214+
console.info(` SKIP (server error): ${err.message}`)
179215
totalSkipped++
180216
pageSkipped++
181217
}
@@ -185,6 +221,8 @@ async function main (args) {
185221
}
186222
}
187223

224+
console.info(`Page ${pageNumber} done: processed=${pageProcessed}, merged=${totalMerged}, skipped=${totalSkipped}`)
225+
188226
// Advance skip past records that remain in query results (skipped/unmerged).
189227
// In non-dry-run mode merged records disappear from the query, so only
190228
// skipped ones need to be stepped over. In dry-run nothing is removed.

0 commit comments

Comments
 (0)