Skip to content

Commit 141522d

Browse files
committed
Improve logging and memory use in user import process
Enhance logging to clearly distinguish between new and existing users during import and metadata enrichment. Add null checks before clearing LicenseLookups to prevent exceptions. Only include LicenseLookups when tenant SKUs are unavailable to reduce memory usage and avoid OOM errors. Refactor existing user update logic for better error handling and reporting. Improve final summary logs to show counts of inserted and updated users.
1 parent de7779b commit 141522d

File tree

3 files changed

+47
-27
lines changed

3 files changed

+47
-27
lines changed

src/AnalyticsEngine/WebJob.Office365ActivityImporter.Engine/Graph/User/UserInsertProcessor.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public UserInsertProcessor(AnalyticsLogger telemetry, UserBatchProcessor batchPr
7373
_telemetry.LogInformation($"User import - Phase 1: Bulk insert completed");
7474

7575
// PHASE 2: Load inserted users and enrich with metadata
76-
_telemetry.LogInformation($"User import - Phase 2: Starting metadata enrichment...");
76+
_telemetry.LogInformation($"User import - Phase 2: Starting metadata enrichment for {usersToInsert.Count.ToString("N0")} new users (existing users will be updated separately)...");
7777
var insertedUserUpns = usersToInsert.Select(u => u.UserPrincipalName.ToLower()).ToList();
7878
var insertedDbUsers = await EnrichInsertedUsersWithMetadata(
7979
db,
@@ -85,7 +85,7 @@ public UserInsertProcessor(AnalyticsLogger telemetry, UserBatchProcessor batchPr
8585
userMetaCache,
8686
updateAction);
8787

88-
_telemetry.LogInformation($"User import - Phase 2: Metadata enrichment completed for {insertedDbUsers.Count.ToString("N0")} users");
88+
_telemetry.LogInformation($"User import - Phase 2: Metadata enrichment completed for {insertedDbUsers.Count.ToString("N0")} new users");
8989

9090
// Cleanup
9191
existingUpns.Clear();
@@ -256,7 +256,7 @@ private DataTable CreateUserDataTable(List<GraphUser> graphUsers)
256256
var estimatedTotalMs = elapsedMs / percentDone * 100;
257257
var remainingMs = estimatedTotalMs - elapsedMs;
258258
var remaining = TimeSpan.FromMilliseconds(remainingMs);
259-
_telemetry.LogInformation($"User import - Enriched metadata for {enrichedUsers.Count.ToString("N0")}/{insertedUserUpns.Count.ToString("N0")} users ({percentDone:F1}% done, estimated {remaining.Hours}h {remaining.Minutes}m {remaining.Seconds}s remaining)");
259+
_telemetry.LogInformation($"User import - Enriched metadata for {enrichedUsers.Count.ToString("N0")}/{insertedUserUpns.Count.ToString("N0")} new users ({percentDone:F1}% done, estimated {remaining.Hours}h {remaining.Minutes}m {remaining.Seconds}s remaining)");
260260

261261
// Clear change tracker to free memory after each batch
262262
_batchProcessor.DetachAllEntitiesExceptLookups(db);

src/AnalyticsEngine/WebJob.Office365ActivityImporter.Engine/Graph/User/UserLicenseProcessor.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ await db.Database.ExecuteSqlCommandAsync(
6969
// Clear in-memory license collections so new lookups are added cleanly
7070
foreach (var user in graphFoundDbUsers)
7171
{
72-
user.LicenseLookups.Clear();
72+
if (user.LicenseLookups != null)
73+
user.LicenseLookups.Clear();
7374
}
7475

7576
foreach (var sku in skus)

src/AnalyticsEngine/WebJob.Office365ActivityImporter.Engine/Graph/User/UserMetadataUpdater.cs

Lines changed: 42 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,15 @@ public async Task InsertAndUpdateDatabaseFromExternalUsers()
101101
// Get SKUs from tenant
102102
var skus = await _userLoader.LoadTenantSkus();
103103

104-
// Load DB user data without tracking
105-
var allDbUsers = await db.users.AsNoTracking().Include(u => u.LicenseLookups).ToListAsync();
104+
// Load DB user data without tracking.
105+
// Only include license lookups when we'll need per-user license processing
106+
// (i.e. tenant-level SKUs unavailable). Including them unconditionally loads
107+
// hundreds of thousands of extra entities that are never read in the common
108+
// path and can cause an out-of-memory crash before the existing-user metadata
109+
// update runs.
110+
var allDbUsers = skus == null
111+
? await db.users.AsNoTracking().Include(u => u.LicenseLookups).ToListAsync()
112+
: await db.users.AsNoTracking().ToListAsync();
106113
_telemetry.LogInformation($"User import - loaded {allDbUsers.Count.ToString("N0")} users from database");
107114

108115
// Create lookup dictionaries for performance - pre-allocate capacity
@@ -126,7 +133,8 @@ public async Task InsertAndUpdateDatabaseFromExternalUsers()
126133

127134
// Insert any user we've not seen so far
128135
var insertedDbUsers = await InsertMissingUsers(db, allActiveGraphUsers, graphMentionedExistingDbUsers, skus == null);
129-
136+
_telemetry.LogInformation($"User import - Insert phase completed. {insertedDbUsers.Count.ToString("N0")} new users inserted.");
137+
130138
// Reload newly inserted users WITH TRACKING and update dictionaries
131139
// This ensures they're properly tracked when used as managers in ProcessExistingUsersInBatches
132140
if (insertedDbUsers.Count > 0)
@@ -199,27 +207,38 @@ public async Task InsertAndUpdateDatabaseFromExternalUsers()
199207
// path (no per-user Graph calls needed for licenses).
200208
// When SKUs are NOT available we must fall back to the EF-per-entity
201209
// path because each user needs individual Graph license queries.
202-
if (skus != null)
210+
_telemetry.LogInformation($"User import - Starting metadata update for {notInsertedUpns.Count.ToString("N0")} existing users...");
211+
int existingUsersUpdated = 0;
212+
try
203213
{
204-
await _batchProcessor.BulkUpdateExistingUsers(
205-
db,
206-
allActiveGraphUsers,
207-
notInsertedUpns,
208-
dbUsersByUpn,
209-
dbUsersByAadId,
210-
_dataMapper.GraphUsersByAadId,
211-
_userMetaCache);
214+
if (skus != null)
215+
{
216+
existingUsersUpdated = await _batchProcessor.BulkUpdateExistingUsers(
217+
db,
218+
allActiveGraphUsers,
219+
notInsertedUpns,
220+
dbUsersByUpn,
221+
dbUsersByAadId,
222+
_dataMapper.GraphUsersByAadId,
223+
_userMetaCache);
224+
}
225+
else
226+
{
227+
existingUsersUpdated = await _batchProcessor.ProcessExistingUsersInBatches(
228+
db,
229+
allActiveGraphUsers,
230+
notInsertedUpns,
231+
dbUsersByUpn,
232+
dbUsersByAadId,
233+
async (graphUser, dbUser) => await UpdateDbUserWithGraphData(db, graphUser, allActiveGraphUsers, new List<Common.Entities.User>(), dbUser, true, dbUsersByAadId),
234+
BATCH_SIZE);
235+
}
236+
_telemetry.LogInformation($"User import - Completed metadata update for {existingUsersUpdated.ToString("N0")} existing users");
212237
}
213-
else
238+
catch (Exception ex)
214239
{
215-
await _batchProcessor.ProcessExistingUsersInBatches(
216-
db,
217-
allActiveGraphUsers,
218-
notInsertedUpns,
219-
dbUsersByUpn,
220-
dbUsersByAadId,
221-
async (graphUser, dbUser) => await UpdateDbUserWithGraphData(db, graphUser, allActiveGraphUsers, new List<Common.Entities.User>(), dbUser, true, dbUsersByAadId),
222-
BATCH_SIZE);
240+
_telemetry.LogError($"User import - ERROR updating existing users: {ex.Message}");
241+
throw;
223242
}
224243

225244
// Combine inserted & modified db users for SKU processing
@@ -251,13 +270,13 @@ await _batchProcessor.ProcessExistingUsersInBatches(
251270
await db.SaveChangesAsync();
252271
}
253272

273+
_telemetry.LogInformation($"{DateTime.Now.ToShortTimeString()} User import - complete. Inserted {insertedDbUsers.Count.ToString("N0")} new users, updated metadata for {existingUsersUpdated.ToString("N0")} existing users (from {allActiveGraphUsers.Count.ToString("N0")} Graph users)");
274+
254275
// Final cleanup
255276
dbUsersByUpn.Clear();
256277
dbUsersByAadId.Clear();
257278
allActiveGraphUsers.Clear();
258279
allProcessedDbUsers.Clear();
259-
260-
_telemetry.LogInformation($"{DateTime.Now.ToShortTimeString()} User import - inserted {insertedDbUsers.Count.ToString("N0")} new users and updated {notInsertedUpns.Count.ToString("N0")} from Graph API");
261280
}
262281
}
263282

0 commit comments

Comments
 (0)