Skip to content

Commit 4243006

Browse files
committed
Bulk user update: add fast SQL path & perf tests
Implement high-performance bulk update for existing users using temp table and SQL UPDATE JOIN, bypassing EF per-entity tracking when tenant SKUs are available. Add UserMetadataUpdaterPerformanceTests to measure insert/update throughput and verify correctness. Optimize license lookup creation to use UserId FK directly. Expose GraphUsersByAadId for efficient lookups. Update tests and loader to support new logic. These changes greatly improve scalability for large tenants.
1 parent 7bdfd08 commit 4243006

File tree

7 files changed

+591
-23
lines changed

7 files changed

+591
-23
lines changed

src/AnalyticsEngine/Tests.UnitTests/FakeLoaderClasses/FakeUserMetadataLoader.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ public FakeUserMetadataLoader(
3434

3535
public Task<List<GraphUser>> LoadAllActiveUsers()
3636
{
37-
return Task.FromResult(_fakeUsers);
37+
// Return a defensive copy so callers that Clear() the result
38+
// do not wipe the original list (InsertAndUpdateDatabaseFromExternalUsers does this).
39+
return Task.FromResult(new List<GraphUser>(_fakeUsers));
3840
}
3941

4042
public Task<IGraphServiceSubscribedSkusCollectionPage> LoadTenantSkus()

src/AnalyticsEngine/Tests.UnitTests/Tests.UnitTests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
<Compile Include="UserMetadataUpdaterLicenseTests.cs" />
3636
<Compile Include="UserMetadataUpdaterManagerTests.cs" />
3737
<Compile Include="UserMetadataUpdaterMetadataTests.cs" />
38+
<Compile Include="UserMetadataUpdaterPerformanceTests.cs" />
3839
<Content Include="Generated\**" />
3940
</ItemGroup>
4041
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
using Common.Entities;
2+
using Common.Entities.Config;
3+
using DataUtils;
4+
using Microsoft.Graph;
5+
using Microsoft.VisualStudio.TestTools.UnitTesting;
6+
using System;
7+
using System.Collections.Generic;
8+
using System.Data.Entity;
9+
using System.Diagnostics;
10+
using System.Linq;
11+
using System.Threading.Tasks;
12+
using Tests.UnitTests.FakeLoaderClasses;
13+
using WebJob.Office365ActivityImporter.Engine.Graph;
14+
15+
namespace Tests.UnitTests
16+
{
17+
/// <summary>
18+
/// Performance tests for UserMetadataUpdater measuring insert and update throughput
19+
/// </summary>
20+
[TestClass]
21+
public class UserMetadataUpdaterPerformanceTests
22+
{
23+
private static readonly string[] Departments = { "IT", "HR", "Finance", "Marketing", "Sales", "Engineering", "Legal", "Operations" };
24+
private static readonly string[] JobTitles = { "Developer", "Manager", "Analyst", "Director", "VP", "Engineer", "Consultant", "Lead" };
25+
private static readonly string[] Offices = { "Building A", "Building B", "Building C", "Remote", "HQ" };
26+
private static readonly string[] Countries = { "US", "UK", "DE", "FR", "JP" };
27+
private static readonly string[] States = { "WA", "CA", "NY", "TX", "IL" };
28+
private static readonly string[] Companies = { "Contoso", "Fabrikam", "Northwind" };
29+
30+
/// <summary>
31+
/// Inserts 1000 users, then updates them with changed metadata.
32+
/// Measures throughput with tenant-level SKUs (bulk SQL update path).
33+
/// </summary>
34+
[TestMethod]
35+
public async Task UserMetadataUpdater_Update1000ExistingUsers_WithSkus_Performance()
36+
{
37+
const int USER_COUNT = 1000;
38+
var telemetry = AnalyticsLogger.ConsoleOnlyTracer();
39+
var config = new AppConfig();
40+
var testPrefix = $"perfsku{DateTime.Now.Ticks}";
41+
42+
var graphUsers = GenerateGraphUsers(USER_COUNT, testPrefix);
43+
44+
// Use non-null (empty) SKUs so the bulk update path is exercised
45+
var fakeSkus = new GraphServiceSubscribedSkusCollectionPage();
46+
47+
try
48+
{
49+
// --- Phase 1: Insert ---
50+
var insertLoader = new FakeUserMetadataLoader(graphUsers, fakeSkus);
51+
var insertUpdater = new UserMetadataUpdater(telemetry, config, insertLoader);
52+
53+
var insertSw = Stopwatch.StartNew();
54+
await insertUpdater.InsertAndUpdateDatabaseFromExternalUsers();
55+
insertSw.Stop();
56+
57+
telemetry.LogInformation($"PERF: Insert {USER_COUNT} users took {insertSw.ElapsedMilliseconds}ms");
58+
59+
// Verify insert
60+
using (var db = new AnalyticsEntitiesContext())
61+
{
62+
var count = await db.users.CountAsync(u => u.UserPrincipalName.StartsWith(testPrefix));
63+
Assert.AreEqual(USER_COUNT, count, "All users should be inserted");
64+
}
65+
66+
// --- Phase 2: Re-generate users with modified metadata ---
67+
// (the original list was consumed by Phase 1 via allActiveGraphUsers.Clear())
68+
var updatedGraphUsers = GenerateGraphUsers(USER_COUNT, testPrefix);
69+
for (int i = 0; i < updatedGraphUsers.Count; i++)
70+
{
71+
updatedGraphUsers[i].Department = Departments[(i + 1) % Departments.Length];
72+
updatedGraphUsers[i].JobTitle = JobTitles[(i + 1) % JobTitles.Length];
73+
updatedGraphUsers[i].PostalCode = $"{20000 + i}";
74+
updatedGraphUsers[i].OfficeLocation = Offices[(i + 1) % Offices.Length];
75+
}
76+
77+
var updateLoader = new FakeUserMetadataLoader(updatedGraphUsers, fakeSkus);
78+
var updateUpdater = new UserMetadataUpdater(telemetry, config, updateLoader);
79+
80+
var updateSw = Stopwatch.StartNew();
81+
await updateUpdater.InsertAndUpdateDatabaseFromExternalUsers();
82+
updateSw.Stop();
83+
84+
telemetry.LogInformation($"PERF: Update {USER_COUNT} existing users took {updateSw.ElapsedMilliseconds}ms");
85+
86+
// Verify correctness
87+
using (var db = new AnalyticsEntitiesContext())
88+
{
89+
var updatedUsers = await db.users
90+
.Include(u => u.Department)
91+
.Include(u => u.JobTitle)
92+
.Include(u => u.OfficeLocation)
93+
.Where(u => u.UserPrincipalName.StartsWith(testPrefix))
94+
.ToListAsync();
95+
96+
Assert.AreEqual(USER_COUNT, updatedUsers.Count, "User count should not change");
97+
98+
var firstUser = updatedUsers.First(u => u.UserPrincipalName == $"{testPrefix}_user0@test.com");
99+
Assert.AreEqual(Departments[1], firstUser.Department?.Name, "Department should be updated");
100+
Assert.AreEqual(JobTitles[1], firstUser.JobTitle?.Name, "Job title should be updated");
101+
Assert.AreEqual("20000", firstUser.PostalCode, "PostalCode should be updated");
102+
Assert.AreEqual(Offices[1], firstUser.OfficeLocation?.Name, "OfficeLocation should be updated");
103+
}
104+
105+
// Assert reasonable performance: 1000-user update should complete in under 60 seconds
106+
Assert.IsTrue(updateSw.ElapsedMilliseconds < 60000,
107+
$"Update took {updateSw.ElapsedMilliseconds}ms, expected under 60000ms");
108+
109+
telemetry.LogInformation($"=== Results: Insert={insertSw.ElapsedMilliseconds}ms, Update={updateSw.ElapsedMilliseconds}ms ===");
110+
}
111+
finally
112+
{
113+
await CleanupTestUsers(testPrefix);
114+
}
115+
}
116+
117+
/// <summary>
118+
/// Inserts 1000 users, then updates them with changed metadata.
119+
/// Measures throughput without tenant-level SKUs (original EF per-entity path).
120+
/// </summary>
121+
[TestMethod]
122+
public async Task UserMetadataUpdater_Update1000ExistingUsers_WithoutSkus_Performance()
123+
{
124+
const int USER_COUNT = 1000;
125+
var telemetry = AnalyticsLogger.ConsoleOnlyTracer();
126+
var config = new AppConfig();
127+
var testPrefix = $"perfnosku{DateTime.Now.Ticks}";
128+
129+
var graphUsers = GenerateGraphUsers(USER_COUNT, testPrefix);
130+
131+
// Null SKUs → per-user processing (original EF path)
132+
try
133+
{
134+
// --- Phase 1: Insert ---
135+
var insertLoader = new FakeUserMetadataLoader(graphUsers);
136+
var insertUpdater = new UserMetadataUpdater(telemetry, config, insertLoader);
137+
138+
var insertSw = Stopwatch.StartNew();
139+
await insertUpdater.InsertAndUpdateDatabaseFromExternalUsers();
140+
insertSw.Stop();
141+
142+
telemetry.LogInformation($"PERF (no SKU): Insert {USER_COUNT} users took {insertSw.ElapsedMilliseconds}ms");
143+
144+
// --- Phase 2: Re-generate users with modified metadata ---
145+
var updatedGraphUsers = GenerateGraphUsers(USER_COUNT, testPrefix);
146+
for (int i = 0; i < updatedGraphUsers.Count; i++)
147+
{
148+
updatedGraphUsers[i].Department = Departments[(i + 1) % Departments.Length];
149+
updatedGraphUsers[i].JobTitle = JobTitles[(i + 1) % JobTitles.Length];
150+
updatedGraphUsers[i].PostalCode = $"{20000 + i}";
151+
}
152+
153+
var updateLoader = new FakeUserMetadataLoader(updatedGraphUsers);
154+
var updateUpdater = new UserMetadataUpdater(telemetry, config, updateLoader);
155+
156+
var updateSw = Stopwatch.StartNew();
157+
await updateUpdater.InsertAndUpdateDatabaseFromExternalUsers();
158+
updateSw.Stop();
159+
160+
telemetry.LogInformation($"PERF (no SKU): Update {USER_COUNT} existing users took {updateSw.ElapsedMilliseconds}ms");
161+
162+
// Verify correctness
163+
using (var db = new AnalyticsEntitiesContext())
164+
{
165+
var updatedUsers = await db.users
166+
.Include(u => u.Department)
167+
.Include(u => u.JobTitle)
168+
.Where(u => u.UserPrincipalName.StartsWith(testPrefix))
169+
.ToListAsync();
170+
171+
Assert.AreEqual(USER_COUNT, updatedUsers.Count);
172+
173+
var firstUser = updatedUsers.First(u => u.UserPrincipalName == $"{testPrefix}_user0@test.com");
174+
Assert.AreEqual(Departments[1], firstUser.Department?.Name);
175+
Assert.AreEqual(JobTitles[1], firstUser.JobTitle?.Name);
176+
Assert.AreEqual("20000", firstUser.PostalCode);
177+
}
178+
179+
Assert.IsTrue(updateSw.ElapsedMilliseconds < 120000,
180+
$"Update took {updateSw.ElapsedMilliseconds}ms, expected under 120000ms");
181+
182+
telemetry.LogInformation($"=== Results (no SKU): Insert={insertSw.ElapsedMilliseconds}ms, Update={updateSw.ElapsedMilliseconds}ms ===");
183+
}
184+
finally
185+
{
186+
await CleanupTestUsers(testPrefix);
187+
}
188+
}
189+
190+
private static List<GraphUser> GenerateGraphUsers(int count, string prefix)
191+
{
192+
var users = new List<GraphUser>(count);
193+
for (int i = 0; i < count; i++)
194+
{
195+
users.Add(new GraphUser
196+
{
197+
UserPrincipalName = $"{prefix}_user{i}@test.com",
198+
Id = Guid.NewGuid().ToString(),
199+
AccountEnabled = true,
200+
Mail = $"{prefix}_user{i}@test.com",
201+
PostalCode = $"{10000 + i}",
202+
Department = Departments[i % Departments.Length],
203+
JobTitle = JobTitles[i % JobTitles.Length],
204+
OfficeLocation = Offices[i % Offices.Length],
205+
Country = Countries[i % Countries.Length],
206+
State = States[i % States.Length],
207+
CompanyName = Companies[i % Companies.Length],
208+
UsageLocation = Countries[i % Countries.Length]
209+
});
210+
}
211+
return users;
212+
}
213+
214+
private static async Task CleanupTestUsers(string prefix)
215+
{
216+
using (var db = new AnalyticsEntitiesContext())
217+
{
218+
// Delete license lookups first (FK constraint)
219+
await db.Database.ExecuteSqlCommandAsync(
220+
$"DELETE FROM dbo.user_license_type_lookups WHERE user_id IN (SELECT id FROM dbo.users WHERE user_name LIKE '{prefix}%')");
221+
await db.Database.ExecuteSqlCommandAsync(
222+
$"DELETE FROM dbo.users WHERE user_name LIKE '{prefix}%'");
223+
}
224+
}
225+
}
226+
}

0 commit comments

Comments
 (0)