Skip to content

Commit 9b7e995

Browse files
committed
Refactor Add-CIPPDbItem for pipeline streaming and batch efficiency
Add-CIPPDbItem now supports pipeline input for memory-efficient streaming, improved batch processing, and automatic count recording via -AddCount. Updated related cache scripts to use streaming and batch features, reducing memory usage and simplifying code. Added Set-CIPPDbCacheTestData.ps1 for generating large test datasets. Enhanced Add-CIPPAzDataTableEntity with performance logging.
1 parent 3929058 commit 9b7e995

File tree

7 files changed

+257
-81
lines changed

7 files changed

+257
-81
lines changed

Modules/CIPPCore/Public/Add-CIPPAzDataTableEntity.ps1

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ function Add-CIPPAzDataTableEntity {
4040
$MaxRowSize = 500000 - 100
4141
$MaxSize = 30kb
4242

43+
$startTime = Get-Date
44+
$entityCount = @($Entity).Count
45+
$totalValidationTime = 0
46+
$totalAddTime = 0
47+
Write-Information "[Add-CIPPAzDataTableEntity] Processing $entityCount entities"
48+
4349
foreach ($SingleEnt in @($Entity)) {
4450
try {
4551
# Skip null entities
@@ -62,6 +68,7 @@ function Add-CIPPAzDataTableEntity {
6268
}
6369

6470
# Additional validation for AzBobbyTables compatibility
71+
$validationStart = Get-Date
6572
try {
6673
# Ensure all property values are not null for string properties
6774
if ($SingleEnt -is [hashtable]) {
@@ -84,8 +91,15 @@ function Add-CIPPAzDataTableEntity {
8491
} catch {
8592
Write-Warning "Error during entity validation: $($_.Exception.Message)"
8693
}
94+
$validationEnd = Get-Date
95+
$validationDuration = ($validationEnd - $validationStart).TotalMilliseconds
96+
$totalValidationTime += $validationDuration
8797

98+
$addStart = Get-Date
8899
Add-AzDataTableEntity @Parameters -Entity $SingleEnt -ErrorAction Stop
100+
$addEnd = Get-Date
101+
$addDuration = ($addEnd - $addStart).TotalMilliseconds
102+
$totalAddTime += $addDuration
89103

90104
} catch [System.Exception] {
91105
if ($_.Exception.ErrorCode -in @('PropertyValueTooLarge', 'EntityTooLarge', 'RequestBodyTooLarge')) {
@@ -237,4 +251,10 @@ function Add-CIPPAzDataTableEntity {
237251
}
238252
}
239253
}
254+
255+
$endTime = Get-Date
256+
$totalDuration = [math]::Round(($endTime - $startTime).TotalSeconds, 2)
257+
$avgValidation = [math]::Round($totalValidationTime / $entityCount, 2)
258+
$avgAdd = [math]::Round($totalAddTime / $entityCount, 2)
259+
Write-Debug "[Add-CIPPAzDataTableEntity] Completed $entityCount entities in ${totalDuration}s (avg validation: ${avgValidation}ms, avg add: ${avgAdd}ms)"
240260
}

Modules/CIPPCore/Public/Add-CIPPDbItem.ps1

Lines changed: 143 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,30 @@ function Add-CIPPDbItem {
44
Add items to the CIPP Reporting database
55
66
.DESCRIPTION
7-
Adds items to the CippReportingDB table with support for bulk inserts and count mode
7+
Adds items to the CippReportingDB table with support for bulk inserts, count mode, and pipeline streaming
88
99
.PARAMETER TenantFilter
1010
The tenant domain or GUID (used as partition key)
1111
1212
.PARAMETER Type
1313
The type of data being stored (used in row key)
1414
15-
.PARAMETER Data
16-
Array of items to add to the database
15+
.PARAMETER InputObject
16+
Items to add to the database. Accepts pipeline input for memory-efficient streaming.
17+
Alias: Data (for backward compatibility)
1718
1819
.PARAMETER Count
19-
If specified, stores a single row with count of each object property as separate properties
20+
If specified, stores a single row with count of items processed
21+
22+
.PARAMETER AddCount
23+
If specified, automatically records the total count after processing all items
2024
2125
.EXAMPLE
2226
Add-CIPPDbItem -TenantFilter 'contoso.onmicrosoft.com' -Type 'Groups' -Data $GroupsData
2327
28+
.EXAMPLE
29+
New-GraphGetRequest -uri '...' | Add-CIPPDbItem -TenantFilter 'contoso.onmicrosoft.com' -Type 'Users' -AddCount
30+
2431
.EXAMPLE
2532
Add-CIPPDbItem -TenantFilter 'contoso.onmicrosoft.com' -Type 'Groups' -Data $GroupsData -Count
2633
#>
@@ -32,92 +39,167 @@ function Add-CIPPDbItem {
3239
[Parameter(Mandatory = $true)]
3340
[string]$Type,
3441

35-
[Parameter(Mandatory = $true)]
42+
[Parameter(Mandatory = $true, ValueFromPipeline = $true)]
43+
[Alias('Data')]
3644
[AllowEmptyCollection()]
37-
[array]$Data,
45+
$InputObject,
46+
47+
[Parameter(Mandatory = $false)]
48+
[switch]$Count,
3849

3950
[Parameter(Mandatory = $false)]
40-
[switch]$Count
51+
[switch]$AddCount
4152
)
4253

43-
try {
54+
begin {
55+
# Initialize pipeline processing with state hashtable for nested function access
4456
$Table = Get-CippTable -tablename 'CippReportingDB'
57+
$BatchAccumulator = [System.Collections.Generic.List[hashtable]]::new(500)
58+
$State = @{
59+
TotalProcessed = 0
60+
BatchNumber = 0
61+
}
4562

4663
# Helper function to format RowKey values by removing disallowed characters
4764
function Format-RowKey {
4865
param([string]$RowKey)
49-
50-
# Remove disallowed characters: / \ # ? and control characters (U+0000 to U+001F and U+007F to U+009F)
5166
$sanitized = $RowKey -replace '[/\\#?]', '_' -replace '[\u0000-\u001F\u007F-\u009F]', ''
52-
5367
return $sanitized
5468
}
5569

56-
if ($Count) {
57-
$Entity = @{
58-
PartitionKey = $TenantFilter
59-
RowKey = Format-RowKey "$Type-Count"
60-
DataCount = [int]$Data.Count
61-
}
70+
# Function to flush current batch
71+
function Invoke-FlushBatch {
72+
param($State)
73+
if ($BatchAccumulator.Count -eq 0) { return }
74+
75+
$State.BatchNumber++
76+
$batchSize = $BatchAccumulator.Count
77+
$MemoryBeforeGC = [System.GC]::GetTotalMemory($false)
78+
$flushStart = Get-Date
79+
80+
try {
81+
# Entities are already in the accumulator, just write them
82+
$writeStart = Get-Date
83+
Add-CIPPAzDataTableEntity @Table -Entity $BatchAccumulator.ToArray() -Force | Out-Null
84+
$writeEnd = Get-Date
85+
$writeDuration = [math]::Round(($writeEnd - $writeStart).TotalSeconds, 2)
86+
$State.TotalProcessed += $batchSize
87+
88+
} finally {
89+
# Clear and GC
90+
$gcStart = Get-Date
91+
$BatchAccumulator.Clear()
92+
93+
# Single GC pass is sufficient - aggressive GC was causing slowdown
94+
[System.GC]::Collect()
6295

63-
Add-CIPPAzDataTableEntity @Table -Entity $Entity -Force | Out-Null
96+
$flushEnd = Get-Date
97+
$gcDuration = [math]::Round(($flushEnd - $gcStart).TotalSeconds, 2)
98+
$flushDuration = [math]::Round(($flushEnd - $flushStart).TotalSeconds, 2)
99+
$MemoryAfterGC = [System.GC]::GetTotalMemory($false)
100+
$FreedMB = [math]::Round(($MemoryBeforeGC - $MemoryAfterGC) / 1MB, 2)
101+
$CurrentMemoryMB = [math]::Round($MemoryAfterGC / 1MB, 2)
102+
Write-Debug "Batch $($State.BatchNumber): ${flushDuration}s total (write: ${writeDuration}s, gc: ${gcDuration}s) | Processed: $($State.TotalProcessed) | Memory: ${CurrentMemoryMB}MB | Freed: ${FreedMB}MB"
103+
}
104+
}
64105

65-
} else {
66-
#Get the existing type entries and nuke them. This ensures we don't have stale data.
106+
if (-not $Count.IsPresent) {
107+
# Delete existing entries for this type
67108
$Filter = "PartitionKey eq '{0}' and RowKey ge '{1}-' and RowKey lt '{1}0'" -f $TenantFilter, $Type
68109
$ExistingEntities = Get-CIPPAzDataTableEntity @Table -Filter $Filter
69110
if ($ExistingEntities) {
70111
Remove-AzDataTableEntity @Table -Entity $ExistingEntities -Force | Out-Null
71112
}
113+
$AllocatedMemoryMB = [math]::Round([System.GC]::GetTotalMemory($false) / 1MB, 2)
114+
Write-Debug "Starting $Type import for $TenantFilter | Allocated Memory: ${AllocatedMemoryMB}MB | Batch Size: 500"
115+
}
116+
}
72117

73-
# Calculate batch size based on available memory
74-
$AvailableMemory = [System.GC]::GetTotalMemory($false)
75-
$AvailableMemoryMB = [math]::Round($AvailableMemory / 1MB, 2)
118+
process {
119+
# Process each item from pipeline
120+
if ($null -eq $InputObject) { return }
76121

77-
# Estimate item size from first item (with fallback)
78-
$EstimatedItemSizeBytes = 1KB # Default assumption
79-
if ($Data.Count -gt 0) {
80-
$SampleJson = $Data[0] | ConvertTo-Json -Depth 10 -Compress
81-
$EstimatedItemSizeBytes = [System.Text.Encoding]::UTF8.GetByteCount($SampleJson)
82-
}
122+
# If Count mode and InputObject is an integer, use it directly as count
123+
if ($Count.IsPresent -and $InputObject -is [int]) {
124+
$State.TotalProcessed = $InputObject
125+
return
126+
}
83127

84-
# Use 25% of available memory for batch processing, with min/max bounds
85-
$TargetBatchMemoryMB = [Math]::Max(50, $AvailableMemoryMB * 0.25)
86-
$CalculatedBatchSize = [Math]::Floor(($TargetBatchMemoryMB * 1MB) / $EstimatedItemSizeBytes)
87-
# Reduce max to 500 to prevent OOM with large datasets
88-
$BatchSize = [Math]::Max(100, [Math]::Min(500, $CalculatedBatchSize))
89-
90-
$TotalCount = $Data.Count
91-
$ProcessedCount = 0
92-
Write-Information "Adding $TotalCount items of type $Type to CIPP Reporting DB for tenant $TenantFilter | Available Memory: ${AvailableMemoryMB}MB | Target Memory: ${TargetBatchMemoryMB}MB | Calculated: $CalculatedBatchSize | Batch Size: $BatchSize (est. item size: $([math]::Round($EstimatedItemSizeBytes/1KB, 2))KB)"
93-
for ($i = 0; $i -lt $TotalCount; $i += $BatchSize) {
94-
$BatchEnd = [Math]::Min($i + $BatchSize, $TotalCount)
95-
$Batch = $Data[$i..($BatchEnd - 1)]
96-
97-
$Entities = foreach ($Item in $Batch) {
98-
$ItemId = $Item.id ?? $Item.ExternalDirectoryObjectId ?? $Item.Identity ?? $Item.skuId
99-
@{
100-
PartitionKey = $TenantFilter
101-
RowKey = Format-RowKey "$Type-$ItemId"
102-
Data = [string]($Item | ConvertTo-Json -Depth 10 -Compress)
103-
Type = $Type
104-
}
105-
}
128+
# Handle both single items and arrays (for backward compatibility)
129+
$ItemsToProcess = if ($InputObject -is [array]) {
130+
$InputObject
131+
} else {
132+
@($InputObject)
133+
}
106134

107-
Add-CIPPAzDataTableEntity @Table -Entity $Entities -Force | Out-Null
108-
$ProcessedCount += $Batch.Count
135+
# If Count mode, just count items without processing
136+
if ($Count.IsPresent) {
137+
$itemCount = if ($ItemsToProcess -is [array]) { $ItemsToProcess.Count } else { 1 }
138+
$State.TotalProcessed += $itemCount
139+
return
140+
}
109141

110-
# Clear batch variables to free memory
111-
$Entities = $null
112-
$Batch = $null
113-
[System.GC]::Collect()
142+
foreach ($Item in $ItemsToProcess) {
143+
if ($null -eq $Item) { continue }
144+
145+
# Convert to entity
146+
$ItemId = $Item.ExternalDirectoryObjectId ?? $Item.id ?? $Item.Identity ?? $Item.skuId
147+
$Entity = @{
148+
PartitionKey = $TenantFilter
149+
RowKey = Format-RowKey "$Type-$ItemId"
150+
Data = [string]($Item | ConvertTo-Json -Depth 10 -Compress)
151+
Type = $Type
114152
}
115153

154+
$BatchAccumulator.Add($Entity)
155+
156+
# Flush when batch reaches 500 items
157+
if ($BatchAccumulator.Count -ge 500) {
158+
Invoke-FlushBatch -State $State
159+
}
116160
}
117-
Write-LogMessage -API 'CIPPDbItem' -tenant $TenantFilter -message "Added $($Data.Count) items of type $Type$(if ($Count) { ' (count mode)' })" -sev Debug
161+
}
162+
163+
end {
164+
try {
165+
# Flush any remaining items in final partial batch
166+
if ($BatchAccumulator.Count -gt 0) {
167+
Invoke-FlushBatch -State $State
168+
}
169+
170+
if ($Count.IsPresent) {
171+
# Store count record
172+
$Entity = @{
173+
PartitionKey = $TenantFilter
174+
RowKey = Format-RowKey "$Type-Count"
175+
DataCount = [int]$State.TotalProcessed
176+
}
177+
Add-CIPPAzDataTableEntity @Table -Entity $Entity -Force | Out-Null
178+
}
118179

119-
} catch {
120-
Write-LogMessage -API 'CIPPDbItem' -tenant $TenantFilter -message "Failed to add items of type $Type : $($_.Exception.Message)" -sev Error -LogData (Get-CippException -Exception $_)
121-
throw
180+
Write-LogMessage -API 'CIPPDbItem' -tenant $TenantFilter `
181+
-message "Added $($State.TotalProcessed) items of type $Type$(if ($Count.IsPresent) { ' (count mode)' })" -sev Debug
182+
183+
} catch {
184+
Write-LogMessage -API 'CIPPDbItem' -tenant $TenantFilter `
185+
-message "Failed to add items of type $Type : $($_.Exception.Message)" -sev Error `
186+
-LogData (Get-CippException -Exception $_)
187+
Write-Debug "[Add-CIPPDbItem] $TenantFilter - $(Get-CippException -Exception $_ | ConvertTo-Json -Depth 5 -Compress)"
188+
throw
189+
} finally {
190+
# Record count if AddCount was specified
191+
if ($AddCount.IsPresent -and $State.TotalProcessed -gt 0) {
192+
try {
193+
Add-CIPPDbItem -TenantFilter $TenantFilter -Type $Type -InputObject $State.TotalProcessed -Count
194+
} catch {
195+
Write-LogMessage -API 'CIPPDbItem' -tenant $TenantFilter `
196+
-message "Failed to record count for $Type : $($_.Exception.Message)" -sev Warning
197+
}
198+
}
199+
200+
# Final cleanup
201+
$BatchAccumulator = $null
202+
[System.GC]::Collect()
203+
}
122204
}
123205
}

Modules/CIPPCore/Public/Entrypoints/Activity Triggers/Mailbox Permissions/Push-StoreMailboxPermissions.ps1

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,17 +69,15 @@ function Push-StoreMailboxPermissions {
6969

7070
# Store all permissions together as MailboxPermissions
7171
if ($AllPermissions.Count -gt 0) {
72-
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'MailboxPermissions' -Data $AllPermissions
73-
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'MailboxPermissions' -Data $AllPermissions -Count
72+
$AllPermissions | Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'MailboxPermissions' -AddCount
7473
Write-LogMessage -API 'CIPPDBCache' -tenant $TenantFilter -message "Cached $($AllPermissions.Count) mailbox permission records" -sev Info
7574
} else {
7675
Write-LogMessage -API 'CIPPDBCache' -tenant $TenantFilter -message 'No mailbox permissions found to cache' -sev Info
7776
}
7877

7978
# Store calendar permissions separately
8079
if ($AllCalendarPermissions.Count -gt 0) {
81-
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'CalendarPermissions' -Data $AllCalendarPermissions
82-
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'CalendarPermissions' -Data $AllCalendarPermissions -Count
80+
$AllCalendarPermissions | Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'CalendarPermissions' -AddCount
8381
Write-LogMessage -API 'CIPPDBCache' -tenant $TenantFilter -message "Cached $($AllCalendarPermissions.Count) calendar permission records" -sev Info
8482
} else {
8583
Write-LogMessage -API 'CIPPDBCache' -tenant $TenantFilter -message 'No calendar permissions found to cache' -sev Info

Modules/CIPPCore/Public/Set-CIPPDBCacheCASMailboxes.ps1

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,9 @@ function Set-CIPPDBCacheCASMailboxes {
1515
try {
1616
Write-LogMessage -API 'CIPPDBCache' -tenant $TenantFilter -message 'Caching CAS mailboxes' -sev Debug
1717

18-
# Use Generic List for better memory efficiency with large datasets
19-
$CASMailboxList = New-ExoRequest -tenantid $TenantFilter -cmdlet 'Get-CasMailbox'
20-
21-
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'CASMailbox' -Data $CASMailboxList
22-
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'CASMailbox' -Data $CASMailboxList -Count
23-
$CASMailboxList = $null
18+
# Stream CAS mailboxes directly to batch processor
19+
New-ExoRequest -tenantid $TenantFilter -cmdlet 'Get-CasMailbox' |
20+
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'CASMailbox' -AddCount
2421

2522
Write-LogMessage -API 'CIPPDBCache' -tenant $TenantFilter -message 'Cached CAS mailboxes successfully' -sev Debug
2623

Modules/CIPPCore/Public/Set-CIPPDBCacheMailboxes.ps1

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,8 @@ function Set-CIPPDBCacheMailboxes {
4444
MessageCopyForSentAsEnabled))
4545
}
4646

47-
$Mailboxes = $MailboxList.ToArray()
48-
$RawMailboxes = $null
49-
$MailboxList.Clear()
50-
$MailboxList = $null
47+
$Mailboxes | Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'Mailboxes' -AddCount
5148

52-
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'Mailboxes' -Data $Mailboxes
53-
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'Mailboxes' -Data $Mailboxes -Count
5449
Write-LogMessage -API 'CIPPDBCache' -tenant $TenantFilter -message "Cached $($Mailboxes.Count) mailboxes successfully" -sev Debug
5550

5651
# Start orchestrator to cache mailbox permissions in batches

Modules/CIPPCore/Public/Set-CIPPDBCacheUsers.ps1

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ function Set-CIPPDBCacheUsers {
1515
try {
1616
Write-LogMessage -API 'CIPPDBCache' -tenant $TenantFilter -message 'Caching users' -sev Debug
1717

18-
$Users = New-GraphGetRequest -uri 'https://graph.microsoft.com/beta/users?$top=999' -tenantid $TenantFilter
19-
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'Users' -Data $Users
20-
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'Users' -Data $Users -Count
21-
$Users = $null
18+
# Stream users directly from Graph API to batch processor
19+
New-GraphGetRequest -uri 'https://graph.microsoft.com/beta/users?$top=999' -tenantid $TenantFilter |
20+
Add-CIPPDbItem -TenantFilter $TenantFilter -Type 'Users' -AddCount
21+
2222
Write-LogMessage -API 'CIPPDBCache' -tenant $TenantFilter -message 'Cached users successfully' -sev Debug
2323

2424
} catch {

0 commit comments

Comments
 (0)