Skip to content

Commit 7322ce4

Browse files
committed
Update SpreadsheetWrangler.ps1
Improved performance of the skulist lookup (from 22mins to 5mins) and ensured the gs spreadsheets were processed in numerical order.
1 parent 1cff1f0 commit 7322ce4

File tree

1 file changed

+95
-13
lines changed

1 file changed

+95
-13
lines changed

SpreadsheetWrangler.ps1

Lines changed: 95 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -700,8 +700,21 @@ function Process-SKUList {
700700

701701
Write-Log "Imported SKU list with $($skuListData.Count) rows" "White"
702702

703-
# Get all combined spreadsheets
704-
$combinedFiles = Get-ChildItem -Path $CombinedSpreadsheetPath -Filter "Combined_Spreadsheet_*.xlsx"
703+
# Create a hashtable for fast SKU lookups indexed by TID
704+
Write-Log "Creating SKU lookup table for faster processing..." "White"
705+
$skuLookup = @{}
706+
foreach ($item in $skuListData) {
707+
if ($item.TID) {
708+
# Convert TID to string to ensure consistent lookup
709+
$tidKey = $item.TID.ToString().Trim()
710+
$skuLookup[$tidKey] = $item
711+
}
712+
}
713+
Write-Log "Created lookup table with $($skuLookup.Count) SKUs" "White"
714+
715+
# Get all combined spreadsheets and sort them numerically
716+
$combinedFiles = Get-ChildItem -Path $CombinedSpreadsheetPath -Filter "Combined_Spreadsheet_*.xlsx" |
717+
Sort-Object { [int]($_.Name -replace 'Combined_Spreadsheet_(\d+)\.xlsx', '$1') }
705718

706719
if ($combinedFiles.Count -eq 0) {
707720
Write-Log "No combined spreadsheets found in: $CombinedSpreadsheetPath" "Yellow"
@@ -713,6 +726,9 @@ function Process-SKUList {
713726
$totalFiles = $combinedFiles.Count
714727
$processedFiles = 0
715728

729+
# Create an array to hold all missing matches for the GS_Missing spreadsheet
730+
$missingData = @()
731+
716732
foreach ($combinedFile in $combinedFiles) {
717733
# Extract the number from the combined spreadsheet filename
718734
if ($combinedFile.Name -match "Combined_Spreadsheet_(\d+)\.xlsx") {
@@ -758,23 +774,27 @@ function Process-SKUList {
758774
continue
759775
}
760776

761-
# Find matching row(s) in SKU list
762-
$matchingRows = $skuListData | Where-Object { $_.'TID' -eq $tcgplayerId }
777+
# Use hashtable for fast lookup instead of filtering the entire SKU list
778+
# Convert TCGplayer Id to string to ensure consistent lookup
779+
$tcgplayerIdKey = $tcgplayerId.ToString().Trim()
780+
$matchedRow = $skuLookup[$tcgplayerIdKey]
763781

764-
if (-not $matchingRows -or $matchingRows.Count -eq 0) {
782+
if (-not $matchedRow) {
765783
Write-Log " No match found in SKU list for TCGplayer Id: $tcgplayerId" "Yellow"
766784
$noMatchCount++
785+
786+
# Add the unmatched row to the missingData array
787+
# Create a clone of the row to avoid reference issues
788+
$missingRow = [PSCustomObject]@{}
789+
foreach ($prop in $row.PSObject.Properties) {
790+
$missingRow | Add-Member -MemberType NoteProperty -Name $prop.Name -Value $prop.Value
791+
}
792+
$missingData += $missingRow
767793
continue
768794
}
769795

770-
if ($matchingRows.Count -gt 1) {
771-
Write-Log " Multiple matches found in SKU list for TCGplayer Id: $tcgplayerId" "Yellow"
772-
$multipleMatchCount++
773-
continue
774-
}
775-
776-
# Get the matched row
777-
$matchedRow = $matchingRows[0]
796+
# Check for multiple matches is no longer needed with hashtable approach
797+
# as we're storing one SKU per TID in the hashtable
778798

779799
# Extract required data
780800
$gmeSku = $matchedRow.'GME SKU'
@@ -853,11 +873,73 @@ function Process-SKUList {
853873
Write-Log " Could not extract number from filename: $($combinedFile.Name)" "Yellow"
854874
}
855875

876+
# If we found unmatched rows in this spreadsheet, add a separator for the next spreadsheet
877+
if ($noMatchCount -gt 0 -and $processedFiles -lt ($totalFiles - 1)) {
878+
# Get property names from the first row to ensure consistent structure
879+
if ($missingData.Count -gt 0) {
880+
$firstRow = $missingData[0]
881+
$propNames = $firstRow.PSObject.Properties.Name
882+
883+
# Create a separator row with the spreadsheet name in the first column
884+
$separatorRow = [PSCustomObject]@{}
885+
foreach ($propName in $propNames) {
886+
if ($propName -eq 'TCGplayer Id') {
887+
$separatorRow | Add-Member -MemberType NoteProperty -Name $propName -Value "COMBINED_SPREADSHEET_$fileNumber"
888+
} else {
889+
$separatorRow | Add-Member -MemberType NoteProperty -Name $propName -Value $null
890+
}
891+
}
892+
$missingData += $separatorRow
893+
}
894+
}
895+
856896
$processedFiles++
857897
$progressPercentage = [int](($processedFiles / $totalFiles) * 100)
858898
Update-ProgressBar $progressPercentage
859899
}
860900

901+
# Create the GS_Missing spreadsheet if we have any missing data
902+
if ($missingData.Count -gt 0) {
903+
try {
904+
$gsMissingFilePath = Join-Path -Path $FinalOutputPath -ChildPath "GS_Missing.xlsx"
905+
906+
# Ensure the export path exists
907+
$exportDir = Split-Path -Path $gsMissingFilePath -Parent
908+
if (-not (Test-Path -Path $exportDir)) {
909+
New-Item -Path $exportDir -ItemType Directory -Force | Out-Null
910+
}
911+
912+
Write-Log "Exporting $($missingData.Count) unmatched rows to GS_Missing.xlsx..." "White"
913+
914+
$exportParams = @{
915+
Path = $gsMissingFilePath
916+
WorksheetName = "Sheet1"
917+
AutoSize = $true
918+
TableName = "MissingData"
919+
TableStyle = "Medium2"
920+
ErrorAction = "Stop"
921+
}
922+
923+
# Export with error handling
924+
$missingData | Export-Excel @exportParams
925+
926+
Write-Log "Created GS_Missing.xlsx with $($missingData.Count) unmatched rows" "Green"
927+
} catch {
928+
Write-Log "Error creating GS_Missing.xlsx: $_" "Red"
929+
930+
# Fallback method if Export-Excel fails
931+
try {
932+
Write-Log "Attempting alternative export method..." "Yellow"
933+
$missingData | ConvertTo-Csv -NoTypeInformation | Out-File -FilePath "$FinalOutputPath\GS_Missing.csv" -Encoding UTF8
934+
Write-Log "Created GS_Missing.csv as fallback" "Green"
935+
} catch {
936+
Write-Log "Fallback export also failed: $_" "Red"
937+
}
938+
}
939+
} else {
940+
Write-Log "No unmatched rows found, GS_Missing.xlsx not created" "Yellow"
941+
}
942+
861943
Write-Log "SKU list processing completed." "Cyan"
862944
Update-ProgressBar 100
863945

0 commit comments

Comments
 (0)