kubev2v
diff --git a/‎internal/handlers/v1alpha1/estimation_test.go‎
Lines changed: 2 additions & 2 deletions b/‎internal/handlers/v1alpha1/estimation_test.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎internal/service/estimation_test.go‎
Lines changed: 11 additions & 8 deletions b/‎internal/service/estimation_test.go‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎pkg/duckdb_parser/builder.go‎
Lines changed: 1 addition & 1 deletion b/‎pkg/duckdb_parser/builder.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/duckdb_parser/inventory_builder_test.go‎
Lines changed: 0 additions & 114 deletions b/‎pkg/duckdb_parser/inventory_builder_test.go‎
Lines changed: 0 additions & 114 deletions
diff --git a/‎pkg/duckdb_parser/templates/COMPLEXITY.md‎
Lines changed: 34 additions & 51 deletions b/‎pkg/duckdb_parser/templates/COMPLEXITY.md‎
Lines changed: 34 additions & 51 deletions
diff --git a/‎pkg/duckdb_parser/templates/complexity_distribution_query.go.tmpl‎
Lines changed: 1 addition & 1 deletion b/‎pkg/duckdb_parser/templates/complexity_distribution_query.go.tmpl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/duckdb_parser/templates/populate_complexity.go.tmpl‎
Lines changed: 5 additions & 29 deletions b/‎pkg/duckdb_parser/templates/populate_complexity.go.tmpl‎
Lines changed: 5 additions & 29 deletions
@@ -503,8 +503,8 @@ var _ = Describe("estimation handler", func() {
 				// createTestInventoryForComplexityHandler has 3 distinct OS names
 				Expect(response.OsRatings).To(HaveLen(3))
 				Expect(response.OsRatings["Red Hat Enterprise Linux 9 (64-bit)"]).To(Equal(1))
-				Expect(response.OsRatings["CentOS 7 (64-bit)"]).To(Equal(2))
-				Expect(response.OsRatings["FreeBSD (64-bit)"]).To(Equal(0))
+				Expect(response.OsRatings["CentOS 7 (64-bit)"]).To(Equal(1))
+				Expect(response.OsRatings["FreeBSD (64-bit)"]).To(Equal(3))
 			})
 
 			It("returns disk scores in canonical order 1 through 4", func() {
 
@@ -166,15 +166,18 @@ var _ = Describe("EstimationService", func() {
 				result, err := estimationSrv.CalculateMigrationComplexity(ctx, assessmentID, clusterID)
 
 				Expect(err).To(BeNil())
-				// score 0: FreeBSD (5 VMs, unclassified) — always first in canonical order
+				// score 0: no unknown entries
 				Expect(result.ComplexityByOS[0].Score).To(Equal(0))
-				Expect(result.ComplexityByOS[0].VMCount).To(Equal(5))
-				// score 1: Red Hat (100 VMs)
+				Expect(result.ComplexityByOS[0].VMCount).To(Equal(0))
+				// score 1: Red Hat (100 VMs) + CentOS 7 (20 VMs)
 				Expect(result.ComplexityByOS[1].Score).To(Equal(1))
-				Expect(result.ComplexityByOS[1].VMCount).To(Equal(100))
-				// score 2: CentOS (20 VMs)
+				Expect(result.ComplexityByOS[1].VMCount).To(Equal(120))
+				// score 2: no medium entries
 				Expect(result.ComplexityByOS[2].Score).To(Equal(2))
-				Expect(result.ComplexityByOS[2].VMCount).To(Equal(20))
+				Expect(result.ComplexityByOS[2].VMCount).To(Equal(0))
+				// score 3: FreeBSD (5 VMs)
+				Expect(result.ComplexityByOS[3].Score).To(Equal(3))
+				Expect(result.ComplexityByOS[3].VMCount).To(Equal(5))
 			})
 
 			It("maps disk tier labels to correct scores with correct size values", func() {
@@ -302,8 +305,8 @@ var _ = Describe("EstimationService", func() {
 				// defaultOsInfo has 3 distinct OS names
 				Expect(result.OSRatings).To(HaveLen(3))
 				Expect(result.OSRatings["Red Hat Enterprise Linux 9 (64-bit)"]).To(Equal(1))
-				Expect(result.OSRatings["CentOS 7 (64-bit)"]).To(Equal(2))
-				Expect(result.OSRatings["FreeBSD (64-bit)"]).To(Equal(0))
+				Expect(result.OSRatings["CentOS 7 (64-bit)"]).To(Equal(1))
+				Expect(result.OSRatings["FreeBSD (64-bit)"]).To(Equal(3))
 			})
 		})
 
 
@@ -343,7 +343,7 @@ func generateOSCaseClauses() string {
 	for keyword, score := range complexity.OSDifficultyScores {
 		level := scoreToLevel[score]
 		clauses = append(clauses, fmt.Sprintf(
-			"            WHEN effective_os LIKE '%%%s%%' THEN '%s'", keyword, level))
+			"            WHEN LOWER(effective_os) LIKE '%%%s%%' THEN '%s'", strings.ToLower(keyword), level))
 	}
 	sort.Strings(clauses) // deterministic output
 	return strings.Join(clauses, "\n")
 
@@ -831,117 +831,3 @@ func TestBuildInventory_VMsWithSharedDisksCount(t *testing.T) {
 	require.NoError(t, err)
 	assert.Equal(t, 1, countCluster2, "VMs with shared disks in cluster2 only")
 }
-
-// TestBuildInventory_ComplexityDistribution tests that complexity is computed and distributed correctly.
-func TestBuildInventory_ComplexityDistribution(t *testing.T) {
-	parser, _, cleanup := setupTestParser(t, &testValidator{})
-	defer cleanup()
-
-	// VMs with different OS types to test complexity classification
-	vms := []map[string]string{
-		// Red Hat (Easy OS) + small disk -> Easy (1)
-		{"VM": "vm-rhel", "VM ID": "vm-001", "Host": "esxi-host-1", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Red Hat Enterprise Linux 8"},
-		// Windows (Medium OS) + small disk -> Medium (2)
-		{"VM": "vm-win", "VM ID": "vm-002", "Host": "esxi-host-1", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Microsoft Windows Server 2019"},
-		// Ubuntu (Hard OS) + small disk -> Medium (2)
-		{"VM": "vm-ubuntu", "VM ID": "vm-003", "Host": "esxi-host-1", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Ubuntu Linux 22.04"},
-		// Oracle (Database) -> White Glove (4) regardless of disk
-		{"VM": "vm-oracle", "VM ID": "vm-004", "Host": "esxi-host-1", "CPUs": "8", "Memory": "16384", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Oracle Linux 8"},
-		// Unknown OS -> Unsupported (0)
-		{"VM": "vm-unknown", "VM ID": "vm-005", "Host": "esxi-host-1", "CPUs": "2", "Memory": "4096", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Some Unknown OS"},
-	}
-	hosts := []map[string]string{
-		{"Datacenter": "dc1", "Cluster": "cluster1", "# Cores": "16", "# CPU": "2", "Object ID": "host-001", "# Memory": "65536", "Model": "ESXi", "Vendor": "VMware", "Host": "esxi-host-1", "Config status": "green"},
-	}
-
-	tmpFile := createTestExcel(t, defaultStandardSheets(vms, hosts)...)
-
-	ctx := context.Background()
-	_, err := parser.IngestRvTools(ctx, tmpFile)
-	require.NoError(t, err)
-
-	inv, err := parser.BuildInventory(ctx)
-	require.NoError(t, err)
-
-	// Verify complexity distribution is populated
-	require.NotNil(t, inv.VCenter.VMs.DistributionByComplexity, "DistributionByComplexity should be populated")
-
-	dist := inv.VCenter.VMs.DistributionByComplexity
-	// Complexity levels: 0=Unsupported, 1=Easy, 2=Medium, 3=Hard, 4=WhiteGlove
-	// Expected: 1 Easy, 2 Medium (Windows + Ubuntu), 1 White Glove (Oracle), 1 Unsupported
-	assert.Equal(t, 1, dist["0"], "1 VM with unknown OS should be Unsupported (0)")
-	assert.Equal(t, 1, dist["1"], "1 VM with Red Hat should be Easy (1)")
-	assert.Equal(t, 2, dist["2"], "2 VMs (Windows, Ubuntu) should be Medium (2)")
-	assert.Equal(t, 1, dist["4"], "1 VM with Oracle should be WhiteGlove (4)")
-}
-
-// TestComplexityDistribution_WithDiskSize tests that disk size affects complexity.
-func TestComplexityDistribution_WithDiskSize(t *testing.T) {
-	parser, _, cleanup := setupTestParser(t, &testValidator{})
-	defer cleanup()
-
-	// Red Hat VM with large disk should be Hard (3) due to disk size
-	vms := []map[string]string{
-		{"VM": "vm-rhel-large", "VM ID": "vm-001", "Host": "esxi-host-1", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Red Hat Enterprise Linux 8"},
-	}
-	hosts := []map[string]string{
-		{"Datacenter": "dc1", "Cluster": "cluster1", "# Cores": "16", "# CPU": "2", "Object ID": "host-001", "# Memory": "65536", "Model": "ESXi", "Vendor": "VMware", "Host": "esxi-host-1", "Config status": "green"},
-	}
-
-	// Add large disk (25 TB = 25 * 1024 * 1024 MiB = 26214400 MiB) -> Hard disk tier
-	vDiskHeaders := []string{
-		"VM ID", "Disk Key", "Unit #", "Path", "Disk Path", "Capacity MiB",
-		"Sharing mode", "Raw", "Shared Bus", "Disk Mode", "Disk UUID",
-		"Thin", "Controller", "Label", "SCSI Unit #",
-	}
-	disks := []map[string]string{
-		{"VM ID": "vm-001", "Disk Key": "2000", "Unit #": "0", "Capacity MiB": "26214400"},
-	}
-
-	sheets := append(defaultStandardSheets(vms, hosts), NewExcelSheet("vDisk", vDiskHeaders, disks))
-	tmpFile := createTestExcel(t, sheets...)
-
-	ctx := context.Background()
-	_, err := parser.IngestRvTools(ctx, tmpFile)
-	require.NoError(t, err)
-
-	dist, err := parser.ComplexityDistribution(ctx, Filters{})
-	require.NoError(t, err)
-
-	// Red Hat (Easy OS) + Hard disk tier = Hard (3)
-	assert.Equal(t, 1, dist["3"], "Red Hat with 25TB disk should be Hard (3)")
-	assert.Equal(t, 0, dist["1"], "No VMs should be Easy (1) with large disk")
-}
-
-// TestComplexityDistribution_ClusterFilter tests filtering by cluster.
-func TestComplexityDistribution_ClusterFilter(t *testing.T) {
-	parser, _, cleanup := setupTestParser(t, &testValidator{})
-	defer cleanup()
-
-	vms := []map[string]string{
-		{"VM": "vm-rhel", "VM ID": "vm-001", "Host": "esxi-host-1", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Red Hat Enterprise Linux 8"},
-		{"VM": "vm-win", "VM ID": "vm-002", "Host": "esxi-host-2", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster2", "Datacenter": "dc1", "OS according to the VMware Tools": "Microsoft Windows Server 2019"},
-	}
-	hosts := []map[string]string{
-		{"Datacenter": "dc1", "Cluster": "cluster1", "# Cores": "16", "# CPU": "2", "Object ID": "host-001", "# Memory": "65536", "Model": "ESXi", "Vendor": "VMware", "Host": "esxi-host-1", "Config status": "green"},
-		{"Datacenter": "dc1", "Cluster": "cluster2", "# Cores": "16", "# CPU": "2", "Object ID": "host-002", "# Memory": "65536", "Model": "ESXi", "Vendor": "VMware", "Host": "esxi-host-2", "Config status": "green"},
-	}
-
-	tmpFile := createTestExcel(t, defaultStandardSheets(vms, hosts)...)
-
-	ctx := context.Background()
-	_, err := parser.IngestRvTools(ctx, tmpFile)
-	require.NoError(t, err)
-
-	// Filter by cluster1 - should only see the Red Hat VM (Easy=1)
-	dist1, err := parser.ComplexityDistribution(ctx, Filters{Cluster: "cluster1"})
-	require.NoError(t, err)
-	assert.Equal(t, 1, dist1["1"], "cluster1 should have 1 Easy (1) VM")
-	assert.Equal(t, 0, dist1["2"], "cluster1 should have no Medium (2) VMs")
-
-	// Filter by cluster2 - should only see the Windows VM (Medium=2)
-	dist2, err := parser.ComplexityDistribution(ctx, Filters{Cluster: "cluster2"})
-	require.NoError(t, err)
-	assert.Equal(t, 0, dist2["1"], "cluster2 should have no Easy (1) VMs")
-	assert.Equal(t, 1, dist2["2"], "cluster2 should have 1 Medium (2) VM")
-}
@@ -1,67 +1,50 @@
-# VM Migration Complexity Scoring
+# Complexity scoring in the duckdb_parser package
 
-This document describes how migration complexity is calculated for each VM.
+Score definitions and OS/disk heuristics are owned by the `pkg/estimations/complexity` package — see its README for the full reference.
 
-## Overview
+This document covers only what is specific to how complexity is computed during ingestion.
 
-Each VM is assigned a complexity score (0-4) based on two factors:
-- **OS type** - The operating system running on the VM
-- **Total disk size** - Combined capacity of all attached disks
+---
 
-## Complexity Levels
+## How OS classification works in the templates
 
-| Level | Value | Description |
-|-------|-------|-------------|
-| Unsupported | 0 | OS not recognized or not supported for migration |
-| Easy | 1 | Simple migration with minimal effort |
-| Medium | 2 | Standard migration requiring some attention |
-| Hard | 3 | Complex migration requiring significant effort |
-| White Glove | 4 | Requires manual intervention and special handling |
+OS scoring is not hardcoded in the SQL templates. Instead, the CASE clauses are **generated at query-build time** from `OSDifficultyScores` in `pkg/estimations/complexity/complexity.go`.
 
-## OS Classification
+The template placeholder `{{.OSCaseClauses}}` in `populate_complexity.go.tmpl` is replaced with one `WHEN ... THEN '...'` clause per entry in that map, ensuring a single source of truth for OS scores across both the API response path and the ingestion path.
 
-The OS is determined from VMware Tools or configuration file data.
+## Disk size tiers in SQL
 
-**Source of truth:** `pkg/estimations/complexity/complexity.go` — see `OSDifficultyScores` map.
+The disk thresholds are expressed directly in SQL within `populate_complexity.go.tmpl`:
 
-The SQL template (`populate_complexity.go.tmpl`) auto-generates OS CASE clauses from this map at query build time, ensuring a single source of truth.
+| SQL condition | Tier label | Score |
+|---|---|---|
+| `total_disk_tb < 10` | Easy | 1 |
+| `total_disk_tb < 20` | Medium | 2 |
+| `total_disk_tb < 50` | Hard | 3 |
+| `else` | White Glove | 4 |
 
-| Score | Patterns |
-|-------|----------|
-| 1 (Easy) | Red Hat, Rocky Linux |
-| 2 (Medium) | CentOS, Windows |
-| 3 (Hard) | Ubuntu, SUSE Linux Enterprise |
-| 4 (Database/White Glove) | Oracle, Microsoft SQL |
-| 0 (Unsupported) | Any other OS |
+Note: these thresholds are currently hardcoded in the template separately from `DiskSizeScores` in the complexity package. They must be kept in sync manually if the tier boundaries change.
 
-## Disk Size Tiers
+## Combined OS × disk matrix
 
-Total disk capacity (sum of all vdisks) determines the disk tier:
+The final per-VM score is computed in `populate_complexity.go.tmpl` by combining the OS level and disk level:
 
-| Tier | Size Range |
-|------|------------|
-| Easy | 0 - 10 TB |
-| Medium | 10 - 20 TB |
-| Hard | 20 - 50 TB |
-| White Glove | > 50 TB |
+| OS level | Disk level | Final score |
+|---|---|---|
+| unknown | any | 0 |
+| database | any | 4 |
+| easy | easy / medium | 1 |
+| easy | hard / white glove | 3 |
+| medium | easy / medium | 2 |
+| medium | hard / white glove | 3 |
+| hard | easy / medium | 2 |
+| hard | hard / white glove | 3 |
 
-## Combined Complexity Matrix
+The result is stored in the `OsDiskComplexity` column of the `vinfo` table during ingestion.
 
-The final complexity is calculated by combining OS and disk levels:
+## Relevant templates
 
-| OS Level | Disk Level | Final Complexity |
-|----------|------------|------------------|
-| Unsupported | Any | 0 (Unsupported) |
-| Database | Any | 4 (White Glove) |
-| Easy | Easy/Medium | 1 (Easy) |
-| Easy | Hard/WG | 3 (Hard) |
-| Medium | Easy/Medium | 2 (Medium) |
-| Medium | Hard/WG | 3 (Hard) |
-| Hard | Easy/Medium | 2 (Medium) |
-| Hard | Hard/WG | 3 (Hard) |
-
-## Implementation
-
-The complexity is computed via SQL in `populate_complexity.go.tmpl` during data ingestion. The computed value is stored in the `OsDiskComplexity` column of the `vinfo` table.
-
-The distribution query (`complexity_distribution_query.go.tmpl`) aggregates VMs by complexity level for reporting.
+| Template | Purpose |
+|---|---|
+| `populate_complexity.go.tmpl` | Computes and stores `OsDiskComplexity` for every VM row |
+| `complexity_distribution_query.go.tmpl` | Aggregates VM counts by `OsDiskComplexity` for reporting |
@@ -2,7 +2,7 @@
 Complexity Distribution Query Template - Returns VM distribution by migration complexity level.
 
 Complexity levels:
-  0 = Unsupported
+  0 = Unknown
   1 = Easy
   2 = Medium
   3 = Hard
 
@@ -1,37 +1,13 @@
 {{- /*
 Populate Complexity Template - Computes per-VM migration complexity from OS type and disk size.
 
-Complexity levels (stored as INTEGER):
-  0 = Unsupported
-  1 = Easy
-  2 = Medium
-  3 = Hard
-  4 = White Glove
+Complexity levels (stored as INTEGER): 0=Unknown, 1=Easy, 2=Medium, 3=Hard, 4=WhiteGlove.
+See pkg/estimations/complexity/README.md for score definitions and heuristics.
 
-OS difficulty mapping:
-  Easy:       Red Hat, Rocky Linux
-  Medium:     CentOS, Windows
-  Hard:       Ubuntu, SUSE Linux Enterprise
-  Database:   Oracle, Microsoft SQL (always White Glove regardless of disk)
-  Unsupported: anything else (always 0)
+OS CASE clauses are injected via {{.OSCaseClauses}}, generated from OSDifficultyScores at
+query-build time. Disk thresholds and the OS×disk combination matrix are hardcoded below.
 
-Disk size tiers (total disk capacity from vdisk, converted to TB):
-  Easy:       0-10 TB
-  Medium:     10-20 TB
-  Hard:       20-50 TB
-  White Glove: >50 TB
-
-Combined matrix:
-  OS Easy   + Disk Easy/Medium  → 1 (Easy)
-  OS Easy   + Disk Hard/WG      → 3 (Hard)
-  OS Medium + Disk Easy/Medium  → 2 (Medium)
-  OS Medium + Disk Hard/WG      → 3 (Hard)
-  OS Hard   + Disk Easy/Medium  → 2 (Medium)
-  OS Hard   + Disk Hard/WG      → 3 (Hard)
-  OS Database (any disk)        → 4 (White Glove)
-  OS Unsupported (any disk)     → 0
-
-No template parameters required.
+No template parameters required (OSCaseClauses is pre-rendered before template execution).
 */ -}}
 WITH vm_os AS (
     SELECT
Original file line number	Diff line number	Diff line change
`@@ -343,7 +343,7 @@ func generateOSCaseClauses() string {`
`343`	`343`	`for keyword, score := range complexity.OSDifficultyScores {`
`344`	`344`	`level := scoreToLevel[score]`
`345`	`345`	`clauses = append(clauses, fmt.Sprintf(`
`346`		`- " WHEN effective_os LIKE '%%%s%%' THEN '%s'", keyword, level))`
	`346`	`+ " WHEN LOWER(effective_os) LIKE '%%%s%%' THEN '%s'", strings.ToLower(keyword), level))`
`347`	`347`	`}`
`348`	`348`	`sort.Strings(clauses) // deterministic output`
`349`	`349`	`return strings.Join(clauses, "\n")`