Skip to content

Commit a1f1b26

Browse files
amalimovopenshift-merge-bot[bot]
authored andcommitted
ECOPROJECT-4173 | feat: Align "Complexity by OS" heuristics to those found in actual VMAs
This PR updates the complexity scores of known OSes according to ratings found in VMA files. Signed-off-by: Ami Malimovka <amalimov@redhat.com>
1 parent 4ccf2aa commit a1f1b26

File tree

11 files changed

+365
-299
lines changed

11 files changed

+365
-299
lines changed

internal/handlers/v1alpha1/estimation_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,8 +503,8 @@ var _ = Describe("estimation handler", func() {
503503
// createTestInventoryForComplexityHandler has 3 distinct OS names
504504
Expect(response.OsRatings).To(HaveLen(3))
505505
Expect(response.OsRatings["Red Hat Enterprise Linux 9 (64-bit)"]).To(Equal(1))
506-
Expect(response.OsRatings["CentOS 7 (64-bit)"]).To(Equal(2))
507-
Expect(response.OsRatings["FreeBSD (64-bit)"]).To(Equal(0))
506+
Expect(response.OsRatings["CentOS 7 (64-bit)"]).To(Equal(1))
507+
Expect(response.OsRatings["FreeBSD (64-bit)"]).To(Equal(3))
508508
})
509509

510510
It("returns disk scores in canonical order 1 through 4", func() {

internal/service/estimation_test.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -166,15 +166,18 @@ var _ = Describe("EstimationService", func() {
166166
result, err := estimationSrv.CalculateMigrationComplexity(ctx, assessmentID, clusterID)
167167

168168
Expect(err).To(BeNil())
169-
// score 0: FreeBSD (5 VMs, unclassified) — always first in canonical order
169+
// score 0: no unknown entries
170170
Expect(result.ComplexityByOS[0].Score).To(Equal(0))
171-
Expect(result.ComplexityByOS[0].VMCount).To(Equal(5))
172-
// score 1: Red Hat (100 VMs)
171+
Expect(result.ComplexityByOS[0].VMCount).To(Equal(0))
172+
// score 1: Red Hat (100 VMs) + CentOS 7 (20 VMs)
173173
Expect(result.ComplexityByOS[1].Score).To(Equal(1))
174-
Expect(result.ComplexityByOS[1].VMCount).To(Equal(100))
175-
// score 2: CentOS (20 VMs)
174+
Expect(result.ComplexityByOS[1].VMCount).To(Equal(120))
175+
// score 2: no medium entries
176176
Expect(result.ComplexityByOS[2].Score).To(Equal(2))
177-
Expect(result.ComplexityByOS[2].VMCount).To(Equal(20))
177+
Expect(result.ComplexityByOS[2].VMCount).To(Equal(0))
178+
// score 3: FreeBSD (5 VMs)
179+
Expect(result.ComplexityByOS[3].Score).To(Equal(3))
180+
Expect(result.ComplexityByOS[3].VMCount).To(Equal(5))
178181
})
179182

180183
It("maps disk tier labels to correct scores with correct size values", func() {
@@ -302,8 +305,8 @@ var _ = Describe("EstimationService", func() {
302305
// defaultOsInfo has 3 distinct OS names
303306
Expect(result.OSRatings).To(HaveLen(3))
304307
Expect(result.OSRatings["Red Hat Enterprise Linux 9 (64-bit)"]).To(Equal(1))
305-
Expect(result.OSRatings["CentOS 7 (64-bit)"]).To(Equal(2))
306-
Expect(result.OSRatings["FreeBSD (64-bit)"]).To(Equal(0))
308+
Expect(result.OSRatings["CentOS 7 (64-bit)"]).To(Equal(1))
309+
Expect(result.OSRatings["FreeBSD (64-bit)"]).To(Equal(3))
307310
})
308311
})
309312

pkg/duckdb_parser/builder.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ func generateOSCaseClauses() string {
343343
for keyword, score := range complexity.OSDifficultyScores {
344344
level := scoreToLevel[score]
345345
clauses = append(clauses, fmt.Sprintf(
346-
" WHEN effective_os LIKE '%%%s%%' THEN '%s'", keyword, level))
346+
" WHEN LOWER(effective_os) LIKE '%%%s%%' THEN '%s'", strings.ToLower(keyword), level))
347347
}
348348
sort.Strings(clauses) // deterministic output
349349
return strings.Join(clauses, "\n")

pkg/duckdb_parser/inventory_builder_test.go

Lines changed: 0 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -831,117 +831,3 @@ func TestBuildInventory_VMsWithSharedDisksCount(t *testing.T) {
831831
require.NoError(t, err)
832832
assert.Equal(t, 1, countCluster2, "VMs with shared disks in cluster2 only")
833833
}
834-
835-
// TestBuildInventory_ComplexityDistribution tests that complexity is computed and distributed correctly.
836-
func TestBuildInventory_ComplexityDistribution(t *testing.T) {
837-
parser, _, cleanup := setupTestParser(t, &testValidator{})
838-
defer cleanup()
839-
840-
// VMs with different OS types to test complexity classification
841-
vms := []map[string]string{
842-
// Red Hat (Easy OS) + small disk -> Easy (1)
843-
{"VM": "vm-rhel", "VM ID": "vm-001", "Host": "esxi-host-1", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Red Hat Enterprise Linux 8"},
844-
// Windows (Medium OS) + small disk -> Medium (2)
845-
{"VM": "vm-win", "VM ID": "vm-002", "Host": "esxi-host-1", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Microsoft Windows Server 2019"},
846-
// Ubuntu (Hard OS) + small disk -> Medium (2)
847-
{"VM": "vm-ubuntu", "VM ID": "vm-003", "Host": "esxi-host-1", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Ubuntu Linux 22.04"},
848-
// Oracle (Database) -> White Glove (4) regardless of disk
849-
{"VM": "vm-oracle", "VM ID": "vm-004", "Host": "esxi-host-1", "CPUs": "8", "Memory": "16384", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Oracle Linux 8"},
850-
// Unknown OS -> Unsupported (0)
851-
{"VM": "vm-unknown", "VM ID": "vm-005", "Host": "esxi-host-1", "CPUs": "2", "Memory": "4096", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Some Unknown OS"},
852-
}
853-
hosts := []map[string]string{
854-
{"Datacenter": "dc1", "Cluster": "cluster1", "# Cores": "16", "# CPU": "2", "Object ID": "host-001", "# Memory": "65536", "Model": "ESXi", "Vendor": "VMware", "Host": "esxi-host-1", "Config status": "green"},
855-
}
856-
857-
tmpFile := createTestExcel(t, defaultStandardSheets(vms, hosts)...)
858-
859-
ctx := context.Background()
860-
_, err := parser.IngestRvTools(ctx, tmpFile)
861-
require.NoError(t, err)
862-
863-
inv, err := parser.BuildInventory(ctx)
864-
require.NoError(t, err)
865-
866-
// Verify complexity distribution is populated
867-
require.NotNil(t, inv.VCenter.VMs.DistributionByComplexity, "DistributionByComplexity should be populated")
868-
869-
dist := inv.VCenter.VMs.DistributionByComplexity
870-
// Complexity levels: 0=Unsupported, 1=Easy, 2=Medium, 3=Hard, 4=WhiteGlove
871-
// Expected: 1 Easy, 2 Medium (Windows + Ubuntu), 1 White Glove (Oracle), 1 Unsupported
872-
assert.Equal(t, 1, dist["0"], "1 VM with unknown OS should be Unsupported (0)")
873-
assert.Equal(t, 1, dist["1"], "1 VM with Red Hat should be Easy (1)")
874-
assert.Equal(t, 2, dist["2"], "2 VMs (Windows, Ubuntu) should be Medium (2)")
875-
assert.Equal(t, 1, dist["4"], "1 VM with Oracle should be WhiteGlove (4)")
876-
}
877-
878-
// TestComplexityDistribution_WithDiskSize tests that disk size affects complexity.
879-
func TestComplexityDistribution_WithDiskSize(t *testing.T) {
880-
parser, _, cleanup := setupTestParser(t, &testValidator{})
881-
defer cleanup()
882-
883-
// Red Hat VM with large disk should be Hard (3) due to disk size
884-
vms := []map[string]string{
885-
{"VM": "vm-rhel-large", "VM ID": "vm-001", "Host": "esxi-host-1", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Red Hat Enterprise Linux 8"},
886-
}
887-
hosts := []map[string]string{
888-
{"Datacenter": "dc1", "Cluster": "cluster1", "# Cores": "16", "# CPU": "2", "Object ID": "host-001", "# Memory": "65536", "Model": "ESXi", "Vendor": "VMware", "Host": "esxi-host-1", "Config status": "green"},
889-
}
890-
891-
// Add large disk (25 TB = 25 * 1024 * 1024 MiB = 26214400 MiB) -> Hard disk tier
892-
vDiskHeaders := []string{
893-
"VM ID", "Disk Key", "Unit #", "Path", "Disk Path", "Capacity MiB",
894-
"Sharing mode", "Raw", "Shared Bus", "Disk Mode", "Disk UUID",
895-
"Thin", "Controller", "Label", "SCSI Unit #",
896-
}
897-
disks := []map[string]string{
898-
{"VM ID": "vm-001", "Disk Key": "2000", "Unit #": "0", "Capacity MiB": "26214400"},
899-
}
900-
901-
sheets := append(defaultStandardSheets(vms, hosts), NewExcelSheet("vDisk", vDiskHeaders, disks))
902-
tmpFile := createTestExcel(t, sheets...)
903-
904-
ctx := context.Background()
905-
_, err := parser.IngestRvTools(ctx, tmpFile)
906-
require.NoError(t, err)
907-
908-
dist, err := parser.ComplexityDistribution(ctx, Filters{})
909-
require.NoError(t, err)
910-
911-
// Red Hat (Easy OS) + Hard disk tier = Hard (3)
912-
assert.Equal(t, 1, dist["3"], "Red Hat with 25TB disk should be Hard (3)")
913-
assert.Equal(t, 0, dist["1"], "No VMs should be Easy (1) with large disk")
914-
}
915-
916-
// TestComplexityDistribution_ClusterFilter tests filtering by cluster.
917-
func TestComplexityDistribution_ClusterFilter(t *testing.T) {
918-
parser, _, cleanup := setupTestParser(t, &testValidator{})
919-
defer cleanup()
920-
921-
vms := []map[string]string{
922-
{"VM": "vm-rhel", "VM ID": "vm-001", "Host": "esxi-host-1", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster1", "Datacenter": "dc1", "OS according to the VMware Tools": "Red Hat Enterprise Linux 8"},
923-
{"VM": "vm-win", "VM ID": "vm-002", "Host": "esxi-host-2", "CPUs": "4", "Memory": "8192", "Powerstate": "poweredOn", "Cluster": "cluster2", "Datacenter": "dc1", "OS according to the VMware Tools": "Microsoft Windows Server 2019"},
924-
}
925-
hosts := []map[string]string{
926-
{"Datacenter": "dc1", "Cluster": "cluster1", "# Cores": "16", "# CPU": "2", "Object ID": "host-001", "# Memory": "65536", "Model": "ESXi", "Vendor": "VMware", "Host": "esxi-host-1", "Config status": "green"},
927-
{"Datacenter": "dc1", "Cluster": "cluster2", "# Cores": "16", "# CPU": "2", "Object ID": "host-002", "# Memory": "65536", "Model": "ESXi", "Vendor": "VMware", "Host": "esxi-host-2", "Config status": "green"},
928-
}
929-
930-
tmpFile := createTestExcel(t, defaultStandardSheets(vms, hosts)...)
931-
932-
ctx := context.Background()
933-
_, err := parser.IngestRvTools(ctx, tmpFile)
934-
require.NoError(t, err)
935-
936-
// Filter by cluster1 - should only see the Red Hat VM (Easy=1)
937-
dist1, err := parser.ComplexityDistribution(ctx, Filters{Cluster: "cluster1"})
938-
require.NoError(t, err)
939-
assert.Equal(t, 1, dist1["1"], "cluster1 should have 1 Easy (1) VM")
940-
assert.Equal(t, 0, dist1["2"], "cluster1 should have no Medium (2) VMs")
941-
942-
// Filter by cluster2 - should only see the Windows VM (Medium=2)
943-
dist2, err := parser.ComplexityDistribution(ctx, Filters{Cluster: "cluster2"})
944-
require.NoError(t, err)
945-
assert.Equal(t, 0, dist2["1"], "cluster2 should have no Easy (1) VMs")
946-
assert.Equal(t, 1, dist2["2"], "cluster2 should have 1 Medium (2) VM")
947-
}
Lines changed: 34 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,50 @@
1-
# VM Migration Complexity Scoring
1+
# Complexity scoring in the duckdb_parser package
22

3-
This document describes how migration complexity is calculated for each VM.
3+
Score definitions and OS/disk heuristics are owned by the `pkg/estimations/complexity` package — see its README for the full reference.
44

5-
## Overview
5+
This document covers only what is specific to how complexity is computed during ingestion.
66

7-
Each VM is assigned a complexity score (0-4) based on two factors:
8-
- **OS type** - The operating system running on the VM
9-
- **Total disk size** - Combined capacity of all attached disks
7+
---
108

11-
## Complexity Levels
9+
## How OS classification works in the templates
1210

13-
| Level | Value | Description |
14-
|-------|-------|-------------|
15-
| Unsupported | 0 | OS not recognized or not supported for migration |
16-
| Easy | 1 | Simple migration with minimal effort |
17-
| Medium | 2 | Standard migration requiring some attention |
18-
| Hard | 3 | Complex migration requiring significant effort |
19-
| White Glove | 4 | Requires manual intervention and special handling |
11+
OS scoring is not hardcoded in the SQL templates. Instead, the CASE clauses are **generated at query-build time** from `OSDifficultyScores` in `pkg/estimations/complexity/complexity.go`.
2012

21-
## OS Classification
13+
The template placeholder `{{.OSCaseClauses}}` in `populate_complexity.go.tmpl` is replaced with one `WHEN ... THEN '...'` clause per entry in that map, ensuring a single source of truth for OS scores across both the API response path and the ingestion path.
2214

23-
The OS is determined from VMware Tools or configuration file data.
15+
## Disk size tiers in SQL
2416

25-
**Source of truth:** `pkg/estimations/complexity/complexity.go` — see `OSDifficultyScores` map.
17+
The disk thresholds are expressed directly in SQL within `populate_complexity.go.tmpl`:
2618

27-
The SQL template (`populate_complexity.go.tmpl`) auto-generates OS CASE clauses from this map at query build time, ensuring a single source of truth.
19+
| SQL condition | Tier label | Score |
20+
|---|---|---|
21+
| `total_disk_tb < 10` | Easy | 1 |
22+
| `total_disk_tb < 20` | Medium | 2 |
23+
| `total_disk_tb < 50` | Hard | 3 |
24+
| `else` | White Glove | 4 |
2825

29-
| Score | Patterns |
30-
|-------|----------|
31-
| 1 (Easy) | Red Hat, Rocky Linux |
32-
| 2 (Medium) | CentOS, Windows |
33-
| 3 (Hard) | Ubuntu, SUSE Linux Enterprise |
34-
| 4 (Database/White Glove) | Oracle, Microsoft SQL |
35-
| 0 (Unsupported) | Any other OS |
26+
Note: these thresholds are currently hardcoded in the template separately from `DiskSizeScores` in the complexity package. They must be kept in sync manually if the tier boundaries change.
3627

37-
## Disk Size Tiers
28+
## Combined OS × disk matrix
3829

39-
Total disk capacity (sum of all vdisks) determines the disk tier:
30+
The final per-VM score is computed in `populate_complexity.go.tmpl` by combining the OS level and disk level:
4031

41-
| Tier | Size Range |
42-
|------|------------|
43-
| Easy | 0 - 10 TB |
44-
| Medium | 10 - 20 TB |
45-
| Hard | 20 - 50 TB |
46-
| White Glove | > 50 TB |
32+
| OS level | Disk level | Final score |
33+
|---|---|---|
34+
| unknown | any | 0 |
35+
| database | any | 4 |
36+
| easy | easy / medium | 1 |
37+
| easy | hard / white glove | 3 |
38+
| medium | easy / medium | 2 |
39+
| medium | hard / white glove | 3 |
40+
| hard | easy / medium | 2 |
41+
| hard | hard / white glove | 3 |
4742

48-
## Combined Complexity Matrix
43+
The result is stored in the `OsDiskComplexity` column of the `vinfo` table during ingestion.
4944

50-
The final complexity is calculated by combining OS and disk levels:
45+
## Relevant templates
5146

52-
| OS Level | Disk Level | Final Complexity |
53-
|----------|------------|------------------|
54-
| Unsupported | Any | 0 (Unsupported) |
55-
| Database | Any | 4 (White Glove) |
56-
| Easy | Easy/Medium | 1 (Easy) |
57-
| Easy | Hard/WG | 3 (Hard) |
58-
| Medium | Easy/Medium | 2 (Medium) |
59-
| Medium | Hard/WG | 3 (Hard) |
60-
| Hard | Easy/Medium | 2 (Medium) |
61-
| Hard | Hard/WG | 3 (Hard) |
62-
63-
## Implementation
64-
65-
The complexity is computed via SQL in `populate_complexity.go.tmpl` during data ingestion. The computed value is stored in the `OsDiskComplexity` column of the `vinfo` table.
66-
67-
The distribution query (`complexity_distribution_query.go.tmpl`) aggregates VMs by complexity level for reporting.
47+
| Template | Purpose |
48+
|---|---|
49+
| `populate_complexity.go.tmpl` | Computes and stores `OsDiskComplexity` for every VM row |
50+
| `complexity_distribution_query.go.tmpl` | Aggregates VM counts by `OsDiskComplexity` for reporting |

pkg/duckdb_parser/templates/complexity_distribution_query.go.tmpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Complexity Distribution Query Template - Returns VM distribution by migration complexity level.
33
44
Complexity levels:
5-
0 = Unsupported
5+
0 = Unknown
66
1 = Easy
77
2 = Medium
88
3 = Hard

pkg/duckdb_parser/templates/populate_complexity.go.tmpl

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,13 @@
11
{{- /*
22
Populate Complexity Template - Computes per-VM migration complexity from OS type and disk size.
33
4-
Complexity levels (stored as INTEGER):
5-
0 = Unsupported
6-
1 = Easy
7-
2 = Medium
8-
3 = Hard
9-
4 = White Glove
4+
Complexity levels (stored as INTEGER): 0=Unknown, 1=Easy, 2=Medium, 3=Hard, 4=WhiteGlove.
5+
See pkg/estimations/complexity/README.md for score definitions and heuristics.
106
11-
OS difficulty mapping:
12-
Easy: Red Hat, Rocky Linux
13-
Medium: CentOS, Windows
14-
Hard: Ubuntu, SUSE Linux Enterprise
15-
Database: Oracle, Microsoft SQL (always White Glove regardless of disk)
16-
Unsupported: anything else (always 0)
7+
OS CASE clauses are injected via {{.OSCaseClauses}}, generated from OSDifficultyScores at
8+
query-build time. Disk thresholds and the OS×disk combination matrix are hardcoded below.
179
18-
Disk size tiers (total disk capacity from vdisk, converted to TB):
19-
Easy: 0-10 TB
20-
Medium: 10-20 TB
21-
Hard: 20-50 TB
22-
White Glove: >50 TB
23-
24-
Combined matrix:
25-
OS Easy + Disk Easy/Medium → 1 (Easy)
26-
OS Easy + Disk Hard/WG → 3 (Hard)
27-
OS Medium + Disk Easy/Medium → 2 (Medium)
28-
OS Medium + Disk Hard/WG → 3 (Hard)
29-
OS Hard + Disk Easy/Medium → 2 (Medium)
30-
OS Hard + Disk Hard/WG → 3 (Hard)
31-
OS Database (any disk) → 4 (White Glove)
32-
OS Unsupported (any disk) → 0
33-
34-
No template parameters required.
10+
No template parameters required (OSCaseClauses is pre-rendered before template execution).
3511
*/ -}}
3612
WITH vm_os AS (
3713
SELECT

0 commit comments

Comments
 (0)