Support additional GPU config values (#90)

bashofmann · web-flow · commit d1e11b34dd2d · 2025-03-07T21:02:43.000+01:00
* Support additional GPU config values See https://qdrant.tech/documentation/guides/running-with-gpu/ * Update docs * Add validation
diff --git a/api/v1/qdrantcluster_types.go b/api/v1/qdrantcluster_types.go
@@ -181,9 +181,44 @@ func (s QdrantClusterSpec) GetServicePerNode() bool {
 }
 
 type GPU struct {
-	// GPUType specifies the type of the GPU to use.
+	// GPUType specifies the type of the GPU to use. If set, GPU indexing is enabled.
 	// +kubebuilder:validation:Enum=nvidia;amd
 	GPUType GPUType `json:"gpuType"`
+	// ForceHalfPrecision for `f32` values while indexing.
+	// `f16` conversion will take place
+	// only inside GPU memory and won't affect storage type.
+	// +kubebuilder:default=false
+	ForceHalfPrecision bool `json:"forceHalfPrecision"`
+	// DeviceFilter for GPU devices by hardware name. Case-insensitive.
+	// List of substrings to match against the gpu device name.
+	// Example: [- "nvidia"]
+	// If not specified, all devices are accepted.
+	// +kubebuilder:validation:MinItems:=1
+	// +optional
+	DeviceFilter []string `json:"deviceFilter,omitempty"`
+	// Devices is a List of explicit GPU devices to use.
+	// If host has multiple GPUs, this option allows to select specific devices
+	// by their index in the list of found devices.
+	// If `deviceFilter` is set, indexes are applied after filtering.
+	// If not specified, all devices are accepted.
+	// +kubebuilder:validation:MinItems:=1
+	// +optional
+	Devices []string `json:"devices,omitempty"`
+	// ParallelIndexes is the number of parallel indexes to run on the GPU.
+	// +kubebuilder:default=1
+	// +kubebuilder:validation:Minimum:=1
+	ParallelIndexes int `json:"parallelIndexes"`
+	// GroupsCount is the amount of used vulkan "groups" of GPU.
+	// In other words, how many parallel points can be indexed by GPU.
+	// Optimal value might depend on the GPU model.
+	// Proportional, but doesn't necessary equal to the physical number of warps.
+	// Do not change this value unless you know what you are doing.
+	// +optional
+	// +kubebuilder:validation:Minimum:=1
+	GroupsCount int `json:"groupsCount,omitempty"`
+	// AllowIntegrated specifies whether to allow integrated GPUs to be used.
+	// +kubebuilder:default=false
+	AllowIntegrated bool `json:"allowIntegrated"`
 }
 
 func (g *GPU) GetGPUType() GPUType {
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
diff --git a/charts/qdrant-kubernetes-api/templates/region-crds/qdrant.io_qdrantclusters.yaml b/charts/qdrant-kubernetes-api/templates/region-crds/qdrant.io_qdrantclusters.yaml
@@ -300,14 +300,66 @@ spec:
                 description: GPU specifies GPU configuration for the cluster. If this
                   field is not set, no GPU will be used.
                 properties:
+                  allowIntegrated:
+                    default: false
+                    description: AllowIntegrated specifies whether to allow integrated
+                      GPUs to be used.
+                    type: boolean
+                  deviceFilter:
+                    description: |-
+                      DeviceFilter for GPU devices by hardware name. Case-insensitive.
+                      List of substrings to match against the gpu device name.
+                      Example: [- "nvidia"]
+                      If not specified, all devices are accepted.
+                    items:
+                      type: string
+                    minItems: 1
+                    type: array
+                  devices:
+                    description: |-
+                      Devices is a List of explicit GPU devices to use.
+                      If host has multiple GPUs, this option allows to select specific devices
+                      by their index in the list of found devices.
+                      If `deviceFilter` is set, indexes are applied after filtering.
+                      If not specified, all devices are accepted.
+                    items:
+                      type: string
+                    minItems: 1
+                    type: array
+                  forceHalfPrecision:
+                    default: false
+                    description: |-
+                      ForceHalfPrecision for `f32` values while indexing.
+                      `f16` conversion will take place
+                      only inside GPU memory and won't affect storage type.
+                    type: boolean
                   gpuType:
-                    description: GPUType specifies the type of the GPU to use.
+                    description: GPUType specifies the type of the GPU to use. If
+                      set, GPU indexing is enabled.
                     enum:
                     - nvidia
                     - amd
                     type: string
+                  groupsCount:
+                    description: |-
+                      GroupsCount is the amount of used vulkan "groups" of GPU.
+                      In other words, how many parallel points can be indexed by GPU.
+                      Optimal value might depend on the GPU model.
+                      Proportional, but doesn't necessary equal to the physical number of warps.
+                      Do not change this value unless you know what you are doing.
+                    minimum: 1
+                    type: integer
+                  parallelIndexes:
+                    default: 1
+                    description: ParallelIndexes is the number of parallel indexes
+                      to run on the GPU.
+                    minimum: 1
+                    type: integer
                 required:
+                - allowIntegrated
+                - forceHalfPrecision
                 - gpuType
+                - parallelIndexes
                 type: object
               id:
                 description: Id specifies the unique identifier of the cluster
diff --git a/crds/qdrant.io_qdrantclusters.yaml b/crds/qdrant.io_qdrantclusters.yaml
@@ -299,14 +299,66 @@ spec:
                 description: GPU specifies GPU configuration for the cluster. If this
                   field is not set, no GPU will be used.
                 properties:
+                  allowIntegrated:
+                    default: false
+                    description: AllowIntegrated specifies whether to allow integrated
+                      GPUs to be used.
+                    type: boolean
+                  deviceFilter:
+                    description: |-
+                      DeviceFilter for GPU devices by hardware name. Case-insensitive.
+                      List of substrings to match against the gpu device name.
+                      Example: [- "nvidia"]
+                      If not specified, all devices are accepted.
+                    items:
+                      type: string
+                    minItems: 1
+                    type: array
+                  devices:
+                    description: |-
+                      Devices is a List of explicit GPU devices to use.
+                      If host has multiple GPUs, this option allows to select specific devices
+                      by their index in the list of found devices.
+                      If `deviceFilter` is set, indexes are applied after filtering.
+                      If not specified, all devices are accepted.
+                    items:
+                      type: string
+                    minItems: 1
+                    type: array
+                  forceHalfPrecision:
+                    default: false
+                    description: |-
+                      ForceHalfPrecision for `f32` values while indexing.
+                      `f16` conversion will take place
+                      only inside GPU memory and won't affect storage type.
+                    type: boolean
                   gpuType:
-                    description: GPUType specifies the type of the GPU to use.
+                    description: GPUType specifies the type of the GPU to use. If
+                      set, GPU indexing is enabled.
                     enum:
                     - nvidia
                     - amd
                     type: string
+                  groupsCount:
+                    description: |-
+                      GroupsCount is the amount of used vulkan "groups" of GPU.
+                      In other words, how many parallel points can be indexed by GPU.
+                      Optimal value might depend on the GPU model.
+                      Proportional, but doesn't necessary equal to the physical number of warps.
+                      Do not change this value unless you know what you are doing.
+                    minimum: 1
+                    type: integer
+                  parallelIndexes:
+                    default: 1
+                    description: ParallelIndexes is the number of parallel indexes
+                      to run on the GPU.
+                    minimum: 1
+                    type: integer
                 required:
+                - allowIntegrated
+                - forceHalfPrecision
                 - gpuType
+                - parallelIndexes
                 type: object
               id:
                 description: Id specifies the unique identifier of the cluster
diff --git a/docs/api.md b/docs/api.md
@@ -105,7 +105,13 @@ _Appears in:_
 
 | Field | Description | Default | Validation |
 | --- | --- | --- | --- |
-| `gpuType` _[GPUType](#gputype)_ | GPUType specifies the type of the GPU to use. |  | Enum: [nvidia amd] <br /> |
+| `gpuType` _[GPUType](#gputype)_ | GPUType specifies the type of the GPU to use. If set, GPU indexing is enabled. |  | Enum: [nvidia amd] <br /> |
+| `forceHalfPrecision` _boolean_ | ForceHalfPrecision for `f32` values while indexing.<br />`f16` conversion will take place<br />only inside GPU memory and won't affect storage type. | false |  |
+| `deviceFilter` _string array_ | DeviceFilter for GPU devices by hardware name. Case-insensitive.<br />List of substrings to match against the gpu device name.<br />Example: [- "nvidia"]<br />If not specified, all devices are accepted. |  | MinItems: 1 <br /> |
+| `devices` _string array_ | Devices is a List of explicit GPU devices to use.<br />If host has multiple GPUs, this option allows to select specific devices<br />by their index in the list of found devices.<br />If `deviceFilter` is set, indexes are applied after filtering.<br />If not specified, all devices are accepted. |  | MinItems: 1 <br /> |
+| `parallelIndexes` _integer_ | ParallelIndexes is the number of parallel indexes to run on the GPU. | 1 | Minimum: 1 <br /> |
+| `groupsCount` _integer_ | GroupsCount is the amount of used vulkan "groups" of GPU.<br />In other words, how many parallel points can be indexed by GPU.<br />Optimal value might depend on the GPU model.<br />Proportional, but doesn't necessary equal to the physical number of warps.<br />Do not change this value unless you know what you are doing. |  | Minimum: 1 <br /> |
+| `allowIntegrated` _boolean_ | AllowIntegrated specifies whether to allow integrated GPUs to be used. | false |  |
 
 
 #### GPUType