diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs
index 70f81740..2a00534b 100644
--- a/crates/nvvm/src/lib.rs
+++ b/crates/nvvm/src/lib.rs
@@ -430,73 +430,62 @@ impl NvvmArch {
}
}
- /// Get all target features up to and including this architecture.
+ /// Gets all target features up to and including this architecture. This effectively answers
+ /// the question "for a given compilation target, what architectural features can be used?"
///
- /// # PTX Forward-Compatibility Rules (per NVIDIA documentation):
+ /// # Examples
///
- /// - **No suffix** (compute_XX): PTX is forward-compatible across all future architectures.
- /// Example: compute_70 runs on CC 7.0, 8.x, 9.x, 10.x, 12.x, and all future GPUs.
+ /// ```
+ /// # use nvvm::NvvmArch;
+ /// let features = NvvmArch::Compute53.all_target_features();
+ /// assert_eq!(
+ /// features,
+ /// vec!["compute_35", "compute_37", "compute_50", "compute_52", "compute_53"]
+ /// );
+ /// ```
///
- /// - **Family-specific 'f' suffix** (compute_XXf): Forward-compatible within the same major
- /// version family. Supports devices with same major CC and equal or higher minor CC.
- /// Example: compute_100f runs on CC 10.0, 10.3, and future 10.x devices, but NOT on 11.x.
- ///
- /// - **Architecture-specific 'a' suffix** (compute_XXa): The code only runs on GPUs of that
- /// specific CC and no others. No forward or backward compatibility whatsoever.
- /// These features are primarily related to Tensor Core programming.
- /// Example: compute_100a ONLY runs on CC 10.0, not on 10.3, 10.1, 9.0, or any other version.
+ /// # External resources
///
/// For more details on family and architecture-specific features, see:
///
pub fn all_target_features(&self) -> Vec {
- let mut features: Vec = if self.is_architecture_variant() {
- // 'a' variants: include all available instructions for the architecture
- // This means: all base variants up to same version, all 'f' variants with same major and <= minor, plus itself
- let base_features: Vec = NvvmArch::iter()
- .filter(|arch| {
- arch.is_base_variant() && arch.capability_value() <= self.capability_value()
- })
- .map(|arch| arch.target_feature())
- .collect();
-
- let family_features: Vec = NvvmArch::iter()
- .filter(|arch| {
- arch.is_family_variant()
- && arch.major_version() == self.major_version()
- && arch.minor_version() <= self.minor_version()
- })
- .map(|arch| arch.target_feature())
- .collect();
+ // All lower-or-equal baseline features are included.
+ let included_baseline = |arch: &NvvmArch| {
+ arch.is_base_variant() && arch.capability_value() <= self.capability_value()
+ };
- base_features
- .into_iter()
- .chain(family_features)
- .chain(std::iter::once(self.target_feature()))
+ // All lower-or-equal-with-same-major-version family features are included.
+ let included_family = |arch: &NvvmArch| {
+ arch.is_family_variant()
+ && arch.major_version() == self.major_version()
+ && arch.minor_version() <= self.minor_version()
+ };
+
+ if self.is_architecture_variant() {
+ // Architecture-specific ('a' suffix) features include:
+ // - all lower-or-equal baseline features
+ // - all lower-or-equal-with-same-major-version family features
+ // - itself
+ NvvmArch::iter()
+ .filter(|arch| included_baseline(arch) || included_family(arch) || arch == self)
+ .map(|arch| arch.target_feature())
.collect()
} else if self.is_family_variant() {
- // 'f' variants: same major version with equal or higher minor version
+ // Family-specific ('f' suffix) features include:
+ // - all lower-or-equal baseline features
+ // - all lower-or-equal-with-same-major-version family features
NvvmArch::iter()
- .filter(|arch| {
- // Include base variants with same major and >= minor version
- arch.is_base_variant()
- && arch.major_version() == self.major_version()
- && arch.minor_version() >= self.minor_version()
- })
+ .filter(|arch| included_baseline(arch) || included_family(arch))
.map(|arch| arch.target_feature())
- .chain(std::iter::once(self.target_feature())) // Add the 'f' variant itself
.collect()
} else {
- // Base variants: all base architectures from lower or equal versions
+ // Baseline (no suffix) features include:
+ // - all lower-or-equal baseline features
NvvmArch::iter()
- .filter(|arch| {
- arch.is_base_variant() && arch.capability_value() <= self.capability_value()
- })
+ .filter(included_baseline)
.map(|arch| arch.target_feature())
.collect()
- };
-
- features.sort();
- features
+ }
}
/// Create an iterator over all architectures from Compute35 up to and including this one
@@ -700,12 +689,11 @@ impl NvvmProgram {
#[cfg(test)]
mod tests {
+ use super::*;
use std::str::FromStr;
#[test]
fn nvvm_arch_capability_value() {
- use crate::NvvmArch;
-
assert_eq!(NvvmArch::Compute35.capability_value(), 35);
assert_eq!(NvvmArch::Compute37.capability_value(), 37);
assert_eq!(NvvmArch::Compute50.capability_value(), 50);
@@ -726,8 +714,6 @@ mod tests {
#[test]
fn nvvm_arch_major_minor_version() {
- use crate::NvvmArch;
-
// Test major/minor version extraction
assert_eq!(NvvmArch::Compute35.major_version(), 3);
assert_eq!(NvvmArch::Compute35.minor_version(), 5);
@@ -747,34 +733,22 @@ mod tests {
}
#[test]
- fn nvvm_arch_target_feature_format_base_variants() {
- use crate::NvvmArch;
-
- // Test base variants format
+ fn nvvm_arch_target_feature() {
+ // Test baseline features
assert_eq!(NvvmArch::Compute35.target_feature(), "compute_35");
assert_eq!(NvvmArch::Compute61.target_feature(), "compute_61");
assert_eq!(NvvmArch::Compute90.target_feature(), "compute_90");
assert_eq!(NvvmArch::Compute100.target_feature(), "compute_100");
assert_eq!(NvvmArch::Compute120.target_feature(), "compute_120");
- }
-
- #[test]
- fn nvvm_arch_target_feature_format_family_variants() {
- use crate::NvvmArch;
- // Test family ('f') variants format
+ // Test family-specfic ('f') features
assert_eq!(NvvmArch::Compute100f.target_feature(), "compute_100f");
assert_eq!(NvvmArch::Compute101f.target_feature(), "compute_101f");
assert_eq!(NvvmArch::Compute103f.target_feature(), "compute_103f");
assert_eq!(NvvmArch::Compute120f.target_feature(), "compute_120f");
assert_eq!(NvvmArch::Compute121f.target_feature(), "compute_121f");
- }
- #[test]
- fn nvvm_arch_target_feature_format_architecture_variants() {
- use crate::NvvmArch;
-
- // Test architecture ('a') variants format
+ // Test architecture-specific ('a') features
assert_eq!(NvvmArch::Compute90a.target_feature(), "compute_90a");
assert_eq!(NvvmArch::Compute100a.target_feature(), "compute_100a");
assert_eq!(NvvmArch::Compute101a.target_feature(), "compute_101a");
@@ -784,24 +758,32 @@ mod tests {
}
#[test]
- fn nvvm_arch_all_target_features_includes_lower_capabilities() {
- use crate::NvvmArch;
+ fn nvvm_arch_all_target_features() {
+ assert_eq!(
+ NvvmArch::Compute35.all_target_features(),
+ vec!["compute_35"]
+ );
- // Compute35 only includes itself
- let compute35_features = NvvmArch::Compute35.all_target_features();
- assert_eq!(compute35_features, vec!["compute_35"]);
+ assert_eq!(
+ NvvmArch::Compute50.all_target_features(),
+ vec!["compute_35", "compute_37", "compute_50"],
+ );
- // Compute50 includes all lower base capabilities
- let compute50_features = NvvmArch::Compute50.all_target_features();
assert_eq!(
- compute50_features,
- vec!["compute_35", "compute_37", "compute_50"]
+ NvvmArch::Compute61.all_target_features(),
+ vec![
+ "compute_35",
+ "compute_37",
+ "compute_50",
+ "compute_52",
+ "compute_53",
+ "compute_60",
+ "compute_61",
+ ]
);
- // Compute61 includes all lower base capabilities
- let compute61_features = NvvmArch::Compute61.all_target_features();
assert_eq!(
- compute61_features,
+ NvvmArch::Compute70.all_target_features(),
vec![
"compute_35",
"compute_37",
@@ -809,76 +791,12 @@ mod tests {
"compute_52",
"compute_53",
"compute_60",
- "compute_61"
+ "compute_61",
+ "compute_62",
+ "compute_70",
]
);
- // Test 'a' variant - includes all available instructions for the architecture
- // This means: all base variants up to same version, all 'f' variants with same major and <= minor, plus itself
- let compute90a_features = NvvmArch::Compute90a.all_target_features();
- // Should include all base up to 90
- assert!(compute90a_features.contains(&"compute_35".to_string()));
- assert!(compute90a_features.contains(&"compute_90".to_string()));
- // Should include the 'a' variant itself
- assert!(compute90a_features.contains(&"compute_90a".to_string()));
- // Should NOT include any 'f' variants (90 has no 'f' variants)
-
- // Test compute100a - should include base variants, and 100f
- let compute100a_features = NvvmArch::Compute100a.all_target_features();
- // Should include all base up to 100
- assert!(compute100a_features.contains(&"compute_90".to_string()));
- assert!(compute100a_features.contains(&"compute_100".to_string()));
- // Should include 100f (same major, <= minor)
- assert!(compute100a_features.contains(&"compute_100f".to_string()));
- // Should NOT include 101f or 103f (higher minor)
- assert!(!compute100a_features.contains(&"compute_101f".to_string()));
- assert!(!compute100a_features.contains(&"compute_103f".to_string()));
- // Should include itself
- assert!(compute100a_features.contains(&"compute_100a".to_string()));
-
- // Test compute101a
- let compute101a_features = NvvmArch::Compute101a.all_target_features();
- // Should include all base up to 101
- assert!(compute101a_features.contains(&"compute_100".to_string()));
- assert!(compute101a_features.contains(&"compute_101".to_string()));
- // Should include 100f and 101f (same major, <= minor)
- assert!(compute101a_features.contains(&"compute_100f".to_string()));
- assert!(compute101a_features.contains(&"compute_101f".to_string()));
- // Should NOT include 103f (higher minor)
- assert!(!compute101a_features.contains(&"compute_103f".to_string()));
- // Should include itself
- assert!(compute101a_features.contains(&"compute_101a".to_string()));
-
- // Test 'f' variant - includes same major version with >= minor
- let compute120f_features = NvvmArch::Compute120f.all_target_features();
- assert!(compute120f_features.contains(&"compute_120".to_string()));
- assert!(compute120f_features.contains(&"compute_121".to_string())); // Higher minor included
- assert!(compute120f_features.contains(&"compute_120f".to_string())); // Self included
- assert!(!compute120f_features.contains(&"compute_120a".to_string())); // No 'a' variants
- assert!(!compute120f_features.contains(&"compute_121f".to_string())); // No other 'f' variants
- assert!(!compute120f_features.contains(&"compute_121a".to_string())); // No 'a' variants
- // Should NOT include different major versions
- assert!(!compute120f_features.contains(&"compute_100".to_string()));
- assert!(!compute120f_features.contains(&"compute_90".to_string()));
-
- // Test 'f' variant with 100f
- let compute100f_features = NvvmArch::Compute100f.all_target_features();
- assert!(compute100f_features.contains(&"compute_100".to_string())); // Same version base
- assert!(compute100f_features.contains(&"compute_101".to_string())); // Higher minor
- assert!(compute100f_features.contains(&"compute_103".to_string())); // Higher minor
- assert!(compute100f_features.contains(&"compute_100f".to_string())); // Self
- assert!(!compute100f_features.contains(&"compute_101f".to_string())); // No other 'f' variants
- assert!(!compute100f_features.contains(&"compute_90".to_string())); // Different major
-
- // Test 'f' variant with 101f
- let compute101f_features = NvvmArch::Compute101f.all_target_features();
- assert!(!compute101f_features.contains(&"compute_100".to_string())); // Lower minor NOT included
- assert!(compute101f_features.contains(&"compute_101".to_string())); // Same version base
- assert!(compute101f_features.contains(&"compute_103".to_string())); // Higher minor included
- assert!(compute101f_features.contains(&"compute_101f".to_string())); // Self
- assert!(!compute101f_features.contains(&"compute_101a".to_string())); // No 'a' variants
-
- // Compute90 includes lower base capabilities
let compute90_features = NvvmArch::Compute90.all_target_features();
assert_eq!(
compute90_features,
@@ -898,80 +816,219 @@ mod tests {
"compute_86",
"compute_87",
"compute_89",
- "compute_90"
+ "compute_90",
]
);
- }
-
- #[test]
- fn target_feature_synthesis_supports_conditional_compilation_patterns() {
- use crate::NvvmArch;
-
- // When targeting Compute61, should enable all lower capabilities
- let features = NvvmArch::Compute61.all_target_features();
- // Should enable compute_60 (for f64 atomics)
- assert!(features.contains(&"compute_60".to_string()));
-
- // Should enable compute_50 (for 64-bit integer atomics)
- assert!(features.contains(&"compute_50".to_string()));
+ assert_eq!(
+ NvvmArch::Compute90a.all_target_features(),
+ vec![
+ "compute_35",
+ "compute_37",
+ "compute_50",
+ "compute_52",
+ "compute_53",
+ "compute_60",
+ "compute_61",
+ "compute_62",
+ "compute_70",
+ "compute_72",
+ "compute_75",
+ "compute_80",
+ "compute_86",
+ "compute_87",
+ "compute_89",
+ "compute_90",
+ "compute_90a",
+ ]
+ );
- // Should enable compute_35 (baseline)
- assert!(features.contains(&"compute_35".to_string()));
+ assert_eq!(
+ NvvmArch::Compute100a.all_target_features(),
+ vec![
+ "compute_35",
+ "compute_37",
+ "compute_50",
+ "compute_52",
+ "compute_53",
+ "compute_60",
+ "compute_61",
+ "compute_62",
+ "compute_70",
+ "compute_72",
+ "compute_75",
+ "compute_80",
+ "compute_86",
+ "compute_87",
+ "compute_89",
+ "compute_90",
+ "compute_100",
+ "compute_100f",
+ "compute_100a",
+ ]
+ );
- // Should enable the target itself
- assert!(features.contains(&"compute_61".to_string()));
+ assert_eq!(
+ NvvmArch::Compute100f.all_target_features(),
+ vec![
+ "compute_35",
+ "compute_37",
+ "compute_50",
+ "compute_52",
+ "compute_53",
+ "compute_60",
+ "compute_61",
+ "compute_62",
+ "compute_70",
+ "compute_72",
+ "compute_75",
+ "compute_80",
+ "compute_86",
+ "compute_87",
+ "compute_89",
+ "compute_90",
+ "compute_100",
+ "compute_100f",
+ ]
+ );
- // Should NOT enable higher capabilities
- assert!(!features.contains(&"compute_62".to_string()));
- assert!(!features.contains(&"compute_70".to_string()));
- }
+ assert_eq!(
+ NvvmArch::Compute101a.all_target_features(),
+ vec![
+ "compute_35",
+ "compute_37",
+ "compute_50",
+ "compute_52",
+ "compute_53",
+ "compute_60",
+ "compute_61",
+ "compute_62",
+ "compute_70",
+ "compute_72",
+ "compute_75",
+ "compute_80",
+ "compute_86",
+ "compute_87",
+ "compute_89",
+ "compute_90",
+ "compute_100",
+ "compute_100f",
+ "compute_101",
+ "compute_101f",
+ "compute_101a",
+ ]
+ );
- #[test]
- fn target_feature_synthesis_enables_correct_cfg_patterns() {
- use crate::NvvmArch;
-
- // Test that targeting Compute70 enables appropriate cfg patterns
- let features = NvvmArch::Compute70.all_target_features();
-
- // These should all be true for compute_70 target
- let expected_enabled = [
- "compute_35",
- "compute_37",
- "compute_50",
- "compute_52",
- "compute_53",
- "compute_60",
- "compute_61",
- "compute_62",
- "compute_70",
- ];
+ assert_eq!(
+ NvvmArch::Compute101f.all_target_features(),
+ vec![
+ "compute_35",
+ "compute_37",
+ "compute_50",
+ "compute_52",
+ "compute_53",
+ "compute_60",
+ "compute_61",
+ "compute_62",
+ "compute_70",
+ "compute_72",
+ "compute_75",
+ "compute_80",
+ "compute_86",
+ "compute_87",
+ "compute_89",
+ "compute_90",
+ "compute_100",
+ "compute_100f",
+ "compute_101",
+ "compute_101f",
+ ]
+ );
- for feature in expected_enabled {
- assert!(
- features.contains(&feature.to_string()),
- "Compute70 should enable {} for cfg(target_feature = \"{}\")",
- feature,
- feature
- );
- }
+ assert_eq!(
+ NvvmArch::Compute120.all_target_features(),
+ vec![
+ "compute_35",
+ "compute_37",
+ "compute_50",
+ "compute_52",
+ "compute_53",
+ "compute_60",
+ "compute_61",
+ "compute_62",
+ "compute_70",
+ "compute_72",
+ "compute_75",
+ "compute_80",
+ "compute_86",
+ "compute_87",
+ "compute_89",
+ "compute_90",
+ "compute_100",
+ "compute_101",
+ "compute_103",
+ "compute_120",
+ ]
+ );
- // These should NOT be enabled for compute_70 target
- let expected_disabled = ["compute_72", "compute_75", "compute_80", "compute_90"];
+ assert_eq!(
+ NvvmArch::Compute120f.all_target_features(),
+ vec![
+ "compute_35",
+ "compute_37",
+ "compute_50",
+ "compute_52",
+ "compute_53",
+ "compute_60",
+ "compute_61",
+ "compute_62",
+ "compute_70",
+ "compute_72",
+ "compute_75",
+ "compute_80",
+ "compute_86",
+ "compute_87",
+ "compute_89",
+ "compute_90",
+ "compute_100",
+ "compute_101",
+ "compute_103",
+ "compute_120",
+ "compute_120f",
+ ]
+ );
- for feature in expected_disabled {
- assert!(
- !features.contains(&feature.to_string()),
- "Compute70 should NOT enable {}",
- feature
- );
- }
+ assert_eq!(
+ NvvmArch::Compute120a.all_target_features(),
+ vec![
+ "compute_35",
+ "compute_37",
+ "compute_50",
+ "compute_52",
+ "compute_53",
+ "compute_60",
+ "compute_61",
+ "compute_62",
+ "compute_70",
+ "compute_72",
+ "compute_75",
+ "compute_80",
+ "compute_86",
+ "compute_87",
+ "compute_89",
+ "compute_90",
+ "compute_100",
+ "compute_101",
+ "compute_103",
+ "compute_120",
+ "compute_120f",
+ "compute_120a",
+ ]
+ );
}
#[test]
fn nvvm_arch_iter_up_to_includes_only_lower_or_equal() {
- use crate::NvvmArch;
-
// Compute35 only includes itself
let archs: Vec<_> = NvvmArch::Compute35.iter_up_to().collect();
assert_eq!(archs, vec![NvvmArch::Compute35]);
@@ -1010,8 +1067,8 @@ mod tests {
#[test]
fn options_parse_correctly() {
- use crate::NvvmArch::*;
- use crate::NvvmOption::{self, *};
+ use NvvmArch::*;
+ use NvvmOption::{self, *};
let opts = vec![
"-g",
@@ -1066,8 +1123,6 @@ mod tests {
#[test]
fn nvvm_arch_variant_checks() {
- use crate::NvvmArch;
-
// Base variants
assert!(NvvmArch::Compute90.is_base_variant());
assert!(NvvmArch::Compute120.is_base_variant());
@@ -1088,8 +1143,6 @@ mod tests {
#[test]
fn nvvm_arch_base_architecture() {
- use crate::NvvmArch;
-
// Base variants return themselves
assert_eq!(NvvmArch::Compute90.base_architecture(), NvvmArch::Compute90);
assert_eq!(
@@ -1097,7 +1150,7 @@ mod tests {
NvvmArch::Compute120
);
- // Floating-point variants return base
+ // Family-specific variants return base
assert_eq!(
NvvmArch::Compute120f.base_architecture(),
NvvmArch::Compute120
@@ -1120,94 +1173,46 @@ mod tests {
#[test]
fn nvvm_arch_get_variants() {
- use crate::NvvmArch;
-
// Architecture with only base variant
let compute80_variants = NvvmArch::Compute80.get_variants();
assert_eq!(compute80_variants, vec![NvvmArch::Compute80]);
// Architecture with architecture and base variants
- let mut compute90_variants = NvvmArch::Compute90.get_variants();
- compute90_variants.sort_by_key(|v| format!("{:?}", v));
assert_eq!(
- compute90_variants,
+ NvvmArch::Compute90.get_variants(),
vec![NvvmArch::Compute90, NvvmArch::Compute90a]
);
// Architecture with all three variants
- let mut compute120_variants = NvvmArch::Compute120.get_variants();
- compute120_variants.sort_by_key(|v| format!("{:?}", v));
- assert_eq!(
- compute120_variants,
- vec![
- NvvmArch::Compute120,
- NvvmArch::Compute120a,
- NvvmArch::Compute120f
- ]
- );
-
- // Getting variants from a variant returns all variants
- let compute120f_variants = NvvmArch::Compute120f.get_variants();
- assert_eq!(compute120f_variants.len(), 3);
- assert!(compute120f_variants.contains(&NvvmArch::Compute120));
- assert!(compute120f_variants.contains(&NvvmArch::Compute120f));
- assert!(compute120f_variants.contains(&NvvmArch::Compute120a));
- }
-
- #[test]
- fn nvvm_arch_a_suffix_includes_all_available_instructions() {
- use crate::NvvmArch;
-
- // Test that 'a' suffix variants include all available instructions for the architecture
- // While they only RUN on exact CC, they enable all base and family features during compilation
-
- // Test Compute90a
- let features = NvvmArch::Compute90a.all_target_features();
- assert!(features.contains(&"compute_90a".to_string())); // Includes itself
- assert!(features.contains(&"compute_90".to_string())); // Includes base
- assert!(features.contains(&"compute_80".to_string())); // Includes lower versions
- assert!(!features.contains(&"compute_100".to_string())); // Does NOT include higher versions
-
- // Test Compute100a
- let features = NvvmArch::Compute100a.all_target_features();
- assert!(features.contains(&"compute_100a".to_string())); // Includes itself
- assert!(features.contains(&"compute_100".to_string())); // Includes base
- assert!(features.contains(&"compute_100f".to_string())); // Includes family variant
- assert!(features.contains(&"compute_90".to_string())); // Includes lower base versions
- assert!(!features.contains(&"compute_90a".to_string())); // Does NOT include other 'a' variants
- assert!(!features.contains(&"compute_101f".to_string())); // Does NOT include higher minor family variants
-
- // Test Compute120a
- let features = NvvmArch::Compute120a.all_target_features();
- assert!(features.contains(&"compute_120a".to_string())); // Includes itself
- assert!(features.contains(&"compute_120".to_string())); // Includes base
- assert!(features.contains(&"compute_120f".to_string())); // Includes family variant (same minor)
- assert!(features.contains(&"compute_100".to_string())); // Includes lower base versions
- assert!(!features.contains(&"compute_121f".to_string())); // Does NOT include higher minor family variants
+ let expected120 = vec![
+ NvvmArch::Compute120,
+ NvvmArch::Compute120f,
+ NvvmArch::Compute120a,
+ ];
+ assert_eq!(NvvmArch::Compute120.get_variants(), expected120);
+ assert_eq!(NvvmArch::Compute120f.get_variants(), expected120);
+ assert_eq!(NvvmArch::Compute120a.get_variants(), expected120);
}
#[test]
fn nvvm_arch_variants_for_capability() {
- use crate::NvvmArch;
-
// Capability with single variant
- let compute75_variants = NvvmArch::variants_for_capability(75);
- assert_eq!(compute75_variants, vec![NvvmArch::Compute75]);
+ assert_eq!(
+ NvvmArch::variants_for_capability(75),
+ vec![NvvmArch::Compute75]
+ );
// Capability with multiple variants
- let mut compute101_variants = NvvmArch::variants_for_capability(101);
- compute101_variants.sort_by_key(|v| format!("{:?}", v));
assert_eq!(
- compute101_variants,
+ NvvmArch::variants_for_capability(101),
vec![
NvvmArch::Compute101,
+ NvvmArch::Compute101f,
NvvmArch::Compute101a,
- NvvmArch::Compute101f
]
);
// Non-existent capability
- let compute999_variants = NvvmArch::variants_for_capability(999);
- assert!(compute999_variants.is_empty());
+ assert!(NvvmArch::variants_for_capability(999).is_empty());
}
}
diff --git a/guide/src/guide/compute_capabilities.md b/guide/src/guide/compute_capabilities.md
index 432522c7..617169fb 100644
--- a/guide/src/guide/compute_capabilities.md
+++ b/guide/src/guide/compute_capabilities.md
@@ -74,9 +74,9 @@ CudaBuilder::new("kernels")
.unwrap();
// In your kernel code:
-#[cfg(target_feature = "compute_60")] // ✓ Pass (older compute capability)
-#[cfg(target_feature = "compute_70")] // ✓ Pass (current compute capability)
-#[cfg(target_feature = "compute_80")] // ✗ Fail (newer compute capability)
+#[cfg(target_feature = "compute_60")] // ✓ Pass (lower base variant)
+#[cfg(target_feature = "compute_70")] // ✓ Pass (this base variant))
+#[cfg(target_feature = "compute_80")] // ✗ Fail (higher base variant)
```
### Family Suffix ('f')
@@ -99,13 +99,13 @@ CudaBuilder::new("kernels")
.unwrap();
// In your kernel code:
-#[cfg(target_feature = "compute_100")] // ✗ Fail (10.0 < 10.1)
-#[cfg(target_feature = "compute_101")] // ✓ Pass (equal major, equal minor)
-#[cfg(target_feature = "compute_103")] // ✓ Pass (equal major, greater minor)
+#[cfg(target_feature = "compute_90")] // ✓ Pass (lower base variant)
+#[cfg(target_feature = "compute_100")] // ✓ Pass (lower base variant)
+#[cfg(target_feature = "compute_100f")] // ✓ Pass (lower 'f' variant)
+#[cfg(target_feature = "compute_101")] // ✓ Pass (this base variant)
#[cfg(target_feature = "compute_101f")] // ✓ Pass (the 'f' variant itself)
-#[cfg(target_feature = "compute_100f")] // ✗ Fail (other 'f' variant)
-#[cfg(target_feature = "compute_90")] // ✗ Fail (different major)
-#[cfg(target_feature = "compute_110")] // ✗ Fail (different major)
+#[cfg(target_feature = "compute_103")] // ✗ Fail (higher base variant)
+#[cfg(target_feature = "compute_110")] // ✗ Fail (higher base variant)
```
### Architecture Suffix ('a')
@@ -130,12 +130,12 @@ CudaBuilder::new("kernels")
.unwrap();
// In your kernel code:
-#[cfg(target_feature = "compute_100a")] // ✓ Pass (the 'a' variant itself)
-#[cfg(target_feature = "compute_100")] // ✓ Pass (base variant)
#[cfg(target_feature = "compute_90")] // ✓ Pass (lower base variant)
+#[cfg(target_feature = "compute_100")] // ✓ Pass (base variant)
#[cfg(target_feature = "compute_100f")] // ✓ Pass (family variant with same major/minor)
-#[cfg(target_feature = "compute_101f")] // ✗ Fail (family variant with higher minor)
-#[cfg(target_feature = "compute_110")] // ✗ Fail (higher major version)
+#[cfg(target_feature = "compute_100a")] // ✓ Pass (the 'a' variant itself)
+#[cfg(target_feature = "compute_101f")] // ✗ Fail (higher family variant)
+#[cfg(target_feature = "compute_110")] // ✗ Fail (higher base variant)
```
Note: While the 'a' variant enables all these features during compilation (allowing you to use all available instructions), the generated PTX code will still only run on the exact GPU architecture specified.