Skip to content

Commit 9318997

Browse files
committed
Add SIMD support
Signed-off-by: Ludvig Liljenberg <[email protected]>
1 parent c725c8b commit 9318997

File tree

18 files changed

+1015
-213
lines changed

18 files changed

+1015
-213
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"Cargo.toml",
44
// guest crates for testing, not part of the workspace
55
"src/tests/rust_guests/simpleguest/Cargo.toml",
6+
"src/tests/rust_guests/simdguest/Cargo.toml",
67
"src/tests/rust_guests/callbackguest/Cargo.toml"
78
]
89
}

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ exclude = [
2222
"src/tests/rust_guests/dummyguest",
2323
"src/tests/rust_guests/simpleguest",
2424
"src/tests/rust_guests/witguest",
25+
"src/tests/rust_guests/simdguest",
2526
]
2627

2728
[workspace.package]

Justfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ simpleguest_source := "src/tests/rust_guests/simpleguest/target/x86_64-unknown-n
1313
dummyguest_source := "src/tests/rust_guests/dummyguest/target/x86_64-unknown-none"
1414
callbackguest_source := "src/tests/rust_guests/callbackguest/target/x86_64-unknown-none"
1515
witguest_source := "src/tests/rust_guests/witguest/target/x86_64-unknown-none"
16+
simdguest_source := "src/tests/rust_guests/simdguest/target/x86_64-unknown-none"
1617
rust_guests_bin_dir := "src/tests/rust_guests/bin"
1718

1819
################
@@ -43,12 +44,14 @@ build-rust-guests target=default-target: (witguest-wit)
4344
cd src/tests/rust_guests/simpleguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}
4445
cd src/tests/rust_guests/dummyguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}
4546
cd src/tests/rust_guests/witguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}
47+
cd src/tests/rust_guests/simdguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}
4648

4749
@move-rust-guests target=default-target:
4850
cp {{ callbackguest_source }}/{{ target }}/callbackguest* {{ rust_guests_bin_dir }}/{{ target }}/
4951
cp {{ simpleguest_source }}/{{ target }}/simpleguest* {{ rust_guests_bin_dir }}/{{ target }}/
5052
cp {{ dummyguest_source }}/{{ target }}/dummyguest* {{ rust_guests_bin_dir }}/{{ target }}/
5153
cp {{ witguest_source }}/{{ target }}/witguest* {{ rust_guests_bin_dir }}/{{ target }}/
54+
cp {{ simdguest_source }}/{{ target }}/simdguest* {{ rust_guests_bin_dir }}/{{ target }}/
5255

5356
build-and-move-rust-guests: (build-rust-guests "debug") (move-rust-guests "debug") (build-rust-guests "release") (move-rust-guests "release")
5457
build-and-move-c-guests: (build-c-guests "debug") (move-c-guests "debug") (build-c-guests "release") (move-c-guests "release")
@@ -61,6 +64,7 @@ clean-rust:
6164
cd src/tests/rust_guests/dummyguest && cargo clean
6265
cd src/tests/rust_guests/callbackguest && cargo clean
6366
cd src/tests/rust_guests/witguest && cargo clean
67+
cd src/tests/rust_guests/simdguest && cargo clean
6468
cd src/tests/rust_guests/witguest && rm -f interface.wasm
6569
git clean -fdx src/tests/c_guests/bin src/tests/rust_guests/bin
6670

@@ -149,6 +153,7 @@ fmt-check:
149153
cargo +nightly fmt --manifest-path src/tests/rust_guests/simpleguest/Cargo.toml -- --check
150154
cargo +nightly fmt --manifest-path src/tests/rust_guests/dummyguest/Cargo.toml -- --check
151155
cargo +nightly fmt --manifest-path src/tests/rust_guests/witguest/Cargo.toml -- --check
156+
cargo +nightly fmt --manifest-path src/tests/rust_guests/simdguest/Cargo.toml -- --check
152157
cargo +nightly fmt --manifest-path src/hyperlight_guest_capi/Cargo.toml -- --check
153158

154159
check-license-headers:
@@ -160,6 +165,7 @@ fmt-apply:
160165
cargo +nightly fmt --manifest-path src/tests/rust_guests/simpleguest/Cargo.toml
161166
cargo +nightly fmt --manifest-path src/tests/rust_guests/dummyguest/Cargo.toml
162167
cargo +nightly fmt --manifest-path src/tests/rust_guests/witguest/Cargo.toml
168+
cargo +nightly fmt --manifest-path src/tests/rust_guests/simdguest/Cargo.toml
163169
cargo +nightly fmt --manifest-path src/hyperlight_guest_capi/Cargo.toml
164170

165171
clippy target=default-target: (witguest-wit)
@@ -169,6 +175,7 @@ clippy-guests target=default-target: (witguest-wit)
169175
cd src/tests/rust_guests/simpleguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings
170176
cd src/tests/rust_guests/callbackguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings
171177
cd src/tests/rust_guests/witguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings
178+
cd src/tests/rust_guests/simdguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings
172179

173180
clippy-apply-fix-unix:
174181
cargo clippy --fix --all

count_simd_instructions.sh

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/bin/bash
2+
3+
# Script to count SIMD instructions in an ELF binary
4+
# Usage: ./count_simd_instructions.sh <binary_file>
5+
6+
if [ $# -eq 0 ]; then
7+
echo "Usage: $0 <binary_file>"
8+
exit 1
9+
fi
10+
11+
BINARY="$1"
12+
13+
if [ ! -f "$BINARY" ]; then
14+
echo "Error: File '$BINARY' not found"
15+
exit 1
16+
fi
17+
18+
echo "Analyzing SIMD instructions in: $BINARY"
19+
echo "========================================"
20+
21+
# Disassemble the binary
22+
DISASM=$(objdump -d "$BINARY" 2>/dev/null)
23+
24+
if [ $? -ne 0 ]; then
25+
echo "Error: Failed to disassemble binary. Make sure it's a valid ELF file."
26+
exit 1
27+
fi
28+
29+
# Count different instruction sets
30+
SSE_COUNT=$(echo "$DISASM" | grep -i -E "\b(movss|movsd|addss|addsd|subss|subsd|mulss|mulsd|divss|divsd|sqrtss|sqrtsd|maxss|maxsd|minss|minsd|cmpss|cmpsd|ucomiss|ucomisd|comiss|comisd)\b" | wc -l)
31+
32+
SSE2_COUNT=$(echo "$DISASM" | grep -i -E "\b(movdqa|movdqu|movq|movd|paddb|paddw|paddd|paddq|psubb|psubw|psubd|psubq|pmullw|pmuludq|pand|pandn|por|pxor|psllw|pslld|psllq|psrlw|psrld|psrlq|psraw|psrad|packsswb|packssdw|packuswb|punpckhbw|punpckhwd|punpckhdq|punpckhqdq|punpcklbw|punpcklwd|punpckldq|punpcklqdq|pcmpeqb|pcmpeqw|pcmpeqd|pcmpgtb|pcmpgtw|pcmpgtd|pmaxub|pmaxsw|pminub|pminsw|psadbw|pavgb|pavgw)\b" | wc -l)
33+
34+
SSE3_COUNT=$(echo "$DISASM" | grep -i -E "\b(addsubpd|addsubps|haddpd|haddps|hsubpd|hsubps|movddup|movshdup|movsldup|lddqu)\b" | wc -l)
35+
36+
SSSE3_COUNT=$(echo "$DISASM" | grep -i -E "\b(pabsb|pabsw|pabsd|palignr|phaddb|phaddw|phaddd|phaddsw|phsubb|phsubw|phsubd|phsubsw|pmaddubsw|pmulhrsw|pshufb|psignb|psignw|psignd)\b" | wc -l)
37+
38+
SSE41_COUNT=$(echo "$DISASM" | grep -i -E "\b(blendpd|blendps|blendvpd|blendvps|dppd|dpps|extractps|insertps|movntdqa|mpsadbw|packusdw|pblendvb|pblendw|pcmpeqq|pextrb|pextrd|pextrq|pextrw|phminposuw|pinsrb|pinsrd|pinsrq|pmaxsb|pmaxsd|pmaxud|pmaxuw|pminsb|pminsd|pminud|pminuw|pmovsxbw|pmovsxbd|pmovsxbq|pmovsxwd|pmovsxwq|pmovsxdq|pmovzxbw|pmovzxbd|pmovzxbq|pmovzxwd|pmovzxwq|pmovzxdq|pmuldq|pmulld|ptest|roundpd|roundps|roundsd|roundss)\b" | wc -l)
39+
40+
SSE42_COUNT=$(echo "$DISASM" | grep -i -E "\b(crc32|pcmpestri|pcmpestrm|pcmpistri|pcmpistrm|pcmpgtq)\b" | wc -l)
41+
42+
AVX_COUNT=$(echo "$DISASM" | grep -i -E "\bv(movss|movsd|addss|addsd|subss|subsd|mulss|mulsd|divss|divsd|sqrtss|sqrtsd|maxss|maxsd|minss|minsd|cmpss|cmpsd|ucomiss|ucomisd|comiss|comisd|movaps|movapd|movups|movupd|movlps|movlpd|movhps|movhpd|movlhps|movhlps|unpcklps|unpcklpd|unpckhps|unpckhpd|addps|addpd|subps|subpd|mulps|mulpd|divps|divpd|sqrtps|sqrtpd|maxps|maxpd|minps|minpd|cmpps|cmppd|andps|andpd|andnps|andnpd|orps|orpd|xorps|xorpd|shufps|shufpd|blendps|blendpd|blendvps|blendvpd|dpps|dppd|roundps|roundpd|roundss|roundsd|insertf128|extractf128|broadcast|permute|maskload|maskstore|testc|testz|testnzc)\b" | wc -l)
43+
44+
AVX2_COUNT=$(echo "$DISASM" | grep -i -E "\bv(pabs|padd|psub|pmul|pand|pandn|por|pxor|psll|psrl|psra|ppack|punpck|pcmp|pmax|pmin|psad|pavg|pblend|pbroadcast|perm|pgather|pinsert|pextract|pmovsx|pmovzx|psign|pshuf|palign|pmadd|pmaddubs|phsub|phadd)\b" | wc -l)
45+
46+
AVX512_COUNT=$(echo "$DISASM" | grep -i -E "\b(evex|zmm|k[0-7])\b|\bv.*\{.*\}\b" | wc -l)
47+
48+
echo "SSE instructions: $SSE_COUNT"
49+
echo "SSE2 instructions: $SSE2_COUNT"
50+
echo "SSE3 instructions: $SSE3_COUNT"
51+
echo "SSSE3 instructions: $SSSE3_COUNT"
52+
echo "SSE4.1 instructions: $SSE41_COUNT"
53+
echo "SSE4.2 instructions: $SSE42_COUNT"
54+
echo "AVX instructions: $AVX_COUNT"
55+
echo "AVX2 instructions: $AVX2_COUNT"
56+
echo "AVX-512 instructions: $AVX512_COUNT"
57+
echo "========================================"
58+
59+
TOTAL=$((SSE_COUNT + SSE2_COUNT + SSE3_COUNT + SSSE3_COUNT + SSE41_COUNT + SSE42_COUNT + AVX_COUNT + AVX2_COUNT + AVX512_COUNT))
60+
echo "Total SIMD instructions: $TOTAL"

src/hyperlight_guest/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@ Provides only the essential building blocks for interacting with the host enviro
1313

1414
[dependencies]
1515
anyhow = { version = "1.0.98", default-features = false }
16-
serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
16+
# serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
1717
hyperlight-common = { workspace = true }

src/hyperlight_guest/src/error.rs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ limitations under the License.
1717
use alloc::format;
1818
use alloc::string::String;
1919

20+
use anyhow;
2021
use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode;
21-
use {anyhow, serde_json};
2222

2323
pub type Result<T> = core::result::Result<T, HyperlightGuestError>;
2424

@@ -42,12 +42,3 @@ impl From<anyhow::Error> for HyperlightGuestError {
4242
}
4343
}
4444
}
45-
46-
impl From<serde_json::Error> for HyperlightGuestError {
47-
fn from(error: serde_json::Error) -> Self {
48-
Self {
49-
kind: ErrorCode::GuestError,
50-
message: format!("Error: {:?}", error),
51-
}
52-
}
53-
}

src/hyperlight_host/src/hypervisor/kvm.rs

Lines changed: 119 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use std::sync::Arc;
2121
use std::sync::Mutex;
2222
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
2323

24-
use kvm_bindings::{KVM_MEM_READONLY, kvm_fpu, kvm_regs, kvm_userspace_memory_region};
24+
use kvm_bindings::{KVM_MEM_READONLY, kvm_fpu, kvm_regs, kvm_userspace_memory_region, kvm_xcrs};
2525
use kvm_ioctls::Cap::UserMemory;
2626
use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd};
2727
use log::LevelFilter;
@@ -37,8 +37,8 @@ use super::handlers::DbgMemAccessHandlerWrapper;
3737
use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper};
3838
#[cfg(feature = "init-paging")]
3939
use super::{
40-
CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE,
41-
EFER_LMA, EFER_LME, EFER_NX, EFER_SCE,
40+
CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT,
41+
CR4_OSXSAVE, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, XCR0_AVX, XCR0_SSE, XCR0_X87,
4242
};
4343
use super::{HyperlightExit, Hypervisor, InterruptHandle, LinuxInterruptHandle, VirtualCPU};
4444
#[cfg(gdb)]
@@ -336,6 +336,7 @@ impl KVMDriver {
336336
})?;
337337

338338
let mut vcpu_fd = vm_fd.create_vcpu(0)?;
339+
Self::setup_cpuid(&kvm, &mut vcpu_fd)?;
339340
Self::setup_initial_sregs(&mut vcpu_fd, pml4_addr)?;
340341

341342
#[cfg(gdb)]
@@ -409,7 +410,7 @@ impl KVMDriver {
409410
cfg_if::cfg_if! {
410411
if #[cfg(feature = "init-paging")] {
411412
sregs.cr3 = _pml4_addr;
412-
sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT;
413+
sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT | CR4_OSXSAVE;
413414
sregs.cr0 = CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP;
414415
sregs.efer = EFER_LME | EFER_LMA | EFER_SCE | EFER_NX;
415416
sregs.cs.l = 1; // required for 64-bit mode
@@ -419,6 +420,120 @@ impl KVMDriver {
419420
}
420421
}
421422
vcpu_fd.set_sregs(&sregs)?;
423+
424+
// Setup XCR0 (Extended Control Register 0) to enable SIMD features
425+
// This is required for AVX and other SIMD instruction support
426+
// Only set XCR0 if the init-paging feature is enabled
427+
cfg_if::cfg_if! {
428+
if #[cfg(feature = "init-paging")] {
429+
// Create a properly initialized kvm_xcrs structure
430+
let mut xcrs: kvm_xcrs = unsafe { std::mem::zeroed() };
431+
432+
// Set XCR0 to enable x87 FPU (required), SSE, and AVX
433+
// XCR0 bit 0 (x87) must always be set for any XSAVE features
434+
xcrs.xcrs[0].xcr = 0; // XCR0 register number
435+
xcrs.xcrs[0].value = XCR0_X87 | XCR0_SSE | XCR0_AVX;
436+
xcrs.nr_xcrs = 1;
437+
438+
println!("Setting XCRs: XCR0={:#x}, nr_xcrs={}", xcrs.xcrs[0].value, xcrs.nr_xcrs);
439+
440+
match vcpu_fd.set_xcrs(&xcrs) {
441+
Ok(_) => {
442+
println!("Successfully set XCR0 to enable SIMD features: {:#x}", xcrs.xcrs[0].value);
443+
},
444+
Err(e) => {
445+
println!("Failed to set XCRs (XCR0) for SIMD support: {:?}", e);
446+
}
447+
}
448+
}
449+
}
450+
451+
Ok(())
452+
}
453+
454+
/// Setup the CPUID for the vCPU to enable SIMD features.
455+
/// This is done by just mirroring the host's CPUID in the guest.
456+
#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
457+
fn setup_cpuid(kvm: &Kvm, vcpu_fd: &mut VcpuFd) -> Result<()> {
458+
// Get the supported CPUID from the host machine
459+
let cpuid = kvm.get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
460+
461+
let entries = cpuid.as_slice();
462+
463+
// https://en.wikipedia.org/wiki/CPUID
464+
// sse: EAX=1, EDX bit 25
465+
if !entries
466+
.get(1)
467+
.map(|entry| entry.edx & (1 << 25) != 0)
468+
.unwrap_or(false)
469+
{
470+
return Err(new_error!("SSE support not detected on the host machine"));
471+
}
472+
// sse2 is EAX=1, EDX bit 26
473+
if !entries
474+
.get(1)
475+
.map(|entry| entry.edx & (1 << 26) != 0)
476+
.unwrap_or(false)
477+
{
478+
return Err(new_error!("SSE2 support not detected on the host machine"));
479+
}
480+
// sse3 is EAX=1, ECX bit 0
481+
if !entries
482+
.get(1)
483+
.map(|entry| entry.ecx & (1 << 0) != 0)
484+
.unwrap_or(false)
485+
{
486+
return Err(new_error!("SSE3 support not detected on the host machine"));
487+
}
488+
// ssse3 is EAX=1, ECX bit 9
489+
if !entries
490+
.get(1)
491+
.map(|entry| entry.ecx & (1 << 9) != 0)
492+
.unwrap_or(false)
493+
{
494+
return Err(new_error!("SSSE3 support not detected on the host machine"));
495+
}
496+
// sse4.1 is EAX=1, ECX bit 19
497+
if !entries
498+
.get(1)
499+
.map(|entry| entry.ecx & (1 << 19) != 0)
500+
.unwrap_or(false)
501+
{
502+
return Err(new_error!(
503+
"SSE4.1 support not detected on the host machine"
504+
));
505+
}
506+
// sse4.2 is EAX=1, ECX bit 20
507+
if !entries
508+
.get(1)
509+
.map(|entry| entry.ecx & (1 << 20) != 0)
510+
.unwrap_or(false)
511+
{
512+
return Err(new_error!(
513+
"SSE4.2 support not detected on the host machine"
514+
));
515+
}
516+
// avx is EAX=1, ECX bit 28
517+
if !entries
518+
.get(1)
519+
.map(|entry| entry.ecx & (1 << 28) != 0)
520+
.unwrap_or(false)
521+
{
522+
return Err(new_error!("AVX support not detected on the host machine"));
523+
}
524+
// avx2 is EAX=7, EBX bit 5
525+
if !entries
526+
.get(7)
527+
.map(|entry| entry.ebx & (1 << 5) != 0)
528+
.unwrap_or(false)
529+
{
530+
return Err(new_error!("AVX2 support not detected on the host machine"));
531+
}
532+
533+
// Set the CPUID for the guest's vCPU to be the same as the host's
534+
vcpu_fd.set_cpuid2(&cpuid)?;
535+
println!("CPUID set successfully for SIMD support");
536+
422537
Ok(())
423538
}
424539
}

src/hyperlight_host/src/hypervisor/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ cfg_if::cfg_if! {
8080
pub(crate) const CR4_PAE: u64 = 1 << 5;
8181
pub(crate) const CR4_OSFXSR: u64 = 1 << 9;
8282
pub(crate) const CR4_OSXMMEXCPT: u64 = 1 << 10;
83+
pub(crate) const CR4_OSXSAVE: u64 = 1 << 18;
8384
pub(crate) const CR0_PE: u64 = 1;
8485
pub(crate) const CR0_MP: u64 = 1 << 1;
8586
pub(crate) const CR0_ET: u64 = 1 << 4;
@@ -91,6 +92,11 @@ cfg_if::cfg_if! {
9192
pub(crate) const EFER_LMA: u64 = 1 << 10;
9293
pub(crate) const EFER_SCE: u64 = 1;
9394
pub(crate) const EFER_NX: u64 = 1 << 11;
95+
96+
// XCR0 (Extended Control Register 0) bits for XSAVE features
97+
pub(crate) const XCR0_X87: u64 = 1 << 0; // x87 FPU state
98+
pub(crate) const XCR0_SSE: u64 = 1 << 1; // SSE state (XMM registers)
99+
pub(crate) const XCR0_AVX: u64 = 1 << 2; // AVX state (YMM registers)
94100
}
95101
}
96102

0 commit comments

Comments
 (0)