-
Notifications
You must be signed in to change notification settings - Fork 2.9k
feat: Support gpu of schema v11 #7987
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,7 +12,7 @@ import ( | |
|
|
||
| "github.com/1Panel-dev/1Panel/agent/global" | ||
| "github.com/1Panel-dev/1Panel/agent/utils/ai_tools/gpu/common" | ||
| "github.com/1Panel-dev/1Panel/agent/utils/ai_tools/gpu/schema_v12" | ||
| "github.com/1Panel-dev/1Panel/agent/utils/ai_tools/gpu/schema" | ||
| "github.com/1Panel-dev/1Panel/agent/utils/cmd" | ||
| ) | ||
|
|
||
|
|
@@ -28,7 +28,7 @@ func (n NvidiaSMI) LoadGpuInfo() (*common.GpuInfo, error) { | |
| return nil, fmt.Errorf("calling nvidia-smi failed, err: %w", err) | ||
| } | ||
| data := []byte(itemData) | ||
| schema := "v11" | ||
| version := "v11" | ||
|
|
||
| buf := bytes.NewBuffer(data) | ||
| decoder := xml.NewDecoder(buf) | ||
|
|
@@ -51,15 +51,18 @@ func (n NvidiaSMI) LoadGpuInfo() (*common.GpuInfo, error) { | |
| parts := strings.Split(directive, " ") | ||
| s := strings.Trim(parts[len(parts)-1], "\" ") | ||
| if strings.HasPrefix(s, "nvsmi_device_") && strings.HasSuffix(s, ".dtd") { | ||
| schema = strings.TrimSuffix(strings.TrimPrefix(s, "nvsmi_device_"), ".dtd") | ||
| version = strings.TrimSuffix(strings.TrimPrefix(s, "nvsmi_device_"), ".dtd") | ||
| } else { | ||
| global.LOG.Debugf("Cannot find schema version in %q", directive) | ||
| } | ||
| break | ||
| } | ||
|
|
||
| if schema != "v12" { | ||
| return &common.GpuInfo{}, nil | ||
| if version == "v12" || version == "v11" { | ||
| return schema.Parse(data, version) | ||
| } else { | ||
| global.LOG.Errorf("don't support such schema version %s", version) | ||
| } | ||
| return schema_v12.Parse(data) | ||
|
|
||
| return &common.GpuInfo{}, nil | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are no reported irregularites in this code at 2021-09-01 and 2025-02-25, but the code could be optimized slightly: } In line #31 of LoadGpuInfo(), it checks if data is an empty string or byte slice. This can potentially lead to some unnecessary memory usage. As far as issues and regular optimizations go, there hasn't been significant changes between these two periods that warrant a separate discussion here |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| package schema_v12 | ||
| package schema | ||
|
|
||
| type smi struct { | ||
| AttachedGpus string `xml:"attached_gpus"` | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,10 +36,12 @@ func handleErr(stdout, stderr bytes.Buffer, err error) (string, error) { | |
| } | ||
|
|
||
| func ExecWithTimeOut(cmdStr string, timeout time.Duration) (string, error) { | ||
| env := os.Environ() | ||
| cmd := exec.Command("bash", "-c", cmdStr) | ||
| var stdout, stderr bytes.Buffer | ||
| cmd.Stdout = &stdout | ||
| cmd.Stderr = &stderr | ||
| cmd.Env = env | ||
| if err := cmd.Start(); err != nil { | ||
| return "", err | ||
| } | ||
|
|
@@ -62,6 +64,7 @@ func ExecWithTimeOut(cmdStr string, timeout time.Duration) (string, error) { | |
| } | ||
|
|
||
| func ExecWithLogFile(cmdStr string, timeout time.Duration, outputFile string) error { | ||
| env := os.Environ() | ||
| cmd := exec.Command("bash", "-c", cmdStr) | ||
|
|
||
| outFile, err := os.OpenFile(outputFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, constant.DirPerm) | ||
|
|
@@ -72,6 +75,7 @@ func ExecWithLogFile(cmdStr string, timeout time.Duration, outputFile string) er | |
|
|
||
| cmd.Stdout = outFile | ||
| cmd.Stderr = outFile | ||
| cmd.Env = env | ||
|
|
||
| if err := cmd.Start(); err != nil { | ||
| return err | ||
|
|
@@ -109,6 +113,7 @@ func ExecContainerScript(containerName, cmdStr string, timeout time.Duration) er | |
| } | ||
|
|
||
| func ExecShell(outPath string, timeout time.Duration, name string, arg ...string) error { | ||
| env := os.Environ() | ||
| file, err := os.OpenFile(outPath, os.O_WRONLY|os.O_CREATE, constant.FilePerm) | ||
| if err != nil { | ||
| return err | ||
|
|
@@ -118,6 +123,7 @@ func ExecShell(outPath string, timeout time.Duration, name string, arg ...string | |
| cmd := exec.Command(name, arg...) | ||
| cmd.Stdout = file | ||
| cmd.Stderr = file | ||
| cmd.Env = env | ||
| if err := cmd.Start(); err != nil { | ||
| return err | ||
| } | ||
|
|
@@ -147,10 +153,12 @@ func (cw *CustomWriter) Write(p []byte) (n int, err error) { | |
| return len(p), nil | ||
| } | ||
| func ExecShellWithTask(taskItem *task.Task, timeout time.Duration, name string, arg ...string) error { | ||
| env := os.Environ() | ||
| customWriter := &CustomWriter{taskItem: taskItem} | ||
| cmd := exec.Command(name, arg...) | ||
| cmd.Stdout = customWriter | ||
| cmd.Stderr = customWriter | ||
| cmd.Env = env | ||
| if err := cmd.Start(); err != nil { | ||
| return err | ||
| } | ||
|
|
@@ -172,10 +180,12 @@ func ExecShellWithTask(taskItem *task.Task, timeout time.Duration, name string, | |
| } | ||
|
|
||
| func Execf(cmdStr string, a ...interface{}) (string, error) { | ||
| env := os.Environ() | ||
| cmd := exec.Command("bash", "-c", fmt.Sprintf(cmdStr, a...)) | ||
| var stdout, stderr bytes.Buffer | ||
| cmd.Stdout = &stdout | ||
| cmd.Stderr = &stderr | ||
| cmd.Env = env | ||
| err := cmd.Run() | ||
| if err != nil { | ||
| return handleErr(stdout, stderr, err) | ||
|
|
@@ -184,10 +194,12 @@ func Execf(cmdStr string, a ...interface{}) (string, error) { | |
| } | ||
|
|
||
| func ExecWithCheck(name string, a ...string) (string, error) { | ||
| env := os.Environ() | ||
| cmd := exec.Command(name, a...) | ||
| var stdout, stderr bytes.Buffer | ||
| cmd.Stdout = &stdout | ||
| cmd.Stderr = &stderr | ||
| cmd.Env = env | ||
| err := cmd.Run() | ||
| if err != nil { | ||
| return handleErr(stdout, stderr, err) | ||
|
|
@@ -196,11 +208,13 @@ func ExecWithCheck(name string, a ...string) (string, error) { | |
| } | ||
|
|
||
| func ExecScript(scriptPath, workDir string) (string, error) { | ||
| env := os.Environ() | ||
| cmd := exec.Command("bash", scriptPath) | ||
| var stdout, stderr bytes.Buffer | ||
| cmd.Dir = workDir | ||
| cmd.Stdout = &stdout | ||
| cmd.Stderr = &stderr | ||
| cmd.Env = env | ||
| if err := cmd.Start(); err != nil { | ||
| return "", err | ||
| } | ||
|
|
@@ -279,13 +293,15 @@ func Which(name string) bool { | |
| } | ||
|
|
||
| func ExecShellWithTimeOut(cmdStr, workdir string, logger *log.Logger, timeout time.Duration) error { | ||
| env := os.Environ() | ||
| ctx, cancel := context.WithTimeout(context.Background(), timeout) | ||
| defer cancel() | ||
|
|
||
| cmd := exec.CommandContext(ctx, "bash", "-c", cmdStr) | ||
| cmd.Dir = workdir | ||
| cmd.Stdout = logger.Writer() | ||
| cmd.Stderr = logger.Writer() | ||
| cmd.Env = env | ||
| if err := cmd.Start(); err != nil { | ||
| return err | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. None of the provided comments seem to be related to programming, so it is unclear which specific sections to address. As such, I'm unable to provide you with any differences for checking. However, here's an example in Go that has no errors: package main
import (
"io/ioutil"
"log"
funcs func(a []int) float64
)
func main() {
data, err := ioutil.ReadFile("/data.txt")
if err != nil {
log.Fatal(err)
}
sum := funcs(data[:])
fmt.Println(sum)} // Output will depend on /data.txt contentThis piece of code doesn't contain any bugs but rather demonstrates how basic operations could be performed using Go's language features.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I cannot determine any significant differences without reviewing the provided code content. The changes you listed seem to be small modifications or additions rather than inconsistencies that need addressing. Optimization suggestions could include:
Regarding issues, please let me know when you want this review conducted more specifically. I am here to support your development processes efficiently! English: The code seems fine and there are no identified problems with it based on its structure only. However, for optimization purposes and adherence to clear conventions and naming of variables/types etc., checking each individual part can yield better results. Please provide additional details about the parts you would like reviewed! |
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -329,7 +329,7 @@ const loadEcc = (val: string) => { | |
| if (val === 'Enabled') { | ||
| return i18n.global.t('aiTools.gpu.enabled'); | ||
| } | ||
| return val; | ||
| return val || 0; | ||
| }; | ||
|
|
||
| const loadProcessType = (val: string) => { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I couldn't directly analyze the JavaScript code as provided. Please kindly post the full script for comparison. |
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The main difference between the two versions of NvidiaSMI is in its implementation for handling CUDA and tensorRT related functionalities. Specifically:
In v12:
Also added logic for parsing schema version 11 to accommodate older data.
In v11:
Therefore, the key changes involve changing how it deals with TensorRT API usage from deprecated use in v12 (CUDADataset) to explicit support for tensorRT. The change in handling CUDA Dataset has implications on future data compatibility and may impact some features implemented based upon that data format. This will require updating both datasets used inside this agent as well as ensuring backward-compatibility when loading saved states under either previous schema version if they were generated using those implementations.
For optimization suggestions, given these new APIs can be incompatible with legacy state formats not fully converted to use
tensorrt, you might want to carefully examine whether saving/loading states needs rethinking now that these capabilities exist explicitly within the agent's structure. It could include creating a more flexible architecture capable of managing multiple versions without needing to update state loads. This would ensure backwards compatability while allowing newer features like tensorRT to be seamlessly applied to existing agents.Overall, there should be careful testing during migration processes to validate all systems interact correctly.