Skip to content

Commit c0047cc

Browse files
authored
Merge pull request #507 from nerdalert/osx-train
Get OSX support functioning for generate/train
2 parents 79fd81c + a96e19b commit c0047cc

File tree

4 files changed

+26
-35
lines changed

4 files changed

+26
-35
lines changed

api-server/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ go.work.sum
2323

2424
# binary
2525
api-server
26+
ilab-api-server
2627

2728
# app specific
2829
logs/

api-server/README.md

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,34 @@ go mod download
2121

2222
### Run the Server
2323

24+
### Build
25+
26+
```bash
27+
go build -o ilab-api-server
28+
```
29+
2430
#### For macOS with Metal (MPS)
2531

2632
```bash
27-
go run main.go --base-dir /path/to/base-dir --taxonomy-path /path/to/taxonomy --osx
33+
./ilab-api-server --base-dir /path/to/base-dir --taxonomy-path /path/to/taxonomy --osx --pipeline simple
34+
# Or the full pipeline
35+
./ilab-api-server --base-dir /path/to/base-dir --taxonomy-path /path/to/taxonomy --osx --pipeline full
2836
```
2937

3038
#### For CUDA-enabled environments
3139

40+
Since the device type is cuda, only the accelerated pipeline option is available and set as the default.
41+
3242
```bash
33-
go run main.go --base-dir /path/to/base-dir --taxonomy-path /path/to/taxonomy --cuda
43+
./ilab-api-server --base-dir /path/to/base-dir --taxonomy-path /path/to/taxonomy --cuda
3444
```
3545

3646
#### For a RHEL AI machine
3747

38-
- If you're operating on a Red Hat Enterprise Linux AI (RHEL AI) machine, and the ilab binary is already available in your $PATH, you don't need to specify the --base-dir. Additionally, pass CUDA support with `--cuda`.
48+
- If you're operating on a Red Hat Enterprise Linux AI (RHEL AI) machine, and the ilab binary is already available in your $PATH, you don't need to specify the --base-dir. Additionally, pass CUDA support with `--cuda`. The `accelerated` pipeline is the only option here and also the default.
3949

4050
```bash
41-
go run main.go --taxonomy-path ~/.local/share/instructlab/taxonomy/ --rhelai --cuda
51+
./ilab-api-server --taxonomy-path ~/.local/share/instructlab/taxonomy/ --rhelai --cuda
4252
```
4353

4454
The `--rhelai` flag indicates that the ilab binary is available in the system's $PATH and does not require a virtual environment.
@@ -66,10 +76,10 @@ After this, we can cleanup our temp directory as it is no longer required: `rm -
6676

6777
### Example command with paths
6878

69-
Here's an example command for running the server on a macOS machine with Metal support:
79+
Here's an example command for running the server on a macOS machine with Metal support and debugging enabled:
7080

7181
```bash
72-
go run main.go --base-dir /Users/user/code/instructlab --taxonomy-path ~/.local/share/instructlab/taxonomy/ --osx
82+
./ilab-api-server --base-dir /Users/<USERNAME>/<PATH_TO_ILAB>/instructlab/ --taxonomy-path ~/.local/share/instructlab/taxonomy/ --pipeline simple --osx --debug
7383
```
7484

7585
## API Documentation

api-server/jobs.db

12 KB
Binary file not shown.

api-server/main.go

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -509,40 +509,20 @@ func (srv *ILabServer) startTrainJob(modelName, branchName string, epochs *int)
509509
srv.log.Info("No epochs specified; using default number of epochs.")
510510
}
511511

512-
// Additional logic if pipelineType == "simple" (and not rhelai)
513512
if srv.pipelineType == "simple" && !srv.rhelai {
514-
homeDir, err := os.UserHomeDir()
515-
if err != nil {
516-
return "", fmt.Errorf("failed to get user home directory: %v", err)
517-
}
518-
datasetDir := filepath.Join(homeDir, ".local", "share", "instructlab", "datasets")
519-
520-
// Copy the latest knowledge_train_msgs_*.jsonl => train_gen.jsonl
521-
latestTrainFile, err := srv.findLatestFileWithPrefix(datasetDir, "knowledge_train_msgs_")
522-
if err != nil {
523-
return "", fmt.Errorf("failed to find knowledge_train_msgs_*.jsonl file: %v", err)
524-
}
525-
trainGenPath := filepath.Join(datasetDir, "train_gen.jsonl")
526-
if err := srv.overwriteCopy(latestTrainFile, trainGenPath); err != nil {
527-
return "", fmt.Errorf("failed to copy %s to %s: %v", latestTrainFile, trainGenPath, err)
528-
}
513+
// TODO: Works on RHEL not from ilab main. --model-path seems to only accept the repo/name here and not the full path. Commenting for now.
514+
//homeDir, err := os.UserHomeDir()
515+
//if err != nil {
516+
// return "", fmt.Errorf("failed to get user home directory: %v", err)
517+
//}
518+
//datasetDir := filepath.Join(homeDir, ".local", "share", "instructlab", "datasets")
529519

530-
// Copy the latest test_ggml-model-*.jsonl => test_gen.jsonl
531-
latestTestFile, err := srv.findLatestFileWithPrefix(datasetDir, "test_ggml-model")
532-
if err != nil {
533-
return "", fmt.Errorf("failed to find test_ggml-model*.jsonl file: %v", err)
534-
}
535-
testGenPath := filepath.Join(datasetDir, "test_gen.jsonl")
536-
if err := srv.overwriteCopy(latestTestFile, testGenPath); err != nil {
537-
return "", fmt.Errorf("failed to copy %s to %s: %v", latestTestFile, testGenPath, err)
538-
}
539-
540-
// Reset cmdArgs to a simpler set
541520
cmdArgs = []string{
542521
"model", "train",
543522
"--pipeline", srv.pipelineType,
544-
fmt.Sprintf("--data-path=%s", datasetDir),
545-
fmt.Sprintf("--model-path=%s", fullModelPath),
523+
"--optimize-memory",
524+
//fmt.Sprintf("--data-path=%s", datasetDir), // Leaving commented out for now until the above todo is resolved.
525+
fmt.Sprintf("--model-path=%s", modelName),
546526
}
547527
if srv.isOSX {
548528
cmdArgs = append(cmdArgs, "--device=mps")

0 commit comments

Comments
 (0)