diff --git a/cmd/cmd.go b/cmd/cmd.go index 04d7cc8044..18d3cac169 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -7,6 +7,7 @@ import ( "github.com/databricks/cli/cmd/bundle" "github.com/databricks/cli/cmd/configure" "github.com/databricks/cli/cmd/fs" + "github.com/databricks/cli/cmd/labs" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/cmd/sync" "github.com/databricks/cli/cmd/version" @@ -37,6 +38,7 @@ func New() *cobra.Command { cli.AddCommand(bundle.New()) cli.AddCommand(configure.New()) cli.AddCommand(fs.New()) + cli.AddCommand(labs.New()) cli.AddCommand(sync.New()) cli.AddCommand(version.New()) diff --git a/cmd/internal/test.go b/cmd/internal/test.go new file mode 100644 index 0000000000..19fe8b30d2 --- /dev/null +++ b/cmd/internal/test.go @@ -0,0 +1,21 @@ +package internal + +import ( + "bytes" + "context" + + "github.com/databricks/cli/cmd" +) + +func RunGetOutput(ctx context.Context, args ...string) ([]byte, error) { + root := cmd.New() + args = append(args, "--log-level", "debug") + root.SetArgs(args) + var buf bytes.Buffer + root.SetOut(&buf) + err := root.ExecuteContext(ctx) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} diff --git a/cmd/labs/feature/all.go b/cmd/labs/feature/all.go new file mode 100644 index 0000000000..5b4af962e8 --- /dev/null +++ b/cmd/labs/feature/all.go @@ -0,0 +1,37 @@ +package feature + +import ( + "context" + "fmt" + "os" + "path/filepath" +) + +func LoadAll(ctx context.Context) (features []*Feature, err error) { + home, err := os.UserHomeDir() + if err != nil { + return nil, err + } + labsDir, err := os.ReadDir(filepath.Join(home, ".databricks", "labs")) + if os.IsNotExist(err) { + return nil, nil + } + if err != nil { + return nil, err + } + for _, v := range labsDir { + if !v.IsDir() { + continue + } + feature, err := NewFeature(v.Name()) + if err != nil { + return nil, fmt.Errorf("%s: %w", v.Name(), err) + } + err = feature.loadMetadata() + if err != nil { + return nil, fmt.Errorf("%s metadata: %w", v.Name(), err) + } + features = append(features, feature) + } + return features, nil +} diff --git a/cmd/labs/feature/feature.go b/cmd/labs/feature/feature.go new file mode 100644 index 0000000000..5da8fefce9 --- /dev/null +++ b/cmd/labs/feature/feature.go @@ -0,0 +1,198 @@ +package feature + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/databricks/cli/libs/git" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/process" + "github.com/databricks/cli/libs/python" + "gopkg.in/yaml.v2" +) + +type Feature struct { + Name string `json:"name"` + Context string `json:"context,omitempty"` // auth context + Description string `json:"description"` + Hooks struct { + Install string `json:"install,omitempty"` + Uninstall string `json:"uninstall,omitempty"` + } `json:"hooks,omitempty"` + Entrypoint string `json:"entrypoint"` + Commands []struct { + Name string `json:"name"` + Description string `json:"description"` + Flags []struct { + Name string `json:"name"` + Description string `json:"description"` + } `json:"flags,omitempty"` + } `json:"commands,omitempty"` + + version string + path string + checkout *git.Repository +} + +func NewFeature(name string) (*Feature, error) { + home, err := os.UserHomeDir() + if err != nil { + return nil, err + } + version := "latest" + split := strings.Split(name, "@") + if len(split) > 2 { + return nil, fmt.Errorf("invalid coordinates: %s", name) + } + if len(split) == 2 { + name = split[0] + version = split[1] + } + path := filepath.Join(home, ".databricks", "labs", name) + checkout, err := git.NewRepository(path) + if err != nil && !os.IsNotExist(err) { + return nil, err + } + return &Feature{ + Name: name, + path: path, + version: version, + checkout: checkout, + }, nil +} + +type release struct { + TagName string `json:"tag_name"` + Draft bool `json:"draft"` + Prerelease bool `json:"prerelease"` + PublishedAt time.Time `json:"published_at"` +} + +func (i *Feature) loadMetadata() error { + raw, err := os.ReadFile(filepath.Join(i.path, "labs.yml")) + if err != nil { + return fmt.Errorf("read labs.yml: %w", err) + } + err = yaml.Unmarshal(raw, i) + if err != nil { + return fmt.Errorf("parse labs.yml: %w", err) + } + return nil +} + +func (i *Feature) fetchLatestVersion(ctx context.Context) (*release, error) { + var tags []release + url := fmt.Sprintf("https://api.github.com/repos/databrickslabs/%s/releases", i.Name) + err := httpCall(ctx, url, &tags) + if err != nil { + return nil, err + } + return &tags[0], nil +} + +func (i *Feature) requestedVersion(ctx context.Context) (string, error) { + if i.version == "latest" { + release, err := i.fetchLatestVersion(ctx) + if err != nil { + return "", err + } + return release.TagName, nil + } + return i.version, nil +} + +func (i *Feature) Install(ctx context.Context) error { + if i.hasFile(".git/HEAD") { + curr, err := process.Background(ctx, []string{ + "git", "tag", "--points-at", "HEAD", + }, process.WithDir(i.path)) + if err != nil { + return err + } + return fmt.Errorf("%s (%s) is already installed", i.Name, curr) + } + url := fmt.Sprintf("https://github.com/databrickslabs/%s", i.Name) + version, err := i.requestedVersion(ctx) + if err != nil { + return err + } + log.Infof(ctx, "Installing %s (%s) into %s", url, version, i.path) + err = git.Clone(ctx, url, version, i.path) + if err != nil { + return err + } + err = i.loadMetadata() + if err != nil { + return fmt.Errorf("labs.yml: %w", err) + } + if i.isPython() { + err := i.installPythonTool(ctx) + if err != nil { + return err + } + } + return nil +} + +const CacheDir = ".databricks" + +func (i *Feature) Run(ctx context.Context, raw []byte) error { + // raw is a JSON-encoded payload that holds things like command name and flags + return i.forwardPython(ctx, filepath.Join(i.path, i.Entrypoint), string(raw)) +} + +func (i *Feature) hasFile(name string) bool { + _, err := os.Stat(filepath.Join(i.path, name)) + return err == nil +} + +func (i *Feature) isPython() bool { + return i.hasFile("setup.py") || i.hasFile("pyproject.toml") +} + +func (i *Feature) venvBinDir() string { + return filepath.Join(i.path, CacheDir, "bin") +} + +func (i *Feature) forwardPython(ctx context.Context, pythonArgs ...string) error { + args := []string{filepath.Join(i.venvBinDir(), "python")} + args = append(args, pythonArgs...) + return process.Forwarded(ctx, args, + process.WithDir(i.path), // we may need to skip it for install step + process.WithEnv("PYTHONPATH", i.path)) +} + +func (i *Feature) installPythonTool(ctx context.Context) error { + pythons, err := python.DetectInterpreters(ctx) + if err != nil { + return err + } + interpreter := pythons.Latest() + log.Debugf(ctx, "Creating Python %s virtual environment in %s", interpreter.Version, i.path) + _, err = process.Background(ctx, []string{ + interpreter.Binary, "-m", "venv", CacheDir, + }, process.WithDir(i.path)) + if err != nil { + return fmt.Errorf("create venv: %w", err) + } + log.Debugf(ctx, "Installing dependencies via PIP") + venvPip := filepath.Join(i.venvBinDir(), "pip") + _, err = process.Background(ctx, []string{ + venvPip, "install", ".", + }, process.WithDir(i.path)) + if err != nil { + return fmt.Errorf("pip install: %w", err) + } + if i.Hooks.Install != "" { + installer := filepath.Join(i.path, i.Hooks.Install) + err = i.forwardPython(ctx, installer) + if err != nil { + return fmt.Errorf("%s: %w", i.Hooks.Install, err) + } + } + return nil +} diff --git a/cmd/labs/feature/http_call.go b/cmd/labs/feature/http_call.go new file mode 100644 index 0000000000..d64a72f379 --- /dev/null +++ b/cmd/labs/feature/http_call.go @@ -0,0 +1,29 @@ +package feature + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" +) + +func httpCall(ctx context.Context, url string, response any) error { + res, err := http.Get(url) + if err != nil { + return err + } + if res.StatusCode >= 400 { + return fmt.Errorf("github request failed: %s", res.Status) + } + defer res.Body.Close() + raw, err := io.ReadAll(res.Body) + if err != nil { + return err + } + err = json.Unmarshal(raw, response) + if err != nil { + return err + } + return nil +} diff --git a/cmd/labs/install.go b/cmd/labs/install.go new file mode 100644 index 0000000000..6718764fa3 --- /dev/null +++ b/cmd/labs/install.go @@ -0,0 +1,33 @@ +package labs + +import ( + "github.com/databricks/cli/cmd/labs/feature" + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func newInstallCommand() *cobra.Command { + return &cobra.Command{ + Use: "install NAME", + Short: "Install a feature", + Args: cobra.ExactArgs(1), + PreRunE: root.MustWorkspaceClient, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + // TODO: context can be on both command and feature level + err := root.MustWorkspaceClient(cmd, args) + if err != nil { + return err + } + // TODO: add account-level init as well + w := root.WorkspaceClient(cmd.Context()) + propagateEnvConfig(w.Config) + + state, err := feature.NewFeature(args[0]) + if err != nil { + return err + } + return state.Install(ctx) + }, + } +} diff --git a/cmd/labs/install_test.go b/cmd/labs/install_test.go new file mode 100644 index 0000000000..724c44e591 --- /dev/null +++ b/cmd/labs/install_test.go @@ -0,0 +1,15 @@ +package labs_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/cmd/internal" + "github.com/stretchr/testify/assert" +) + +func TestInstallDbx(t *testing.T) { + ctx := context.Background() + _, err := internal.RunGetOutput(ctx, "labs", "install", "dbx@metadata", "--profile", "bogdan") + assert.NoError(t, err) +} diff --git a/cmd/labs/labs.go b/cmd/labs/labs.go new file mode 100644 index 0000000000..f6a6d524cc --- /dev/null +++ b/cmd/labs/labs.go @@ -0,0 +1,131 @@ +package labs + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/databricks/cli/cmd/labs/feature" + "github.com/databricks/cli/cmd/root" + "github.com/databricks/databricks-sdk-go/config" + "github.com/spf13/cobra" +) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "labs", + Short: "Databricks Labs features", + Long: `Manage experimental Databricks Labs apps`, + } + + // TODO: this should be on the top CLI level + cmd.AddGroup(&cobra.Group{ + ID: "labs", + Title: "Databricks Labs", + }) + + cmd.AddCommand( + newListCommand(), + newInstallCommand(), + &cobra.Command{ + Use: "py", + Short: "...", + RunE: func(cmd *cobra.Command, args []string) error { + return nil + }, + }, + ) + + err := infuse(cmd) + if err != nil { + panic(err) + } + + return cmd +} + +type commandInput struct { + Command string `json:"command"` + Flags map[string]any `json:"flags"` + OutputType string `json:"output_type"` +} + +func propagateEnvConfig(cfg *config.Config) error { + for _, a := range config.ConfigAttributes { + if a.IsZero(cfg) { + continue + } + for _, ev := range a.EnvVars { + err := os.Setenv(ev, a.GetString(cfg)) + if err != nil { + return fmt.Errorf("set %s: %w", a.Name, err) + } + } + } + return nil +} + +func infuse(cmd *cobra.Command) error { + ctx := cmd.Context() + all, err := feature.LoadAll(ctx) + if err != nil { + return err + } + for _, f := range all { + group := &cobra.Command{ + Use: f.Name, + Short: f.Description, + GroupID: "labs", + } + cmd.AddCommand(group) + for _, v := range f.Commands { + l := v + definedFlags := v.Flags + vcmd := &cobra.Command{ + Use: v.Name, + Short: v.Description, + RunE: func(cmd *cobra.Command, args []string) error { + flags := cmd.Flags() + if f.Context == "workspace" { + // TODO: context can be on both command and feature level + err = root.MustWorkspaceClient(cmd, args) + if err != nil { + return err + } + // TODO: add account-level init as well + w := root.WorkspaceClient(cmd.Context()) + propagateEnvConfig(w.Config) + } + ci := &commandInput{ + Command: l.Name, + Flags: map[string]any{}, + } + for _, flag := range definedFlags { + v, err := flags.GetString(flag.Name) + if err != nil { + return fmt.Errorf("get %s flag: %w", flag.Name, err) + } + ci.Flags[flag.Name] = v + } + logLevelFlag := flags.Lookup("log-level") + if logLevelFlag != nil { + ci.Flags["log_level"] = logLevelFlag.Value.String() + } + raw, err := json.Marshal(ci) + if err != nil { + return err + } + ctx := cmd.Context() + // actually execute the command + return f.Run(ctx, raw) + }, + } + flags := vcmd.Flags() + for _, flag := range definedFlags { + flags.String(flag.Name, "", flag.Description) + } + group.AddCommand(vcmd) + } + } + return nil +} diff --git a/cmd/labs/list.go b/cmd/labs/list.go new file mode 100644 index 0000000000..c8ea1455d6 --- /dev/null +++ b/cmd/labs/list.go @@ -0,0 +1,89 @@ +package labs + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +type labsMeta struct { + Name string `json:"name"` + Description string `json:"description"` + License string `json:"license"` +} + +func httpCall(ctx context.Context, url string, response any) error { + res, err := http.Get(url) + if err != nil { + return err + } + if res.StatusCode >= 400 { + return fmt.Errorf("github request failed: %s", res.Status) + } + defer res.Body.Close() + raw, err := io.ReadAll(res.Body) + if err != nil { + return err + } + err = json.Unmarshal(raw, response) + if err != nil { + return err + } + return nil +} + +func newListCommand() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List all labs", + Annotations: map[string]string{ + "template": cmdio.Heredoc(` + Name Description + {{range .}}{{.Name}} {{.Description}} + {{end}} + `), + }, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + var repositories []struct { + Name string `json:"name"` + Description string `json:"description"` + Fork bool `json:"fork"` + Arcived bool `json:"archived"` + License struct { + Name string `json:"name"` + } `json:"license"` + } + err := httpCall(ctx, + "https://api.github.com/users/databrickslabs/repos", + &repositories) + if err != nil { + return err + } + info := []labsMeta{} + for _, v := range repositories { + if v.Arcived { + continue + } + if v.Fork { + continue + } + description := v.Description + if len(description) > 50 { + description = description[:50] + "..." + } + info = append(info, labsMeta{ + Name: v.Name, + Description: description, + License: v.License.Name, + }) + } + return cmdio.Render(ctx, info) + }, + } +} diff --git a/libs/process/background.go b/libs/process/background.go new file mode 100644 index 0000000000..f061568072 --- /dev/null +++ b/libs/process/background.go @@ -0,0 +1,32 @@ +package process + +import ( + "bytes" + "context" + "fmt" + "os" + "os/exec" + "strings" + + "github.com/databricks/cli/libs/log" +) + +func Background(ctx context.Context, args []string, opts ...execOption) (string, error) { + commandStr := strings.Join(args, " ") + log.Debugf(ctx, "running: %s", commandStr) + cmd := exec.CommandContext(ctx, args[0], args[1:]...) + stdout := &bytes.Buffer{} + cmd.Stdin = os.Stdin + cmd.Stdout = stdout + cmd.Stderr = stdout + for _, o := range opts { + err := o(cmd) + if err != nil { + return "", err + } + } + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("%s: %s", commandStr, stdout.String()) + } + return strings.TrimSpace(stdout.String()), nil +} diff --git a/libs/process/forwarded.go b/libs/process/forwarded.go new file mode 100644 index 0000000000..f408290816 --- /dev/null +++ b/libs/process/forwarded.go @@ -0,0 +1,47 @@ +package process + +import ( + "context" + "io" + "os" + "os/exec" + "strings" + + "github.com/databricks/cli/libs/log" +) + +func Forwarded(ctx context.Context, args []string, opts ...execOption) error { + commandStr := strings.Join(args, " ") + log.Debugf(ctx, "starting: %s", commandStr) + cmd := exec.CommandContext(ctx, args[0], args[1:]...) + + // make sure to sync on writing to stdout + reader, writer := io.Pipe() + go io.CopyBuffer(os.Stdout, reader, make([]byte, 128)) + defer reader.Close() + defer writer.Close() + cmd.Stdout = writer + cmd.Stderr = writer + + // apply common options + for _, o := range opts { + err := o(cmd) + if err != nil { + return err + } + } + + stdin, err := cmd.StdinPipe() + if err != nil { + return err + } + go io.CopyBuffer(stdin, os.Stdin, make([]byte, 128)) + defer stdin.Close() + + err = cmd.Start() + if err != nil { + return err + } + + return cmd.Wait() +} diff --git a/libs/process/opts.go b/libs/process/opts.go new file mode 100644 index 0000000000..7aebfd3862 --- /dev/null +++ b/libs/process/opts.go @@ -0,0 +1,39 @@ +package process + +import ( + "fmt" + "os" + "os/exec" +) + +type execOption func(*exec.Cmd) error + +func WithEnv(key, value string) execOption { + return func(c *exec.Cmd) error { + if c.Env == nil { + c.Env = os.Environ() + } + v := fmt.Sprintf("%s=%s", key, value) + c.Env = append(c.Env, v) + return nil + } +} + +func WithEnvs(envs map[string]string) execOption { + return func(c *exec.Cmd) error { + for k, v := range envs { + err := WithEnv(k, v)(c) + if err != nil { + return err + } + } + return nil + } +} + +func WithDir(dir string) execOption { + return func(c *exec.Cmd) error { + c.Dir = dir + return nil + } +} diff --git a/libs/python/interpreters.go b/libs/python/interpreters.go new file mode 100644 index 0000000000..633946f849 --- /dev/null +++ b/libs/python/interpreters.go @@ -0,0 +1,95 @@ +package python + +import ( + "context" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/process" + "golang.org/x/mod/semver" +) + +type Interpreter struct { + Version string + Binary string +} + +func (i Interpreter) String() string { + return fmt.Sprintf("%s (%s)", i.Version, i.Binary) +} + +type AllInterpreters []Interpreter + +func (a AllInterpreters) Latest() Interpreter { + return a[len(a)-1] +} + +func DetectInterpreters(ctx context.Context) (AllInterpreters, error) { + found := AllInterpreters{} + paths := strings.Split(os.Getenv("PATH"), string(os.PathListSeparator)) + seen := map[string]bool{} + for _, prefix := range paths { + entries, err := os.ReadDir(prefix) + if os.IsNotExist(err) { + // some directories in $PATH may not exist + continue + } + if err != nil { + return nil, fmt.Errorf("listing %s: %w", prefix, err) + } + for _, v := range entries { + if v.IsDir() { + continue + } + if strings.Contains(v.Name(), "-") { + // skip python3-config, python3.10-config, etc + continue + } + if !strings.HasPrefix(v.Name(), "python3") { + continue + } + bin := filepath.Join(prefix, v.Name()) + resolved, err := filepath.EvalSymlinks(bin) + if err != nil { + log.Debugf(ctx, "cannot resolve symlink for %s: %s", bin, resolved) + continue + } + if seen[resolved] { + continue + } + seen[resolved] = true + out, err := process.Background(ctx, []string{resolved, "--version"}) + if err != nil { + // TODO: skip-and-log or return? + return nil, err + } + words := strings.Split(out, " ") + if words[0] != "Python" { + continue + } + lastWord := words[len(words)-1] + version := semver.Canonical("v" + lastWord) + found = append(found, Interpreter{ + Version: version, + Binary: bin, + }) + } + } + if len(found) == 0 { + return nil, fmt.Errorf("no python3 executables found") + } + sort.Slice(found, func(i, j int) bool { + a := found[i].Version + b := found[j].Version + cmp := semver.Compare(a, b) + if cmp != 0 { + return cmp < 0 + } + return a < b + }) + return found, nil +} diff --git a/libs/python/interpreters_test.go b/libs/python/interpreters_test.go new file mode 100644 index 0000000000..d099703197 --- /dev/null +++ b/libs/python/interpreters_test.go @@ -0,0 +1,17 @@ +package python + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAtLeastOnePythonInstalled(t *testing.T) { + ctx := context.Background() + all, err := DetectInterpreters(ctx) + assert.NoError(t, err) + a := all.Latest() + t.Logf("latest is: %s", a) + assert.True(t, len(all) > 0) +} diff --git a/libs/python/venv.go b/libs/python/venv.go new file mode 100644 index 0000000000..d623866065 --- /dev/null +++ b/libs/python/venv.go @@ -0,0 +1,33 @@ +package python + +import ( + "errors" + "os" + "path/filepath" +) + +// DetectVirtualEnv scans direct subfolders in path to get a valid +// Virtual Environment installation, that is marked by pyvenv.cfg file. +// +// See: https://packaging.python.org/en/latest/tutorials/packaging-projects/ +func DetectVirtualEnvPath(path string) (string, error) { + files, err := os.ReadDir(path) + if err != nil { + return "", err + } + for _, v := range files { + if !v.IsDir() { + continue + } + candidate := filepath.Join(path, v.Name()) + _, err = os.Stat(filepath.Join(candidate, "pyvenv.cfg")) + if errors.Is(err, os.ErrNotExist) { + continue + } + if err != nil { + return "", err + } + return candidate, nil + } + return "", nil +}