campaigns: skip changeset spec creation for cached empty diffs (#397)

LawnGnome · web-flow · commit ff776f642c1b · 2020-11-25T15:39:14.000-08:00
* campaigns: skip changeset spec creation for cached empty diffs It's totally valid and normal for empty diffs to be created when executing campaign specs: sometimes you just don't want anything to change, even though the repo matched the initial query. When this happens, #313 added a check that prevents the changeset spec from being created, and print a verbose mode message indicating that the repo was skipped: https://sourcegraph.com/github.com/sourcegraph/src-cli@d29ad54eff678d96fb7ebdf75ff95890dce6a1cf/-/blob/internal/campaigns/executor.go?utm_source=VSCode-1.1.0#L273-278 So far, so good. In #374, we made our empty diff handling even better by caching the empty diff: this means that we don't have to recalculate that nothing happened. Unfortunately, the check that exists in the cache miss code path to skip changeset spec creation doesn't exist in the cache hit code path, which means that on subsequent applications of the campaign, a changeset spec with an empty diff will be uploaded, and gitserver will ultimately be very grumpy. By applying the same logic to the cache hit code path, we can filter out these problematic changeset specs. * Extend integration tests to cover the empty diff bug. This also means that we run all the integration tests with cold and warm caches, which should help pick up these issues in future.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,6 +21,7 @@ All notable changes to `src-cli` are documented in this file.
 ### Fixed
 
 - The evaluation of the [`repository.branch` attribute](https://docs.sourcegraph.com/campaigns/references/campaign_spec_yaml_reference#on-repository) has been fixed to actually cause the correct version of the repository to be used. [#393](https://github.com/sourcegraph/src-cli/pull/393)
+- Normally, when one or more repositories in a campaign generate an empty diff, a changeset spec isn't created. From src-cli 3.21.9 to 3.22.3, inclusive, re-running a campaign would result in an empty changeset spec being created by mistake if the empty changeset spec was in the execution cache, which would result in errors on Sourcegraph when applying the campaign. This has been fixed, and empty changeset specs in the cache are now treated the same way as uncached changeset specs that are empty: they are skipped, and a message is displayed in `-v` mode indicating the repo that was skipped. [#397](https://github.com/sourcegraph/src-cli/pull/397)
 
 ### Removed
 
diff --git a/internal/campaigns/executor.go b/internal/campaigns/executor.go
@@ -210,9 +210,20 @@ func (x *executor) do(ctx context.Context, task *Task) (err error) {
 				diff = result.Commits[0].Diff
 			}
 
+			status.Cached = true
+
+			// If the cached result resulted in an empty diff, we don't need to
+			// add it to the list of specs that are displayed to the user and
+			// send to the server. Instead, we can just report that the task is
+			// complete and move on.
+			if len(diff) == 0 {
+				status.FinishedAt = time.Now()
+				x.updateTaskStatus(task, status)
+				return
+			}
+
 			spec := createChangesetSpec(task, diff, x.features)
 
-			status.Cached = true
 			status.ChangesetSpec = spec
 			status.FinishedAt = time.Now()
 			x.updateTaskStatus(task, status)
diff --git a/internal/campaigns/executor_test.go b/internal/campaigns/executor_test.go
@@ -4,6 +4,7 @@ import (
 	"archive/zip"
 	"bytes"
 	"context"
+	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"log"
@@ -14,6 +15,7 @@ import (
 	"path/filepath"
 	"runtime"
 	"strings"
+	"sync"
 	"testing"
 	"time"
 
@@ -116,6 +118,21 @@ func TestExecutor_Integration(t *testing.T) {
 				srcCLIRepo.ID: []string{"main.go", "modified-main.go.md", "added-modified-main.go.md"},
 			},
 		},
+		{
+			name:  "empty",
+			repos: []*graphql.Repository{srcCLIRepo},
+			archives: []mockRepoArchive{
+				{repo: srcCLIRepo, files: map[string]string{
+					"README.md": "# Welcome to the README\n",
+					"main.go":   "package main\n\nfunc main() {\n\tfmt.Println(     \"Hello World\")\n}\n",
+				}},
+			},
+			steps: []Step{
+				{Run: `true`, Container: "doesntmatter:13"},
+			},
+			// No changesets should be generated.
+			wantFilesChanged: map[string][]string{},
+		},
 	}
 
 	for _, tc := range tests {
@@ -132,9 +149,10 @@ func TestExecutor_Integration(t *testing.T) {
 			}
 			defer os.Remove(testTempDir)
 
+			cache := newInMemoryExecutionCache()
 			creator := &WorkspaceCreator{dir: testTempDir, client: client}
 			opts := ExecutorOpts{
-				Cache:       &ExecutionNoOpCache{},
+				Cache:       cache,
 				Creator:     creator,
 				TempDir:     testTempDir,
 				Parallelism: runtime.GOMAXPROCS(0),
@@ -144,63 +162,98 @@ func TestExecutor_Integration(t *testing.T) {
 				opts.Timeout = 30 * time.Second
 			}
 
-			executor := newExecutor(opts, client, featuresAllEnabled())
+			// execute contains the actual logic running the tasks on an
+			// executor. We'll run this multiple times to cover both the cache
+			// and non-cache code paths.
+			execute := func() {
+				executor := newExecutor(opts, client, featuresAllEnabled())
 
-			template := &ChangesetTemplate{}
-			for _, r := range tc.repos {
-				executor.AddTask(r, tc.steps, template)
-			}
-
-			executor.Start(context.Background())
-			specs, err := executor.Wait()
-			if tc.wantErrInclude == "" && err != nil {
-				t.Fatalf("execution failed: %s", err)
-			}
-			if err != nil && !strings.Contains(err.Error(), tc.wantErrInclude) {
-				t.Errorf("wrong error. have=%q want included=%q", err, tc.wantErrInclude)
-			}
-			if tc.wantErrInclude != "" {
-				return
-			}
-
-			if have, want := len(specs), len(tc.wantFilesChanged); have != want {
-				t.Fatalf("wrong number of changeset specs. want=%d, have=%d", want, have)
-			}
-
-			for _, spec := range specs {
-				if have, want := len(spec.Commits), 1; have != want {
-					t.Fatalf("wrong number of commits. want=%d, have=%d", want, have)
+				template := &ChangesetTemplate{}
+				for _, r := range tc.repos {
+					executor.AddTask(r, tc.steps, template)
 				}
 
-				fileDiffs, err := diff.ParseMultiFileDiff([]byte(spec.Commits[0].Diff))
-				if err != nil {
-					t.Fatalf("failed to parse diff: %s", err)
+				executor.Start(context.Background())
+				specs, err := executor.Wait()
+				if tc.wantErrInclude == "" && err != nil {
+					t.Fatalf("execution failed: %s", err)
 				}
-
-				wantFiles, ok := tc.wantFilesChanged[spec.BaseRepository]
-				if !ok {
-					t.Fatalf("unexpected file changes in repo %s", spec.BaseRepository)
+				if err != nil && !strings.Contains(err.Error(), tc.wantErrInclude) {
+					t.Errorf("wrong error. have=%q want included=%q", err, tc.wantErrInclude)
+				}
+				if tc.wantErrInclude != "" {
+					return
 				}
 
-				if have, want := len(fileDiffs), len(wantFiles); have != want {
-					t.Fatalf("repo %s: wrong number of fileDiffs. want=%d, have=%d", spec.BaseRepository, want, have)
+				if have, want := len(specs), len(tc.wantFilesChanged); have != want {
+					t.Fatalf("wrong number of changeset specs. want=%d, have=%d", want, have)
 				}
 
-				diffsByName := map[string]*diff.FileDiff{}
-				for _, fd := range fileDiffs {
-					if fd.NewName == "/dev/null" {
-						diffsByName[fd.OrigName] = fd
-					} else {
-						diffsByName[fd.NewName] = fd
+				for _, spec := range specs {
+					if have, want := len(spec.Commits), 1; have != want {
+						t.Fatalf("wrong number of commits. want=%d, have=%d", want, have)
+					}
+
+					fileDiffs, err := diff.ParseMultiFileDiff([]byte(spec.Commits[0].Diff))
+					if err != nil {
+						t.Fatalf("failed to parse diff: %s", err)
+					}
+
+					wantFiles, ok := tc.wantFilesChanged[spec.BaseRepository]
+					if !ok {
+						t.Fatalf("unexpected file changes in repo %s", spec.BaseRepository)
 					}
-				}
 
-				for _, file := range wantFiles {
-					if _, ok := diffsByName[file]; !ok {
-						t.Errorf("%s was not changed (diffsByName=%#v)", file, diffsByName)
+					if have, want := len(fileDiffs), len(wantFiles); have != want {
+						t.Fatalf("repo %s: wrong number of fileDiffs. want=%d, have=%d", spec.BaseRepository, want, have)
 					}
+
+					diffsByName := map[string]*diff.FileDiff{}
+					for _, fd := range fileDiffs {
+						if fd.NewName == "/dev/null" {
+							diffsByName[fd.OrigName] = fd
+						} else {
+							diffsByName[fd.NewName] = fd
+						}
+					}
+
+					for _, file := range wantFiles {
+						if _, ok := diffsByName[file]; !ok {
+							t.Errorf("%s was not changed (diffsByName=%#v)", file, diffsByName)
+						}
+					}
+				}
+			}
+
+			verifyCache := func() {
+				want := len(tc.repos)
+				if tc.wantErrInclude != "" {
+					want = 0
+				}
+
+				// Verify that there is a cache entry for each repo.
+				if have := cache.size(); have != want {
+					t.Errorf("unexpected number of cache entries: have=%d want=%d cache=%+v", have, want, cache)
 				}
 			}
+
+			// Sanity check, since we're going to be looking at the side effects
+			// on the cache.
+			if cache.size() != 0 {
+				t.Fatalf("unexpectedly hot cache: %+v", cache)
+			}
+
+			// Run with a cold cache.
+			t.Run("cold cache", func(t *testing.T) {
+				execute()
+				verifyCache()
+			})
+
+			// Run with a warm cache.
+			t.Run("warm cache", func(t *testing.T) {
+				execute()
+				verifyCache()
+			})
 		})
 	}
 }
@@ -254,3 +307,73 @@ func newZipArchivesMux(t *testing.T, callback http.HandlerFunc, archives ...mock
 
 	return mux
 }
+
+// inMemoryExecutionCache provides an in-memory cache for testing purposes.
+type inMemoryExecutionCache struct {
+	cache map[string][]byte
+	mu    sync.RWMutex
+}
+
+func newInMemoryExecutionCache() *inMemoryExecutionCache {
+	return &inMemoryExecutionCache{
+		cache: make(map[string][]byte),
+	}
+}
+
+func (c *inMemoryExecutionCache) size() int {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	return len(c.cache)
+}
+
+func (c *inMemoryExecutionCache) Get(ctx context.Context, key ExecutionCacheKey) (*ChangesetSpec, error) {
+	k, err := key.Key()
+	if err != nil {
+		return nil, err
+	}
+
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	if raw, ok := c.cache[k]; ok {
+		var spec ChangesetSpec
+		if err := json.Unmarshal(raw, &spec); err != nil {
+			return nil, err
+		}
+
+		return &spec, nil
+	}
+	return nil, nil
+}
+
+func (c *inMemoryExecutionCache) Set(ctx context.Context, key ExecutionCacheKey, spec *ChangesetSpec) error {
+	k, err := key.Key()
+	if err != nil {
+		return err
+	}
+
+	v, err := json.Marshal(spec)
+	if err != nil {
+		return err
+	}
+
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.cache[k] = v
+	return nil
+}
+
+func (c *inMemoryExecutionCache) Clear(ctx context.Context, key ExecutionCacheKey) error {
+	k, err := key.Key()
+	if err != nil {
+		return err
+	}
+
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	delete(c.cache, k)
+	return nil
+}