Skip to content

Commit 6b97d72

Browse files
Use workspace manager to setup workspace instead of utils, make prompts short, only keep relevant logs
1 parent 4f08ff2 commit 6b97d72

17 files changed

+359
-323
lines changed

internal/assertions/filesystem_assertion/file_contents_assertion.go

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"errors"
55
"fmt"
66
"os"
7-
"path/filepath"
87
"strings"
98
"unicode"
109

@@ -15,44 +14,42 @@ type FileContentsAssertion struct {
1514
ExpectedContents string
1615
}
1716

18-
func (a FileContentsAssertion) Run(path string, logger *logger.Logger) error {
19-
_, err := os.Stat(path)
17+
func (a FileContentsAssertion) Run(absolutePath string, logger *logger.Logger, shortFilePathConverter func(string) string) error {
18+
_, err := os.Stat(absolutePath)
19+
shortFilePath := shortFilePathConverter(absolutePath)
2020

2121
if err != nil {
2222
if os.IsNotExist(err) {
23-
return fmt.Errorf("Expected file %s does not exist", path)
23+
return fmt.Errorf("Expected file %s does not exist", shortFilePath)
2424
}
2525

26-
return fmt.Errorf("Failed to check file %s: %v", path, err)
26+
return fmt.Errorf("Failed to check file %s: %v", shortFilePath, err)
2727
}
2828

29-
contents, err := os.ReadFile(path)
29+
contents, err := os.ReadFile(absolutePath)
3030

3131
if err != nil {
32-
return fmt.Errorf("Failed to read file %s: %v", path, err)
32+
return fmt.Errorf("Failed to read file %s: %v", shortFilePath, err)
3333
}
3434

3535
// Trim space from the right (File could have an extra \n: That's fine)
3636
fileContentsTrimmed := strings.TrimRightFunc(string(contents), unicode.IsSpace)
37-
fileName := filepath.Base(path)
3837

3938
if fileContentsTrimmed != a.ExpectedContents {
4039
logger.Plainf("Expected contents:")
41-
logger.WithAdditionalSecondaryPrefix(fileName, func() {
40+
logger.WithAdditionalSecondaryPrefix(shortFilePath, func() {
4241
logger.Plainf("%s", a.ExpectedContents)
4342
})
4443

4544
logger.Errorf("Actual contents:")
46-
logger.WithAdditionalSecondaryPrefix(fileName, func() {
45+
logger.WithAdditionalSecondaryPrefix(shortFilePath, func() {
4746
logger.Errorf("%s", fileContentsTrimmed)
4847
})
4948

5049
return errors.New("Expected file contents differ from actual contents")
5150
}
5251

53-
logger.Successf("✔ File %s exists with contents:", path)
54-
55-
logger.WithAdditionalSecondaryPrefix(filepath.Base(path), func() {
52+
logger.WithAdditionalSecondaryPrefix(shortFilePath, func() {
5653
logger.Successf("%s", fileContentsTrimmed)
5754
})
5855

internal/assertions/filesystem_assertion/file_does_not_exist_assertion.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,18 @@ import (
1212
type FileDoesNotExistAssertion struct {
1313
}
1414

15-
func (a FileDoesNotExistAssertion) Run(filePath string, logger *logger.Logger) error {
16-
_, err := os.Stat(filePath)
15+
func (a FileDoesNotExistAssertion) Run(absolutePath string, logger *logger.Logger, shortFilePathConverter func(string) string) error {
16+
_, err := os.Stat(absolutePath)
17+
shortFilePath := shortFilePathConverter(absolutePath)
1718

1819
if err == nil {
19-
return fmt.Errorf("Expected file %s to not exist, but it exists", filePath)
20+
return fmt.Errorf("Expected file %s to not exist, but it exists", shortFilePath)
2021
}
2122

2223
if !errors.Is(err, fs.ErrNotExist) {
23-
return fmt.Errorf("Failed to check existence of %s: %v", filePath, err)
24+
return fmt.Errorf("Failed to check existence of %s: %v", shortFilePath, err)
2425
}
2526

26-
logger.Successf("✔ File %s does not exist", filePath)
27+
logger.Successf("✔ File %s does not exist", shortFilePath)
2728
return nil
2829
}

internal/assertions/filesystem_assertion/filesystem_assertion.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,9 @@ package filesystem_assertion
33
import "github.com/codecrafters-io/tester-utils/logger"
44

55
type FileSystemAssertion interface {
6-
Run(filePath string, logger *logger.Logger) error
6+
// Run takes absFilePath, logger, and shortFilePathGetter
7+
// absPath is the absolute path of the file
8+
// logger is the logger object using which the success/failure logs are logged
9+
// shortFilePathGetter is a function that converts absolute path to a short file path suitable for logging
10+
Run(absFilePath string, logger *logger.Logger, shortFilePathConverter func(string) string) error
711
}

internal/stage_test_advertise_read_tool.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ import (
1212

1313
func testAdvertiseReadTool(stageHarness *test_case_harness.TestCaseHarness) error {
1414
proxy_server.StartProxyServer(stageHarness)
15-
workspace_manager.BootstrapExecutableWorkspace(stageHarness)
1615
settings_manager.InitializeBypassPermissionSettings(stageHarness)
1716
stageHarness.Executable.TimeoutInMilliseconds = 30 * 1000
17+
workspaceManager := workspace_manager.NewWorkspaceManager()
18+
workspaceManager.BootstrapExecutableWorkspace(stageHarness)
1819

1920
prompt := utils.GetPromptWithGuardRailPrompt(
2021
[]string{

internal/stage_test_agent_loop.go

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ package internal
22

33
import (
44
"fmt"
5-
"path/filepath"
6-
"strings"
75

86
"github.com/codecrafters-io/claude-code-tester/internal/assertions/string_assertion"
97
"github.com/codecrafters-io/claude-code-tester/internal/settings_manager"
@@ -17,53 +15,56 @@ import (
1715

1816
func testAgentLoop(stageHarness *test_case_harness.TestCaseHarness) error {
1917
proxy_server.StartProxyServer(stageHarness)
20-
workspace_manager.BootstrapExecutableWorkspace(stageHarness)
2118
settings_manager.InitializeBypassPermissionSettings(stageHarness)
2219
stageHarness.Executable.TimeoutInMilliseconds = 30 * 1000
23-
workspaceDirPath := stageHarness.Executable.WorkingDir
2420

25-
appDirPath := filepath.Join(workspaceDirPath, "app")
26-
utils.MustCreateDirWithLogging(appDirPath, stageHarness.Logger)
21+
workspaceManager := workspace_manager.NewWorkspaceManager()
22+
workspaceManager.BootstrapExecutableWorkspace(stageHarness)
2723

28-
// Create supporting file
29-
extraFilePath := filepath.Join(appDirPath, random.RandomElementFromArray([]string{"chemical.py", "substance.py", "expiry.py", "duration.py"}))
30-
chemicalExpiryPeriod := random.RandomInt(6, 36) // Random value between 6 and 36 months
31-
extraFileContent := fmt.Sprintf("chemical_expiry_period = %d # months", chemicalExpiryPeriod)
32-
utils.MustCreateFileWithContentsWithLogger(extraFilePath, extraFileContent, stageHarness.Logger)
24+
mainFileName := random.RandomElementFromArray([]string{"main.py", "init.py", "start.py"})
25+
extraFileNameWithoutExtension := random.RandomElementFromArray([]string{"chemical", "substance", "expiry", "duration"})
26+
extraFileName := extraFileNameWithoutExtension + ".py"
27+
readmeFileName := "README.md"
3328

34-
// Create main file
35-
mainFilePath := filepath.Join(
36-
appDirPath,
37-
random.RandomElementFromArray([]string{"main.py", "init.py", "start.py"}),
38-
)
39-
40-
extraFileBaseNameWithoutExtension := strings.TrimSuffix(filepath.Base(extraFilePath), ".py")
41-
mainContent := fmt.Sprintf(`from %s import chemical_expiry_period
29+
mainFileContent := fmt.Sprintf(`from %s import chemical_expiry_period
4230
43-
def main():
44-
print(f"Chemical expiry period: {chemical_expiry_period} months")
31+
def main():
32+
print(f"Chemical expiry period: {chemical_expiry_period} months")
33+
34+
if __name__ == "__main__":
35+
main()
36+
`, extraFileNameWithoutExtension)
4537

46-
if __name__ == "__main__":
47-
main()
48-
`, extraFileBaseNameWithoutExtension)
38+
chemicalExpiryPeriod := random.RandomInt(6, 36)
4939

50-
utils.MustCreateFileWithContentsWithLogger(mainFilePath, mainContent, stageHarness.Logger)
51-
52-
// Create README.md
53-
readmePath := filepath.Join(workspaceDirPath, "README.md")
5440
readmeContent := fmt.Sprintf(`This is a simple python project.
55-
The starting point of this project is app/%s.`, filepath.Base(mainFilePath))
56-
utils.MustCreateFileWithContentsWithLogger(readmePath, readmeContent, stageHarness.Logger)
41+
- The starting point of this project is app/%s.
42+
- The file app/%s contains chemical properties.`, mainFileName, extraFileName)
5743

58-
expectedOutput := fmt.Sprintf("%d", chemicalExpiryPeriod)
44+
workspaceManager.MustCreateFilesWithLogger([]workspace_manager.WorkspaceFile{
45+
{
46+
RelativePath: readmeFileName,
47+
Content: readmeContent,
48+
FileMode: 0644,
49+
},
50+
{
51+
52+
RelativePath: fmt.Sprintf("app/%s", extraFileName),
53+
Content: fmt.Sprintf("chemical_expiry_period = %d # months", chemicalExpiryPeriod),
54+
FileMode: 0644,
55+
},
56+
{
57+
RelativePath: fmt.Sprintf("app/%s", mainFileName),
58+
Content: mainFileContent,
59+
FileMode: 0644,
60+
},
61+
}, stageHarness.Logger)
5962

6063
prompt := utils.GetPromptWithGuardRailPrompt(
6164
[]string{
62-
"Read the README.md, figure out the file that contains the chemical expiry period in months, and get me that value.",
63-
"Read README.md, determine which file contains the chemical expiry period in months, and get me that value.",
64-
"Read the README.md file, find the file that contains the chemical expiry period in months, and get me that value.",
65-
"Read README.md, figure out which file has the chemical expiry period in months, and get me that value.",
66-
"Read the README.md, identify the file containing the chemical expiry period in months, and get me that value.",
65+
"Use README.md to determine the chemical expiry period in months.",
66+
"Find the chemical expiry period in months from README.md.",
67+
"Determine in how many months the chemical expires by reading README.md.",
6768
},
6869
"Number only.",
6970
)
@@ -72,7 +73,7 @@ The starting point of this project is app/%s.`, filepath.Base(mainFilePath))
7273
InputPrompt: prompt,
7374
ExpectedExitCode: 0,
7475
StdoutAssertion: string_assertion.ExactMatchAssertion{
75-
ExpectedValue: expectedOutput,
76+
ExpectedValue: fmt.Sprintf("%d", chemicalExpiryPeriod),
7677
},
7778
}
7879

internal/stage_test_bash_tool.go

Lines changed: 46 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
package internal
22

33
import (
4-
"path/filepath"
5-
64
"github.com/codecrafters-io/claude-code-tester/internal/assertions/filesystem_assertion"
7-
"github.com/codecrafters-io/claude-code-tester/internal/assertions/string_assertion"
85
"github.com/codecrafters-io/claude-code-tester/internal/settings_manager"
96
"github.com/codecrafters-io/claude-code-tester/internal/test_cases"
107
"github.com/codecrafters-io/claude-code-tester/internal/utils"
@@ -15,75 +12,85 @@ import (
1512

1613
func testBashTool(stageHarness *test_case_harness.TestCaseHarness) error {
1714
proxy_server.StartProxyServer(stageHarness)
18-
workspace_manager.BootstrapExecutableWorkspace(stageHarness)
1915
settings_manager.InitializeBypassPermissionSettings(stageHarness)
2016
stageHarness.Executable.TimeoutInMilliseconds = 30 * 1000
21-
workspaceDirPath := stageHarness.Executable.WorkingDir
17+
stageLogger := stageHarness.Logger
2218

23-
appDirPath := filepath.Join(workspaceDirPath, "app")
24-
utils.MustCreateDirWithLogging(appDirPath, stageHarness.Logger)
19+
workspaceManager := workspace_manager.NewWorkspaceManager()
20+
workspaceManager.BootstrapExecutableWorkspace(stageHarness)
21+
22+
// Store file contents in variables
23+
mainJsContent := `async function main() {
24+
const response = await fetch('https://jsonplaceholder.typicode.com/posts/1');
25+
const data = await response.json();
26+
console.log(data);
27+
}
28+
29+
main();`
2530

26-
readmePath := filepath.Join(workspaceDirPath, "README.md")
2731
readmeContent := `# My Project
2832
Uses async js to demonstrate web fetch.
2933
Entry point: app/`
30-
utils.MustCreateFileWithContentsWithLogger(readmePath, readmeContent, stageHarness.Logger)
3134

32-
readmeOldPath := filepath.Join(workspaceDirPath, "README_old.md")
3335
readmeOldContent := `# My project
3436
Uses javascript promise api to demonstrate web fetch.
3537
Entry point: app/`
36-
utils.MustCreateFileWithContentsWithLogger(readmeOldPath, readmeOldContent, stageHarness.Logger)
37-
38-
mainFilePath := filepath.Join(appDirPath, "main.js")
39-
mainContent := `async function main() {
40-
const response = await fetch('https://jsonplaceholder.typicode.com/posts/1');
41-
const data = await response.json();
42-
console.log(data);
43-
}
4438

45-
main();`
46-
utils.MustCreateFileWithContentsWithLogger(mainFilePath, mainContent, stageHarness.Logger)
39+
// Create files using MustCreateFiles
40+
workspaceManager.MustCreateFilesWithLogger([]workspace_manager.WorkspaceFile{
41+
{
42+
RelativePath: "app/main.js",
43+
Content: mainJsContent,
44+
FileMode: 0644,
45+
},
46+
{
47+
RelativePath: "README.md",
48+
Content: readmeContent,
49+
FileMode: 0644,
50+
},
51+
{
52+
RelativePath: "README_old.md",
53+
Content: readmeOldContent,
54+
FileMode: 0644,
55+
},
56+
}, stageLogger)
4757

4858
promptTestCase := test_cases.NonInteractiveTestCase{
4959
InputPrompt: utils.GetPromptWithGuardRailPrompt(
5060
[]string{
5161
"Delete the old readme file.",
5262
"Remove the old readme from the project.",
5363
},
54-
"Always respond with `Done`",
64+
"Always respond with `Deleted README_old.md`",
5565
),
56-
StdoutAssertion: string_assertion.ExactMatchAssertion{
57-
ExpectedValue: "Done",
58-
},
5966
ExpectedExitCode: 0,
6067
}
6168

6269
if err := promptTestCase.Run(stageHarness); err != nil {
6370
return err
6471
}
6572

66-
// main.js should be intact
67-
mainJsAssertion := filesystem_assertion.FileContentsAssertion{
68-
ExpectedContents: mainContent,
69-
}
70-
if err := mainJsAssertion.Run(mainFilePath, stageHarness.Logger); err != nil {
73+
stageLogger.Infof("Checking workspace contents")
74+
75+
// Assert that main file is intact
76+
mainFileAbsPath := workspaceManager.ConvertToAbsPath("app/main.js")
77+
mainJsAssertion := filesystem_assertion.FileContentsAssertion{ExpectedContents: mainJsContent}
78+
79+
if err := mainJsAssertion.Run(mainFileAbsPath, stageLogger, workspaceManager.GetRelPathConverter()); err != nil {
7180
return err
7281
}
7382

74-
// New readme should be intact
75-
newReadmeAssertion := filesystem_assertion.FileContentsAssertion{
76-
ExpectedContents: readmeContent,
77-
}
78-
if err := newReadmeAssertion.Run(readmePath, stageHarness.Logger); err != nil {
83+
// Assert that readme is intact
84+
readmePath := workspaceManager.ConvertToAbsPath("README.md")
85+
readmeAssertion := filesystem_assertion.FileContentsAssertion{ExpectedContents: readmeContent}
86+
87+
if err := readmeAssertion.Run(readmePath, stageLogger, workspaceManager.GetRelPathConverter()); err != nil {
7988
return err
8089
}
8190

82-
// Old readme should be deleted
91+
// Assert that old readme is deleted
92+
oldReadmeAbsPath := workspaceManager.ConvertToAbsPath("README_old.md")
8393
oldReadmeAssertion := filesystem_assertion.FileDoesNotExistAssertion{}
84-
if err := oldReadmeAssertion.Run(readmeOldPath, stageHarness.Logger); err != nil {
85-
return err
86-
}
8794

88-
return nil
95+
return oldReadmeAssertion.Run(oldReadmeAbsPath, stageLogger, workspaceManager.GetRelPathConverter())
8996
}

0 commit comments

Comments
 (0)