Merge pull request #41 from priyanshujain/refactor-tests

priyanshujain · web-flow · commit 17b3dcee1e1c · 2026-03-10T18:21:32.000+07:00
fix: flaky and dead test code
diff --git a/agent/provider_test.go b/agent/provider_test.go
@@ -17,7 +17,7 @@ import (
 	"github.com/priyanshujain/openbotkit/provider/openai"
 )
 
-// providerTestCase holds a provider instance and model name for table-driven integration tests.
+// providerTestCase holds a provider instance and model name for provider conformance tests.
 type providerTestCase struct {
 	name     string
 	provider provider.Provider
@@ -81,15 +81,14 @@ func availableProviders(t *testing.T) []providerTestCase {
 	}
 
 	if len(providers) == 0 {
-		t.Skip("no API keys set (ANTHROPIC_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY) — skipping integration tests")
+		t.Skip("no API keys set (ANTHROPIC_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY) — skipping provider tests")
 	}
 	return providers
 }
 
-// TestIntegration_AgentLoop tests the full agent loop with a real LLM API.
-// The agent is given a bash tool, asked to run "echo hello", and should
-// return a response containing the command output.
-func TestIntegration_AgentLoop(t *testing.T) {
+// TestProvider_AgentToolExecution verifies each provider can drive the agent
+// loop: user request → tool call → tool execution → text response.
+func TestProvider_AgentToolExecution(t *testing.T) {
 	for _, tc := range availableProviders(t) {
 		t.Run(tc.name, func(t *testing.T) {
 			reg := tools.NewRegistry()
@@ -115,9 +114,9 @@ func TestIntegration_AgentLoop(t *testing.T) {
 	}
 }
 
-// TestIntegration_ToolUseRoundtrip verifies the provider correctly handles a
-// tool_use → tool_result → text response cycle via the real API.
-func TestIntegration_ToolUseRoundtrip(t *testing.T) {
+// TestProvider_ToolUseRoundtrip verifies each provider correctly handles the
+// tool_use → tool_result → text response cycle against the real API.
+func TestProvider_ToolUseRoundtrip(t *testing.T) {
 	for _, tc := range availableProviders(t) {
 		t.Run(tc.name, func(t *testing.T) {
 			toolSchema := provider.Tool{
@@ -170,6 +169,7 @@ func TestIntegration_ToolUseRoundtrip(t *testing.T) {
 							Type: provider.ContentToolResult,
 							ToolResult: &provider.ToolResult{
 								ToolUseID: calls[0].ID,
+								Name:      calls[0].Name,
 								Content:   `{"temperature": "22°C", "condition": "Sunny"}`,
 							},
 						},
@@ -205,8 +205,9 @@ func TestIntegration_ToolUseRoundtrip(t *testing.T) {
 	}
 }
 
-// TestIntegration_Streaming verifies streaming works with the real API.
-func TestIntegration_Streaming(t *testing.T) {
+// TestProvider_Streaming verifies each provider's streaming implementation
+// delivers text deltas and a done event.
+func TestProvider_Streaming(t *testing.T) {
 	for _, tc := range availableProviders(t) {
 		t.Run(tc.name, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
diff --git a/channel/telegram/integration_test.go b/channel/telegram/integration_test.go
@@ -18,9 +18,9 @@ import (
 	"github.com/priyanshujain/openbotkit/store"
 )
 
-// TestIntegration_SessionWithRealLLM tests the full session lifecycle:
-// message → agent with real Gemini API → response → history saved.
-func TestIntegration_SessionWithRealLLM(t *testing.T) {
+// TestSession_MessageAndHistorySaved verifies the full session lifecycle:
+// message → agent with real Gemini API → response → history saved to DB.
+func TestSession_MessageAndHistorySaved(t *testing.T) {
 	key := testutil.RequireGeminiKey(t)
 
 	dir := t.TempDir()
@@ -105,7 +105,9 @@ func TestIntegration_SessionWithRealLLM(t *testing.T) {
 
 // TestIntegration_SessionWithMemoryInjection tests that user memories are injected
 // into the system prompt when the agent processes a message.
-func TestIntegration_SessionWithMemoryInjection(t *testing.T) {
+// TestSession_MemoryInjectedIntoPrompt verifies memories from the DB appear
+// in the system prompt and the agent can reference them.
+func TestSession_MemoryInjectedIntoPrompt(t *testing.T) {
 	key := testutil.RequireGeminiKey(t)
 
 	dir := t.TempDir()
@@ -171,7 +173,9 @@ func TestIntegration_SessionWithMemoryInjection(t *testing.T) {
 
 // TestIntegration_SessionWithToolUse tests that the agent can use tools
 // (like bash) when processing a Telegram message.
-func TestIntegration_SessionWithToolUse(t *testing.T) {
+// TestSession_ToolUseViaBash verifies the agent can execute bash commands
+// through the tool use loop within a Telegram session.
+func TestSession_ToolUseViaBash(t *testing.T) {
 	key := testutil.RequireGeminiKey(t)
 
 	dir := t.TempDir()
diff --git a/channel/telegram/telegram_test.go b/channel/telegram/telegram_test.go
@@ -4,19 +4,27 @@ import (
 	"io"
 	"sync"
 	"testing"
+	"time"
 
 	tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5"
 )
 
 type mockBot struct {
-	mu   sync.Mutex
-	sent []tgbotapi.Chattable
+	mu     sync.Mutex
+	sent   []tgbotapi.Chattable
+	notify chan struct{}
 }
 
 func (m *mockBot) Send(c tgbotapi.Chattable) (tgbotapi.Message, error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	m.sent = append(m.sent, c)
+	if m.notify != nil {
+		select {
+		case m.notify <- struct{}{}:
+		default:
+		}
+	}
 	return tgbotapi.Message{}, nil
 }
 
@@ -74,7 +82,7 @@ func TestReceive_EOFOnClose(t *testing.T) {
 }
 
 func TestRequestApproval_SendsKeyboard(t *testing.T) {
-	bot := &mockBot{}
+	bot := &mockBot{notify: make(chan struct{}, 1)}
 	ch := NewChannel(bot, 123)
 
 	done := make(chan bool, 1)
@@ -87,14 +95,10 @@ func TestRequestApproval_SendsKeyboard(t *testing.T) {
 		done <- approved
 	}()
 
-	// Wait for the approval message to be sent
-	for {
-		bot.mu.Lock()
-		n := len(bot.sent)
-		bot.mu.Unlock()
-		if n > 0 {
-			break
-		}
+	select {
+	case <-bot.notify:
+	case <-time.After(5 * time.Second):
+		t.Fatal("timed out waiting for approval message")
 	}
 
 	bot.mu.Lock()
diff --git a/internal/server/api_test.go b/internal/server/api_test.go
@@ -70,8 +70,9 @@ func newLocalBackend(t *testing.T) servertest.Backend {
 	}
 }
 
-// TestServer_Local runs the full server test suite against a local httptest server.
-func TestServer_Local(t *testing.T) {
+// TestServerAPI runs the server API contract tests (auth, CRUD, validation,
+// DB proxy) against a local httptest server.
+func TestServerAPI(t *testing.T) {
 	if _, err := exec.LookPath("sqlite3"); err != nil {
 		t.Skip("sqlite3 not in PATH")
 	}
diff --git a/internal/server/docker_test.go b/internal/server/docker_test.go
@@ -15,9 +15,9 @@ import (
 	"github.com/testcontainers/testcontainers-go/wait"
 )
 
-// TestServer_Docker runs the full server test suite against a Docker container
-// built from the repo's Dockerfile. Requires Docker to be running.
-func TestServer_Docker(t *testing.T) {
+// TestServerAPI_Docker runs the server API contract tests against a Docker
+// container built from the repo's Dockerfile. Requires Docker to be running.
+func TestServerAPI_Docker(t *testing.T) {
 	if testing.Short() {
 		t.Skip("skipping Docker test in short mode")
 	}
diff --git a/internal/servertest/suite.go b/internal/servertest/suite.go
@@ -6,10 +6,10 @@ import (
 	"github.com/priyanshujain/openbotkit/remote"
 )
 
-// Backend provides a configured client for running the server test suite.
-// The suite exercises all server functionality through the remote.Client,
-// making it backend-agnostic: the same tests run against a local httptest
-// server and a Docker container.
+// Backend provides a configured client for running the server API contract
+// tests. The suite verifies auth, CRUD, validation, and DB proxy behavior
+// through the remote.Client — backend-agnostic so the same tests run against
+// a local httptest server and a Docker container.
 type Backend struct {
 	Client       *remote.Client
 	NoAuthClient *remote.Client
diff --git a/memory/integration_test.go b/memory/integration_test.go
@@ -73,7 +73,9 @@ func (pl *providerLLM) Chat(ctx context.Context, req provider.ChatRequest) (*pro
 	return pl.p.Chat(ctx, req)
 }
 
-func TestIntegration_Extract(t *testing.T) {
+// TestExtract_WithRealLLM verifies fact extraction from conversation messages
+// produces valid categorized facts.
+func TestExtract_WithRealLLM(t *testing.T) {
 	for _, tc := range availableProviders(t) {
 		t.Run(tc.name, func(t *testing.T) {
 			llm := &providerLLM{p: tc.provider, model: tc.model}
@@ -110,7 +112,10 @@ func TestIntegration_Extract(t *testing.T) {
 	}
 }
 
-func TestIntegration_ExtractAndReconcile(t *testing.T) {
+// TestExtractAndReconcile_WithRealLLM verifies the full pipeline: extract facts
+// from messages, reconcile against existing DB (add new, update changed, skip
+// duplicates).
+func TestExtractAndReconcile_WithRealLLM(t *testing.T) {
 	for _, tc := range availableProviders(t) {
 		t.Run(tc.name, func(t *testing.T) {
 			db := testDB(t)
diff --git a/source/whatsapp/auth_test.go b/source/whatsapp/auth_test.go
@@ -218,7 +218,7 @@ func TestWaitForHistorySync_QuietPeriod(t *testing.T) {
 	waitForHistorySync(ch, 5*time.Second, 100*time.Millisecond)
 	elapsed := time.Since(start)
 
-	if elapsed < 100*time.Millisecond || elapsed > 500*time.Millisecond {
+	if elapsed < 100*time.Millisecond || elapsed > 5*time.Second {
 		t.Fatalf("expected ~100ms quiet period, got %v", elapsed)
 	}
 }
@@ -238,7 +238,7 @@ func TestWaitForHistorySync_ResetOnMultipleEvents(t *testing.T) {
 
 	// First signal at t=0 starts 100ms quiet timer; second at t=80ms resets it.
 	// Should return around t=180ms.
-	if elapsed < 150*time.Millisecond || elapsed > 500*time.Millisecond {
+	if elapsed < 150*time.Millisecond || elapsed > 5*time.Second {
 		t.Fatalf("expected ~180ms (reset quiet period), got %v", elapsed)
 	}
 }
@@ -265,7 +265,7 @@ func TestWaitForHistorySync_DeadlineExpires(t *testing.T) {
 	elapsed := time.Since(start)
 	close(stop)
 
-	if elapsed < 180*time.Millisecond || elapsed > 500*time.Millisecond {
+	if elapsed < 180*time.Millisecond || elapsed > 5*time.Second {
 		t.Fatalf("expected ~200ms deadline, got %v", elapsed)
 	}
 }
@@ -277,7 +277,7 @@ func TestWaitForHistorySync_NoSignal(t *testing.T) {
 	waitForHistorySync(ch, 200*time.Millisecond, 100*time.Millisecond)
 	elapsed := time.Since(start)
 
-	if elapsed < 180*time.Millisecond || elapsed > 500*time.Millisecond {
+	if elapsed < 180*time.Millisecond || elapsed > 5*time.Second {
 		t.Fatalf("expected ~200ms deadline (no signal), got %v", elapsed)
 	}
 }
diff --git a/source/whatsapp/sync_test.go b/source/whatsapp/sync_test.go
@@ -168,8 +168,3 @@ func TestTruncate(t *testing.T) {
 	}
 }
 
-func TestChatName(t *testing.T) {
-	// chatName is not exported, but we test it via the events.Message path.
-	// For now, test the helper directly since we're in the same package.
-	// chatName returns push name for non-group chats.
-}

Original file line number	Diff line number	Diff line change
`@@ -70,8 +70,9 @@ func newLocalBackend(t *testing.T) servertest.Backend {`
`70`	`70`	`}`
`71`	`71`	`}`
`72`	`72`
`73`		`-// TestServer_Local runs the full server test suite against a local httptest server.`
`74`		`-func TestServer_Local(t *testing.T) {`
	`73`	`+// TestServerAPI runs the server API contract tests (auth, CRUD, validation,`
	`74`	`+// DB proxy) against a local httptest server.`
	`75`	`+func TestServerAPI(t *testing.T) {`
`75`	`76`	`if _, err := exec.LookPath("sqlite3"); err != nil {`
`76`	`77`	`t.Skip("sqlite3 not in PATH")`
`77`	`78`	`}`
Original file line number	Diff line number	Diff line change
`@@ -15,9 +15,9 @@ import (`
`15`	`15`	`"github.com/testcontainers/testcontainers-go/wait"`
`16`	`16`	`)`
`17`	`17`
`18`		`-// TestServer_Docker runs the full server test suite against a Docker container`
`19`		`-// built from the repo's Dockerfile. Requires Docker to be running.`
`20`		`-func TestServer_Docker(t *testing.T) {`
	`18`	`+// TestServerAPI_Docker runs the server API contract tests against a Docker`
	`19`	`+// container built from the repo's Dockerfile. Requires Docker to be running.`
	`20`	`+func TestServerAPI_Docker(t *testing.T) {`
`21`	`21`	`if testing.Short() {`
`22`	`22`	`t.Skip("skipping Docker test in short mode")`
`23`	`23`	`}`
Original file line number	Diff line number	Diff line change
`@@ -218,7 +218,7 @@ func TestWaitForHistorySync_QuietPeriod(t *testing.T) {`
`218`	`218`	`waitForHistorySync(ch, 5time.Second, 100time.Millisecond)`
`219`	`219`	`elapsed := time.Since(start)`
`220`	`220`
`221`		`- if elapsed < 100time.Millisecond \|\| elapsed > 500time.Millisecond {`
	`221`	`+ if elapsed < 100time.Millisecond \|\| elapsed > 5time.Second {`
`222`	`222`	`t.Fatalf("expected ~100ms quiet period, got %v", elapsed)`
`223`	`223`	`}`
`224`	`224`	`}`
`@@ -238,7 +238,7 @@ func TestWaitForHistorySync_ResetOnMultipleEvents(t *testing.T) {`
`238`	`238`
`239`	`239`	`// First signal at t=0 starts 100ms quiet timer; second at t=80ms resets it.`
`240`	`240`	`// Should return around t=180ms.`
`241`		`- if elapsed < 150time.Millisecond \|\| elapsed > 500time.Millisecond {`
	`241`	`+ if elapsed < 150time.Millisecond \|\| elapsed > 5time.Second {`
`242`	`242`	`t.Fatalf("expected ~180ms (reset quiet period), got %v", elapsed)`
`243`	`243`	`}`
`244`	`244`	`}`
`@@ -265,7 +265,7 @@ func TestWaitForHistorySync_DeadlineExpires(t *testing.T) {`
`265`	`265`	`elapsed := time.Since(start)`
`266`	`266`	`close(stop)`
`267`	`267`
`268`		`- if elapsed < 180time.Millisecond \|\| elapsed > 500time.Millisecond {`
	`268`	`+ if elapsed < 180time.Millisecond \|\| elapsed > 5time.Second {`
`269`	`269`	`t.Fatalf("expected ~200ms deadline, got %v", elapsed)`
`270`	`270`	`}`
`271`	`271`	`}`
`@@ -277,7 +277,7 @@ func TestWaitForHistorySync_NoSignal(t *testing.T) {`
`277`	`277`	`waitForHistorySync(ch, 200time.Millisecond, 100time.Millisecond)`
`278`	`278`	`elapsed := time.Since(start)`
`279`	`279`
`280`		`- if elapsed < 180time.Millisecond \|\| elapsed > 500time.Millisecond {`
	`280`	`+ if elapsed < 180time.Millisecond \|\| elapsed > 5time.Second {`
`281`	`281`	`t.Fatalf("expected ~200ms deadline (no signal), got %v", elapsed)`
`282`	`282`	`}`
`283`	`283`	`}`
Original file line number	Diff line number	Diff line change
`@@ -168,8 +168,3 @@ func TestTruncate(t *testing.T) {`
`168`	`168`	`}`
`169`	`169`	`}`
`170`	`170`
`171`		`-func TestChatName(t *testing.T) {`
`172`		`- // chatName is not exported, but we test it via the events.Message path.`
`173`		`- // For now, test the helper directly since we're in the same package.`
`174`		`- // chatName returns push name for non-group chats.`
`175`		`-}`