Skip to content

Commit a81f8ea

Browse files
Migrate scraper tests to real PostgreSQL database with Drizzle ORM
Co-authored-by: git <[email protected]>
1 parent 87107ef commit a81f8ea

File tree

1 file changed

+145
-79
lines changed

1 file changed

+145
-79
lines changed

sitio/src/routes/api/internal/scraper/scrap/server.test.ts

Lines changed: 145 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,22 @@
1-
import { describe, it, expect, beforeEach, vi } from 'vitest';
1+
import { describe, it, expect, beforeEach, afterEach, beforeAll, afterAll } from 'vitest';
22
import { POST } from './+server.js';
33
import { GET as lastIdsGET } from '../last-ids/+server.js';
44
import type { RequestEvent } from '@sveltejs/kit';
55
import { nanoid } from 'nanoid';
6+
import { drizzle } from 'drizzle-orm/postgres-js';
7+
import postgres from 'postgres';
8+
import * as schema from '../../../../../schema.js';
9+
import { eq, sql } from 'drizzle-orm';
610

7-
// Mock the database module
8-
vi.mock('$lib/db/index.js', () => ({
9-
db: {
10-
query: {
11-
scraperTokens: {
12-
findFirst: vi.fn()
13-
},
14-
tweets: {
15-
findMany: vi.fn()
16-
}
17-
},
18-
transaction: vi.fn(),
19-
insert: vi.fn(),
20-
update: vi.fn(),
21-
}
22-
}));
11+
// Set up test database URL
12+
const TEST_DATABASE_URL = 'postgresql://testuser:testpass@localhost:5432/milei_test';
2313

24-
// Import the mocked db
25-
import { db } from '$lib/db/index.js';
14+
// Override the DATABASE_URL environment variable for testing
15+
process.env.DATABASE_URL = TEST_DATABASE_URL;
16+
17+
// Create database connection for testing
18+
const client = postgres(TEST_DATABASE_URL, { max: 1 });
19+
const testDb = drizzle(client, { schema });
2620

2721
// Mock request/response for testing
2822
function mockRequest(body: any, headers: Record<string, string> = {}): RequestEvent {
@@ -85,9 +79,80 @@ const validScrapData = {
8579

8680
const validToken = 'test-token-123';
8781

88-
describe('Scraper API Endpoints', () => {
89-
beforeEach(() => {
90-
vi.clearAllMocks();
82+
describe('Scraper API Real Database Tests', () => {
83+
beforeAll(async () => {
84+
// Create the database schema
85+
await testDb.execute(sql`
86+
CREATE TABLE IF NOT EXISTS db_scraper_tokens (
87+
id SERIAL PRIMARY KEY,
88+
token TEXT NOT NULL
89+
);
90+
`);
91+
92+
await testDb.execute(sql`
93+
CREATE TABLE IF NOT EXISTS db_scraps (
94+
id SERIAL PRIMARY KEY,
95+
uid TEXT UNIQUE,
96+
at TIMESTAMP WITH TIME ZONE NOT NULL,
97+
cuenta_id TEXT,
98+
total_tweets_seen INTEGER
99+
);
100+
`);
101+
102+
await testDb.execute(sql`
103+
CREATE TABLE IF NOT EXISTS db_liked_tweets (
104+
url TEXT PRIMARY KEY,
105+
first_seen_at TIMESTAMP WITH TIME ZONE NOT NULL,
106+
last_seen_at TIMESTAMP WITH TIME ZONE,
107+
text TEXT,
108+
scrap_id INTEGER
109+
);
110+
`);
111+
112+
await testDb.execute(sql`
113+
CREATE TABLE IF NOT EXISTS db_retweets (
114+
poster_id TEXT NOT NULL,
115+
poster_handle TEXT,
116+
post_id TEXT NOT NULL,
117+
first_seen_at TIMESTAMP WITH TIME ZONE NOT NULL,
118+
retweet_at TIMESTAMP WITH TIME ZONE NOT NULL,
119+
posted_at TIMESTAMP WITH TIME ZONE NOT NULL,
120+
text TEXT,
121+
scrap_id INTEGER,
122+
PRIMARY KEY (poster_id, post_id)
123+
);
124+
`);
125+
126+
await testDb.execute(sql`
127+
CREATE TABLE IF NOT EXISTS db_tweets (
128+
id TEXT PRIMARY KEY,
129+
twitter_scraper_json JSONB NOT NULL,
130+
captured_at TIMESTAMP WITH TIME ZONE NOT NULL
131+
);
132+
`);
133+
134+
// Insert test token
135+
await testDb.insert(schema.scraperTokens).values({
136+
token: validToken
137+
});
138+
});
139+
140+
beforeEach(async () => {
141+
// Clean up test data before each test
142+
await testDb.delete(schema.tweets);
143+
await testDb.delete(schema.retweets);
144+
await testDb.delete(schema.likedTweets);
145+
await testDb.delete(schema.scraps);
146+
});
147+
148+
afterAll(async () => {
149+
// Clean up after all tests
150+
await testDb.execute(sql`DROP TABLE IF EXISTS db_tweets CASCADE;`);
151+
await testDb.execute(sql`DROP TABLE IF EXISTS db_retweets CASCADE;`);
152+
await testDb.execute(sql`DROP TABLE IF EXISTS db_liked_tweets CASCADE;`);
153+
await testDb.execute(sql`DROP TABLE IF EXISTS db_scraps CASCADE;`);
154+
await testDb.execute(sql`DROP TABLE IF EXISTS db_scraper_tokens CASCADE;`);
155+
await client.end();
91156
});
92157

93158
describe('POST /api/internal/scraper/scrap', () => {
@@ -104,9 +169,6 @@ describe('Scraper API Endpoints', () => {
104169
});
105170

106171
it('should reject requests with invalid token', async () => {
107-
// Mock database to return no token
108-
vi.mocked(db.query.scraperTokens.findFirst).mockResolvedValue(undefined);
109-
110172
const request = mockRequest(validScrapData, {
111173
Authorization: 'Bearer invalid-token',
112174
});
@@ -121,12 +183,6 @@ describe('Scraper API Endpoints', () => {
121183
});
122184

123185
it('should reject requests with invalid scrap data', async () => {
124-
// Mock database to return a valid token
125-
vi.mocked(db.query.scraperTokens.findFirst).mockResolvedValue({
126-
id: 1,
127-
token: validToken
128-
});
129-
130186
const request = mockRequest(
131187
{ invalidField: 'invalid' },
132188
{ Authorization: `Bearer ${validToken}` }
@@ -140,77 +196,87 @@ describe('Scraper API Endpoints', () => {
140196
}
141197
});
142198

143-
it('should accept valid scrap data with valid token', async () => {
144-
// Mock database interactions
145-
vi.mocked(db.query.scraperTokens.findFirst).mockResolvedValue({
146-
id: 1,
147-
token: validToken
148-
});
149-
150-
// Mock transaction
151-
const mockTransaction = vi.fn(async (callback) => {
152-
const mockTx = {
153-
insert: vi.fn().mockReturnValue({
154-
values: vi.fn().mockReturnValue({
155-
returning: vi.fn().mockReturnValue({
156-
onConflictDoNothing: vi.fn().mockResolvedValue([{ id: 123 }])
157-
}),
158-
onConflictDoUpdate: vi.fn().mockResolvedValue([{ id: 123 }])
159-
})
160-
}),
161-
update: vi.fn().mockReturnValue({
162-
set: vi.fn().mockReturnValue({
163-
where: vi.fn().mockResolvedValue({})
164-
})
165-
}),
166-
query: {
167-
scraps: {
168-
findFirst: vi.fn()
169-
}
170-
}
171-
};
172-
return await callback(mockTx);
173-
});
174-
175-
vi.mocked(db.transaction).mockImplementation(mockTransaction);
176-
199+
it('should successfully process valid scrap data with PostgreSQL', async () => {
177200
const request = mockRequest(validScrapData, {
178201
Authorization: `Bearer ${validToken}`,
179202
});
180203

181204
const response = await POST(request);
182-
183205
expect(response.status).toBe(200);
206+
184207
const data = await response.json();
185208
expect(data).toHaveProperty('scrapId');
186209
expect(typeof data.scrapId).toBe('number');
210+
211+
// Verify data was inserted into PostgreSQL database
212+
const scraps = await testDb.select().from(schema.scraps);
213+
expect(scraps.length).toBe(1);
214+
expect(scraps[0].uid).toBe(validScrapData.uid);
215+
216+
const likedTweets = await testDb.select().from(schema.likedTweets);
217+
expect(likedTweets.length).toBe(1);
218+
expect(likedTweets[0].url).toBe(validScrapData.likedTweets[0].url);
219+
220+
const retweets = await testDb.select().from(schema.retweets);
221+
expect(retweets.length).toBe(1);
222+
expect(retweets[0].posterId).toBe(validScrapData.retweets[0].posterId);
223+
224+
const tweets = await testDb.select().from(schema.tweets);
225+
expect(tweets.length).toBe(1);
226+
expect(tweets[0].id).toBe(validScrapData.tweets[0].id);
227+
});
228+
229+
it('should handle PostgreSQL conflict resolution correctly', async () => {
230+
// First request
231+
const request1 = mockRequest(validScrapData, {
232+
Authorization: `Bearer ${validToken}`,
233+
});
234+
235+
const response1 = await POST(request1);
236+
expect(response1.status).toBe(200);
237+
238+
// Second request with same UID should use PostgreSQL onConflictDoNothing
239+
const request2 = mockRequest(validScrapData, {
240+
Authorization: `Bearer ${validToken}`,
241+
});
242+
243+
const response2 = await POST(request2);
244+
expect(response2.status).toBe(200);
245+
246+
// Should still only have one scrap record due to PostgreSQL unique constraint
247+
const scraps = await testDb.select().from(schema.scraps);
248+
expect(scraps.length).toBe(1);
187249
});
188250
});
189251

190252
describe('GET /api/internal/scraper/last-ids', () => {
191-
it('should return last tweet IDs', async () => {
192-
// Mock database to return some tweet IDs
193-
vi.mocked(db.query.tweets.findMany).mockResolvedValue([
194-
{ id: 'tweet123', twitterScraperJson: {}, capturedAt: new Date() },
195-
{ id: 'tweet456', twitterScraperJson: {}, capturedAt: new Date() },
196-
{ id: 'tweet789', twitterScraperJson: {}, capturedAt: new Date() }
253+
it('should return last tweet IDs from PostgreSQL', async () => {
254+
// Insert test tweets into PostgreSQL
255+
await testDb.insert(schema.tweets).values([
256+
{
257+
id: 'tweet1',
258+
twitterScraperJson: { test: 'data1' },
259+
capturedAt: new Date('2023-01-01')
260+
},
261+
{
262+
id: 'tweet2',
263+
twitterScraperJson: { test: 'data2' },
264+
capturedAt: new Date('2023-01-02')
265+
}
197266
]);
198267

199268
const response = await lastIdsGET();
200-
201269
expect(response.status).toBe(200);
202270

203271
const data = await response.json();
204272
expect(Array.isArray(data)).toBe(true);
205-
expect(data).toEqual(['tweet123', 'tweet456', 'tweet789']);
273+
expect(data.length).toBe(2);
274+
expect(data).toContain('tweet1');
275+
expect(data).toContain('tweet2');
206276
});
207277

208-
it('should handle empty tweet results', async () => {
209-
// Mock database to return empty results
210-
vi.mocked(db.query.tweets.findMany).mockResolvedValue([]);
211-
278+
it('should handle empty PostgreSQL results', async () => {
212279
const response = await lastIdsGET();
213-
214280
expect(response.status).toBe(200);
215281

216282
const data = await response.json();

0 commit comments

Comments
 (0)