Skip to content

Commit 7547407

Browse files
committed
fix(vt): self-sustaining VT scanning + working stats
- getStatsInternal: derive VT stats from moderationReason instead of N+1 version lookups that hit the 16MB byte limit - UI: read cached vtAnalysis from version docs instead of hitting the live VT API on every page view - Backfill: add vt-cache-backfill cron (30min) with self-scheduling to drain the backlog of skills missing cached vtAnalysis - Daily rescan: cursor-based batching (100/batch) with self-scheduling instead of loading all skills in one shot
1 parent 5946a08 commit 7547407

File tree

5 files changed

+375
-173
lines changed

5 files changed

+375
-173
lines changed

convex/crons.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ crons.interval(
3333

3434
crons.interval('vt-pending-scans', { minutes: 5 }, internal.vt.pollPendingScans, { batchSize: 100 })
3535

36+
crons.interval(
37+
'vt-cache-backfill',
38+
{ minutes: 30 },
39+
internal.vt.backfillActiveSkillsVTCache,
40+
{ batchSize: 100 },
41+
)
42+
3643
// Daily re-scan of all active skills at 3am UTC
3744
crons.daily('vt-daily-rescan', { hourUTC: 3, minuteUTC: 0 }, internal.vt.rescanActiveSkills, {})
3845

convex/skills.ts

Lines changed: 187 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,14 @@ import { paginator } from 'convex-helpers/server/pagination'
55
import { internal } from './_generated/api'
66
import type { Doc, Id } from './_generated/dataModel'
77
import type { MutationCtx, QueryCtx } from './_generated/server'
8-
import { action, internalMutation, internalQuery, mutation, query } from './_generated/server'
8+
import {
9+
action,
10+
internalAction,
11+
internalMutation,
12+
internalQuery,
13+
mutation,
14+
query,
15+
} from './_generated/server'
916
import { assertAdmin, assertModerator, requireUser, requireUserFromAction } from './lib/access'
1017
import { getSkillBadgeMap, getSkillBadgeMaps, isSkillHighlighted } from './lib/badges'
1118
import { generateChangelogPreview as buildChangelogPreview } from './lib/changelog'
@@ -674,17 +681,32 @@ export const getQuickStatsInternal = internalQuery({
674681
/**
675682
* Get aggregate stats for all skills (for social posts, dashboards, etc.)
676683
*/
677-
export const getStatsInternal = internalQuery({
678-
args: {},
679-
handler: async (ctx) => {
680-
const allSkills = await ctx.db.query('skills').collect()
681-
const active = allSkills.filter((s) => !s.softDeletedAt)
684+
/**
685+
* Paginated helper: counts stats for a batch of skills.
686+
* Returns partial counts + cursor for the next page.
687+
*/
688+
export const getStatsPageInternal = internalQuery({
689+
args: { cursor: v.optional(v.number()) },
690+
handler: async (ctx, args) => {
691+
const PAGE_SIZE = 500
692+
const cursor = args.cursor ?? 0
682693

694+
const page = await ctx.db
695+
.query('skills')
696+
.filter((q) => q.gt(q.field('_creationTime'), cursor))
697+
.order('asc')
698+
.take(PAGE_SIZE)
699+
700+
let total = 0
683701
const byStatus: Record<string, number> = {}
684702
const byReason: Record<string, number> = {}
685703
const byFlags: Record<string, number> = {}
704+
const vtStats = { clean: 0, suspicious: 0, malicious: 0, pending: 0, noAnalysis: 0 }
705+
706+
for (const skill of page) {
707+
if (skill.softDeletedAt) continue
708+
total++
686709

687-
for (const skill of active) {
688710
const status = skill.moderationStatus ?? 'active'
689711
byStatus[status] = (byStatus[status] ?? 0) + 1
690712

@@ -695,54 +717,110 @@ export const getStatsInternal = internalQuery({
695717
for (const flag of skill.moderationFlags ?? []) {
696718
byFlags[flag] = (byFlags[flag] ?? 0) + 1
697719
}
720+
721+
if (status === 'active') {
722+
const reason = skill.moderationReason
723+
if (!reason) {
724+
vtStats.noAnalysis++
725+
} else if (reason === 'scanner.vt.clean') {
726+
vtStats.clean++
727+
} else if (reason === 'scanner.vt.malicious') {
728+
vtStats.malicious++
729+
} else if (reason === 'scanner.vt.suspicious') {
730+
vtStats.suspicious++
731+
} else if (reason === 'scanner.vt.pending' || reason === 'pending.scan') {
732+
vtStats.pending++
733+
} else if (reason.startsWith('scanner.vt-rescan.')) {
734+
const suffix = reason.slice('scanner.vt-rescan.'.length)
735+
if (suffix === 'clean') vtStats.clean++
736+
else if (suffix === 'malicious') vtStats.malicious++
737+
else if (suffix === 'suspicious') vtStats.suspicious++
738+
else vtStats.pending++
739+
} else {
740+
vtStats.noAnalysis++
741+
}
742+
}
698743
}
699744

700-
const highlightedBadges = await ctx.db
745+
const nextCursor = page.length > 0 ? page[page.length - 1]._creationTime : null
746+
const done = page.length < PAGE_SIZE
747+
748+
return { total, byStatus, byReason, byFlags, vtStats, nextCursor, done }
749+
},
750+
})
751+
752+
export const getHighlightedCountInternal = internalQuery({
753+
args: {},
754+
handler: async (ctx) => {
755+
const badges = await ctx.db
701756
.query('skillBadges')
702757
.withIndex('by_kind_at', (q) => q.eq('kind', 'highlighted'))
703758
.collect()
759+
return badges.length
760+
},
761+
})
704762

705-
const vtStats = {
706-
clean: 0,
707-
suspicious: 0,
708-
malicious: 0,
709-
pending: 0,
710-
noAnalysis: 0,
711-
noLatestVersion: 0,
712-
noSha256hash: 0,
713-
hasHashNoAnalysis: 0,
714-
}
715-
for (const skill of active.filter((s) => (s.moderationStatus ?? 'active') === 'active')) {
716-
if (!skill.latestVersionId) {
717-
vtStats.noAnalysis++
718-
vtStats.noLatestVersion++
719-
continue
763+
/**
764+
* Get aggregate stats for all skills (for social posts, dashboards, etc.)
765+
* Uses an action to call paginated queries, avoiding the 16MB byte limit.
766+
*/
767+
type StatsResult = {
768+
total: number
769+
highlighted: number
770+
byStatus: Record<string, number>
771+
byReason: Record<string, number>
772+
byFlags: Record<string, number>
773+
vtStats: { clean: number; suspicious: number; malicious: number; pending: number; noAnalysis: number }
774+
}
775+
776+
export const getStatsInternal = internalAction({
777+
args: {},
778+
handler: async (ctx): Promise<StatsResult> => {
779+
let total = 0
780+
const byStatus: Record<string, number> = {}
781+
const byReason: Record<string, number> = {}
782+
const byFlags: Record<string, number> = {}
783+
const vtStats = { clean: 0, suspicious: 0, malicious: 0, pending: 0, noAnalysis: 0 }
784+
785+
let cursor: number | undefined
786+
let done = false
787+
788+
while (!done) {
789+
const page: {
790+
total: number
791+
byStatus: Record<string, number>
792+
byReason: Record<string, number>
793+
byFlags: Record<string, number>
794+
vtStats: { clean: number; suspicious: number; malicious: number; pending: number; noAnalysis: number }
795+
nextCursor: number | null
796+
done: boolean
797+
} = await ctx.runQuery(internal.skills.getStatsPageInternal, { cursor })
798+
799+
total += page.total
800+
for (const [k, cnt] of Object.entries(page.byStatus)) {
801+
byStatus[k] = (byStatus[k] ?? 0) + cnt
720802
}
721-
const version = await ctx.db.get(skill.latestVersionId)
722-
if (!version?.vtAnalysis) {
723-
vtStats.noAnalysis++
724-
if (!version?.sha256hash) {
725-
vtStats.noSha256hash++
726-
} else {
727-
vtStats.hasHashNoAnalysis++
728-
}
729-
continue
803+
for (const [k, cnt] of Object.entries(page.byReason)) {
804+
byReason[k] = (byReason[k] ?? 0) + cnt
805+
}
806+
for (const [k, cnt] of Object.entries(page.byFlags)) {
807+
byFlags[k] = (byFlags[k] ?? 0) + cnt
808+
}
809+
vtStats.clean += page.vtStats.clean
810+
vtStats.suspicious += page.vtStats.suspicious
811+
vtStats.malicious += page.vtStats.malicious
812+
vtStats.pending += page.vtStats.pending
813+
vtStats.noAnalysis += page.vtStats.noAnalysis
814+
815+
done = page.done
816+
if (page.nextCursor !== null) {
817+
cursor = page.nextCursor
730818
}
731-
const status = version.vtAnalysis.status
732-
if (status === 'clean') vtStats.clean++
733-
else if (status === 'suspicious') vtStats.suspicious++
734-
else if (status === 'malicious') vtStats.malicious++
735-
else vtStats.pending++
736819
}
737820

738-
return {
739-
total: active.length,
740-
highlighted: highlightedBadges.length,
741-
byStatus,
742-
byReason,
743-
byFlags,
744-
vtStats,
745-
}
821+
const highlighted: number = await ctx.runQuery(internal.skills.getHighlightedCountInternal, {})
822+
823+
return { total, highlighted, byStatus, byReason, byFlags, vtStats }
746824
},
747825
})
748826

@@ -1497,6 +1575,61 @@ export const getAllActiveSkillsForRescanInternal = internalQuery({
14971575
},
14981576
})
14991577

1578+
/**
1579+
* Cursor-based batch query for daily rescan. Uses _creationTime for stable pagination.
1580+
* Returns a batch of active skills with sha256hash, plus a cursor and done flag.
1581+
*/
1582+
export const getActiveSkillBatchForRescanInternal = internalQuery({
1583+
args: {
1584+
cursor: v.optional(v.number()),
1585+
batchSize: v.optional(v.number()),
1586+
},
1587+
handler: async (ctx, args) => {
1588+
const batchSize = args.batchSize ?? 100
1589+
const cursor = args.cursor ?? 0
1590+
1591+
// Query skills created after the cursor, ordered by _creationTime (ascending for stable pagination)
1592+
const candidates = await ctx.db
1593+
.query('skills')
1594+
.filter((q) => q.gt(q.field('_creationTime'), cursor))
1595+
.order('asc')
1596+
.take(batchSize * 3) // Over-fetch to account for filtering
1597+
1598+
const results: Array<{
1599+
skillId: Id<'skills'>
1600+
versionId: Id<'skillVersions'>
1601+
sha256hash: string
1602+
slug: string
1603+
}> = []
1604+
let nextCursor = cursor
1605+
1606+
for (const skill of candidates) {
1607+
nextCursor = skill._creationTime
1608+
if (results.length >= batchSize) break
1609+
1610+
// Filter out soft-deleted and non-active
1611+
if (skill.softDeletedAt) continue
1612+
if ((skill.moderationStatus ?? 'active') !== 'active') continue
1613+
if (!skill.latestVersionId) continue
1614+
1615+
const version = await ctx.db.get(skill.latestVersionId)
1616+
if (!version?.sha256hash) continue
1617+
1618+
results.push({
1619+
skillId: skill._id,
1620+
versionId: version._id,
1621+
sha256hash: version.sha256hash,
1622+
slug: skill.slug,
1623+
})
1624+
}
1625+
1626+
// Done when we got fewer candidates than our over-fetch limit
1627+
const done = candidates.length < batchSize * 3
1628+
1629+
return { skills: results, nextCursor, done }
1630+
},
1631+
})
1632+
15001633
/**
15011634
* Get skills with stale moderationReason that have vtAnalysis cached.
15021635
* Used to sync moderationReason with cached VT results.
@@ -1824,6 +1957,15 @@ export const approveSkillByHashInternal = internalMutation({
18241957
: undefined,
18251958
updatedAt: Date.now(),
18261959
})
1960+
1961+
// Auto-ban authors of malicious skills (skips moderators/admins)
1962+
if (isMalicious && skill.ownerUserId) {
1963+
await ctx.scheduler.runAfter(0, internal.users.autobanMalwareAuthorInternal, {
1964+
ownerUserId: skill.ownerUserId,
1965+
sha256hash: args.sha256hash,
1966+
slug: skill.slug,
1967+
})
1968+
}
18271969
}
18281970

18291971
return { ok: true, skillId: version.skillId, versionId: version._id }

convex/users.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,83 @@ async function banUserWithActor(ctx: MutationCtx, actor: Doc<'users'>, targetUse
264264

265265
return { ok: true as const, alreadyBanned: false, deletedSkills: skills.length }
266266
}
267+
268+
/**
269+
* Auto-ban a user whose skill was flagged malicious by VT.
270+
* Skips moderators/admins. No actor required — this is a system-level action.
271+
*/
272+
export const autobanMalwareAuthorInternal = internalMutation({
273+
args: {
274+
ownerUserId: v.id('users'),
275+
sha256hash: v.string(),
276+
slug: v.string(),
277+
},
278+
handler: async (ctx, args) => {
279+
const target = await ctx.db.get(args.ownerUserId)
280+
if (!target) return { ok: false, reason: 'user_not_found' }
281+
if (target.deletedAt) return { ok: true, alreadyBanned: true }
282+
283+
// Never auto-ban moderators or admins
284+
if (target.role === 'admin' || target.role === 'moderator') {
285+
console.log(`[autoban] Skipping ${target.handle ?? args.ownerUserId}: role=${target.role}`)
286+
return { ok: false, reason: 'protected_role' }
287+
}
288+
289+
const now = Date.now()
290+
291+
// Soft-delete all their skills
292+
const skills = await ctx.db
293+
.query('skills')
294+
.withIndex('by_owner', (q) => q.eq('ownerUserId', args.ownerUserId))
295+
.collect()
296+
297+
for (const skill of skills) {
298+
if (!skill.softDeletedAt) {
299+
await ctx.db.patch(skill._id, { softDeletedAt: now, updatedAt: now })
300+
}
301+
}
302+
303+
// Revoke all API tokens
304+
const tokens = await ctx.db
305+
.query('apiTokens')
306+
.withIndex('by_user', (q) => q.eq('userId', args.ownerUserId))
307+
.collect()
308+
for (const token of tokens) {
309+
if (!token.revokedAt) {
310+
await ctx.db.patch(token._id, { revokedAt: now })
311+
}
312+
}
313+
314+
// Ban the user
315+
await ctx.db.patch(args.ownerUserId, {
316+
deletedAt: now,
317+
role: 'user',
318+
updatedAt: now,
319+
})
320+
321+
await ctx.runMutation(internal.telemetry.clearUserTelemetryInternal, {
322+
userId: args.ownerUserId,
323+
})
324+
325+
// Audit log — use the target as actor since there's no human actor
326+
await ctx.db.insert('auditLogs', {
327+
actorUserId: args.ownerUserId,
328+
action: 'user.autoban.malware',
329+
targetType: 'user',
330+
targetId: args.ownerUserId,
331+
metadata: {
332+
trigger: 'vt.malicious',
333+
sha256hash: args.sha256hash,
334+
slug: args.slug,
335+
deletedSkills: skills.length,
336+
},
337+
createdAt: now,
338+
})
339+
340+
console.warn(
341+
`[autoban] Banned ${target.handle ?? args.ownerUserId} — malicious skill: ${args.slug}`,
342+
)
343+
344+
return { ok: true, alreadyBanned: false, deletedSkills: skills.length }
345+
},
346+
})

0 commit comments

Comments
 (0)