Skip to content

Commit 04d2af6

Browse files
committed
address review
1 parent dd12ee0 commit 04d2af6

File tree

7 files changed

+692
-88
lines changed

7 files changed

+692
-88
lines changed

packages/rss-db-collection/README.md

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ RSS/Atom feed collection for TanStack DB - sync data from RSS and Atom feeds wit
55
## Features
66

77
- **📡 RSS & Atom Support**: Dedicated option creators for RSS 2.0 and Atom 1.0 feeds
8-
- **🔄 Automatic Polling**: Configurable polling intervals with intelligent error recovery and manual refresh capability
9-
- **✨ Deduplication**: Built-in deduplication based on feed item IDs/GUIDs
8+
- **🔄 Smart Polling**: Configurable polling intervals with automatic detection based on feed metadata (`sy:updatePeriod`/`sy:updateFrequency`)
9+
- **✨ Content-Aware Deduplication**: Built-in deduplication that detects content changes for existing GUIDs and treats them as updates
10+
- **📅 RFC-Compliant Date Parsing**: Strict RFC 2822/3339 date parsing for reliable timezone handling
1011
- **🔧 Transform Functions**: Custom transform functions to normalize feed data to your schema
1112
- **📝 Full TypeScript Support**: Complete type safety with schema inference
1213
- **🎛️ Mutation Handlers**: Support for `onInsert`, `onUpdate`, and `onDelete` callbacks
@@ -83,9 +84,51 @@ const atomFeed = createCollection({
8384
})
8485
```
8586

87+
## Smart Features
88+
89+
### Smart Polling Intervals
90+
91+
The RSS collection automatically detects optimal polling intervals based on feed metadata:
92+
93+
- **RSS Syndication**: Uses `<sy:updatePeriod>` and `<sy:updateFrequency>` tags when available
94+
- **Default**: 5 minutes for all feeds when syndication tags are not present
95+
96+
```typescript
97+
// The collection will automatically detect and use appropriate intervals
98+
const feed = createCollection({
99+
...rssCollectionOptions({
100+
feedUrl: "https://blog.example.com/feed.xml",
101+
// No pollingInterval specified - will use 5 minutes default or sy:updatePeriod if available
102+
}),
103+
})
104+
```
105+
106+
### Content-Aware Deduplication
107+
108+
Unlike simple GUID-based deduplication, this collection detects when feed items with the same GUID have changed content and treats them as updates:
109+
110+
- **New Items**: Items with unseen GUIDs are inserted
111+
- **Content Changes**: Items with existing GUIDs but changed content are updated
112+
- **No Changes**: Items with existing GUIDs and unchanged content are ignored
113+
114+
This ensures that corrections, updates, or content changes in feed items are properly reflected in your database.
115+
116+
### RFC-Compliant Date Parsing
117+
118+
The collection uses strict RFC 2822 (RSS) and RFC 3339 (Atom) date parsing to avoid locale-dependent issues:
119+
120+
```typescript
121+
import { parseFeedDate } from "@tanstack/rss-db-collection"
122+
123+
// Handles various date formats reliably
124+
const date1 = parseFeedDate("Mon, 25 Dec 2023 10:30:00 GMT") // RFC 2822
125+
const date2 = parseFeedDate("2023-12-25T10:30:00Z") // RFC 3339
126+
const date3 = parseFeedDate("2023-12-25T10:30:00+01:00") // RFC 3339 with offset
127+
```
128+
86129
## Configuration Options
87130

88-
### RSS Collection Configuration
131+
### RSS Collection Options
89132

90133
```typescript
91134
interface RSSCollectionConfig {
@@ -94,7 +137,7 @@ interface RSSCollectionConfig {
94137
getKey: (item: T) => string // Extract unique key from item
95138

96139
// Optional
97-
pollingInterval?: number // Polling interval in ms (default: 300000 = 5 minutes)
140+
pollingInterval?: number // Polling interval in ms (default: 5 minutes, or based on sy:updatePeriod/sy:updateFrequency)
98141
startPolling?: boolean // Start polling immediately (default: true)
99142
maxSeenItems?: number // Max items to track for deduplication (default: 1000)
100143

packages/rss-db-collection/src/index.ts

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,26 +60,23 @@
6060
*/
6161

6262
// RSS collection functionality
63-
export {
64-
rssCollectionOptions,
65-
type RSSCollectionConfig,
66-
type RSSItem,
67-
} from "./rss"
63+
export { rssCollectionOptions, type RSSCollectionConfig } from "./rss"
6864

6965
// Atom collection functionality
70-
export {
71-
atomCollectionOptions,
72-
type AtomCollectionConfig,
73-
type AtomItem,
74-
} from "./rss"
66+
export { atomCollectionOptions, type AtomCollectionConfig } from "./rss"
7567

7668
// Shared types and utilities
69+
export { type FeedCollectionUtils } from "./rss"
70+
71+
// Feed item types
7772
export {
73+
type RSSItem,
74+
type AtomItem,
7875
type FeedItem,
7976
type FeedType,
8077
type HTTPOptions,
81-
type FeedCollectionUtils,
82-
} from "./rss"
78+
type ParsedFeedData,
79+
} from "./types"
8380

8481
// Error types
8582
export {

packages/rss-db-collection/src/rss.ts

Lines changed: 55 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ import {
88
InvalidPollingIntervalError,
99
UnsupportedFeedFormatError,
1010
} from "./errors"
11+
import {
12+
detectSmartPollingInterval,
13+
getContentHash,
14+
parseFeedDate,
15+
} from "./utils"
1116
import type {
1217
CollectionConfig,
1318
DeleteMutationFnParams,
@@ -17,66 +22,10 @@ import type {
1722
UtilsRecord,
1823
} from "@tanstack/db"
1924
import type { StandardSchemaV1 } from "@standard-schema/spec"
25+
import type { AtomItem, FeedItem, HTTPOptions, RSSItem } from "./types"
2026

2127
const debug = DebugModule.debug(`ts/db:rss`)
2228

23-
/**
24-
* Types for RSS feed items
25-
*/
26-
export interface RSSItem {
27-
title?: string
28-
description?: string
29-
link?: string
30-
guid?: string
31-
pubDate?: string | Date
32-
author?: string
33-
category?: string | Array<string>
34-
enclosure?: {
35-
url: string
36-
type?: string
37-
length?: string
38-
}
39-
[key: string]: any
40-
}
41-
42-
/**
43-
* Types for Atom feed items
44-
*/
45-
export interface AtomItem {
46-
title?: string | { $text?: string; type?: string }
47-
summary?: string | { $text?: string; type?: string }
48-
content?: string | { $text?: string; type?: string }
49-
link?:
50-
| string
51-
| { href?: string; rel?: string; type?: string }
52-
| Array<{ href?: string; rel?: string; type?: string }>
53-
id?: string
54-
updated?: string | Date
55-
published?: string | Date
56-
author?: string | { name?: string; email?: string; uri?: string }
57-
category?:
58-
| string
59-
| { term?: string; label?: string }
60-
| Array<{ term?: string; label?: string }>
61-
[key: string]: any
62-
}
63-
64-
export type FeedItem = RSSItem | AtomItem
65-
66-
/**
67-
* Feed type detection
68-
*/
69-
export type FeedType = `rss` | `atom` | `auto`
70-
71-
/**
72-
* HTTP options for fetching feeds
73-
*/
74-
export interface HTTPOptions {
75-
timeout?: number
76-
headers?: Record<string, string>
77-
userAgent?: string
78-
}
79-
8029
/**
8130
* Base configuration interface for feed collection options
8231
*/
@@ -305,7 +254,7 @@ function parseFeed(xmlContent: string, parserOptions: any = {}): ParsedFeed {
305254
function defaultRSSTransform(item: RSSItem): RSSItem {
306255
return {
307256
...item,
308-
pubDate: item.pubDate ? new Date(item.pubDate) : undefined,
257+
pubDate: item.pubDate ? parseFeedDate(item.pubDate) : undefined,
309258
}
310259
}
311260

@@ -340,10 +289,10 @@ function defaultAtomTransform(item: AtomItem): AtomItem {
340289

341290
// Handle dates
342291
if (item.updated) {
343-
normalized.updated = new Date(item.updated)
292+
normalized.updated = parseFeedDate(item.updated)
344293
}
345294
if (item.published) {
346-
normalized.published = new Date(item.published)
295+
normalized.published = parseFeedDate(item.published)
347296
}
348297

349298
// Handle author
@@ -447,7 +396,7 @@ function createFeedCollectionOptions<
447396
) {
448397
const {
449398
feedUrl,
450-
pollingInterval = 300000, // 5 minutes default
399+
pollingInterval: userPollingInterval,
451400
httpOptions = {},
452401
startPolling = true,
453402
maxSeenItems = 1000,
@@ -461,6 +410,10 @@ function createFeedCollectionOptions<
461410
...restConfig
462411
} = config
463412

413+
// Smart polling interval detection
414+
let pollingInterval =
415+
userPollingInterval !== undefined ? userPollingInterval : 300000 // Default 5 minutes
416+
464417
// Validation
465418
if (!feedUrl) {
466419
throw new FeedURLRequiredError()
@@ -470,7 +423,10 @@ function createFeedCollectionOptions<
470423
}
471424

472425
// State management
473-
let seenItems = new Map<string, { id: string; lastSeen: number }>()
426+
let seenItems = new Map<
427+
string,
428+
{ id: string; lastSeen: number; contentHash: string }
429+
>()
474430
let syncParams:
475431
| Parameters<
476432
SyncConfig<ResolveType<TExplicit, TSchema, TFallback>, TKey>[`sync`]
@@ -544,10 +500,22 @@ function createFeedCollectionOptions<
544500
throw new UnsupportedFeedFormatError(feedUrl)
545501
}
546502

503+
// Detect smart polling interval on first fetch
504+
if (!userPollingInterval) {
505+
const parser = new XMLParser(parserOptions)
506+
const feedData = parser.parse(xmlContent)
507+
const smartInterval = detectSmartPollingInterval(feedData)
508+
if (smartInterval !== pollingInterval) {
509+
pollingInterval = smartInterval
510+
debug(`Updated polling interval to ${pollingInterval}ms`)
511+
}
512+
}
513+
547514
const { begin, write, commit } = params
548515
begin()
549516

550517
let newItemsCount = 0
518+
let updatedItemsCount = 0
551519
const currentTime = Date.now()
552520

553521
for (const rawItem of parsedFeed.items) {
@@ -572,22 +540,41 @@ function createFeedCollectionOptions<
572540

573541
// Generate unique ID for deduplication
574542
const itemId = getItemId(rawItem, parsedFeed.type)
543+
const contentHash = getContentHash(rawItem)
575544

576545
// Check if we've seen this item before
577546
const seen = seenItems.get(itemId)
578547

579548
if (!seen) {
580549
// New item
581-
seenItems.set(itemId, { id: itemId, lastSeen: currentTime })
550+
seenItems.set(itemId, {
551+
id: itemId,
552+
lastSeen: currentTime,
553+
contentHash,
554+
})
582555

583556
write({
584557
type: `insert`,
585558
value: transformedItem,
586559
})
587560

588561
newItemsCount++
562+
} else if (seen.contentHash !== contentHash) {
563+
// Item exists but content has changed - treat as update
564+
seenItems.set(itemId, {
565+
...seen,
566+
lastSeen: currentTime,
567+
contentHash,
568+
})
569+
570+
write({
571+
type: `update`,
572+
value: transformedItem,
573+
})
574+
575+
updatedItemsCount++
589576
} else {
590-
// Update last seen time
577+
// Item exists and content hasn't changed - just update last seen time
591578
seenItems.set(itemId, { ...seen, lastSeen: currentTime })
592579
}
593580
}
@@ -597,6 +584,9 @@ function createFeedCollectionOptions<
597584
if (newItemsCount > 0) {
598585
debug(`Added ${newItemsCount} new items from feed`)
599586
}
587+
if (updatedItemsCount > 0) {
588+
debug(`Updated ${updatedItemsCount} existing items from feed`)
589+
}
600590

601591
// Clean up old items periodically
602592
cleanupSeenItems()
@@ -694,6 +684,7 @@ function createFeedCollectionOptions<
694684
getKey,
695685
sync,
696686
startSync: true,
687+
rowUpdateMode: `full`,
697688
onInsert,
698689
onUpdate,
699690
onDelete,

0 commit comments

Comments
 (0)