inked/inked-worker.js at main · christophcunningham/inked · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
export default {
  async fetch(request) {
    const url = new URL(request.url);
    const feedUrl = url.searchParams.get('url');
    const count = parseInt(url.searchParams.get('count') || '75');

    if (!feedUrl) {
      return new Response(JSON.stringify({ error: 'Missing url parameter' }), {
        status: 400,
        headers: { 'Content-Type': 'application/json' },
      });
    }

    let feedRes;
    try {
      feedRes = await fetch(feedUrl, {
        headers: {
          'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
          'Accept': 'application/rss+xml, application/atom+xml, application/xml, text/xml, */*',
          'Accept-Language': 'en-US,en;q=0.9',
        },
      });
    } catch (e) {
      return new Response(JSON.stringify({ error: 'Failed to fetch feed', detail: e.message }), {
        status: 502,
        headers: corsHeaders('application/json'),
      });
    }

    const xml = await feedRes.text();
    const items = [];

    // Try RSS <item> blocks first, then Atom <entry> blocks
    const rssItems = xml.split(/<item[\s>]/i).slice(1);
    const atomItems = xml.split(/<entry[\s>]/i).slice(1);

    if (rssItems.length > 0) {
      for (const raw of rssItems) {
        const item = raw.split(/<\/item>/i)[0];
        const title   = decodeEntities(tag(item, 'title'));
        const link    = linkTag(item) || tag(item, 'guid');
        const pubDate = tag(item, 'pubDate') || tag(item, 'dc:date');
        const content = tagNS(item, 'content:encoded') || tag(item, 'description');
        if (title) items.push({ title, link, pubDate, content });
      }
    }

    if (atomItems.length > 0 && items.length === 0) {
      for (const raw of atomItems) {
        const entry = raw.split(/<\/entry>/i)[0];
        const title   = decodeEntities(tag(entry, 'title'));
        const link    = attr(entry, 'link', 'href') || tagInner(entry, 'link');
        const pubDate = tag(entry, 'published') || tag(entry, 'updated');
        const content = tag(entry, 'content') || tag(entry, 'summary');
        if (title) items.push({ title, link, pubDate, content });
      }
    }

    return new Response(JSON.stringify({ status: 'ok', items }), {
      headers: corsHeaders('application/json'),
    });
  },
};

// ── Helpers ──────────────────────────────────────────────────────────────────

function corsHeaders(contentType) {
  return {
    'Content-Type': contentType,
    'Access-Control-Allow-Origin': '*',
    'Cache-Control': 's-maxage=300',
  };
}

// Standard tag — handles CDATA and plain text
function tag(xml, tagName) {
  // Escaped colon in tag name for regex safety
  const escaped = tagName.replace(':', '\\:');
  const cdata = xml.match(new RegExp(`<${escaped}[^>]*><!\\[CDATA\\[([\\s\\S]*?)\\]\\]>`, 'i'));
  if (cdata) return cdata[1].trim();
  const plain = xml.match(new RegExp(`<${escaped}[^>]*>([\\s\\S]*?)<\\/${escaped}>`, 'i'));
  return plain ? plain[1].trim() : '';
}

// Namespaced tag like content:encoded — regex-safe colon handling
function tagNS(xml, fullName) {
  const escaped = fullName.replace(':', '\\:');
  const cdata = xml.match(new RegExp(`<${escaped}[^>]*><!\\[CDATA\\[([\\s\\S]*?)\\]\\]>`, 'i'));
  if (cdata) return cdata[1].trim();
  const plain = xml.match(new RegExp(`<${escaped}[^>]*>([\\s\\S]*?)<\\/${escaped}>`, 'i'));
  return plain ? plain[1].trim() : '';
}

// RSS <link> is tricky — often has no closing tag, just text node
function linkTag(xml) {
  // Between opening and next tag
  const m = xml.match(/<link[^>]*>([^<]+)/i);
  return m ? m[1].trim() : '';
}

// Extract inner content of tag (for Atom <link> text fallback)
function tagInner(xml, tagName) {
  const m = xml.match(new RegExp(`<${tagName}[^/]*?>([\\s\\S]*?)<\\/${tagName}>`, 'i'));
  return m ? m[1].trim() : '';
}

// Extract attribute value
function attr(xml, tagName, attrName) {
  const m = xml.match(new RegExp(`<${tagName}[^>]*\\s${attrName}="([^"]*)"`, 'i'));
  return m ? m[1].trim() : '';
}

// Decode HTML entities and strip residual CDATA
function decodeEntities(str) {
  return str
    .replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1')
    .replace(/&amp;/g, '&')
    .replace(/&lt;/g, '<')
    .replace(/&gt;/g, '>')
    .replace(/&quot;/g, '"')
    .replace(/&#39;/g, "'")
    .replace(/&apos;/g, "'")
    .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n)))
    .replace(/&#x([0-9a-f]+);/gi, (_, h) => String.fromCharCode(parseInt(h, 16)));
}