-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserper.js
More file actions
125 lines (111 loc) · 3.51 KB
/
serper.js
File metadata and controls
125 lines (111 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/**
* Serper API: scrape a URL to plain text.
* Uses X-API-KEY header and scrape endpoint from env.
* Parses JSON response (text field) and formats horoscope only (no links/CTAs).
*/
const SERPER_SCRAPE_URL = process.env.SERPER_SCRAPE_URL;
const HOROSCOPE_BASE_URL = process.env.HOROSCOPE_BASE_URL;
// Lines containing these (case-insensitive) start the footer we drop (must come AFTER all sections: Daily, Health, Love, Career & Money)
const HOROSCOPE_CUTOFF_PHRASES = [
'to unfold what lies further ahead',
'take a look at your',
'weekly and',
'monthly horoscope',
'to read ',
'horoscope in hindi',
'rashifal today',
'daily horoscope highlighting',
'horoscope for other zodiac signs',
'back to horoscope main page',
'related links',
];
// Lines containing these are omitted from output (e.g. promo/CTA lines between sections)
const HOROSCOPE_SKIP_PHRASES = [
'understand compatibility',
'check love percentage',
'love calculator',
];
/**
* Returns true if the line starts the footer/links section we want to drop.
* @param {string} line
* @returns {boolean}
*/
const isCutoffLine = (line) => {
const lower = line.toLowerCase().trim();
if (!lower) return false;
return HOROSCOPE_CUTOFF_PHRASES.some((phrase) => lower.includes(phrase));
};
/**
* Returns true if the line should be omitted (e.g. promo line) but we keep processing.
* @param {string} line
* @returns {boolean}
*/
const isSkipLine = (line) => {
const lower = line.toLowerCase().trim();
if (!lower) return false;
return HOROSCOPE_SKIP_PHRASES.some((phrase) => lower.includes(phrase));
};
/**
* Keep only horoscope content: drop links, "read more", other signs, related links.
* @param {string} raw - Raw text from scrape (e.g. Serper JSON .text)
* @returns {string} Formatted horoscope only
*/
const formatHoroscopeOnly = (raw) => {
const text = (raw || '').trim();
if (!text) return '';
const lines = text
.split(/\n+/)
.map((l) => l.trim())
.filter(Boolean);
const kept = [];
for (const line of lines) {
if (isCutoffLine(line)) break;
if (isSkipLine(line)) continue;
kept.push(line);
}
return kept.join('\n\n').trim();
};
/**
* Scrape a URL; if response is JSON with .text, return that, else return body as text.
* @param {string} apiKey - Serper API key
* @param {string} url - Full URL to scrape
* @returns {Promise<string>} Scraped content as text
*/
const scrapeUrl = async (apiKey, url) => {
const response = await fetch(SERPER_SCRAPE_URL, {
method: 'POST',
headers: {
'X-API-KEY': apiKey,
'Content-Type': 'application/json',
},
body: JSON.stringify({ url }),
});
if (!response.ok) {
throw new Error(`Serper scrape failed: ${response.status}`);
}
const body = await response.text();
try {
const data = JSON.parse(body);
if (data && typeof data.text === 'string') return data.text.trim();
} catch {
// not JSON, use as-is
}
console.log('body', body);
return body;
};
/**
* Get horoscope for a zodiac sign: scrape configured URL with sign param, then format to horoscope-only text.
* @param {string} apiKey - Serper API key
* @param {string} sign - Zodiac sign for query param (e.g. "aries", "leo")
* @returns {Promise<string>} Formatted horoscope content only (no links/CTAs)
*/
const getHoroscope = async (apiKey, sign) => {
const url = `${HOROSCOPE_BASE_URL}${encodeURIComponent(sign)}`;
const raw = await scrapeUrl(apiKey, url);
return formatHoroscopeOnly(raw);
};
module.exports = {
scrapeUrl,
formatHoroscopeOnly,
getHoroscope,
};