Skip to content

Commit 2173d44

Browse files
committed
feat: fetch page titles for links added via telegram
1 parent 61dc6b6 commit 2173d44

File tree

4 files changed

+91
-54
lines changed

4 files changed

+91
-54
lines changed

app.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ const express = require('express')
77
function connect(success, error) {
88
function onConnectFailed(err) {
99
setTimeout(() => { error() }, 0)
10+
console.error(err)
1011
throw new Error(`unable to connect to database at ${config.db}`)
1112
}
1213

app/controllers/remote.js

Lines changed: 9 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
const express = require('express'),
22
router = express.Router(),
3-
axios = require('axios'),
4-
htmlparser = require('htmlparser2'),
5-
cache = require('memory-cache'),
6-
config = require('../../config/config'),
73
morgan = require('../../config/middlewares/morgan')(),
8-
logger = require('./../../config/log')()
4+
logger = require('./../../config/log')(),
5+
PageMetaService = require('../services/page_meta')
96

10-
const CACHE_TIMEOUT = 1000 * 60 * 60 * 24
7+
const pageMetaService = new PageMetaService()
118

129
module.exports = function (app) {
1310
app.use('/api/remote', router)
@@ -40,53 +37,13 @@ module.exports = function (app) {
4037

4138
if (!url || !url.length) return res.makeError(400, 'No url given')
4239

43-
function sendTitle(t) {
44-
cache.put(url, t, CACHE_TIMEOUT)
45-
return res.send({ title: t })
46-
}
47-
48-
const cachedValue = cache.get(url)
49-
if (cachedValue) return sendTitle(cachedValue)
50-
else if (cache.keys().includes(url)) return sendTitle(null)
51-
52-
cache.put(url, null, CACHE_TIMEOUT) // prevent redirect loops
53-
54-
axios({
55-
method: 'head',
56-
url: url,
57-
headers: { 'Accept': 'text/html' }
58-
}).then((response) => {
59-
const contentType = response.headers['content-type']
60-
const contentLenth = parseInt(response['content-length']) / 1000
61-
if (!contentType.includes('text/html') && (!contentLenth || contentLenth > config.maxHtmlSizeKb)) {
62-
sendTitle(null)
63-
return null
64-
}
65-
return axios({
66-
method: 'get',
67-
url: url,
68-
headers: { 'Accept': 'text/html' }
40+
pageMetaService.fetchTitle(url)
41+
.then((title) => {
42+
return res.send({ title })
6943
})
70-
}).then((response) => {
71-
if (!response) return
72-
const contentType = response.headers['content-type']
73-
if (!contentType.startsWith('text/html')) return sendTitle(null)
74-
const handler = new htmlparser.DomHandler((error, dom) => {
75-
if (error) return res.makeError(404, 'Not found')
76-
const htmlNode = dom.filter((n) => { return n.type === 'tag' && n.name === 'html' })[0]
77-
const headNode = htmlNode.children.filter((n) => { return n.type === 'tag' && n.name === 'head' })[0]
78-
const titleNode = headNode.children.filter((n) => { return n.type === 'tag' && n.name === 'title' })[0]
79-
let title = titleNode.children[0].data.trim()
80-
title = title.replace(/&#(\d+);/g, (match, dec) => {
81-
return String.fromCharCode(dec)
82-
}).replace(/&.+;/g, '')
83-
return sendTitle(title)
44+
.catch((err) => {
45+
logger.error(`Failed to resolve title for URL ${url} - ${err}`)
46+
return res.makeError(404, 'Not found')
8447
})
85-
const parser = new htmlparser.Parser(handler)
86-
parser.parseComplete(response.data)
87-
}).catch((err) => {
88-
logger.error(`Failed to resolve title for URL ${url} - ${err}`)
89-
return res.makeError(404, 'Not found')
90-
})
9148
})
9249
}

app/controllers/telegram.js

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ const fs = require('fs'),
1414
tgutils = require('./utils/telegram'),
1515
addLink = require('./utils/collection').addLink,
1616
fetchCollections = require('./utils/collection').fetchCollections,
17-
addShortlink = require('./utils/shortlink').addShortlink
17+
addShortlink = require('./utils/shortlink').addShortlink,
18+
PageMetaService = require('./../services/page_meta')
1819

1920
const otps = {}
2021
const pendingLinks = {} // by user
22+
const pageMetaService = new PageMetaService()
2123

2224
const CMD_START = 'CMD_START',
2325
CMD_LOGOUT = 'CMD_LOGOUT',
@@ -169,7 +171,12 @@ const commandProcessors = {
169171
const link = pendingLinks[user._id]
170172
const index = parseInt(args[0]) - 1
171173

172-
fetchCollections(user)
174+
pageMetaService.fetchTitle(link.url)
175+
.then((title) => {
176+
link.description = title
177+
})
178+
.catch((err) => null) // ignore and continue without setting title
179+
.then(() => fetchCollections(user))
173180
.then(({ data }) => {
174181
// this is not robust to the collections changing while the user does their choice
175182
if (index < 0 || index >= data.length) return tgutils.doRequest('sendMessage', { chat_id: rawMessage.chat.id, text: '❌ Invalid collection.' })

app/services/page_meta.js

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
const axios = require('axios'),
2+
htmlparser = require('htmlparser2'),
3+
cache = require('memory-cache'),
4+
config = require('../../config/config')
5+
6+
const CACHE_TIMEOUT = 1000 * 60 * 60 * 24
7+
8+
class PageMetaService {
9+
constructor() {
10+
if (!PageMetaService._instance) {
11+
PageMetaService.instance = this
12+
}
13+
return PageMetaService._instance
14+
}
15+
16+
async fetchTitle(url) {
17+
function done(title) {
18+
cache.put(url, title, CACHE_TIMEOUT)
19+
return title
20+
}
21+
22+
const cachedValue = cache.get(url)
23+
if (cachedValue) return done(cachedValue)
24+
else if (cache.keys().includes(url)) return done(null)
25+
26+
cache.put(url, null, CACHE_TIMEOUT) // prevent redirect loops
27+
28+
return await axios({
29+
method: 'head',
30+
url: url,
31+
headers: { 'Accept': 'text/html' }
32+
}).then((response) => {
33+
const contentType = response.headers['content-type']
34+
const contentLenth = parseInt(response['content-length']) / 1000
35+
36+
if (!contentType.includes('text/html') && (!contentLenth || contentLenth > config.maxHtmlSizeKb)) {
37+
return done(null)
38+
}
39+
40+
return axios({
41+
method: 'get',
42+
url: url,
43+
headers: { 'Accept': 'text/html' }
44+
})
45+
}).then((response) => {
46+
if (!response) return
47+
48+
const contentType = response.headers['content-type']
49+
if (!contentType.startsWith('text/html')) return done(null)
50+
51+
let title = null
52+
53+
const handler = new htmlparser.DomHandler((error, dom) => {
54+
if (error) throw new Error(error)
55+
56+
const htmlNode = dom.filter((n) => { return n.type === 'tag' && n.name === 'html' })[0]
57+
const headNode = htmlNode.children.filter((n) => { return n.type === 'tag' && n.name === 'head' })[0]
58+
const titleNode = headNode.children.filter((n) => { return n.type === 'tag' && n.name === 'title' })[0]
59+
60+
title = titleNode.children[0].data.trim()
61+
title = title.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec)).replace(/&.+;/g, '')
62+
})
63+
64+
const parser = new htmlparser.Parser(handler)
65+
parser.parseComplete(response.data)
66+
67+
return title
68+
})
69+
}
70+
}
71+
72+
module.exports = PageMetaService

0 commit comments

Comments
 (0)