Skip to content

Commit 99d00a6

Browse files
authored
fix: parsing URL correctly within special characters (#961)
Fixes - https://sendbird.atlassian.net/browse/CLNP-2190 - https://sendbird.atlassian.net/browse/SBISSUE-14850 e.g. `[www.sendbird.com](www.sendbird.com)` like format of token string wasn't parsed as a URL but rendered like a normal text.
1 parent 7a4a467 commit 99d00a6

File tree

5 files changed

+110
-18
lines changed

5 files changed

+110
-18
lines changed

src/modules/Message/utils/tokens/__tests__/tokenizeMessage.spec.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ describe('tokenizeMessage', () => {
1616
messageText: 'Hello world https://example.com',
1717
});
1818
expect(tokens).toEqual([
19-
{ type: 'string', value: 'Hello world' },
19+
{ type: 'string', value: 'Hello world ' },
2020
{ type: 'url', value: 'https://example.com' },
2121
]);
2222
});
@@ -26,9 +26,9 @@ describe('tokenizeMessage', () => {
2626
messageText: 'Hello world https://example.com and more',
2727
});
2828
expect(tokens).toEqual([
29-
{ type: 'string', value: 'Hello world' },
29+
{ type: 'string', value: 'Hello world ' },
3030
{ type: 'url', value: 'https://example.com' },
31-
{ type: 'string', value: 'and more' },
31+
{ type: 'string', value: ' and more' },
3232
]);
3333
});
3434

@@ -48,7 +48,7 @@ describe('tokenizeMessage', () => {
4848
type: 'mention',
4949
userId: 'userA',
5050
}, {
51-
value: ' and more',
51+
value: ' and more ',
5252
type: 'string',
5353
}, {
5454
value: 'https://example.com',

src/modules/Message/utils/tokens/__tests__/tokenizeUtils.spec.ts

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,88 @@ describe('identifyUrlsAndStrings', () => {
6464
const result = identifyUrlsAndStrings(tokens);
6565
expect(result).toEqual([{
6666
type: 'string',
67-
value: 'abc',
67+
value: 'abc ',
6868
}, {
6969
type: 'url',
7070
value: 'https://www.google.com',
7171
}, {
7272
type: 'string',
73-
value: '123',
73+
value: ' 123',
74+
}]);
75+
});
76+
77+
it('should parse multiple urls with special charactors', () => {
78+
const tokens = [{
79+
type: 'undetermined',
80+
value: '[https://www.google.com](https://www.google.com)',
81+
}] as UndeterminedToken[];
82+
const result = identifyUrlsAndStrings(tokens);
83+
expect(result).toEqual([{
84+
type: 'string',
85+
value: '[',
86+
}, {
87+
type: 'url',
88+
value: 'https://www.google.com',
89+
}, {
90+
type: 'string',
91+
value: '](',
92+
}, {
93+
type: 'url',
94+
value: 'https://www.google.com',
95+
}, {
96+
type: 'string',
97+
value: ')',
7498
}]);
7599
});
100+
101+
it('should parse valid URLs correctly', () => {
102+
const validURLs = [
103+
// with protocol
104+
'http://www.example.com',
105+
'https://www.example.com',
106+
'http://example.com',
107+
'https://example.com',
108+
// without protocol
109+
'www.example.com',
110+
'example.com',
111+
// with sub paths
112+
'http://www.example.com/path/to/page.html',
113+
'https://www.example.com/path/to/page.html',
114+
'http://example.com/path/to/page.html',
115+
'https://example.com/path/to/page.html',
116+
'www.example.com/path/to/page.html',
117+
'example.com/path/to/page.html',
118+
// with query strings
119+
'http://www.example.com/path/to/page.html?query=string',
120+
'https://www.example.com/path/to/page.html?query=string',
121+
'http://example.com/path/to/page.html?query=string',
122+
'https://example.com/path/to/page.html?query=string',
123+
'www.example.com/path/to/page.html?query=string',
124+
'example.com/path/to/page.html?query=string',
125+
'https://www.amazon.com/Hacker-Playbook-Practical-Penetration-Testing/dp/1494932636/ref=sr_1_5?crid=1IKVPDXYF5NQG&keywords=hacker+guide&qid=1681333238&sprefix=hacker+guid%2Caps%2C148&sr=8-5',
126+
// with the hash property
127+
'https://example.com/path/to/page.html?query=string#hash',
128+
'https://docs.google.com/document/d/19IccwdTIwNPJ_rGtsbi2Ft8dshaH4WiCXD5pder97VE/edit#heading=h.pve9ikkfqqzz',
129+
// A subdomain has a hyphen
130+
'https://send-bird.slack.com/archives/C065N4UQ77W/p1699931368643169?thread_ts=1699925671l395019&cid-Co65N4UQ77W',
131+
// with long top-level domain
132+
'https://send.bird.business/archives/C065N4UQ77W/p1699931368643169?thread_ts=1699925671l395019&cid-Co65N4UQ77W',
133+
];
134+
validURLs.forEach((url) => {
135+
const result = identifyUrlsAndStrings([
136+
{
137+
type: 'undetermined',
138+
value: url,
139+
},
140+
]);
141+
expect(result).toEqual([
142+
{
143+
type: 'url',
144+
value: url,
145+
},
146+
]);
147+
});
148+
});
76149
});
77150

78151
describe('combineNearbyStrings', () => {
@@ -96,7 +169,7 @@ describe('combineNearbyStrings', () => {
96169
}] as Token[];
97170
const expected = [{
98171
type: 'string',
99-
value: 'abc pqr',
172+
value: 'abc pqr',
100173
}, {
101174
type: 'url',
102175
value: 'https://www.google.com',

src/modules/Message/utils/tokens/tokenize.ts

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import { User } from '@sendbird/chat';
22
import { USER_MENTION_PREFIX } from '../../consts';
33
import { IdentifyMentionsType, MentionToken, Token, TOKEN_TYPES, TokenParams, UndeterminedToken } from './types';
4-
import { isUrl } from '../../../../utils';
54

65
export function getUserMentionRegex(mentionedUsers: User[], templatePrefix_: string): RegExp {
76
const templatePrefix = templatePrefix_ || USER_MENTION_PREFIX;
@@ -52,20 +51,38 @@ export function identifyMentions({
5251
}
5352

5453
export function identifyUrlsAndStrings(token: Token[]): Token[] {
54+
const URL_REG = /(?:https?:\/\/|www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.(xn--)?[a-z0-9-]{2,20}\b([-a-zA-Z0-9@:%_+[\],.~#?&/=]*[-a-zA-Z0-9@:%_+~#?&/=])*/g;
5555
const results: Token[] = token.map((token) => {
5656
if (token.type !== TOKEN_TYPES.undetermined) {
5757
return token;
5858
}
5959
const { value = '' } = token;
60-
const parts = value.split(' ');
61-
const tokens = parts.map((part) => {
62-
if (isUrl(part)) {
63-
return { value: part, type: TOKEN_TYPES.url };
64-
} else {
65-
return { value: part, type: TOKEN_TYPES.string };
66-
}
60+
61+
const matches = Array.from(value.matchAll(URL_REG));
62+
const founds = matches.map((value) => {
63+
const text = value[0];
64+
const start = value.index ?? 0;
65+
const end = start + text.length;
66+
return { text, start, end };
6767
});
68-
return tokens;
68+
69+
const items: Token[] = [{ value, type: TOKEN_TYPES.string }];
70+
let cursor = 0;
71+
founds.forEach(({ text, start, end }) => {
72+
const restText = items.pop().value as string;
73+
const head = restText.slice(0, start - cursor);
74+
const mid = text;
75+
const tail = restText.slice(end - cursor);
76+
items.push({ value: head, type: TOKEN_TYPES.string }, { value: mid, type: TOKEN_TYPES.url });
77+
if (tail.length > 0) items.push({ value: tail, type: TOKEN_TYPES.string });
78+
cursor = end;
79+
});
80+
81+
// Remove the first empty string
82+
if (items[0].value === '' && items[0].type === TOKEN_TYPES.string) {
83+
items.shift();
84+
}
85+
return items;
6986
}).flat();
7087

7188
return results;
@@ -75,7 +92,7 @@ export function combineNearbyStrings(tokens: Token[]): Token[] {
7592
const results: Token[] = tokens.reduce((acc, token) => {
7693
const lastToken = acc[acc.length - 1];
7794
if (lastToken?.type === TOKEN_TYPES.string && token.type === TOKEN_TYPES.string) {
78-
lastToken.value = `${lastToken.value} ${token.value}`;
95+
lastToken.value = `${lastToken.value}${token.value}`;
7996
return acc;
8097
}
8198
return [...acc, token];

src/ui/Word/index.scss

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
}
77

88
.sendbird-word__url {
9-
margin: 0 4px;
109
display: inline;
1110
color: inherit;
1211
word-break: break-all;

src/utils/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,9 @@ export const isMessageSentByMe = (
425425
);
426426

427427
const URL_REG = /^((http|https):\/\/)?([a-z\d-]+\.)+[a-z]{2,}(\:[0-9]{1,5})?(\/[-a-zA-Z\d%_.~+&=]*)*(\?[;&a-zA-Z\d%_.~+=-]*)?(#\S*)?$/;
428+
/** @deprecated
429+
* URL detection in a message text will be handled in utils/tokens/tokenize.ts
430+
*/
428431
export const isUrl = (text: string): boolean => URL_REG.test(text);
429432

430433
const MENTION_TAG_REG = /\@\{.*?\}/i;

0 commit comments

Comments
 (0)