Skip to content

Commit 490a0b7

Browse files
shiny-comicshiny-comic
authored andcommitted
Fix disappearing end of the comments with emoji
Previous code use UTF-8 to count characters however Emojis are UTF-16 units. This difference leads to misalignment of index offsets.
1 parent 21d0d10 commit 490a0b7

File tree

1 file changed

+6
-8
lines changed

1 file changed

+6
-8
lines changed

src/invidious/videos/description.cr

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,16 @@ private def copy_string(str : String::Builder, iter : Iterator, count : Int) : I
2121
str << cp.chr
2222
end
2323

24-
# A codepoint from the SMP counts twice
25-
copied += 1 if cp > 0xFFFF
2624
copied += 1
2725
end
2826

2927
return copied
3028
end
3129

30+
private def utf16_length(content : String) : Int32
31+
content.each_char.sum { |ch| ch.ord >= 0x10000 ? 2 : 1 }
32+
end
33+
3234
def parse_description(desc, video_id : String) : String?
3335
return "" if desc.nil?
3436

@@ -40,14 +42,10 @@ def parse_description(desc, video_id : String) : String?
4042
# Slightly faster than HTML.escape, as we're only doing one pass on
4143
# the string instead of five for the standard library
4244
return String.build do |str|
43-
copy_string(str, content.each_codepoint, content.size)
45+
copy_string(str, content.each_codepoint, utf16_length(content))
4446
end
4547
end
4648

47-
# Not everything is stored in UTF-8 on youtube's side. The SMP codepoints
48-
# (0x10000 and above) are encoded as UTF-16 surrogate pairs, which are
49-
# automatically decoded by the JSON parser. It means that we need to count
50-
# copied byte in a special manner, preventing the use of regular string copy.
5149
iter = content.each_codepoint
5250

5351
index = 0
@@ -76,7 +74,7 @@ def parse_description(desc, video_id : String) : String?
7674
end
7775

7876
# Copy the end of the string (past the last command).
79-
remaining_length = content.size - index
77+
remaining_length = utf16_length(content) - index
8078
copy_string(str, iter, remaining_length) if remaining_length > 0
8179
end
8280
end

0 commit comments

Comments
 (0)