From 50fa843eb563dd231cae46693de628d54f5b1f0f Mon Sep 17 00:00:00 2001 From: shiny-comic Date: Thu, 8 Jan 2026 21:51:33 +0900 Subject: [PATCH 1/5] Fix disappearing end of the comments with emoji Previous code use UTF-8 to count characters however Emojis are UTF-16 units. This difference leads to misalignment of index offsets. --- src/invidious/videos/description.cr | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/invidious/videos/description.cr b/src/invidious/videos/description.cr index 1371bebb..73758bf6 100644 --- a/src/invidious/videos/description.cr +++ b/src/invidious/videos/description.cr @@ -29,6 +29,10 @@ private def copy_string(str : String::Builder, iter : Iterator, count : Int) : I return copied end +private def utf16_length(content : String) : Int32 + content.each_char.sum { |ch| ch.ord >= 0x10000 ? 2 : 1 } +end + def parse_description(desc, video_id : String) : String? return "" if desc.nil? @@ -40,7 +44,7 @@ def parse_description(desc, video_id : String) : String? # Slightly faster than HTML.escape, as we're only doing one pass on # the string instead of five for the standard library return String.build do |str| - copy_string(str, content.each_codepoint, content.size) + copy_string(str, content.each_codepoint, utf16_length(content)) end end @@ -76,7 +80,7 @@ def parse_description(desc, video_id : String) : String? end # Copy the end of the string (past the last command). - remaining_length = content.size - index + remaining_length = utf16_length(content) - index copy_string(str, iter, remaining_length) if remaining_length > 0 end end From 51aab0681b201aeb36cd91001ba325c910f93be8 Mon Sep 17 00:00:00 2001 From: shiny-comic Date: Tue, 17 Feb 2026 12:04:58 +0900 Subject: [PATCH 2/5] description.cr updated Co-authored-by: Fijxu --- src/invidious/videos/description.cr | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/invidious/videos/description.cr b/src/invidious/videos/description.cr index 73758bf6..9f36477c 100644 --- a/src/invidious/videos/description.cr +++ b/src/invidious/videos/description.cr @@ -29,10 +29,16 @@ private def copy_string(str : String::Builder, iter : Iterator, count : Int) : I return copied end +# size < bytesize, so we need to count the number of characters that are +# two UInt16 wide. +# Taken from: https://github.com/crystal-lang/crystal/blob/8fa7f90c091aa3757821c04ee243c7ab5f67ac20/src/string/utf16.cr#L18-L20 private def utf16_length(content : String) : Int32 - content.each_char.sum { |ch| ch.ord >= 0x10000 ? 2 : 1 } + u16_size = 0 + content.each_char do |char| + u16_size += char.ord < 0x1_0000 ? 1 : 2 + end + u16_size end - def parse_description(desc, video_id : String) : String? return "" if desc.nil? From 698cd49529a5459a979881d1d50407f8a5edbd18 Mon Sep 17 00:00:00 2001 From: shiny-comic Date: Tue, 17 Feb 2026 13:42:08 +0900 Subject: [PATCH 3/5] Update src/invidious/videos/description.cr Co-authored-by: Fijxu --- src/invidious/videos/description.cr | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/invidious/videos/description.cr b/src/invidious/videos/description.cr index 9f36477c..d448ba80 100644 --- a/src/invidious/videos/description.cr +++ b/src/invidious/videos/description.cr @@ -86,7 +86,8 @@ def parse_description(desc, video_id : String) : String? end # Copy the end of the string (past the last command). - remaining_length = utf16_length(content) - index + content_size = content.ascii_only? ? content.size : utf16_length(content) + remaining_length = content_size - index copy_string(str, iter, remaining_length) if remaining_length > 0 end end From 44a5ea5eeb32a7ab24f0f0a73a201695f3597dd2 Mon Sep 17 00:00:00 2001 From: shiny-comic Date: Tue, 17 Feb 2026 13:42:19 +0900 Subject: [PATCH 4/5] Update src/invidious/videos/description.cr Co-authored-by: Fijxu --- src/invidious/videos/description.cr | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/invidious/videos/description.cr b/src/invidious/videos/description.cr index d448ba80..e422b6eb 100644 --- a/src/invidious/videos/description.cr +++ b/src/invidious/videos/description.cr @@ -50,7 +50,8 @@ def parse_description(desc, video_id : String) : String? # Slightly faster than HTML.escape, as we're only doing one pass on # the string instead of five for the standard library return String.build do |str| - copy_string(str, content.each_codepoint, utf16_length(content)) + content_size = content.ascii_only? ? content.size : utf16_length(content) + copy_string(str, content.each_codepoint, content_size) end end From 8ae9899cc83dbbd72aa8a56ecd0908b06773017e Mon Sep 17 00:00:00 2001 From: shiny-comic Date: Tue, 17 Feb 2026 13:42:29 +0900 Subject: [PATCH 5/5] Update src/invidious/videos/description.cr Co-authored-by: Fijxu --- src/invidious/videos/description.cr | 1 + 1 file changed, 1 insertion(+) diff --git a/src/invidious/videos/description.cr b/src/invidious/videos/description.cr index e422b6eb..ef6b90b5 100644 --- a/src/invidious/videos/description.cr +++ b/src/invidious/videos/description.cr @@ -39,6 +39,7 @@ private def utf16_length(content : String) : Int32 end u16_size end + def parse_description(desc, video_id : String) : String? return "" if desc.nil?