Compare commits

..

No commits in common. "bb14f794969f62582917a39c2dd57bf92fa146a7" and "cc03610325f8c49b14216f2913a8a3148b48e3a2" have entirely different histories.

14 changed files with 149 additions and 314 deletions

View File

@ -38,10 +38,11 @@ jobs:
matrix: matrix:
stable: [true] stable: [true]
crystal: crystal:
- 1.4.1
- 1.5.1
- 1.6.2 - 1.6.2
- 1.7.3 - 1.7.3
- 1.8.2 - 1.8.2
- 1.9.2
include: include:
- crystal: nightly - crystal: nightly
stable: false stable: false
@ -52,7 +53,7 @@ jobs:
submodules: true submodules: true
- name: Install Crystal - name: Install Crystal
uses: crystal-lang/install-crystal@v1.8.0 uses: crystal-lang/install-crystal@v1.7.0
with: with:
crystal: ${{ matrix.crystal }} crystal: ${{ matrix.crystal }}

View File

@ -25,9 +25,9 @@ jobs:
uses: actions/checkout@v3 uses: actions/checkout@v3
- name: Install Crystal - name: Install Crystal
uses: crystal-lang/install-crystal@v1.8.0 uses: crystal-lang/install-crystal@v1.6.0
with: with:
crystal: 1.9.2 crystal: 1.5.0
- name: Run lint - name: Run lint
run: | run: |
@ -77,3 +77,4 @@ jobs:
tags: quay.io/invidious/invidious:${{ github.sha }}-arm64,quay.io/invidious/invidious:latest-arm64 tags: quay.io/invidious/invidious:${{ github.sha }}-arm64,quay.io/invidious/invidious:latest-arm64
build-args: | build-args: |
"release=1" "release=1"

View File

@ -14,7 +14,7 @@ jobs:
with: with:
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
days-before-stale: 365 days-before-stale: 365
days-before-pr-stale: 90 days-before-pr-stale: 45 # PRs should be active. Anything that hasn't had activity in more than 45 days should be considered abandoned.
days-before-close: 30 days-before-close: 30
exempt-pr-labels: blocked exempt-pr-labels: blocked
stale-issue-message: 'This issue has been automatically marked as stale and will be closed in 30 days because it has not had recent activity and is much likely outdated. If you think this issue is still relevant and applicable, you just have to post a comment and it will be unmarked.' stale-issue-message: 'This issue has been automatically marked as stale and will be closed in 30 days because it has not had recent activity and is much likely outdated. If you think this issue is still relevant and applicable, you just have to post a comment and it will be unmarked.'

View File

@ -161,19 +161,6 @@ https_only: false
#force_resolve: #force_resolve:
##
## Use Innertube's transcripts API instead of timedtext for closed captions
##
## Useful for larger instances as InnerTube is **not ratelimited**. See https://github.com/iv-org/invidious/issues/2567
##
## Subtitle experience may differ slightly on Invidious.
##
## Accepted values: true, false
## Default: false
##
# use_innertube_for_captions: false
# ----------------------------- # -----------------------------
# Logging # Logging
# ----------------------------- # -----------------------------

View File

@ -127,9 +127,6 @@ class Config
# Pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool of `pool_size`) # Pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool of `pool_size`)
property pool_size : Int32 = 100 property pool_size : Int32 = 100
# Use Innertube's transcripts API instead of timedtext for closed captions
property use_innertube_for_captions : Bool = false
# Saved cookies in "name1=value1; name2=value2..." format # Saved cookies in "name1=value1; name2=value2..." format
@[YAML::Field(converter: Preferences::StringToCookies)] @[YAML::Field(converter: Preferences::StringToCookies)]
property cookies : HTTP::Cookies = HTTP::Cookies.new property cookies : HTTP::Cookies = HTTP::Cookies.new

View File

@ -7,7 +7,7 @@ module Invidious::Frontend::WatchPage
getter full_videos : Array(Hash(String, JSON::Any)) getter full_videos : Array(Hash(String, JSON::Any))
getter video_streams : Array(Hash(String, JSON::Any)) getter video_streams : Array(Hash(String, JSON::Any))
getter audio_streams : Array(Hash(String, JSON::Any)) getter audio_streams : Array(Hash(String, JSON::Any))
getter captions : Array(Invidious::Videos::Captions::Metadata) getter captions : Array(Invidious::Videos::Caption)
def initialize( def initialize(
@full_videos, @full_videos,

View File

@ -89,7 +89,6 @@ struct Playlist
property views : Int64 property views : Int64
property updated : Time property updated : Time
property thumbnail : String? property thumbnail : String?
property subtitle : String?
def to_json(offset, json : JSON::Builder, video_id : String? = nil) def to_json(offset, json : JSON::Builder, video_id : String? = nil)
json.object do json.object do
@ -101,7 +100,6 @@ struct Playlist
json.field "author", self.author json.field "author", self.author
json.field "authorId", self.ucid json.field "authorId", self.ucid
json.field "authorUrl", "/channel/#{self.ucid}" json.field "authorUrl", "/channel/#{self.ucid}"
json.field "subtitle", self.subtitle
json.field "authorThumbnails" do json.field "authorThumbnails" do
json.array do json.array do
@ -358,8 +356,6 @@ def fetch_playlist(plid : String)
updated = Time.utc updated = Time.utc
video_count = 0 video_count = 0
subtitle = extract_text(initial_data.dig?("header", "playlistHeaderRenderer", "subtitle"))
playlist_info["stats"]?.try &.as_a.each do |stat| playlist_info["stats"]?.try &.as_a.each do |stat|
text = stat["runs"]?.try &.as_a.map(&.["text"].as_s).join("") || stat["simpleText"]?.try &.as_s text = stat["runs"]?.try &.as_a.map(&.["text"].as_s).join("") || stat["simpleText"]?.try &.as_s
next if !text next if !text
@ -401,7 +397,6 @@ def fetch_playlist(plid : String)
views: views, views: views,
updated: updated, updated: updated,
thumbnail: thumbnail, thumbnail: thumbnail,
subtitle: subtitle,
}) })
end end

View File

@ -87,78 +87,70 @@ module Invidious::Routes::API::V1::Videos
caption = caption[0] caption = caption[0]
end end
if CONFIG.use_innertube_for_captions url = URI.parse("#{caption.base_url}&tlang=#{tlang}").request_target
params = Invidious::Videos::Transcript.generate_param(id, caption.language_code, caption.auto_generated)
initial_data = YoutubeAPI.get_transcript(params)
webvtt = Invidious::Videos::Transcript.convert_transcripts_to_vtt(initial_data, caption.language_code) # Auto-generated captions often have cues that aren't aligned properly with the video,
else # as well as some other markup that makes it cumbersome, so we try to fix that here
# Timedtext API handling if caption.name.includes? "auto-generated"
url = URI.parse("#{caption.base_url}&tlang=#{tlang}").request_target caption_xml = YT_POOL.client &.get(url).body
# Auto-generated captions often have cues that aren't aligned properly with the video, if caption_xml.starts_with?("<?xml")
# as well as some other markup that makes it cumbersome, so we try to fix that here webvtt = caption.timedtext_to_vtt(caption_xml, tlang)
if caption.name.includes? "auto-generated" else
caption_xml = YT_POOL.client &.get(url).body caption_xml = XML.parse(caption_xml)
if caption_xml.starts_with?("<?xml") webvtt = String.build do |str|
webvtt = caption.timedtext_to_vtt(caption_xml, tlang) str << <<-END_VTT
else WEBVTT
caption_xml = XML.parse(caption_xml) Kind: captions
Language: #{tlang || caption.language_code}
webvtt = String.build do |str|
str << <<-END_VTT
WEBVTT
Kind: captions
Language: #{tlang || caption.language_code}
END_VTT END_VTT
caption_nodes = caption_xml.xpath_nodes("//transcript/text") caption_nodes = caption_xml.xpath_nodes("//transcript/text")
caption_nodes.each_with_index do |node, i| caption_nodes.each_with_index do |node, i|
start_time = node["start"].to_f.seconds start_time = node["start"].to_f.seconds
duration = node["dur"]?.try &.to_f.seconds duration = node["dur"]?.try &.to_f.seconds
duration ||= start_time duration ||= start_time
if caption_nodes.size > i + 1 if caption_nodes.size > i + 1
end_time = caption_nodes[i + 1]["start"].to_f.seconds end_time = caption_nodes[i + 1]["start"].to_f.seconds
else else
end_time = start_time + duration end_time = start_time + duration
end
start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}"
end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}"
text = HTML.unescape(node.content)
text = text.gsub(/<font color="#[a-fA-F0-9]{6}">/, "")
text = text.gsub(/<\/font>/, "")
if md = text.match(/(?<name>.*) : (?<text>.*)/)
text = "<v #{md["name"]}>#{md["text"]}</v>"
end
str << <<-END_CUE
#{start_time} --> #{end_time}
#{text}
END_CUE
end end
start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}"
end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}"
text = HTML.unescape(node.content)
text = text.gsub(/<font color="#[a-fA-F0-9]{6}">/, "")
text = text.gsub(/<\/font>/, "")
if md = text.match(/(?<name>.*) : (?<text>.*)/)
text = "<v #{md["name"]}>#{md["text"]}</v>"
end
str << <<-END_CUE
#{start_time} --> #{end_time}
#{text}
END_CUE
end end
end end
end
else
# Some captions have "align:[start/end]" and "position:[num]%"
# attributes. Those are causing issues with VideoJS, which is unable
# to properly align the captions on the video, so we remove them.
#
# See: https://github.com/iv-org/invidious/issues/2391
webvtt = YT_POOL.client &.get("#{url}&format=vtt").body
if webvtt.starts_with?("<?xml")
webvtt = caption.timedtext_to_vtt(webvtt)
else else
# Some captions have "align:[start/end]" and "position:[num]%"
# attributes. Those are causing issues with VideoJS, which is unable
# to properly align the captions on the video, so we remove them.
#
# See: https://github.com/iv-org/invidious/issues/2391
webvtt = YT_POOL.client &.get("#{url}&format=vtt").body webvtt = YT_POOL.client &.get("#{url}&format=vtt").body
if webvtt.starts_with?("<?xml") .gsub(/([0-9:.]{12} --> [0-9:.]{12}).+/, "\\1")
webvtt = caption.timedtext_to_vtt(webvtt)
else
webvtt = YT_POOL.client &.get("#{url}&format=vtt").body
.gsub(/([0-9:.]{12} --> [0-9:.]{12}).+/, "\\1")
end
end end
end end

View File

@ -24,7 +24,7 @@ struct Video
property updated : Time property updated : Time
@[DB::Field(ignore: true)] @[DB::Field(ignore: true)]
@captions = [] of Invidious::Videos::Captions::Metadata @captions = [] of Invidious::Videos::Caption
@[DB::Field(ignore: true)] @[DB::Field(ignore: true)]
property adaptive_fmts : Array(Hash(String, JSON::Any))? property adaptive_fmts : Array(Hash(String, JSON::Any))?
@ -215,9 +215,9 @@ struct Video
keywords.includes? "YouTube Red" keywords.includes? "YouTube Red"
end end
def captions : Array(Invidious::Videos::Captions::Metadata) def captions : Array(Invidious::Videos::Caption)
if @captions.empty? && @info.has_key?("captions") if @captions.empty? && @info.has_key?("captions")
@captions = Invidious::Videos::Captions::Metadata.from_yt_json(info["captions"]) @captions = Invidious::Videos::Caption.from_yt_json(info["captions"])
end end
return @captions return @captions

View File

@ -1,106 +1,100 @@
require "json" require "json"
module Invidious::Videos module Invidious::Videos
module Captions struct Caption
struct Metadata property name : String
property name : String property language_code : String
property language_code : String property base_url : String
property base_url : String
property auto_generated : Bool def initialize(@name, @language_code, @base_url)
end
def initialize(@name, @language_code, @base_url, @auto_generated) # Parse the JSON structure from Youtube
def self.from_yt_json(container : JSON::Any) : Array(Caption)
caption_tracks = container
.dig?("playerCaptionsTracklistRenderer", "captionTracks")
.try &.as_a
captions_list = [] of Caption
return captions_list if caption_tracks.nil?
caption_tracks.each do |caption|
name = caption["name"]["simpleText"]? || caption["name"]["runs"][0]["text"]
name = name.to_s.split(" - ")[0]
language_code = caption["languageCode"].to_s
base_url = caption["baseUrl"].to_s
captions_list << Caption.new(name, language_code, base_url)
end end
# Parse the JSON structure from Youtube return captions_list
def self.from_yt_json(container : JSON::Any) : Array(Captions::Metadata) end
caption_tracks = container
.dig?("playerCaptionsTracklistRenderer", "captionTracks")
.try &.as_a
captions_list = [] of Captions::Metadata def timedtext_to_vtt(timedtext : String, tlang = nil) : String
return captions_list if caption_tracks.nil? # In the future, we could just directly work with the url. This is more of a POC
cues = [] of XML::Node
tree = XML.parse(timedtext)
tree = tree.children.first
caption_tracks.each do |caption| tree.children.each do |item|
name = caption["name"]["simpleText"]? || caption["name"]["runs"][0]["text"] if item.name == "body"
name = name.to_s.split(" - ")[0] item.children.each do |cue|
if cue.name == "p" && !(cue.children.size == 1 && cue.children[0].content == "\n")
language_code = caption["languageCode"].to_s cues << cue
base_url = caption["baseUrl"].to_s
auto_generated = (caption["kind"]? == "asr")
captions_list << Captions::Metadata.new(name, language_code, base_url, auto_generated)
end
return captions_list
end
def timedtext_to_vtt(timedtext : String, tlang = nil) : String
# In the future, we could just directly work with the url. This is more of a POC
cues = [] of XML::Node
tree = XML.parse(timedtext)
tree = tree.children.first
tree.children.each do |item|
if item.name == "body"
item.children.each do |cue|
if cue.name == "p" && !(cue.children.size == 1 && cue.children[0].content == "\n")
cues << cue
end
end end
break
end end
break
end end
result = String.build do |result|
result << <<-END_VTT
WEBVTT
Kind: captions
Language: #{tlang || @language_code}
END_VTT
result << "\n\n"
cues.each_with_index do |node, i|
start_time = node["t"].to_f.milliseconds
duration = node["d"]?.try &.to_f.milliseconds
duration ||= start_time
if cues.size > i + 1
end_time = cues[i + 1]["t"].to_f.milliseconds
else
end_time = start_time + duration
end
# start_time
result << start_time.hours.to_s.rjust(2, '0')
result << ':' << start_time.minutes.to_s.rjust(2, '0')
result << ':' << start_time.seconds.to_s.rjust(2, '0')
result << '.' << start_time.milliseconds.to_s.rjust(3, '0')
result << " --> "
# end_time
result << end_time.hours.to_s.rjust(2, '0')
result << ':' << end_time.minutes.to_s.rjust(2, '0')
result << ':' << end_time.seconds.to_s.rjust(2, '0')
result << '.' << end_time.milliseconds.to_s.rjust(3, '0')
result << "\n"
node.children.each do |s|
result << s.content
end
result << "\n"
result << "\n"
end
end
return result
end end
result = String.build do |result|
result << <<-END_VTT
WEBVTT
Kind: captions
Language: #{tlang || @language_code}
END_VTT
result << "\n\n"
cues.each_with_index do |node, i|
start_time = node["t"].to_f.milliseconds
duration = node["d"]?.try &.to_f.milliseconds
duration ||= start_time
if cues.size > i + 1
end_time = cues[i + 1]["t"].to_f.milliseconds
else
end_time = start_time + duration
end
# start_time
result << start_time.hours.to_s.rjust(2, '0')
result << ':' << start_time.minutes.to_s.rjust(2, '0')
result << ':' << start_time.seconds.to_s.rjust(2, '0')
result << '.' << start_time.milliseconds.to_s.rjust(3, '0')
result << " --> "
# end_time
result << end_time.hours.to_s.rjust(2, '0')
result << ':' << end_time.minutes.to_s.rjust(2, '0')
result << ':' << end_time.seconds.to_s.rjust(2, '0')
result << '.' << end_time.milliseconds.to_s.rjust(3, '0')
result << "\n"
node.children.each do |s|
result << s.content
end
result << "\n"
result << "\n"
end
end
return result
end end
# List of all caption languages available on Youtube. # List of all caption languages available on Youtube.

View File

@ -1,103 +0,0 @@
module Invidious::Videos
# Namespace for methods primarily relating to Transcripts
module Transcript
record TranscriptLine, start_ms : Time::Span, end_ms : Time::Span, line : String
def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String
kind = auto_generated ? "asr" : ""
object = {
"1:0:string" => video_id,
"2:base64" => {
"1:string" => kind,
"2:string" => language_code,
"3:string" => "",
},
"3:varint" => 1_i64,
"5:string" => "engagement-panel-searchable-transcript-search-panel",
"6:varint" => 1_i64,
"7:varint" => 1_i64,
"8:varint" => 1_i64,
}
params = object.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return params
end
def self.convert_transcripts_to_vtt(initial_data : Hash(String, JSON::Any), target_language : String) : String
# Convert into array of TranscriptLine
lines = self.parse(initial_data)
# Taken from Invidious::Videos::Captions::Metadata.timedtext_to_vtt()
vtt = String.build do |vtt|
vtt << <<-END_VTT
WEBVTT
Kind: captions
Language: #{target_language}
END_VTT
vtt << "\n\n"
lines.each do |line|
start_time = line.start_ms
end_time = line.end_ms
# start_time
vtt << start_time.hours.to_s.rjust(2, '0')
vtt << ':' << start_time.minutes.to_s.rjust(2, '0')
vtt << ':' << start_time.seconds.to_s.rjust(2, '0')
vtt << '.' << start_time.milliseconds.to_s.rjust(3, '0')
vtt << " --> "
# end_time
vtt << end_time.hours.to_s.rjust(2, '0')
vtt << ':' << end_time.minutes.to_s.rjust(2, '0')
vtt << ':' << end_time.seconds.to_s.rjust(2, '0')
vtt << '.' << end_time.milliseconds.to_s.rjust(3, '0')
vtt << "\n"
vtt << line.line
vtt << "\n"
vtt << "\n"
end
end
return vtt
end
private def self.parse(initial_data : Hash(String, JSON::Any))
body = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer",
"content", "transcriptSearchPanelRenderer", "body", "transcriptSegmentListRenderer",
"initialSegments").as_a
lines = [] of TranscriptLine
body.each do |line|
# Transcript section headers. They are not apart of the captions and as such we can safely skip them.
if line.as_h.has_key?("transcriptSectionHeaderRenderer")
next
end
line = line["transcriptSegmentRenderer"]
start_ms = line["startMs"].as_s.to_i.millisecond
end_ms = line["endMs"].as_s.to_i.millisecond
text = extract_text(line["snippet"]) || ""
lines << TranscriptLine.new(start_ms, end_ms, text)
end
return lines
end
end
end

View File

@ -70,12 +70,7 @@
</b> </b>
<% else %> <% else %>
<b> <b>
<% if !author.empty? %> <a href="/channel/<%= playlist.ucid %>"><%= author %></a> |
<a href="/channel/<%= playlist.ucid %>"><%= author %></a> |
<% elsif !playlist.subtitle.nil? %>
<% subtitle = playlist.subtitle || "" %>
<span><%= HTML.escape(subtitle[0..subtitle.rindex(" • ") || subtitle.size]) %></span> |
<% end %>
<%= translate_count(locale, "generic_videos_count", playlist.video_count) %> | <%= translate_count(locale, "generic_videos_count", playlist.video_count) %> |
<%= translate(locale, "Updated `x` ago", recode_date(playlist.updated, locale)) %> <%= translate(locale, "Updated `x` ago", recode_date(playlist.updated, locale)) %>
</b> </b>

View File

@ -89,7 +89,7 @@
<label for="captions[0]"><%= translate(locale, "preferences_captions_label") %></label> <label for="captions[0]"><%= translate(locale, "preferences_captions_label") %></label>
<% preferences.captions.each_with_index do |caption, index| %> <% preferences.captions.each_with_index do |caption, index| %>
<select class="pure-u-1-6" name="captions[<%= index %>]" id="captions[<%= index %>]"> <select class="pure-u-1-6" name="captions[<%= index %>]" id="captions[<%= index %>]">
<% Invidious::Videos::Captions::LANGUAGES.each do |option| %> <% Invidious::Videos::Caption::LANGUAGES.each do |option| %>
<option value="<%= option %>" <% if preferences.captions[index] == option %> selected <% end %>><%= translate(locale, option.blank? ? "none" : option) %></option> <option value="<%= option %>" <% if preferences.captions[index] == option %> selected <% end %>><%= translate(locale, option.blank? ? "none" : option) %></option>
<% end %> <% end %>
</select> </select>

View File

@ -557,30 +557,6 @@ module YoutubeAPI
return self._post_json("/youtubei/v1/search", data, client_config) return self._post_json("/youtubei/v1/search", data, client_config)
end end
####################################################################
# get_transcript(params, client_config?)
#
# Requests the youtubei/v1/get_transcript endpoint with the required headers
# and POST data in order to get a JSON reply.
#
# The requested data is a specially encoded protobuf string that denotes the specific language requested.
#
# An optional ClientConfig parameter can be passed, too (see
# `struct ClientConfig` above for more details).
#
def get_transcript(
params : String,
client_config : ClientConfig | Nil = nil
) : Hash(String, JSON::Any)
data = {
"context" => self.make_context(client_config),
"params" => params,
}
return self._post_json("/youtubei/v1/get_transcript", data, client_config)
end
#################################################################### ####################################################################
# _post_json(endpoint, data, client_config?) # _post_json(endpoint, data, client_config?)
# #