invidious/src/invidious/channels.cr
syeopite 1b569bbc99
Add support channel home pages + gen. improvements
This commit adds support for channel home pages and all of the
categories within it. However, the frontend code is a mess and thus
needs to be refactor soon. Though that would likely require a rework of
items.ecr

This commit also comes with some general cleanups and improvements.

Before this commit channel brand URls would only be supported on the
videos page (now home page). It has been improved to be able to handle
all channel URLs.

The category_type and auxiliary_data property has also been removed from
the Category struct. The former was never used and the latter allows for
random data to be added to the Struct presenting documentation issues.

Since the auxiliary_data variable was mainly used to store values from
the browse_endpoint in order to create URLs, its much simpler to instead
just get the URL from the webCommandMetadata.

As a result of this change the browse_endpoint_data attribute of
Category has also been removed.
2021-06-24 08:52:48 -07:00

1112 lines
41 KiB
Crystal

struct InvidiousChannel
include DB::Serializable
property id : String
property author : String
property updated : Time
property deleted : Bool
property subscribed : Time?
end
struct ChannelVideo
include DB::Serializable
property id : String
property title : String
property published : Time
property updated : Time
property ucid : String
property author : String
property length_seconds : Int32 = 0
property live_now : Bool = false
property premiere_timestamp : Time? = nil
property views : Int64? = nil
def to_json(locale, json : JSON::Builder)
json.object do
json.field "type", "shortVideo"
json.field "title", self.title
json.field "videoId", self.id
json.field "videoThumbnails" do
generate_thumbnails(json, self.id)
end
json.field "lengthSeconds", self.length_seconds
json.field "author", self.author
json.field "authorId", self.ucid
json.field "authorUrl", "/channel/#{self.ucid}"
json.field "published", self.published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
json.field "viewCount", self.views
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
def to_xml(locale, query_params, xml : XML::Builder)
query_params["v"] = self.id
xml.element("entry") do
xml.element("id") { xml.text "yt:video:#{self.id}" }
xml.element("yt:videoId") { xml.text self.id }
xml.element("yt:channelId") { xml.text self.ucid }
xml.element("title") { xml.text self.title }
xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}")
xml.element("author") do
xml.element("name") { xml.text self.author }
xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" }
end
xml.element("content", type: "xhtml") do
xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do
xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do
xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg")
end
end
end
xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") }
xml.element("updated") { xml.text self.updated.to_s("%Y-%m-%dT%H:%M:%S%:z") }
xml.element("media:group") do
xml.element("media:title") { xml.text self.title }
xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg",
width: "320", height: "180")
end
end
end
def to_xml(locale, xml : XML::Builder | Nil = nil)
if xml
to_xml(locale, xml)
else
XML.build do |xml|
to_xml(locale, xml)
end
end
end
def to_tuple
{% begin %}
{
{{*@type.instance_vars.map { |var| var.name }}}
}
{% end %}
end
end
struct AboutRelatedChannel
include DB::Serializable
property ucid : String
property author : String
property author_url : String
property author_thumbnail : String
end
# TODO: Refactor into either SearchChannel or InvidiousChannel
struct AboutChannel
include DB::Serializable
property ucid : String
property author : String
property auto_generated : Bool
property author_url : String
property author_thumbnail : String
property banner : String?
property description_html : String
property paid : Bool
property country : String
property total_views : Int64
property sub_count : Int32
property joined : Time
property is_family_friendly : Bool
property allowed_regions : Array(String)
property related_channels : Array(AboutRelatedChannel)
property tabs : Hash(String, Tuple(Int32, String)) # TabName => {TabiZZndex, browseEndpoint params}
property links : Array(Tuple(String, String, String))
end
class ChannelRedirect < Exception
property channel_id : String
def initialize(@channel_id)
end
end
def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, max_threads = 10)
finished_channel = Channel(String | Nil).new
spawn do
active_threads = 0
active_channel = Channel(Nil).new
channels.each do |ucid|
if active_threads >= max_threads
active_channel.receive
active_threads -= 1
end
active_threads += 1
spawn do
begin
get_channel(ucid, db, refresh, pull_all_videos)
finished_channel.send(ucid)
rescue ex
finished_channel.send(nil)
ensure
active_channel.send(nil)
end
end
end
end
final = [] of String
channels.size.times do
if ucid = finished_channel.receive
final << ucid
end
end
return final
end
def get_channel(id, db, refresh = true, pull_all_videos = true)
if channel = db.query_one?("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel)
if refresh && Time.utc - channel.updated > 10.minutes
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
channel_array = channel.to_a
args = arg_array(channel_array)
db.exec("INSERT INTO channels VALUES (#{args}) \
ON CONFLICT (id) DO UPDATE SET author = $2, updated = $3", args: channel_array)
end
else
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
channel_array = channel.to_a
args = arg_array(channel_array)
db.exec("INSERT INTO channels VALUES (#{args})", args: channel_array)
end
return channel
end
def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
LOGGER.debug("fetch_channel: #{ucid}")
LOGGER.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}, locale = #{locale}")
LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed")
rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body
LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed")
rss = XML.parse_html(rss)
author = rss.xpath_node(%q(//feed/title))
if !author
raise InfoException.new("Deleted or invalid channel")
end
author = author.content
# Auto-generated channels
# https://support.google.com/youtube/answer/2579942
if author.ends_with?(" - Topic") ||
{"Popular on YouTube", "Music", "Sports", "Gaming"}.includes? author
auto_generated = true
end
LOGGER.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}")
page = 1
LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page")
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = extract_videos(initial_data, author, ucid)
LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
rss.xpath_nodes("//feed/entry").each do |entry|
video_id = entry.xpath_node("videoid").not_nil!.content
title = entry.xpath_node("title").not_nil!.content
published = Time.parse_rfc3339(entry.xpath_node("published").not_nil!.content)
updated = Time.parse_rfc3339(entry.xpath_node("updated").not_nil!.content)
author = entry.xpath_node("author/name").not_nil!.content
ucid = entry.xpath_node("channelid").not_nil!.content
views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
views ||= 0_i64
channel_video = videos.select { |video| video.id == video_id }[0]?
length_seconds = channel_video.try &.length_seconds
length_seconds ||= 0
live_now = channel_video.try &.live_now
live_now ||= false
premiere_timestamp = channel_video.try &.premiere_timestamp
video = ChannelVideo.new({
id: video_id,
title: title,
published: published,
updated: Time.utc,
ucid: ucid,
author: author,
length_seconds: length_seconds,
live_now: live_now,
premiere_timestamp: premiere_timestamp,
views: views,
})
LOGGER.trace("fetch_channel: #{ucid} : video #{video_id} : Updating or inserting video")
# We don't include the 'premiere_timestamp' here because channel pages don't include them,
# meaning the above timestamp is always null
was_insert = db.query_one("INSERT INTO channel_videos VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) \
ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
updated = $4, ucid = $5, author = $6, length_seconds = $7, \
live_now = $8, views = $10 returning (xmax=0) as was_insert", *video.to_tuple, as: Bool)
if was_insert
LOGGER.trace("fetch_channel: #{ucid} : video #{video_id} : Inserted, updating subscriptions")
db.exec("UPDATE users SET notifications = array_append(notifications, $1), \
feed_needs_update = true WHERE $2 = ANY(subscriptions)", video.id, video.ucid)
else
LOGGER.trace("fetch_channel: #{ucid} : video #{video_id} : Updated")
end
end
if pull_all_videos
page += 1
ids = [] of String
loop do
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = extract_videos(initial_data, author, ucid)
count = videos.size
videos = videos.map { |video| ChannelVideo.new({
id: video.id,
title: video.title,
published: video.published,
updated: Time.utc,
ucid: video.ucid,
author: video.author,
length_seconds: video.length_seconds,
live_now: video.live_now,
premiere_timestamp: video.premiere_timestamp,
views: video.views,
}) }
videos.each do |video|
ids << video.id
# We are notified of Red videos elsewhere (PubSub), which includes a correct published date,
# so since they don't provide a published date here we can safely ignore them.
if Time.utc - video.published > 1.minute
was_insert = db.query_one("INSERT INTO channel_videos VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) \
ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
updated = $4, ucid = $5, author = $6, length_seconds = $7, \
live_now = $8, views = $10 returning (xmax=0) as was_insert", *video.to_tuple, as: Bool)
db.exec("UPDATE users SET notifications = array_append(notifications, $1), \
feed_needs_update = true WHERE $2 = ANY(subscriptions)", video.id, video.ucid) if was_insert
end
end
break if count < 25
page += 1
end
end
channel = InvidiousChannel.new({
id: ucid,
author: author,
updated: Time.utc,
deleted: false,
subscribed: nil,
})
return channel
end
def fetch_channel_home(ucid, channel)
initial_data = request_youtube_api_browse(ucid, channel.tabs["home"][1])
items = extract_items(initial_data, channel.author, channel.ucid)
# Channel trailer needs some slight special handling
home_tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"])
trailer = home_tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["channelVideoPlayerRenderer"]? || nil
home_sections = [] of (Category | Video)
if trailer
trailer = get_video(trailer["videoId"].as_s, PG_DB)
home_sections << trailer
end
items.each do |category|
if category.is_a? Category
home_sections << category
end
end
return home_sections
end
def fetch_channel_playlists(ucid, author, continuation, sort_by)
if continuation
response_json = request_youtube_api_browse(continuation)
continuationItems = response_json["onResponseReceivedActions"]?
.try &.[0]["appendContinuationItemsAction"]["continuationItems"]
return [] of SearchItem, nil if !continuationItems
items = [] of SearchItem
continuationItems.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item|
extract_item(item, author, ucid).try { |t| items << t }
}
continuation = continuationItems.as_a.last["continuationItemRenderer"]?
.try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
else
url = "/channel/#{ucid}/playlists?flow=list&view=1"
case sort_by
when "last", "last_added"
#
when "oldest", "oldest_created"
url += "&sort=da"
when "newest", "newest_created"
url += "&sort=dd"
else nil # Ignore
end
response = YT_POOL.client &.get(url)
initial_data = extract_initial_data(response.body)
return [] of SearchItem, nil if !initial_data
items = extract_items(initial_data, author, ucid)
continuation = response.body.match(/"token":"(?<continuation>[^"]+)"/).try &.["continuation"]?
end
return items, continuation
end
def fetch_channel_featured_channels(ucid, tab_data, view = nil, shelf_id = nil, continuation = nil, query_title = nil) : {Array(Category), (String | Nil)}
if continuation.is_a?(String)
initial_data = request_youtube_api_browse(continuation)
items = extract_items(initial_data)
continuation_token = fetch_continuation_token(initial_data)
return [Category.new({
title: query_title.not_nil!, # If continuation contents is requested then the query_title has to be passed along.
contents: items,
description_html: "",
url: nil,
badges: nil,
})], continuation_token
else
url = nil
if view && shelf_id
url = "/channel/#{ucid}/channels?view=#{view}&shelf_id=#{shelf_id}"
params = produce_featured_channel_browse_param(view.to_i64, shelf_id.to_i64)
initial_data = request_youtube_api_browse(ucid, params)
continuation_token = fetch_continuation_token(initial_data)
else
initial_data = request_youtube_api_browse(ucid, tab_data[1])
continuation_token = nil
end
channels_tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"])
submenu = channels_tab["content"]["sectionListRenderer"]["subMenu"]?
# There's no submenu data if the channel doesn't feature any channels.
if !submenu
return {[] of Category, continuation_token}
end
submenu_data = submenu["channelSubMenuRenderer"]["contentTypeSubMenuItems"]
items = extract_items(initial_data)
fallback_title = submenu_data.as_a.select(&.["selected"].as_bool)[0]["title"].as_s
# Although extract_items parsed everything into the right structs, we still have
# to fill in the title (if missing) attribute since Youtube doesn't return it when requesting
# a full category
category_array = [] of Category
items.each do |category|
# Tell compiler that the result from extract_items has to be an array of Categories
if !category.is_a?(Category)
next
end
category_array << Category.new({
title: category.title.empty? ? fallback_title : category.title,
contents: category.contents,
description_html: category.description_html,
url: category.url,
badges: nil,
})
end
# If no categories has been parsed then it means that we're currently requesting a single one and not in
# the initial preview anymore. The frontend still needs a Category however, so we'll create one.
if category_array.empty?
category_array << Category.new({
title: fallback_title,
contents: items,
description_html: "",
url: url,
badges: nil,
})
end
return category_array, continuation_token
end
end
def produce_featured_channel_browse_param(view : Int64, shelf_id : Int64)
object = {
"2:string" => "channels",
"4:varint" => view,
"14:varint" => shelf_id,
}
browse_params = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return browse_params
end
def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:base64" => {
"2:string" => "videos",
"6:varint" => 2_i64,
"7:varint" => 1_i64,
"12:varint" => 1_i64,
"13:string" => "",
"23:varint" => 0_i64,
},
},
}
if !v2
if auto_generated
seed = Time.unix(1525757349)
until seed >= Time.utc
seed += 1.month
end
timestamp = seed - (page - 1).months
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}"
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
end
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:varint" => 30_i64 * (page - 1),
}))),
})))
end
case sort_by
when "newest"
when "popular"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x01_i64
when "oldest"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x02_i64
else nil # Ignore
end
object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
object["80226972:embedded"].delete("3:base64")
continuation = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return continuation
end
# Used in bypass_captcha_job.cr
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2)
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
end
# ## NOTE: DEPRECATED
# Reason -> Unstable
# The Protobuf object must be provided with an id of the last playlist from the current "page"
# in order to fetch the next one accurately
# (if the id isn't included, entries shift around erratically between pages,
# leading to repetitions and skip overs)
#
# Since it's impossible to produce the appropriate Protobuf without an id being provided by the user,
# it's better to stick to continuation tokens provided by the first request and onward
def produce_channel_playlists_url(ucid, cursor, sort = "newest", auto_generated = false)
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:base64" => {
"2:string" => "playlists",
"6:varint" => 2_i64,
"7:varint" => 1_i64,
"12:varint" => 1_i64,
"13:string" => "",
"23:varint" => 0_i64,
},
},
}
if cursor
cursor = Base64.urlsafe_encode(cursor, false) if !auto_generated
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = cursor
end
if auto_generated
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x32_i64
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 1_i64
case sort
when "oldest", "oldest_created"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 2_i64
when "newest", "newest_created"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 3_i64
when "last", "last_added"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 4_i64
else nil # Ignore
end
end
object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
object["80226972:embedded"].delete("3:base64")
continuation = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
end
# TODO: Add "sort_by"
def fetch_channel_community(ucid, continuation, locale, format, thin_mode)
response = YT_POOL.client &.get("/channel/#{ucid}/community?gl=US&hl=en")
if response.status_code != 200
response = YT_POOL.client &.get("/user/#{ucid}/community?gl=US&hl=en")
end
if response.status_code != 200
raise InfoException.new("This channel does not exist.")
end
ucid = response.body.match(/https:\/\/www.youtube.com\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/).not_nil!["ucid"]
if !continuation || continuation.empty?
initial_data = extract_initial_data(response.body)
body = initial_data["contents"]?.try &.["twoColumnBrowseResultsRenderer"]["tabs"].as_a.select { |tab| tab["tabRenderer"]?.try &.["selected"].as_bool.== true }[0]?
if !body
raise InfoException.new("Could not extract community tab.")
end
body = body["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]
else
continuation = produce_channel_community_continuation(ucid, continuation)
headers = HTTP::Headers.new
headers["cookie"] = response.cookies.add_request_headers(headers)["cookie"]
session_token = response.body.match(/"XSRF_TOKEN":"(?<session_token>[^"]+)"/).try &.["session_token"]? || ""
post_req = {
session_token: session_token,
}
response = YT_POOL.client &.post("/comment_service_ajax?action_get_comments=1&ctoken=#{continuation}&continuation=#{continuation}&hl=en&gl=US", headers, form: post_req)
body = JSON.parse(response.body)
body = body["response"]["continuationContents"]["itemSectionContinuation"]? ||
body["response"]["continuationContents"]["backstageCommentsContinuation"]?
if !body
raise InfoException.new("Could not extract continuation.")
end
end
continuation = body["continuations"]?.try &.[0]["nextContinuationData"]["continuation"].as_s
posts = body["contents"].as_a
if message = posts[0]["messageRenderer"]?
error_message = (message["text"]["simpleText"]? ||
message["text"]["runs"]?.try &.[0]?.try &.["text"]?)
.try &.as_s || ""
raise InfoException.new(error_message)
end
response = JSON.build do |json|
json.object do
json.field "authorId", ucid
json.field "comments" do
json.array do
posts.each do |post|
comments = post["backstagePostThreadRenderer"]?.try &.["comments"]? ||
post["backstageCommentsContinuation"]?
post = post["backstagePostThreadRenderer"]?.try &.["post"]["backstagePostRenderer"]? ||
post["commentThreadRenderer"]?.try &.["comment"]["commentRenderer"]?
next if !post
content_html = post["contentText"]?.try { |t| parse_content(t) } || ""
author = post["authorText"]?.try &.["simpleText"]? || ""
json.object do
json.field "author", author
json.field "authorThumbnails" do
json.array do
qualities = {32, 48, 76, 100, 176, 512}
author_thumbnail = post["authorThumbnail"]["thumbnails"].as_a[0]["url"].as_s
qualities.each do |quality|
json.object do
json.field "url", author_thumbnail.gsub(/s\d+-/, "s#{quality}-")
json.field "width", quality
json.field "height", quality
end
end
end
end
if post["authorEndpoint"]?
json.field "authorId", post["authorEndpoint"]["browseEndpoint"]["browseId"]
json.field "authorUrl", post["authorEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s
else
json.field "authorId", ""
json.field "authorUrl", ""
end
published_text = post["publishedTimeText"]["runs"][0]["text"].as_s
published = decode_date(published_text.rchop(" (edited)"))
if published_text.includes?(" (edited)")
json.field "isEdited", true
else
json.field "isEdited", false
end
like_count = post["actionButtons"]["commentActionButtonsRenderer"]["likeButton"]["toggleButtonRenderer"]["accessibilityData"]["accessibilityData"]["label"]
.try &.as_s.gsub(/\D/, "").to_i? || 0
json.field "content", html_to_content(content_html)
json.field "contentHtml", content_html
json.field "published", published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))
json.field "likeCount", like_count
json.field "commentId", post["postId"]? || post["commentId"]? || ""
json.field "authorIsChannelOwner", post["authorEndpoint"]["browseEndpoint"]["browseId"] == ucid
if attachment = post["backstageAttachment"]?
json.field "attachment" do
json.object do
case attachment.as_h
when .has_key?("videoRenderer")
attachment = attachment["videoRenderer"]
json.field "type", "video"
if !attachment["videoId"]?
error_message = (attachment["title"]["simpleText"]? ||
attachment["title"]["runs"]?.try &.[0]?.try &.["text"]?)
json.field "error", error_message
else
video_id = attachment["videoId"].as_s
video_title = attachment["title"]["simpleText"]? || attachment["title"]["runs"]?.try &.[0]?.try &.["text"]?
json.field "title", video_title
json.field "videoId", video_id
json.field "videoThumbnails" do
generate_thumbnails(json, video_id)
end
json.field "lengthSeconds", decode_length_seconds(attachment["lengthText"]["simpleText"].as_s)
author_info = attachment["ownerText"]["runs"][0].as_h
json.field "author", author_info["text"].as_s
json.field "authorId", author_info["navigationEndpoint"]["browseEndpoint"]["browseId"]
json.field "authorUrl", author_info["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
# TODO: json.field "authorThumbnails", "channelThumbnailSupportedRenderers"
# TODO: json.field "authorVerified", "ownerBadges"
published = decode_date(attachment["publishedTimeText"]["simpleText"].as_s)
json.field "published", published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))
view_count = attachment["viewCountText"]?.try &.["simpleText"].as_s.gsub(/\D/, "").to_i64? || 0_i64
json.field "viewCount", view_count
json.field "viewCountText", translate(locale, "`x` views", number_to_short_text(view_count))
end
when .has_key?("backstageImageRenderer")
attachment = attachment["backstageImageRenderer"]
json.field "type", "image"
json.field "imageThumbnails" do
json.array do
thumbnail = attachment["image"]["thumbnails"][0].as_h
width = thumbnail["width"].as_i
height = thumbnail["height"].as_i
aspect_ratio = (width.to_f / height.to_f)
url = thumbnail["url"].as_s.gsub(/=w\d+-h\d+(-p)?(-nd)?(-df)?(-rwa)?/, "=s640")
qualities = {320, 560, 640, 1280, 2000}
qualities.each do |quality|
json.object do
json.field "url", url.gsub(/=s\d+/, "=s#{quality}")
json.field "width", quality
json.field "height", (quality / aspect_ratio).ceil.to_i
end
end
end
end
# TODO
# when .has_key?("pollRenderer")
# attachment = attachment["pollRenderer"]
# json.field "type", "poll"
else
json.field "type", "unknown"
json.field "error", "Unrecognized attachment type."
end
end
end
end
if comments && (reply_count = (comments["backstageCommentsRenderer"]["moreText"]["simpleText"]? ||
comments["backstageCommentsRenderer"]["moreText"]["runs"]?.try &.[0]?.try &.["text"]?)
.try &.as_s.gsub(/\D/, "").to_i?)
continuation = comments["backstageCommentsRenderer"]["continuations"]?.try &.as_a[0]["nextContinuationData"]["continuation"].as_s
continuation ||= ""
json.field "replies" do
json.object do
json.field "replyCount", reply_count
json.field "continuation", extract_channel_community_cursor(continuation)
end
end
end
end
end
end
end
if body["continuations"]?
continuation = body["continuations"][0]["nextContinuationData"]["continuation"].as_s
json.field "continuation", extract_channel_community_cursor(continuation)
end
end
end
if format == "html"
response = JSON.parse(response)
content_html = template_youtube_comments(response, locale, thin_mode)
response = JSON.build do |json|
json.object do
json.field "contentHtml", content_html
end
end
end
return response
end
def produce_channel_community_continuation(ucid, cursor)
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:string" => cursor || "",
},
}
continuation = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return continuation
end
def extract_channel_community_cursor(continuation)
object = URI.decode_www_form(continuation)
.try { |i| Base64.decode(i) }
.try { |i| IO::Memory.new(i) }
.try { |i| Protodec::Any.parse(i) }
.try { |i| i["80226972:0:embedded"]["3:1:base64"].as_h }
if object["53:2:embedded"]?.try &.["3:0:embedded"]?
object["53:2:embedded"]["3:0:embedded"]["2:0:string"] = object["53:2:embedded"]["3:0:embedded"]
.try { |i| i["2:0:base64"].as_h }
.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i, padding: false) }
object["53:2:embedded"]["3:0:embedded"].as_h.delete("2:0:base64")
end
cursor = Protodec::Any.cast_json(object)
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
cursor
end
def get_about_info(ucid, locale)
result = YT_POOL.client &.get("/channel/#{ucid}/about?gl=US&hl=en")
if result.status_code != 200
result = YT_POOL.client &.get("/user/#{ucid}/about?gl=US&hl=en")
end
if md = result.headers["location"]?.try &.match(/\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/)
raise ChannelRedirect.new(channel_id: md["ucid"])
end
if result.status_code != 200
raise InfoException.new("This channel does not exist.")
end
about = XML.parse_html(result.body)
if about.xpath_node(%q(//div[contains(@class, "channel-empty-message")]))
raise InfoException.new("This channel does not exist.")
end
initdata = extract_initial_data(result.body)
if initdata.empty?
error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip
error_message ||= translate(locale, "Could not get channel info.")
raise InfoException.new(error_message)
end
if browse_endpoint = initdata["onResponseReceivedActions"]?.try &.[0]?.try &.["navigateAction"]?.try &.["endpoint"]?.try &.["browseEndpoint"]?
raise ChannelRedirect.new(channel_id: browse_endpoint["browseId"].to_s)
end
auto_generated = false
# Check for special auto generated gaming channels
if !initdata.has_key?("metadata")
auto_generated = true
end
if auto_generated
author = initdata["header"]["interactiveTabbedHeaderRenderer"]["title"]["simpleText"].as_s
author_url = initdata["microformat"]["microformatDataRenderer"]["urlCanonical"].as_s
author_thumbnail = initdata["header"]["interactiveTabbedHeaderRenderer"]["boxArt"]["thumbnails"][0]["url"].as_s
# Raises a KeyError on failure.
banners = initdata["header"]["interactiveTabbedHeaderRenderer"]?.try &.["banner"]?.try &.["thumbnails"]?
banner = banners.try &.[-1]?.try &.["url"].as_s?
description = initdata["header"]["interactiveTabbedHeaderRenderer"]["description"]["simpleText"].as_s
description_html = HTML.escape(description).gsub("\n", "<br>")
paid = false
is_family_friendly = initdata["microformat"]["microformatDataRenderer"]["familySafe"].as_bool
allowed_regions = initdata["microformat"]["microformatDataRenderer"]["availableCountries"].as_a.map { |a| a.as_s }
related_channels = [] of AboutRelatedChannel
else
author = initdata["metadata"]["channelMetadataRenderer"]["title"].as_s
author_url = initdata["metadata"]["channelMetadataRenderer"]["channelUrl"].as_s
author_thumbnail = initdata["metadata"]["channelMetadataRenderer"]["avatar"]["thumbnails"][0]["url"].as_s
ucid = initdata["metadata"]["channelMetadataRenderer"]["externalId"].as_s
# Raises a KeyError on failure.
banners = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["banner"]?.try &.["thumbnails"]?
banner = banners.try &.[-1]?.try &.["url"].as_s?
# if banner.includes? "channels/c4/default_banner"
# banner = nil
# end
description = initdata["metadata"]["channelMetadataRenderer"]?.try &.["description"]?.try &.as_s? || ""
description_html = HTML.escape(description).gsub("\n", "<br>")
paid = about.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True"
is_family_friendly = about.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True"
allowed_regions = about.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).not_nil!["content"].split(",")
related_channels = initdata["contents"]["twoColumnBrowseResultsRenderer"]
.["secondaryContents"]?.try &.["browseSecondaryContentsRenderer"]["contents"][0]?
.try &.["verticalChannelSectionRenderer"]?.try &.["items"]?.try &.as_a.map do |node|
renderer = node["miniChannelRenderer"]?
related_id = renderer.try &.["channelId"]?.try &.as_s?
related_id ||= ""
related_title = renderer.try &.["title"]?.try &.["simpleText"]?.try &.as_s?
related_title ||= ""
related_author_url = renderer.try &.["navigationEndpoint"]?.try &.["commandMetadata"]?.try &.["webCommandMetadata"]?
.try &.["url"]?.try &.as_s?
related_author_url ||= ""
related_author_thumbnails = renderer.try &.["thumbnail"]?.try &.["thumbnails"]?.try &.as_a?
related_author_thumbnails ||= [] of JSON::Any
related_author_thumbnail = ""
if related_author_thumbnails.size > 0
related_author_thumbnail = related_author_thumbnails[-1]["url"]?.try &.as_s?
related_author_thumbnail ||= ""
end
AboutRelatedChannel.new({
ucid: related_id,
author: related_title,
author_url: related_author_url,
author_thumbnail: related_author_thumbnail,
})
end
related_channels ||= [] of AboutRelatedChannel
end
country = ""
total_views = 0_i64
joined = Time.unix(0)
tabs = {} of String => Tuple(Int32, String) # TabName => {TabiZZndex, browseEndpoint params}
links = [] of {String, String, String}
tabs_json = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]?.try &.as_a?
tab_names = [] of String
tab_data = [] of Tuple(Int32, String)
if !tabs_json.nil?
# Retrieve information from the tabs array. The index we are looking for varies between channels.
tabs_json.each_with_index do |node, i|
# Try to find the about section which is located in only one of the tabs.
channel_about_meta = node["tabRenderer"]?.try &.["content"]?.try &.["sectionListRenderer"]?
.try &.["contents"]?.try &.[0]?.try &.["itemSectionRenderer"]?.try &.["contents"]?
.try &.[0]?.try &.["channelAboutFullMetadataRenderer"]?
if !channel_about_meta.nil?
total_views = channel_about_meta["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D/, "").to_i64? || 0_i64
# The joined text is split to several sub strings. The reduce joins those strings before parsing the date.
joined = channel_about_meta["joinedDateText"]?.try &.["runs"]?.try &.as_a.reduce("") { |acc, node| acc + node["text"].as_s }
.try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
# External link parsing
channel_about_meta["primaryLinks"]?.try &.as_a.each do |link|
link_title = link["title"]["simpleText"].as_s
link_url = URI.parse(link["navigationEndpoint"]["urlEndpoint"]["url"].to_s)
link_icon_url = link["icon"]?.try &.["thumbnails"][0]["url"].to_s || ""
if {"m.youtube.com", "www.youtube.com", "youtu.be"}.includes? link_url.host
if link_url.path == "/redirect"
link_url = HTTP::Params.parse(link_url.query.not_nil!)["q"]
else
link_url = link_url.request_target.to_s
end
else
link_url = link_url.to_s
end
links << {link_title, link_url, link_icon_url}
end
country = channel_about_meta["country"]?.try &.["simpleText"].as_s || ""
# Normal Auto-generated channels
# https://support.google.com/youtube/answer/2579942
# For auto-generated channels, channel_about_meta only has ["description"]["simpleText"] and ["primaryLinks"][0]["title"]["simpleText"]
if (channel_about_meta["primaryLinks"]?.try &.size || 0) == 1 && (channel_about_meta["primaryLinks"][0]?) &&
(channel_about_meta["primaryLinks"][0]["title"]?.try &.["simpleText"]?.try &.as_s? || "") == "Auto-generated by YouTube"
auto_generated = true
end
end
if node["tabRenderer"]?
tab_names << node["tabRenderer"]["title"].as_s.downcase
tab_data << {i, node["tabRenderer"]["endpoint"]["browseEndpoint"]["params"].as_s}
end
end
tabs = Hash.zip(tab_names, tab_data)
end
sub_count = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s?
.try { |text| short_text_to_number(text.split(" ")[0]) } || 0
AboutChannel.new({
ucid: ucid,
author: author,
auto_generated: auto_generated,
author_url: author_url,
author_thumbnail: author_thumbnail,
banner: banner,
description_html: description_html,
paid: paid,
country: country,
total_views: total_views,
sub_count: sub_count,
joined: joined,
is_family_friendly: is_family_friendly,
allowed_regions: allowed_regions,
related_channels: related_channels,
tabs: tabs,
links: links,
})
end
def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
continuation = produce_channel_videos_continuation(ucid, page,
auto_generated: auto_generated, sort_by: sort_by, v2: true)
return request_youtube_api_browse(continuation)
end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo
2.times do |i|
initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
videos.concat extract_videos(initial_data, author, ucid)
end
return videos.size, videos
end
def get_latest_videos(ucid)
initial_data = get_channel_videos_response(ucid)
author = initial_data["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
return extract_videos(initial_data, author, ucid)
end