mirror of
				https://github.com/iv-org/invidious.git
				synced 2025-10-24 09:48:31 -05:00 
			
		
		
		
	extractors: Add continuation token parser
This commit is contained in:
		
							parent
							
								
									bdc51cd20f
								
							
						
					
					
						commit
						ce7db8d2cb
					
				| @ -4,7 +4,7 @@ Spectator.describe Invidious::Hashtag do | |||||||
|   it "parses richItemRenderer containers (test 1)" do |   it "parses richItemRenderer containers (test 1)" do | ||||||
|     # Enable mock |     # Enable mock | ||||||
|     test_content = load_mock("hashtag/martingarrix_page1") |     test_content = load_mock("hashtag/martingarrix_page1") | ||||||
|     videos = extract_items(test_content) |     videos, _ = extract_items(test_content) | ||||||
| 
 | 
 | ||||||
|     expect(typeof(videos)).to eq(Array(SearchItem)) |     expect(typeof(videos)).to eq(Array(SearchItem)) | ||||||
|     expect(videos.size).to eq(60) |     expect(videos.size).to eq(60) | ||||||
| @ -57,7 +57,7 @@ Spectator.describe Invidious::Hashtag do | |||||||
|   it "parses richItemRenderer containers (test 2)" do |   it "parses richItemRenderer containers (test 2)" do | ||||||
|     # Enable mock |     # Enable mock | ||||||
|     test_content = load_mock("hashtag/martingarrix_page2") |     test_content = load_mock("hashtag/martingarrix_page2") | ||||||
|     videos = extract_items(test_content) |     videos, _ = extract_items(test_content) | ||||||
| 
 | 
 | ||||||
|     expect(typeof(videos)).to eq(Array(SearchItem)) |     expect(typeof(videos)).to eq(Array(SearchItem)) | ||||||
|     expect(videos.size).to eq(60) |     expect(videos.size).to eq(60) | ||||||
|  | |||||||
| @ -1,18 +1,7 @@ | |||||||
| def fetch_channel_playlists(ucid, author, continuation, sort_by) | def fetch_channel_playlists(ucid, author, continuation, sort_by) | ||||||
|   if continuation |   if continuation | ||||||
|     response_json = YoutubeAPI.browse(continuation) |     response_json = YoutubeAPI.browse(continuation) | ||||||
|     continuation_items = response_json["onResponseReceivedActions"]? |     items, continuation = extract_items(response_json, author, ucid) | ||||||
|       .try &.[0]["appendContinuationItemsAction"]["continuationItems"] |  | ||||||
| 
 |  | ||||||
|     return [] of SearchItem, nil if !continuation_items |  | ||||||
| 
 |  | ||||||
|     items = [] of SearchItem |  | ||||||
|     continuation_items.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item| |  | ||||||
|       parse_item(item, author, ucid).try { |t| items << t } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     continuation = continuation_items.as_a.last["continuationItemRenderer"]? |  | ||||||
|       .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s |  | ||||||
|   else |   else | ||||||
|     url = "/channel/#{ucid}/playlists?flow=list&view=1" |     url = "/channel/#{ucid}/playlists?flow=list&view=1" | ||||||
| 
 | 
 | ||||||
| @ -30,8 +19,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by) | |||||||
|     initial_data = extract_initial_data(response.body) |     initial_data = extract_initial_data(response.body) | ||||||
|     return [] of SearchItem, nil if !initial_data |     return [] of SearchItem, nil if !initial_data | ||||||
| 
 | 
 | ||||||
|     items = extract_items(initial_data, author, ucid) |     items, continuation = extract_items(initial_data, author, ucid) | ||||||
|     continuation = response.body.match(/"token":"(?<continuation>[^"]+)"/).try &.["continuation"]? |  | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   return items, continuation |   return items, continuation | ||||||
|  | |||||||
| @ -8,7 +8,8 @@ module Invidious::Hashtag | |||||||
|     client_config = YoutubeAPI::ClientConfig.new(region: region) |     client_config = YoutubeAPI::ClientConfig.new(region: region) | ||||||
|     response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config) |     response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config) | ||||||
| 
 | 
 | ||||||
|     return extract_items(response) |     items, _ = extract_items(response) | ||||||
|  |     return items | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   def generate_continuation(hashtag : String, cursor : Int) |   def generate_continuation(hashtag : String, cursor : Int) | ||||||
|  | |||||||
| @ -265,4 +265,11 @@ class Category | |||||||
|   end |   end | ||||||
| end | end | ||||||
| 
 | 
 | ||||||
|  | struct Continuation | ||||||
|  |   getter token | ||||||
|  | 
 | ||||||
|  |   def initialize(@token : String) | ||||||
|  |   end | ||||||
|  | end | ||||||
|  | 
 | ||||||
| alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category | alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category | ||||||
|  | |||||||
| @ -9,7 +9,8 @@ module Invidious::Search | |||||||
|       client_config = YoutubeAPI::ClientConfig.new(region: query.region) |       client_config = YoutubeAPI::ClientConfig.new(region: query.region) | ||||||
|       initial_data = YoutubeAPI.search(query.text, search_params, client_config: client_config) |       initial_data = YoutubeAPI.search(query.text, search_params, client_config: client_config) | ||||||
| 
 | 
 | ||||||
|       return extract_items(initial_data) |       items, _ = extract_items(initial_data) | ||||||
|  |       return items | ||||||
|     end |     end | ||||||
| 
 | 
 | ||||||
|     # Search a youtube channel |     # Search a youtube channel | ||||||
| @ -30,16 +31,7 @@ module Invidious::Search | |||||||
|       continuation = produce_channel_search_continuation(ucid, query.text, query.page) |       continuation = produce_channel_search_continuation(ucid, query.text, query.page) | ||||||
|       response_json = YoutubeAPI.browse(continuation) |       response_json = YoutubeAPI.browse(continuation) | ||||||
| 
 | 
 | ||||||
|       continuation_items = response_json["onResponseReceivedActions"]? |       items, _ = extract_items(response_json, "", ucid) | ||||||
|         .try &.[0]["appendContinuationItemsAction"]["continuationItems"] |  | ||||||
| 
 |  | ||||||
|       return [] of SearchItem if !continuation_items |  | ||||||
| 
 |  | ||||||
|       items = [] of SearchItem |  | ||||||
|       continuation_items.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each do |item| |  | ||||||
|         parse_item(item["itemSectionRenderer"]["contents"].as_a[0]).try { |t| items << t } |  | ||||||
|       end |  | ||||||
| 
 |  | ||||||
|       return items |       return items | ||||||
|     end |     end | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -7,7 +7,7 @@ require "../helpers/serialized_yt_data" | |||||||
| private ITEM_CONTAINER_EXTRACTOR = { | private ITEM_CONTAINER_EXTRACTOR = { | ||||||
|   Extractors::YouTubeTabs, |   Extractors::YouTubeTabs, | ||||||
|   Extractors::SearchResults, |   Extractors::SearchResults, | ||||||
|   Extractors::Continuation, |   Extractors::ContinuationContent, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| private ITEM_PARSERS = { | private ITEM_PARSERS = { | ||||||
| @ -18,6 +18,7 @@ private ITEM_PARSERS = { | |||||||
|   Parsers::CategoryRendererParser, |   Parsers::CategoryRendererParser, | ||||||
|   Parsers::RichItemRendererParser, |   Parsers::RichItemRendererParser, | ||||||
|   Parsers::ReelItemRendererParser, |   Parsers::ReelItemRendererParser, | ||||||
|  |   Parsers::ContinuationItemRendererParser, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| private alias InitialData = Hash(String, JSON::Any) | private alias InitialData = Hash(String, JSON::Any) | ||||||
| @ -347,14 +348,9 @@ private module Parsers | |||||||
|         content_container = item_contents["contents"] |         content_container = item_contents["contents"] | ||||||
|       end |       end | ||||||
| 
 | 
 | ||||||
|       raw_contents = content_container["items"]?.try &.as_a |       content_container["items"]?.try &.as_a.each do |item| | ||||||
|       if !raw_contents.nil? |         result = parse_item(item, author_fallback.name, author_fallback.id) | ||||||
|         raw_contents.each do |item| |         contents << result if result.is_a?(SearchItem) | ||||||
|           result = parse_item(item) |  | ||||||
|           if !result.nil? |  | ||||||
|             contents << result |  | ||||||
|           end |  | ||||||
|         end |  | ||||||
|       end |       end | ||||||
| 
 | 
 | ||||||
|       Category.new({ |       Category.new({ | ||||||
| @ -477,6 +473,35 @@ private module Parsers | |||||||
|       return {{@type.name}} |       return {{@type.name}} | ||||||
|     end |     end | ||||||
|   end |   end | ||||||
|  | 
 | ||||||
|  |   # Parses an InnerTube continuationItemRenderer into a Continuation. | ||||||
|  |   # Returns nil when the given object isn't a continuationItemRenderer. | ||||||
|  |   # | ||||||
|  |   # continuationItemRenderer contains various metadata ued to load more | ||||||
|  |   # content (i.e when the user scrolls down). The interesting bit is the | ||||||
|  |   # protobuf object known as the "continutation token". Previously, those | ||||||
|  |   # were generated from sratch, but recent (as of 11/2022) Youtube changes | ||||||
|  |   # are forcing us to extract them from replies. | ||||||
|  |   # | ||||||
|  |   module ContinuationItemRendererParser | ||||||
|  |     def self.process(item : JSON::Any, author_fallback : AuthorFallback) | ||||||
|  |       if item_contents = item["continuationItemRenderer"]? | ||||||
|  |         return self.parse(item_contents) | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     private def self.parse(item_contents) | ||||||
|  |       token = item_contents | ||||||
|  |         .dig?("continuationEndpoint", "continuationCommand", "token") | ||||||
|  |         .try &.as_s | ||||||
|  | 
 | ||||||
|  |       return Continuation.new(token) if token | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     def self.parser_name | ||||||
|  |       return {{@type.name}} | ||||||
|  |     end | ||||||
|  |   end | ||||||
| end | end | ||||||
| 
 | 
 | ||||||
| # The following are the extractors for extracting an array of items from | # The following are the extractors for extracting an array of items from | ||||||
| @ -746,13 +771,18 @@ def extract_items( | |||||||
|   initial_data : InitialData, |   initial_data : InitialData, | ||||||
|   author_fallback : String? = nil, |   author_fallback : String? = nil, | ||||||
|   author_id_fallback : String? = nil |   author_id_fallback : String? = nil | ||||||
| ) : Array(SearchItem) | ) : {Array(SearchItem), String?} | ||||||
|   items = [] of SearchItem |   items = [] of SearchItem | ||||||
|  |   continuation = nil | ||||||
| 
 | 
 | ||||||
|   extract_items(initial_data) do |item| |   extract_items(initial_data) do |item| | ||||||
|     parsed = parse_item(item, author_fallback, author_id_fallback) |     parsed = parse_item(item, author_fallback, author_id_fallback) | ||||||
|     items << parsed if !parsed.nil? | 
 | ||||||
|  |     case parsed | ||||||
|  |     when .is_a?(Continuation) then continuation = parsed.token | ||||||
|  |     when .is_a?(SearchItem)   then items << parsed | ||||||
|  |     end | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   return items |   return items, continuation | ||||||
| end | end | ||||||
|  | |||||||
| @ -68,10 +68,10 @@ rescue ex | |||||||
|   return false |   return false | ||||||
| end | end | ||||||
| 
 | 
 | ||||||
| def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) | def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) : Array(SearchVideo) | ||||||
|   extracted = extract_items(initial_data, author_fallback, author_id_fallback) |   extracted, _ = extract_items(initial_data, author_fallback, author_id_fallback) | ||||||
| 
 | 
 | ||||||
|   target = [] of SearchItem |   target = [] of (SearchItem | Continuation) | ||||||
|   extracted.each do |i| |   extracted.each do |i| | ||||||
|     if i.is_a?(Category) |     if i.is_a?(Category) | ||||||
|       i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video } |       i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video } | ||||||
| @ -79,28 +79,11 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str | |||||||
|       target << i |       target << i | ||||||
|     end |     end | ||||||
|   end |   end | ||||||
|   return target.select(SearchVideo).map(&.as(SearchVideo)) | 
 | ||||||
|  |   return target.select(SearchVideo) | ||||||
| end | end | ||||||
| 
 | 
 | ||||||
| def extract_selected_tab(tabs) | def extract_selected_tab(tabs) | ||||||
|   # Extract the selected tab from the array of tabs Youtube returns |   # Extract the selected tab from the array of tabs Youtube returns | ||||||
|   return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"]?.try &.as_bool)[0]["tabRenderer"] |   return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"]?.try &.as_bool)[0]["tabRenderer"] | ||||||
| end | end | ||||||
| 
 |  | ||||||
| def fetch_continuation_token(items : Array(JSON::Any)) |  | ||||||
|   # Fetches the continuation token from an array of items |  | ||||||
|   return items.last["continuationItemRenderer"]? |  | ||||||
|     .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s |  | ||||||
| end |  | ||||||
| 
 |  | ||||||
| def fetch_continuation_token(initial_data : Hash(String, JSON::Any)) |  | ||||||
|   # Fetches the continuation token from initial data |  | ||||||
|   if initial_data["onResponseReceivedActions"]? |  | ||||||
|     continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"] |  | ||||||
|   else |  | ||||||
|     tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]) |  | ||||||
|     continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"] |  | ||||||
|   end |  | ||||||
| 
 |  | ||||||
|   return fetch_continuation_token(continuation_items.as_a) |  | ||||||
| end |  | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user