mirror of
				https://github.com/iv-org/invidious.git
				synced 2025-10-23 01:08:30 -05:00 
			
		
		
		
	Refactor search extractor
This commit is contained in:
		
							parent
							
								
									e768e1e277
								
							
						
					
					
						commit
						9f9cc1ffb5
					
				| @ -5167,7 +5167,7 @@ get "/vi/:id/:name" do |env| | |||||||
|   end |   end | ||||||
| end | end | ||||||
| 
 | 
 | ||||||
| # Undocumented, creates anonymous playlist with specified 'video_ids' | # Undocumented, creates anonymous playlist with specified 'video_ids', max 50 videos | ||||||
| get "/watch_videos" do |env| | get "/watch_videos" do |env| | ||||||
|   client = make_client(YT_URL) |   client = make_client(YT_URL) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -387,14 +387,15 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by) | |||||||
| 
 | 
 | ||||||
|     html = XML.parse_html(json["content_html"].as_s) |     html = XML.parse_html(json["content_html"].as_s) | ||||||
|     nodeset = html.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])) |     nodeset = html.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])) | ||||||
|   else |   elsif auto_generated | ||||||
|     url = "/channel/#{ucid}/playlists?disable_polymer=1&flow=list" |     url = "/channel/#{ucid}" | ||||||
| 
 | 
 | ||||||
|     if auto_generated |     response = client.get(url) | ||||||
|       url += "&view=50" |     html = XML.parse_html(response.body) | ||||||
|  | 
 | ||||||
|  |     nodeset = html.xpath_nodes(%q(//ul[@id="browse-items-primary"]/li[contains(@class, "feed-item-container")])) | ||||||
|   else |   else | ||||||
|       url += "&view=1" |     url = "/channel/#{ucid}/playlists?disable_polymer=1&flow=list&view=1" | ||||||
|     end |  | ||||||
| 
 | 
 | ||||||
|     case sort_by |     case sort_by | ||||||
|     when "last", "last_added" |     when "last", "last_added" | ||||||
|  | |||||||
| @ -442,47 +442,20 @@ def extract_items(nodeset, ucid = nil, author_name = nil) | |||||||
|     else |     else | ||||||
|       id = id.lchop("/watch?v=") |       id = id.lchop("/watch?v=") | ||||||
| 
 | 
 | ||||||
|       metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li)) |       metadata = node.xpath_node(%q(.//div[contains(@class,"yt-lockup-meta")]/ul)) | ||||||
| 
 | 
 | ||||||
|       begin |       published = metadata.try &.xpath_node(%q(.//li[contains(text(), " ago")])).try { |node| decode_date(node.content.sub(/^[a-zA-Z]+ /, "")) } | ||||||
|         published = decode_date(metadata[0].content.lchop("Streamed ").lchop("Starts ")) |       published ||= metadata.try &.xpath_node(%q(.//span[@data-timestamp])).try { |node| Time.unix(node["data-timestamp"].to_i64) } | ||||||
|       rescue ex |  | ||||||
|       end |  | ||||||
|       begin |  | ||||||
|         published ||= Time.unix(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64) |  | ||||||
|       rescue ex |  | ||||||
|       end |  | ||||||
|       published ||= Time.utc |       published ||= Time.utc | ||||||
| 
 | 
 | ||||||
|       begin |       view_count = metadata.try &.xpath_node(%q(.//li[contains(text(), " views")])).try &.content.gsub(/\D/, "").to_i64? | ||||||
|         view_count = metadata[0].content.rchop(" watching").delete(",").try &.to_i64? |  | ||||||
|       rescue ex |  | ||||||
|       end |  | ||||||
|       begin |  | ||||||
|         view_count ||= metadata.try &.[1].content.delete("No views,").try &.to_i64? |  | ||||||
|       rescue ex |  | ||||||
|       end |  | ||||||
|       view_count ||= 0_i64 |       view_count ||= 0_i64 | ||||||
| 
 | 
 | ||||||
|       length_seconds = node.xpath_node(%q(.//span[@class="video-time"])) |       length_seconds = node.xpath_node(%q(.//span[@class="video-time"])).try { |node| decode_length_seconds(node.content) } | ||||||
|       if length_seconds |       length_seconds ||= -1 | ||||||
|         length_seconds = decode_length_seconds(length_seconds.content) |  | ||||||
|       else |  | ||||||
|         length_seconds = -1 |  | ||||||
|       end |  | ||||||
| 
 | 
 | ||||||
|       live_now = node.xpath_node(%q(.//span[contains(@class, "yt-badge-live")])) |       live_now = node.xpath_node(%q(.//span[contains(@class, "yt-badge-live")])) ? true : false | ||||||
|       if live_now |       premium = node.xpath_node(%q(.//span[text()="Premium"])) ? true : false | ||||||
|         live_now = true |  | ||||||
|       else |  | ||||||
|         live_now = false |  | ||||||
|       end |  | ||||||
| 
 |  | ||||||
|       if node.xpath_node(%q(.//span[text()="Premium"])) |  | ||||||
|         premium = true |  | ||||||
|       else |  | ||||||
|         premium = false |  | ||||||
|       end |  | ||||||
| 
 | 
 | ||||||
|       if !premium || node.xpath_node(%q(.//span[contains(text(), "Free episode")])) |       if !premium || node.xpath_node(%q(.//span[contains(text(), "Free episode")])) | ||||||
|         paid = false |         paid = false | ||||||
| @ -520,26 +493,18 @@ def extract_shelf_items(nodeset, ucid = nil, author_name = nil) | |||||||
| 
 | 
 | ||||||
|   nodeset.each do |shelf| |   nodeset.each do |shelf| | ||||||
|     shelf_anchor = shelf.xpath_node(%q(.//h2[contains(@class, "branded-page-module-title")])) |     shelf_anchor = shelf.xpath_node(%q(.//h2[contains(@class, "branded-page-module-title")])) | ||||||
|  |     next if !shelf_anchor | ||||||
| 
 | 
 | ||||||
|     if !shelf_anchor |     title = shelf_anchor.xpath_node(%q(.//span[contains(@class, "branded-page-module-title-text")])).try &.content.strip | ||||||
|       next |  | ||||||
|     end |  | ||||||
| 
 |  | ||||||
|     title = shelf_anchor.xpath_node(%q(.//span[contains(@class, "branded-page-module-title-text")])) |  | ||||||
|     if title |  | ||||||
|       title = title.content.strip |  | ||||||
|     end |  | ||||||
|     title ||= "" |     title ||= "" | ||||||
| 
 | 
 | ||||||
|     id = shelf_anchor.xpath_node(%q(.//a)).try &.["href"] |     id = shelf_anchor.xpath_node(%q(.//a)).try &.["href"] | ||||||
|     if !id |     next if !id | ||||||
|       next |  | ||||||
|     end |  | ||||||
| 
 | 
 | ||||||
|     is_playlist = false |     shelf_is_playlist = false | ||||||
|     videos = [] of SearchPlaylistVideo |     videos = [] of SearchPlaylistVideo | ||||||
| 
 | 
 | ||||||
|     shelf.xpath_nodes(%q(.//ul[contains(@class, "yt-uix-shelfslider-list")]/li)).each do |child_node| |     shelf.xpath_nodes(%q(.//ul[contains(@class, "yt-uix-shelfslider-list") or contains(@class, "expanded-shelf-content-list")]/li)).each do |child_node| | ||||||
|       type = child_node.xpath_node(%q(./div)) |       type = child_node.xpath_node(%q(./div)) | ||||||
|       if !type |       if !type | ||||||
|         next |         next | ||||||
| @ -547,7 +512,7 @@ def extract_shelf_items(nodeset, ucid = nil, author_name = nil) | |||||||
| 
 | 
 | ||||||
|       case type["class"] |       case type["class"] | ||||||
|       when .includes? "yt-lockup-video" |       when .includes? "yt-lockup-video" | ||||||
|         is_playlist = true |         shelf_is_playlist = true | ||||||
| 
 | 
 | ||||||
|         anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a)) |         anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a)) | ||||||
|         if anchor |         if anchor | ||||||
| @ -588,19 +553,42 @@ def extract_shelf_items(nodeset, ucid = nil, author_name = nil) | |||||||
|         end |         end | ||||||
|         video_count ||= 50 |         video_count ||= 50 | ||||||
| 
 | 
 | ||||||
|  |         videos = [] of SearchPlaylistVideo | ||||||
|  |         child_node.xpath_nodes(%q(.//*[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video| | ||||||
|  |           anchor = video.xpath_node(%q(.//a)) | ||||||
|  |           if anchor | ||||||
|  |             video_title = anchor.content.strip | ||||||
|  |             id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"] | ||||||
|  |           end | ||||||
|  |           video_title ||= "" | ||||||
|  |           id ||= "" | ||||||
|  | 
 | ||||||
|  |           anchor = video.xpath_node(%q(.//span/span)) | ||||||
|  |           if anchor | ||||||
|  |             length_seconds = decode_length_seconds(anchor.content) | ||||||
|  |           end | ||||||
|  |           length_seconds ||= 0 | ||||||
|  | 
 | ||||||
|  |           videos << SearchPlaylistVideo.new( | ||||||
|  |             video_title, | ||||||
|  |             id, | ||||||
|  |             length_seconds | ||||||
|  |           ) | ||||||
|  |         end | ||||||
|  | 
 | ||||||
|         items << SearchPlaylist.new( |         items << SearchPlaylist.new( | ||||||
|           playlist_title, |           playlist_title, | ||||||
|           plid, |           plid, | ||||||
|           author_name, |           author_name, | ||||||
|           ucid, |           ucid, | ||||||
|           video_count, |           video_count, | ||||||
|           Array(SearchPlaylistVideo).new, |           videos, | ||||||
|           playlist_thumbnail |           playlist_thumbnail | ||||||
|         ) |         ) | ||||||
|       end |       end | ||||||
|     end |     end | ||||||
| 
 | 
 | ||||||
|     if is_playlist |     if shelf_is_playlist | ||||||
|       plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"] |       plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"] | ||||||
| 
 | 
 | ||||||
|       items << SearchPlaylist.new( |       items << SearchPlaylist.new( | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user