[SoundCloud] Detect whether there are any more search results

ShareASmile · TobiGr · ShareASmile · commit c23fa7c124a2 · 2024-06-13T13:43:41.000+05:30
Previously, the extractor always assumed that there was an infinite number of search results. That has caused some problems. When searching for something with only a few results, that could cause clients such as NewPipe to try to load more results without getting more and thus causing tons of requests, ultimately resulting in reCaptcha requests.

Co-Authored-By: Tobi &lt;17365767+tobigr@users.noreply.github.com&gt;
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java
@@ -1,14 +1,9 @@
 package org.schabi.newpipe.extractor.services.soundcloud;
 
-import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
-import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
-import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
-
 import com.grack.nanojson.JsonArray;
 import com.grack.nanojson.JsonObject;
 import com.grack.nanojson.JsonParser;
 import com.grack.nanojson.JsonParserException;
-
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
@@ -28,6 +23,7 @@
 import org.schabi.newpipe.extractor.utils.Parser.RegexException;
 import org.schabi.newpipe.extractor.utils.Utils;
 
+import javax.annotation.Nonnull;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
@@ -38,7 +34,9 @@
 import java.util.List;
 import java.util.Map;
 
-import javax.annotation.Nonnull;
+import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
+import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
+import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
 
 public final class SoundcloudParsingHelper {
     private static String clientId;
@@ -201,6 +199,7 @@ public static String getUsersFromApiMinItems(final int minItems,
      *
      * @return the next streams url, empty if don't have
      */
+    @Nonnull
     public static String getUsersFromApi(final ChannelInfoItemsCollector collector,
                                          final String apiUrl) throws IOException,
             ReCaptchaException, ParsingException {
@@ -222,17 +221,7 @@ public static String getUsersFromApi(final ChannelInfoItemsCollector collector,
             }
         }
 
-        String nextPageUrl;
-        try {
-            nextPageUrl = responseObject.getString("next_href");
-            if (!nextPageUrl.contains("client_id=")) {
-                nextPageUrl += "&client_id=" + SoundcloudParsingHelper.clientId();
-            }
-        } catch (final Exception ignored) {
-            nextPageUrl = "";
-        }
-
-        return nextPageUrl;
+        return getNextPageUrl(responseObject);
     }
 
     /**
@@ -262,6 +251,7 @@ public static String getStreamsFromApiMinItems(final int minItems,
      *
      * @return the next streams url, empty if don't have
      */
+    @Nonnull
     public static String getStreamsFromApi(final StreamInfoItemsCollector collector,
                                            final String apiUrl,
                                            final boolean charts) throws IOException,
@@ -289,17 +279,21 @@ public static String getStreamsFromApi(final StreamInfoItemsCollector collector,
             }
         }
 
-        String nextPageUrl;
+        return getNextPageUrl(responseObject);
+    }
+
+    @Nonnull
+    private static String getNextPageUrl(@Nonnull final JsonObject response) {
         try {
-            nextPageUrl = responseObject.getString("next_href");
+            String nextPageUrl = response.getString("next_href");
             if (!nextPageUrl.contains("client_id=")) {
                 nextPageUrl += "&client_id=" + SoundcloudParsingHelper.clientId();
             }
+            return nextPageUrl;
         } catch (final Exception ignored) {
-            nextPageUrl = "";
+            return "";
         }
 
-        return nextPageUrl;
     }
 
     public static String getStreamsFromApi(final StreamInfoItemsCollector collector,
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java
@@ -15,6 +15,7 @@
 import org.schabi.newpipe.extractor.downloader.Response;
 import org.schabi.newpipe.extractor.exceptions.ExtractionException;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
 import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
 
 import java.io.IOException;
@@ -33,22 +34,7 @@ public SoundcloudCommentsExtractor(final StreamingService service,
     @Override
     public InfoItemsPage<CommentsInfoItem> getInitialPage() throws ExtractionException,
             IOException {
-        final Downloader downloader = NewPipe.getDownloader();
-        final Response response = downloader.get(getUrl());
-
-        final JsonObject json;
-        try {
-            json = JsonParser.object().from(response.responseBody());
-        } catch (final JsonParserException e) {
-            throw new ParsingException("Could not parse json", e);
-        }
-
-        final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
-                getServiceId());
-
-        collectStreamsFrom(collector, json.getArray("collection"));
-
-        return new InfoItemsPage<>(collector, new Page(json.getString("next_href")));
+        return getPage(getUrl());
     }
 
     @Override
@@ -57,9 +43,14 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws Extractio
         if (page == null || isNullOrEmpty(page.getUrl())) {
             throw new IllegalArgumentException("Page doesn't contain an URL");
         }
+        return getPage(page.getUrl());
+    }
 
+    @Nonnull
+    private InfoItemsPage<CommentsInfoItem> getPage(@Nonnull final String url)
+            throws ParsingException, IOException, ReCaptchaException {
         final Downloader downloader = NewPipe.getDownloader();
-        final Response response = downloader.get(page.getUrl());
+        final Response response = downloader.get(url);
 
         final JsonObject json;
         try {
@@ -73,7 +64,7 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws Extractio
 
         collectStreamsFrom(collector, json.getArray("collection"));
 
-        return new InfoItemsPage<>(collector, new Page(json.getString("next_href")));
+        return new InfoItemsPage<>(collector, new Page(json.getString("next_href", null)));
     }
 
     @Override
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java
@@ -33,7 +33,9 @@
 import javax.annotation.Nonnull;
 
 public class SoundcloudSearchExtractor extends SearchExtractor {
-    private JsonArray initialSearchCollection;
+    private JsonObject initialSearchObject;
+    private static final String COLLECTION = "collection";
+    private static final String TOTAL_RESULTS = "total_results";
 
     public SoundcloudSearchExtractor(final StreamingService service,
                                      final SearchQueryHandler linkHandler) {
@@ -60,9 +62,15 @@ public List<MetaInfo> getMetaInfo() {
     @Nonnull
     @Override
     public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
-        return new InfoItemsPage<>(
-                collectItems(initialSearchCollection),
-                getNextPageFromCurrentUrl(getUrl(), currentOffset -> ITEMS_PER_PAGE));
+        if (initialSearchObject.getInt(TOTAL_RESULTS) > ITEMS_PER_PAGE) {
+            return new InfoItemsPage<>(
+                    collectItems(initialSearchObject.getArray(COLLECTION)),
+                    getNextPageFromCurrentUrl(getUrl(), currentOffset -> ITEMS_PER_PAGE));
+        } else {
+            return new InfoItemsPage<>(
+                    collectItems(initialSearchObject.getArray(COLLECTION)), null);
+        }
+
     }
 
     @Override
@@ -74,17 +82,23 @@ public InfoItemsPage<InfoItem> getPage(final Page page) throws IOException,
 
         final Downloader dl = getDownloader();
         final JsonArray searchCollection;
+        final int totalResults;
         try {
             final String response = dl.get(page.getUrl(), getExtractorLocalization())
                     .responseBody();
-            searchCollection = JsonParser.object().from(response).getArray("collection");
+            final JsonObject result = JsonParser.object().from(response);
+            searchCollection = result.getArray(COLLECTION);
+            totalResults = result.getInt(TOTAL_RESULTS);
         } catch (final JsonParserException e) {
             throw new ParsingException("Could not parse json response", e);
         }
 
-        return new InfoItemsPage<>(collectItems(searchCollection),
-                getNextPageFromCurrentUrl(page.getUrl(),
-                        currentOffset -> currentOffset + ITEMS_PER_PAGE));
+        if (getOffsetFromUrl(page.getUrl()) + ITEMS_PER_PAGE < totalResults) {
+            return new InfoItemsPage<>(collectItems(searchCollection),
+                    getNextPageFromCurrentUrl(page.getUrl(),
+                            currentOffset -> currentOffset + ITEMS_PER_PAGE));
+        }
+        return new InfoItemsPage<>(collectItems(searchCollection), null);
     }
 
     @Override
@@ -94,12 +108,12 @@ public void onFetchPage(@Nonnull final Downloader downloader) throws IOException
         final String url = getUrl();
         try {
             final String response = dl.get(url, getExtractorLocalization()).responseBody();
-            initialSearchCollection = JsonParser.object().from(response).getArray("collection");
+            initialSearchObject = JsonParser.object().from(response);
         } catch (final JsonParserException e) {
             throw new ParsingException("Could not parse json response", e);
         }
 
-        if (initialSearchCollection.isEmpty()) {
+        if (initialSearchObject.getArray(COLLECTION).isEmpty()) {
             throw new SearchExtractor.NothingFoundException("Nothing found");
         }
     }
@@ -133,13 +147,20 @@ private InfoItemsCollector<InfoItem, InfoItemExtractor> collectItems(
 
     private Page getNextPageFromCurrentUrl(final String currentUrl,
                                            final IntUnaryOperator newPageOffsetCalculator)
-            throws MalformedURLException, UnsupportedEncodingException {
-        final int currentPageOffset = Integer.parseInt(
-                    Parser.compatParseMap(new URL(currentUrl).getQuery()).get("offset"));
+            throws ParsingException {
+        final int currentPageOffset = getOffsetFromUrl(currentUrl);
 
         return new Page(
                 currentUrl.replace(
                         "&offset=" + currentPageOffset,
                         "&offset=" + newPageOffsetCalculator.applyAsInt(currentPageOffset)));
     }
+
+    private int getOffsetFromUrl(final String url) throws ParsingException {
+        try {
+            return Integer.parseInt(Parser.compatParseMap(new URL(url).getQuery()).get("offset"));
+        } catch (MalformedURLException | UnsupportedEncodingException e) {
+            throw new ParsingException("Could not get offset from page URL", e);
+        }
+    }
 }
diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/search/SoundcloudSearchExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/search/SoundcloudSearchExtractorTest.java
@@ -1,5 +1,6 @@
 package org.schabi.newpipe.extractor.services.soundcloud.search;
 
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
 import static org.schabi.newpipe.extractor.services.DefaultTests.assertNoDuplicatedItems;
@@ -181,4 +182,27 @@ void testIsVerified() throws IOException, ExtractionException {
             assertTrue(verified);
         }
     }
+
+    public static class NoNextPage extends DefaultSearchExtractorTest {
+
+        private static SearchExtractor extractor;
+        private static final String QUERY = "Dan at hor#berlgbd";
+
+        @BeforeAll
+        public static void setUp() throws Exception {
+            NewPipe.init(DownloaderTestImpl.getInstance());
+            extractor = SoundCloud.getSearchExtractor(QUERY);
+            extractor.fetchPage();
+        }
+
+        @Override public boolean expectedHasMoreItems() { return false; }
+        @Override public SearchExtractor extractor() throws Exception { return extractor; }
+        @Override public StreamingService expectedService() throws Exception { return SoundCloud; }
+        @Override public String expectedName() throws Exception { return QUERY; }
+        @Override public String expectedId() throws Exception { return QUERY; }
+        @Override public String expectedUrlContains() { return "soundcloud.com/search?q=" + urlEncode(QUERY); }
+        @Override public String expectedOriginalUrlContains() { return "soundcloud.com/search?q=" + urlEncode(QUERY); }
+        @Override public String expectedSearchString() { return QUERY; }
+        @Nullable @Override public String expectedSearchSuggestion() { return null; }
+    }
 }