Skip to content

Commit c23fa7c

Browse files
ShareASmileTobiGr
andcommitted
[SoundCloud] Detect whether there are any more search results
Previously, the extractor always assumed that there was an infinite number of search results. That has caused some problems. When searching for something with only a few results, that could cause clients such as NewPipe to try to load more results without getting more and thus causing tons of requests, ultimately resulting in reCaptcha requests. Co-Authored-By: Tobi <17365767+tobigr@users.noreply.github.com>
1 parent 9b58c4e commit c23fa7c

4 files changed

Lines changed: 82 additions & 52 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,9 @@
11
package org.schabi.newpipe.extractor.services.soundcloud;
22

3-
import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
4-
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
5-
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
6-
73
import com.grack.nanojson.JsonArray;
84
import com.grack.nanojson.JsonObject;
95
import com.grack.nanojson.JsonParser;
106
import com.grack.nanojson.JsonParserException;
11-
127
import org.jsoup.Jsoup;
138
import org.jsoup.nodes.Document;
149
import org.jsoup.nodes.Element;
@@ -28,6 +23,7 @@
2823
import org.schabi.newpipe.extractor.utils.Parser.RegexException;
2924
import org.schabi.newpipe.extractor.utils.Utils;
3025

26+
import javax.annotation.Nonnull;
3127
import java.io.IOException;
3228
import java.net.MalformedURLException;
3329
import java.net.URL;
@@ -38,7 +34,9 @@
3834
import java.util.List;
3935
import java.util.Map;
4036

41-
import javax.annotation.Nonnull;
37+
import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
38+
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
39+
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
4240

4341
public final class SoundcloudParsingHelper {
4442
private static String clientId;
@@ -201,6 +199,7 @@ public static String getUsersFromApiMinItems(final int minItems,
201199
*
202200
* @return the next streams url, empty if don't have
203201
*/
202+
@Nonnull
204203
public static String getUsersFromApi(final ChannelInfoItemsCollector collector,
205204
final String apiUrl) throws IOException,
206205
ReCaptchaException, ParsingException {
@@ -222,17 +221,7 @@ public static String getUsersFromApi(final ChannelInfoItemsCollector collector,
222221
}
223222
}
224223

225-
String nextPageUrl;
226-
try {
227-
nextPageUrl = responseObject.getString("next_href");
228-
if (!nextPageUrl.contains("client_id=")) {
229-
nextPageUrl += "&client_id=" + SoundcloudParsingHelper.clientId();
230-
}
231-
} catch (final Exception ignored) {
232-
nextPageUrl = "";
233-
}
234-
235-
return nextPageUrl;
224+
return getNextPageUrl(responseObject);
236225
}
237226

238227
/**
@@ -262,6 +251,7 @@ public static String getStreamsFromApiMinItems(final int minItems,
262251
*
263252
* @return the next streams url, empty if don't have
264253
*/
254+
@Nonnull
265255
public static String getStreamsFromApi(final StreamInfoItemsCollector collector,
266256
final String apiUrl,
267257
final boolean charts) throws IOException,
@@ -289,17 +279,21 @@ public static String getStreamsFromApi(final StreamInfoItemsCollector collector,
289279
}
290280
}
291281

292-
String nextPageUrl;
282+
return getNextPageUrl(responseObject);
283+
}
284+
285+
@Nonnull
286+
private static String getNextPageUrl(@Nonnull final JsonObject response) {
293287
try {
294-
nextPageUrl = responseObject.getString("next_href");
288+
String nextPageUrl = response.getString("next_href");
295289
if (!nextPageUrl.contains("client_id=")) {
296290
nextPageUrl += "&client_id=" + SoundcloudParsingHelper.clientId();
297291
}
292+
return nextPageUrl;
298293
} catch (final Exception ignored) {
299-
nextPageUrl = "";
294+
return "";
300295
}
301296

302-
return nextPageUrl;
303297
}
304298

305299
public static String getStreamsFromApi(final StreamInfoItemsCollector collector,

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.schabi.newpipe.extractor.downloader.Response;
1616
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
1717
import org.schabi.newpipe.extractor.exceptions.ParsingException;
18+
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
1819
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
1920

2021
import java.io.IOException;
@@ -33,22 +34,7 @@ public SoundcloudCommentsExtractor(final StreamingService service,
3334
@Override
3435
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws ExtractionException,
3536
IOException {
36-
final Downloader downloader = NewPipe.getDownloader();
37-
final Response response = downloader.get(getUrl());
38-
39-
final JsonObject json;
40-
try {
41-
json = JsonParser.object().from(response.responseBody());
42-
} catch (final JsonParserException e) {
43-
throw new ParsingException("Could not parse json", e);
44-
}
45-
46-
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
47-
getServiceId());
48-
49-
collectStreamsFrom(collector, json.getArray("collection"));
50-
51-
return new InfoItemsPage<>(collector, new Page(json.getString("next_href")));
37+
return getPage(getUrl());
5238
}
5339

5440
@Override
@@ -57,9 +43,14 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws Extractio
5743
if (page == null || isNullOrEmpty(page.getUrl())) {
5844
throw new IllegalArgumentException("Page doesn't contain an URL");
5945
}
46+
return getPage(page.getUrl());
47+
}
6048

49+
@Nonnull
50+
private InfoItemsPage<CommentsInfoItem> getPage(@Nonnull final String url)
51+
throws ParsingException, IOException, ReCaptchaException {
6152
final Downloader downloader = NewPipe.getDownloader();
62-
final Response response = downloader.get(page.getUrl());
53+
final Response response = downloader.get(url);
6354

6455
final JsonObject json;
6556
try {
@@ -73,7 +64,7 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws Extractio
7364

7465
collectStreamsFrom(collector, json.getArray("collection"));
7566

76-
return new InfoItemsPage<>(collector, new Page(json.getString("next_href")));
67+
return new InfoItemsPage<>(collector, new Page(json.getString("next_href", null)));
7768
}
7869

7970
@Override

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
import javax.annotation.Nonnull;
3434

3535
public class SoundcloudSearchExtractor extends SearchExtractor {
36-
private JsonArray initialSearchCollection;
36+
private JsonObject initialSearchObject;
37+
private static final String COLLECTION = "collection";
38+
private static final String TOTAL_RESULTS = "total_results";
3739

3840
public SoundcloudSearchExtractor(final StreamingService service,
3941
final SearchQueryHandler linkHandler) {
@@ -60,9 +62,15 @@ public List<MetaInfo> getMetaInfo() {
6062
@Nonnull
6163
@Override
6264
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
63-
return new InfoItemsPage<>(
64-
collectItems(initialSearchCollection),
65-
getNextPageFromCurrentUrl(getUrl(), currentOffset -> ITEMS_PER_PAGE));
65+
if (initialSearchObject.getInt(TOTAL_RESULTS) > ITEMS_PER_PAGE) {
66+
return new InfoItemsPage<>(
67+
collectItems(initialSearchObject.getArray(COLLECTION)),
68+
getNextPageFromCurrentUrl(getUrl(), currentOffset -> ITEMS_PER_PAGE));
69+
} else {
70+
return new InfoItemsPage<>(
71+
collectItems(initialSearchObject.getArray(COLLECTION)), null);
72+
}
73+
6674
}
6775

6876
@Override
@@ -74,17 +82,23 @@ public InfoItemsPage<InfoItem> getPage(final Page page) throws IOException,
7482

7583
final Downloader dl = getDownloader();
7684
final JsonArray searchCollection;
85+
final int totalResults;
7786
try {
7887
final String response = dl.get(page.getUrl(), getExtractorLocalization())
7988
.responseBody();
80-
searchCollection = JsonParser.object().from(response).getArray("collection");
89+
final JsonObject result = JsonParser.object().from(response);
90+
searchCollection = result.getArray(COLLECTION);
91+
totalResults = result.getInt(TOTAL_RESULTS);
8192
} catch (final JsonParserException e) {
8293
throw new ParsingException("Could not parse json response", e);
8394
}
8495

85-
return new InfoItemsPage<>(collectItems(searchCollection),
86-
getNextPageFromCurrentUrl(page.getUrl(),
87-
currentOffset -> currentOffset + ITEMS_PER_PAGE));
96+
if (getOffsetFromUrl(page.getUrl()) + ITEMS_PER_PAGE < totalResults) {
97+
return new InfoItemsPage<>(collectItems(searchCollection),
98+
getNextPageFromCurrentUrl(page.getUrl(),
99+
currentOffset -> currentOffset + ITEMS_PER_PAGE));
100+
}
101+
return new InfoItemsPage<>(collectItems(searchCollection), null);
88102
}
89103

90104
@Override
@@ -94,12 +108,12 @@ public void onFetchPage(@Nonnull final Downloader downloader) throws IOException
94108
final String url = getUrl();
95109
try {
96110
final String response = dl.get(url, getExtractorLocalization()).responseBody();
97-
initialSearchCollection = JsonParser.object().from(response).getArray("collection");
111+
initialSearchObject = JsonParser.object().from(response);
98112
} catch (final JsonParserException e) {
99113
throw new ParsingException("Could not parse json response", e);
100114
}
101115

102-
if (initialSearchCollection.isEmpty()) {
116+
if (initialSearchObject.getArray(COLLECTION).isEmpty()) {
103117
throw new SearchExtractor.NothingFoundException("Nothing found");
104118
}
105119
}
@@ -133,13 +147,20 @@ private InfoItemsCollector<InfoItem, InfoItemExtractor> collectItems(
133147

134148
private Page getNextPageFromCurrentUrl(final String currentUrl,
135149
final IntUnaryOperator newPageOffsetCalculator)
136-
throws MalformedURLException, UnsupportedEncodingException {
137-
final int currentPageOffset = Integer.parseInt(
138-
Parser.compatParseMap(new URL(currentUrl).getQuery()).get("offset"));
150+
throws ParsingException {
151+
final int currentPageOffset = getOffsetFromUrl(currentUrl);
139152

140153
return new Page(
141154
currentUrl.replace(
142155
"&offset=" + currentPageOffset,
143156
"&offset=" + newPageOffsetCalculator.applyAsInt(currentPageOffset)));
144157
}
158+
159+
private int getOffsetFromUrl(final String url) throws ParsingException {
160+
try {
161+
return Integer.parseInt(Parser.compatParseMap(new URL(url).getQuery()).get("offset"));
162+
} catch (MalformedURLException | UnsupportedEncodingException e) {
163+
throw new ParsingException("Could not get offset from page URL", e);
164+
}
165+
}
145166
}

extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/search/SoundcloudSearchExtractorTest.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.schabi.newpipe.extractor.services.soundcloud.search;
22

3+
import static org.junit.jupiter.api.Assertions.assertFalse;
34
import static org.junit.jupiter.api.Assertions.assertTrue;
45
import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
56
import static org.schabi.newpipe.extractor.services.DefaultTests.assertNoDuplicatedItems;
@@ -181,4 +182,27 @@ void testIsVerified() throws IOException, ExtractionException {
181182
assertTrue(verified);
182183
}
183184
}
185+
186+
public static class NoNextPage extends DefaultSearchExtractorTest {
187+
188+
private static SearchExtractor extractor;
189+
private static final String QUERY = "Dan at hor#berlgbd";
190+
191+
@BeforeAll
192+
public static void setUp() throws Exception {
193+
NewPipe.init(DownloaderTestImpl.getInstance());
194+
extractor = SoundCloud.getSearchExtractor(QUERY);
195+
extractor.fetchPage();
196+
}
197+
198+
@Override public boolean expectedHasMoreItems() { return false; }
199+
@Override public SearchExtractor extractor() throws Exception { return extractor; }
200+
@Override public StreamingService expectedService() throws Exception { return SoundCloud; }
201+
@Override public String expectedName() throws Exception { return QUERY; }
202+
@Override public String expectedId() throws Exception { return QUERY; }
203+
@Override public String expectedUrlContains() { return "soundcloud.com/search?q=" + urlEncode(QUERY); }
204+
@Override public String expectedOriginalUrlContains() { return "soundcloud.com/search?q=" + urlEncode(QUERY); }
205+
@Override public String expectedSearchString() { return QUERY; }
206+
@Nullable @Override public String expectedSearchSuggestion() { return null; }
207+
}
184208
}

0 commit comments

Comments
 (0)