Skip to content

Commit 25613c2

Browse files
committed
Adress requested changes
1 parent b2783f1 commit 25613c2

2 files changed

Lines changed: 128 additions & 51 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 48 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import java.net.URL;
2424
import java.net.URLDecoder;
2525
import java.nio.charset.StandardCharsets;
26-
import java.security.SecureRandom;
2726
import java.time.LocalDate;
2827
import java.time.OffsetDateTime;
2928
import java.time.ZoneOffset;
@@ -65,6 +64,11 @@ private YoutubeParsingHelper() {
6564
public static final String CPN = "cpn";
6665
public static final String VIDEO_ID = "videoId";
6766

67+
/**
68+
* Seed that will be used for video tests, in order to mock video requests.
69+
*/
70+
private static final long SEED_FOR_VIDEOS_TESTS = 3000;
71+
6872
private static final String HARDCODED_CLIENT_VERSION = "2.20220114.01.00";
6973
private static final String HARDCODED_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8";
7074

@@ -82,13 +86,17 @@ private YoutubeParsingHelper() {
8286
private static boolean keyAndVersionExtracted = false;
8387
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
8488
private static Optional<Boolean> hardcodedClientVersionAndKeyValid = Optional.empty();
89+
8590
private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES =
8691
{"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
8792
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
8893
"client.version=([0-9\\.]+)"};
8994
private static final String[] INNERTUBE_API_KEY_REGEXES =
9095
{"INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"",
9196
"innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\""};
97+
private static final String[] INITIAL_DATA_REGEXES =
98+
{"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});",
99+
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});"};
92100
private static final String INNERTUBE_CLIENT_NAME_REGEX =
93101
"INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),";
94102

@@ -98,13 +106,24 @@ private YoutubeParsingHelper() {
98106
private static Random numberGenerator = new Random();
99107

100108
/**
101-
* <code>PENDING+</code> means that the user did not yet submit their choices.
109+
* {@code PENDING+} means that the user did not yet submit their choices.
110+
*
111+
* <p>
102112
* Therefore, YouTube & Google should not track the user, because they did not give consent.
113+
* </p>
114+
*
115+
* <p>
103116
* The three digits at the end can be random, but are required.
117+
* </p>
104118
*/
105119
private static final String CONSENT_COOKIE_VALUE = "PENDING+";
120+
106121
/**
107-
* Youtube <code>CONSENT</code> cookie. Should prevent redirect to consent.youtube.com
122+
* YouTube {@code CONSENT} cookie.
123+
*
124+
* <p>
125+
* Should prevent redirect to {@code consent.youtube.com}.
126+
* </p>
108127
*/
109128
private static final String CONSENT_COOKIE = "CONSENT=" + CONSENT_COOKIE_VALUE;
110129

@@ -312,17 +331,10 @@ public static String extractVideoIdFromMixId(@Nonnull final String playlistId)
312331
}
313332
}
314333

315-
public static JsonObject getInitialData(final String html) throws ParsingException {
334+
private static JsonObject getInitialData(final String html) throws ParsingException {
316335
try {
317-
try {
318-
final String initialData = Parser.matchGroup1(
319-
"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
320-
return JsonParser.object().from(initialData);
321-
} catch (final Parser.RegexException e) {
322-
final String initialData = Parser.matchGroup1(
323-
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});", html);
324-
return JsonParser.object().from(initialData);
325-
}
336+
return JsonParser.object().from(getStringResultFromRegexArray(html,
337+
INITIAL_DATA_REGEXES, 1));
326338
} catch (final JsonParserException | Parser.RegexException e) {
327339
throw new ParsingException("Could not get ytInitialData", e);
328340
}
@@ -445,7 +457,7 @@ private static void extractClientVersionAndKeyFromHtmlSearchResultsPage()
445457
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
446458
} catch (final Parser.RegexException e) {
447459
throw new ParsingException(
448-
"Could not extract YouTube WEB InnerTube client version and API key from HTML search results page");
460+
"Could not extract YouTube WEB InnerTube client version and API key from HTML search results page", e);
449461
}
450462
keyAndVersionExtracted = true;
451463
}
@@ -609,8 +621,7 @@ public static String[] getYoutubeMusicKey()
609621
final String response = getDownloader().get(url, headers).responseBody();
610622
musicClientVersion = getStringResultFromRegexArray(response,
611623
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
612-
musicKey = getStringResultFromRegexArray(response,
613-
INNERTUBE_API_KEY_REGEXES, 1);
624+
musicKey = getStringResultFromRegexArray(response, INNERTUBE_API_KEY_REGEXES, 1);
614625
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, response);
615626
} catch (final Exception e) {
616627
final String url = "https://music.youtube.com/";
@@ -696,10 +707,11 @@ public static String getUrlFromNavigationEndpoint(@Nonnull final JsonObject navi
696707
}
697708

698709
/**
699-
* Get the text from a JSON object that has either a simpleText or a runs array.
710+
* Get the text from a JSON object that has either a {@code simpleText} or a {@code runs}
711+
* array.
700712
*
701713
* @param textObject JSON object to get the text from
702-
* @param html whether to return HTML, by parsing the navigationEndpoint
714+
* @param html whether to return HTML, by parsing the {@code navigationEndpoint}
703715
* @return text in the JSON object or {@code null}
704716
*/
705717
@Nullable
@@ -1343,15 +1355,7 @@ public static String unescapeDocument(@Nonnull final String doc) {
13431355
*/
13441356
@Nonnull
13451357
public static String generateContentPlaybackNonce() {
1346-
final SecureRandom random = new SecureRandom();
1347-
final StringBuilder stringBuilder = new StringBuilder();
1348-
1349-
for (int i = 0; i < 16; i++) {
1350-
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt(
1351-
(random.nextInt(128) + 1) & 63));
1352-
}
1353-
1354-
return stringBuilder.toString();
1358+
return randomStringFromAlphabet(CONTENT_PLAYBACK_NONCE_ALPHABET, 16);
13551359
}
13561360

13571361
/**
@@ -1367,14 +1371,23 @@ public static String generateContentPlaybackNonce() {
13671371
*/
13681372
@Nonnull
13691373
public static String generateTParameter() {
1370-
final SecureRandom random = new SecureRandom();
1371-
final StringBuilder stringBuilder = new StringBuilder();
1372-
1373-
for (int i = 0; i < 12; i++) {
1374-
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt(
1375-
(random.nextInt(128) + 1) & 63));
1376-
}
1374+
return randomStringFromAlphabet(CONTENT_PLAYBACK_NONCE_ALPHABET, 12);
1375+
}
13771376

1378-
return stringBuilder.toString();
1377+
/**
1378+
* Set the seed for video tests.
1379+
*
1380+
* <p>
1381+
* This seed will be used to generate the same {@code t} and {@code cpn} values between
1382+
* different execution of tests so mocks can be used for stream tests.
1383+
* </p>
1384+
*
1385+
* <p>
1386+
* This method will call {@link Utils#setSecureRandomSeed(long)} with the
1387+
* {@link #SEED_FOR_VIDEOS_TESTS value}.
1388+
* </p>
1389+
*/
1390+
public static void setSeedForVideoTests() {
1391+
Utils.setSecureRandomSeed(SEED_FOR_VIDEOS_TESTS);
13791392
}
13801393
}

extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java

Lines changed: 80 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import java.net.MalformedURLException;
99
import java.net.URL;
1010
import java.net.URLDecoder;
11+
import java.security.SecureRandom;
1112
import java.util.*;
1213
import java.util.regex.Pattern;
1314

@@ -19,16 +20,23 @@ public class Utils {
1920
public static final String EMPTY_STRING = "";
2021
private static final Pattern M_PATTERN = Pattern.compile("(https?)?:\\/\\/m\\.");
2122
private static final Pattern WWW_PATTERN = Pattern.compile("(https?)?:\\/\\/www\\.");
23+
private static final SecureRandom random = new SecureRandom();
2224

2325
private Utils() {
2426
// no instance
2527
}
2628

2729
/**
28-
* Remove all non-digit characters from a string.<p>
29-
* Examples:<p>
30-
* <ul><li>1 234 567 views -&gt; 1234567</li>
31-
* <li>$31,133.124 -&gt; 31133124</li></ul>
30+
* Remove all non-digit characters from a string.
31+
*
32+
* <p>
33+
* Examples:
34+
* </p>
35+
*
36+
* <ul>
37+
* <li>1 234 567 views -&gt; 1234567</li>
38+
* <li>$31,133.124 -&gt; 31133124</li>
39+
* </ul>
3240
*
3341
* @param toRemove string to remove non-digit chars
3442
* @return a string that contains only digits
@@ -39,8 +47,12 @@ public static String removeNonDigitCharacters(@Nonnull final String toRemove) {
3947
}
4048

4149
/**
42-
* <p>Convert a mixed number word to a long.</p>
43-
* <p>Examples:</p>
50+
* Convert a mixed number word to a long.
51+
*
52+
* <p>
53+
* Examples:
54+
* </p>
55+
*
4456
* <ul>
4557
* <li>123 -&gt; 123</li>
4658
* <li>1.23K -&gt; 1230</li>
@@ -49,16 +61,18 @@ public static String removeNonDigitCharacters(@Nonnull final String toRemove) {
4961
*
5062
* @param numberWord string to be converted to a long
5163
* @return a long
52-
* @throws NumberFormatException
53-
* @throws ParsingException
64+
* @throws NumberFormatException if the string does not contain a parsable double
65+
* by {@link Double#parseDouble(String)}
66+
* @throws ParsingException if a number could not be found in the string provided
5467
*/
5568
public static long mixedNumberWordToLong(final String numberWord)
5669
throws NumberFormatException, ParsingException {
5770
String multiplier = "";
5871
try {
5972
multiplier = Parser.matchGroup("[\\d]+([\\.,][\\d]+)?([KMBkmb])+", numberWord, 2);
60-
} catch (ParsingException ignored) {
73+
} catch (final ParsingException ignored) {
6174
}
75+
6276
final double count = Double.parseDouble(Parser.matchGroup1("([\\d]+([\\.,][\\d]+)?)",
6377
numberWord).replace(",", "."));
6478
switch (multiplier.toUpperCase()) {
@@ -107,11 +121,15 @@ public static String replaceHttpWithHttps(final String url) {
107121

108122
/**
109123
* Get the value of a URL-query by name.
110-
* If a url-query is give multiple times, only the value of the first query is returned
124+
*
125+
* <p>
126+
* If an url-query is give multiple times, only the value of the first query is returned.
127+
* </p>
111128
*
112129
* @param url the url to be used
113130
* @param parameterName the pattern that will be used to check the url
114-
* @return a string that contains the value of the query parameter or null if nothing was found
131+
* @return a string that contains the value of the query parameter or {@code null} if nothing
132+
* was found
115133
*/
116134
@Nullable
117135
public static String getQueryValue(@Nonnull final URL url,
@@ -149,11 +167,14 @@ public static String getQueryValue(@Nonnull final URL url,
149167
}
150168

151169
/**
152-
* converts a string to a URL-Object.
153-
* defaults to HTTP if no protocol is given
170+
* Convert a string to a {@link URL URL object}.
171+
*
172+
* <p>
173+
* Defaults to HTTP if no protocol is given.
174+
* </p>
154175
*
155176
* @param url the string to be converted to a URL-Object
156-
* @return a URL-Object containing the url
177+
* @return a {@link URL URL object} containing the url
157178
*/
158179
@Nonnull
159180
public static URL stringToURL(final String url) throws MalformedURLException {
@@ -245,11 +266,12 @@ public static boolean isNullOrEmpty(final String str) {
245266
}
246267

247268
/**
248-
* Checks if a collection is null or empty.
269+
* Check if a collection is null or empty.
249270
*
250271
* <p>
251272
* This method can be also used for {@link com.grack.nanojson.JsonArray JsonArray}s.
252273
* </p>
274+
*
253275
* @param collection the collection on which check if it's null or empty
254276
* @return whether the collection is null or empty
255277
*/
@@ -258,11 +280,12 @@ public static boolean isNullOrEmpty(final Collection<?> collection) {
258280
}
259281

260282
/**
261-
* Checks if a {@link Map map} is null or empty.
283+
* Check if a {@link Map map} is null or empty.
262284
*
263285
* <p>
264286
* This method can be also used for {@link com.grack.nanojson.JsonObject JsonObject}s.
265287
* </p>
288+
*
266289
* @param map the {@link Map map} on which check if it's null or empty
267290
* @return whether the {@link Map map} is null or empty
268291
*/
@@ -386,6 +409,7 @@ public static String getStringResultFromRegexArray(@Nonnull final String input,
386409
} catch (final Parser.RegexException ignored) {
387410
}
388411
}
412+
389413
if (result == null) {
390414
throw new Parser.RegexException("No regex matched the input on group " + group);
391415
}
@@ -419,9 +443,49 @@ public static String getStringResultFromRegexArray(@Nonnull final String input,
419443
} catch (final Parser.RegexException ignored) {
420444
}
421445
}
446+
422447
if (result == null) {
423448
throw new Parser.RegexException("No regex matched the input on group " + group);
424449
}
425450
return result;
426451
}
452+
453+
/**
454+
* Generate a random string using the secure random device {@link #random}.
455+
*
456+
* <p>
457+
* {@link #setRandomSeed(long)} might be useful when mocking tests.
458+
* </p>
459+
*
460+
* @param alphabet the characters' alphabet to use
461+
* @param length the length of the returned string
462+
* @return a random string of the requested length made of only characters from the provided
463+
* alphabet
464+
*/
465+
@Nonnull
466+
public static String randomStringFromAlphabet(final String alphabet, final int length) {
467+
final StringBuilder stringBuilder = new StringBuilder();
468+
for (int i = 0; i < length; ++i) {
469+
stringBuilder.append(alphabet.charAt(random.nextInt(alphabet.length())));
470+
}
471+
return stringBuilder.toString();
472+
}
473+
474+
/**
475+
* Seed the secure random device used for {@link #randomStringFromAlphabet(String, int)}.
476+
*
477+
* <p>
478+
* Use this in tests so that they can be mocked as the same random numbers are always
479+
* generated.
480+
* </p>
481+
*
482+
* <p>
483+
* This is not intended to be used outside of tests.
484+
* </p>
485+
*
486+
* @param seed the seed to pass to {@link SecureRandom#setSeed(long)}
487+
*/
488+
public static void setSecureRandomSeed(final long seed) {
489+
random.setSeed(seed);
490+
}
427491
}

0 commit comments

Comments
 (0)