Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
package org.schabi.newpipe.extractor.services.youtube;

import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.extractCachedUrlIfNeeded;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObjectOrThrow;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.isGoogleURL;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;

import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;

import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.Description;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.function.Consumer;
import java.util.stream.Collectors;

import javax.annotation.Nonnull;

public final class YoutubeMetaInfoHelper {

private YoutubeMetaInfoHelper() {
}


@Nonnull
public static List<MetaInfo> getMetaInfo(@Nonnull final JsonArray contents)
throws ParsingException {
final List<MetaInfo> metaInfo = new ArrayList<>();
for (final Object content : contents) {
final JsonObject resultObject = (JsonObject) content;
if (resultObject.has("itemSectionRenderer")) {
for (final Object sectionContentObject
: resultObject.getObject("itemSectionRenderer").getArray("contents")) {

final JsonObject sectionContent = (JsonObject) sectionContentObject;
if (sectionContent.has("infoPanelContentRenderer")) {
metaInfo.add(getInfoPanelContent(sectionContent
.getObject("infoPanelContentRenderer")));
}
if (sectionContent.has("clarificationRenderer")) {
metaInfo.add(getClarificationRenderer(sectionContent
.getObject("clarificationRenderer")
));
}
if (sectionContent.has("emergencyOneboxRenderer")) {
getEmergencyOneboxRenderer(
sectionContent.getObject("emergencyOneboxRenderer"),
metaInfo::add
);
}
}
}
}
return metaInfo;
}

@Nonnull
private static MetaInfo getInfoPanelContent(@Nonnull final JsonObject infoPanelContentRenderer)
throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();
final StringBuilder sb = new StringBuilder();
for (final Object paragraph : infoPanelContentRenderer.getArray("paragraphs")) {
if (sb.length() != 0) {
sb.append("<br>");
}
sb.append(getTextFromObject((JsonObject) paragraph));
}
metaInfo.setContent(new Description(sb.toString(), Description.HTML));
if (infoPanelContentRenderer.has("sourceEndpoint")) {
final String metaInfoLinkUrl = getUrlFromNavigationEndpoint(
infoPanelContentRenderer.getObject("sourceEndpoint"));
try {
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(
metaInfoLinkUrl))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}

final String metaInfoLinkText = getTextFromObject(
infoPanelContentRenderer.getObject("inlineSource"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}

return metaInfo;
}

@Nonnull
private static MetaInfo getClarificationRenderer(
@Nonnull final JsonObject clarificationRenderer) throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();

final String title = getTextFromObject(clarificationRenderer
.getObject("contentTitle"));
final String text = getTextFromObject(clarificationRenderer
.getObject("text"));
if (title == null || text == null) {
throw new ParsingException("Could not extract clarification renderer content");
}
metaInfo.setTitle(title);
metaInfo.setContent(new Description(text, Description.PLAIN_TEXT));

if (clarificationRenderer.has("actionButton")) {
final JsonObject actionButton = clarificationRenderer.getObject("actionButton")
.getObject("buttonRenderer");
try {
final String url = getUrlFromNavigationEndpoint(actionButton
.getObject("command"));
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(url))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}

final String metaInfoLinkText = getTextFromObject(
actionButton.getObject("text"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}

if (clarificationRenderer.has("secondaryEndpoint") && clarificationRenderer
.has("secondarySource")) {
final String url = getUrlFromNavigationEndpoint(clarificationRenderer
.getObject("secondaryEndpoint"));
// Ignore Google URLs, because those point to a Google search about "Covid-19"
if (url != null && !isGoogleURL(url)) {
try {
metaInfo.addUrl(new URL(url));
final String description = getTextFromObject(clarificationRenderer
.getObject("secondarySource"));
metaInfo.addUrlText(description == null ? url : description);
} catch (final MalformedURLException e) {
throw new ParsingException("Could not get metadata info secondary URL", e);
}
}
}

return metaInfo;
}

private static void getEmergencyOneboxRenderer(
@Nonnull final JsonObject emergencyOneboxRenderer,
final Consumer<MetaInfo> addMetaInfo
) throws ParsingException {
final List<JsonObject> supportRenderers = emergencyOneboxRenderer.values()
.stream()
.filter(o -> o instanceof JsonObject
&& ((JsonObject) o).has("singleActionEmergencySupportRenderer"))
.map(o -> ((JsonObject) o).getObject("singleActionEmergencySupportRenderer"))
.collect(Collectors.toList());

if (supportRenderers.isEmpty()) {
throw new ParsingException("Could not extract any meta info from emergency renderer");
}

for (final JsonObject r : supportRenderers) {
final MetaInfo metaInfo = new MetaInfo();

// usually an encouragement like "We are with you"
final String title = getTextFromObjectOrThrow(r.getObject("title"), "title");
// usually a phone number
final String action = getTextFromObjectOrThrow(r.getObject("actionText"), "action");
// usually details about the phone number
final String details = getTextFromObjectOrThrow(r.getObject("detailsText"), "details");
// usually the name of an association
final String urlText = getTextFromObjectOrThrow(r.getObject("navigationText"),
"urlText");

metaInfo.setTitle(title);
metaInfo.setContent(new Description(details + "\n" + action, Description.PLAIN_TEXT));
metaInfo.addUrlText(urlText);

// usually the webpage of the association
final String url = getUrlFromNavigationEndpoint(r.getObject("navigationEndpoint"));
if (url == null) {
throw new ParsingException("Could not extract emergency renderer url");
}

try {
metaInfo.addUrl(new URL(replaceHttpWithHttps(url)));
} catch (final MalformedURLException e) {
throw new ParsingException("Could not parse emergency renderer url", e);
}

addMetaInfo.accept(metaInfo);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,10 @@
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import com.grack.nanojson.JsonWriter;
import org.jsoup.nodes.Entities;

import org.jsoup.nodes.Entities;
import org.schabi.newpipe.extractor.Image;
import org.schabi.newpipe.extractor.Image.ResolutionLevel;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.AccountTerminatedException;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
Expand All @@ -47,7 +46,6 @@
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
import org.schabi.newpipe.extractor.stream.AudioTrackType;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.RandomStringFromAlphabetGenerator;
Expand All @@ -62,12 +60,10 @@
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
Expand Down Expand Up @@ -262,7 +258,7 @@ private YoutubeParsingHelper() {

private static boolean consentAccepted = false;

private static boolean isGoogleURL(final String url) {
public static boolean isGoogleURL(final String url) {
final String cachedUrl = extractCachedUrlIfNeeded(url);
try {
final URL u = new URL(cachedUrl);
Expand Down Expand Up @@ -1080,6 +1076,16 @@ public static String getAttributedDescription(
.replaceAll(" {2}", " &nbsp;");
}

@Nonnull
public static String getTextFromObjectOrThrow(final JsonObject textObject, final String error)
throws ParsingException {
final String result = getTextFromObject(textObject);
if (result == null) {
throw new ParsingException("Could not extract text: " + error);
}
return result;
}

@Nullable
public static String getTextFromObject(final JsonObject textObject) {
return getTextFromObject(textObject, false);
Expand Down Expand Up @@ -1648,120 +1654,6 @@ public static void defaultAlertsCheck(@Nonnull final JsonObject initialData)
}
}

@Nonnull
public static List<MetaInfo> getMetaInfo(@Nonnull final JsonArray contents)
throws ParsingException {
final List<MetaInfo> metaInfo = new ArrayList<>();
for (final Object content : contents) {
final JsonObject resultObject = (JsonObject) content;
if (resultObject.has("itemSectionRenderer")) {
for (final Object sectionContentObject
: resultObject.getObject("itemSectionRenderer").getArray("contents")) {

final JsonObject sectionContent = (JsonObject) sectionContentObject;
if (sectionContent.has("infoPanelContentRenderer")) {
metaInfo.add(getInfoPanelContent(sectionContent
.getObject("infoPanelContentRenderer")));
}
if (sectionContent.has("clarificationRenderer")) {
metaInfo.add(getClarificationRendererContent(sectionContent
.getObject("clarificationRenderer")
));
}

}
}
}
return metaInfo;
}

@Nonnull
private static MetaInfo getInfoPanelContent(@Nonnull final JsonObject infoPanelContentRenderer)
throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();
final StringBuilder sb = new StringBuilder();
for (final Object paragraph : infoPanelContentRenderer.getArray("paragraphs")) {
if (sb.length() != 0) {
sb.append("<br>");
}
sb.append(YoutubeParsingHelper.getTextFromObject((JsonObject) paragraph));
}
metaInfo.setContent(new Description(sb.toString(), Description.HTML));
if (infoPanelContentRenderer.has("sourceEndpoint")) {
final String metaInfoLinkUrl = YoutubeParsingHelper.getUrlFromNavigationEndpoint(
infoPanelContentRenderer.getObject("sourceEndpoint"));
try {
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(
metaInfoLinkUrl))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}

final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
infoPanelContentRenderer.getObject("inlineSource"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}

return metaInfo;
}

@Nonnull
private static MetaInfo getClarificationRendererContent(
@Nonnull final JsonObject clarificationRenderer) throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();

final String title = YoutubeParsingHelper.getTextFromObject(clarificationRenderer
.getObject("contentTitle"));
final String text = YoutubeParsingHelper.getTextFromObject(clarificationRenderer
.getObject("text"));
if (title == null || text == null) {
throw new ParsingException("Could not extract clarification renderer content");
}
metaInfo.setTitle(title);
metaInfo.setContent(new Description(text, Description.PLAIN_TEXT));

if (clarificationRenderer.has("actionButton")) {
final JsonObject actionButton = clarificationRenderer.getObject("actionButton")
.getObject("buttonRenderer");
try {
final String url = YoutubeParsingHelper.getUrlFromNavigationEndpoint(actionButton
.getObject("command"));
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(url))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}

final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
actionButton.getObject("text"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}

if (clarificationRenderer.has("secondaryEndpoint") && clarificationRenderer
.has("secondarySource")) {
final String url = getUrlFromNavigationEndpoint(clarificationRenderer
.getObject("secondaryEndpoint"));
// Ignore Google URLs, because those point to a Google search about "Covid-19"
if (url != null && !isGoogleURL(url)) {
try {
metaInfo.addUrl(new URL(url));
final String description = getTextFromObject(clarificationRenderer
.getObject("secondarySource"));
metaInfo.addUrlText(description == null ? url : description);
} catch (final MalformedURLException e) {
throw new ParsingException("Could not get metadata info secondary URL", e);
}
}
}

return metaInfo;
}

/**
* Sometimes, YouTube provides URLs which use Google's cache. They look like
* {@code https://webcache.googleusercontent.com/search?q=cache:CACHED_URL}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.YoutubeMetaInfoHelper;
import org.schabi.newpipe.extractor.utils.JsonUtils;

import java.io.IOException;
Expand Down Expand Up @@ -151,7 +151,7 @@ public boolean isCorrectedSearch() {
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() throws ParsingException {
return YoutubeParsingHelper.getMetaInfo(
return YoutubeMetaInfoHelper.getMetaInfo(
initialData.getObject("contents")
.getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents")
Expand Down
Loading