package com.zqc.news.html;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;

/* loaded from: classes.dex */
public class NewsPageScraper {
    private String content;
    private final String contentSelector;
    private Document document;
    private String imagePrefix;
    private final String imageSelector;
    private String imageUrl;
    private String pageUrl;
    private final int TIMEMOUT = 20000;
    private final String DELIMETER = "|";
    private final String USER_AGENT = "Mozilla/5.0 (X11; Linux i686; rv:2.0) Gecko/20100101 Firefox/4.0";

    public NewsPageScraper(String str, String[] strArr, String str2) {
        this.pageUrl = str;
        this.contentSelector = strArr[0];
        this.imageSelector = strArr[1];
        this.imagePrefix = str2;
    }

    private String getMobileOptimizedUrl(String str) throws URISyntaxException, MalformedURLException {
        return new URI("http", "www.google.com", "/gwt/x", "u=" + str + "&noimg=1", null).toASCIIString();
    }

    private String readContent() {
        if (this.document != null) {
            StringTokenizer stringTokenizer = new StringTokenizer(this.contentSelector, "|");
            while (stringTokenizer.hasMoreElements()) {
                String retrieveContentText = retrieveContentText(stringTokenizer.nextToken());
                if (retrieveContentText != null) {
                    return retrieveContentText;
                }
            }
        }
        return null;
    }

    private String readImageUrl() {
        if (this.document != null && this.imageSelector != null) {
            StringTokenizer stringTokenizer = new StringTokenizer(this.imageSelector, "|");
            while (stringTokenizer.hasMoreElements()) {
                String retrieveImageUrl = retrieveImageUrl(stringTokenizer.nextToken());
                if (retrieveImageUrl != null) {
                    return retrieveImageUrl;
                }
            }
        }
        return null;
    }

    private String retrieveContentText(String str) {
        Elements select = this.document.select(str);
        if (select == null || select.size() < 1) {
            return null;
        }
        Iterator<Element> it = select.iterator();
        StringBuffer stringBuffer = new StringBuffer();
        while (it.hasNext()) {
            Element next = it.next();
            if (this.pageUrl.startsWith("http://www.google.com")) {
                boolean z = false;
                for (Node node : next.childNodes()) {
                    if (!z) {
                        if (node instanceof TextNode) {
                            String trim = ((TextNode) node).getWholeText().trim();
                            if (trim.equals("NEXT STORY:") || trim.equals("Most Popular")) {
                                z = true;
                            } else {
                                stringBuffer.append(trim.replaceAll("[\n\r]", ""));
                                stringBuffer.append("<br />");
                                stringBuffer.append("<br />");
                            }
                        }
                    }
                }
            } else {
                boolean z2 = false;
                Iterator<Element> it2 = next.children().iterator();
                while (it2.hasNext()) {
                    String name = it2.next().tag().getName();
                    if (name.equals("script") || name.equals("nonscript") || name.equals("img")) {
                        z2 = true;
                        break;
                    }
                }
                if (!z2) {
                    stringBuffer.append(next);
                }
            }
        }
        if (stringBuffer.length() == 0) {
            return null;
        }
        return stringBuffer.toString();
    }

    private String retrieveImageUrl(String str) {
        Elements select = this.document.select(str);
        if (select.size() < 1) {
            return null;
        }
        Attributes attributes = null;
        Iterator<Element> it = select.iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (next.hasAttr("src")) {
                attributes = next.attributes();
            }
        }
        if (attributes == null) {
            return null;
        }
        if (this.pageUrl.startsWith("http://www.google.com")) {
            boolean z = false;
            String str2 = attributes.get("width");
            String str3 = attributes.get("height");
            if (str2 != null && str3 != null && str2.equals("640") && str3.equals("40")) {
                z = true;
            }
            if (z) {
                return null;
            }
        }
        String str4 = attributes.get("src");
        if (str4 != null) {
            return !str4.startsWith("http://") ? new StringBuffer(this.imagePrefix).append(str4).toString() : str4;
        }
        return null;
    }

    public String getContent() {
        return this.content;
    }

    public String getImageUrl() {
        return this.imageUrl;
    }

    public void init() throws IOException, URISyntaxException {
        if (this.pageUrl.startsWith("http://straitstimes.com.feedsportal.com")) {
            this.pageUrl = getMobileOptimizedUrl(this.pageUrl);
        }
        this.document = Jsoup.connect(this.pageUrl).ignoreContentType(true).timeout(20000).userAgent("Mozilla/5.0 (X11; Linux i686; rv:2.0) Gecko/20100101 Firefox/4.0").get();
        this.content = readContent();
        this.imageUrl = readImageUrl();
    }
}
