/*
 * Decompiled with CFR 0.152.
 */
package com.lucidworks.connector.plugins.web.fetcher.http.client;

import com.google.inject.Inject;
import com.lucidworks.connector.plugins.web.WebConnectorException;
import com.lucidworks.connector.plugins.web.config.WebConfig;
import com.lucidworks.connector.plugins.web.fetcher.JsEvaluator;
import com.lucidworks.connector.plugins.web.fetcher.http.RobotsTxt;
import com.lucidworks.connector.plugins.web.fetcher.http.client.LinkValidator;
import com.lucidworks.connector.plugins.web.fetcher.http.client.RequestExecutor;
import com.lucidworks.connector.plugins.web.fetcher.http.client.WebClientResponse;
import com.lucidworks.connector.plugins.web.fetcher.http.login.CredentialsWrapper;
import com.lucidworks.connector.plugins.web.fetcher.http.login.WebCookieStore;
import com.lucidworks.connector.plugins.web.fetcher.http.tokenauth.AccessTokenRequester;
import com.lucidworks.connector.plugins.web.util.DateUtil;
import com.lucidworks.connector.plugins.web.util.HttpClientUtil;
import com.lucidworks.fusion.connector.plugin.api.fetcher.type.content.FetchInput;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.utils.URIUtils;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities;
import org.jsoup.parser.Parser;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class WebClient {
    private static final Logger logger = LoggerFactory.getLogger(WebClient.class);
    private static final ThreadLocal<SimpleDateFormat> HTTP_DATE_FORMATTER = new ThreadLocal<SimpleDateFormat>(){

        @Override
        protected SimpleDateFormat initialValue() {
            return DateUtil.getRFC2822DateFormatter();
        }
    };
    private static final ThreadLocal<Matcher> META_EQUIV_REFRESH_REGEX = new ThreadLocal<Matcher>(){

        @Override
        protected Matcher initialValue() {
            return Pattern.compile("(?si)\\d+;\\s*url=(.+)|\\d+", 2).matcher("");
        }
    };
    public static final Set<String> XML_MIMETYPES = new HashSet<String>(){
        {
            this.add("text/xml");
            this.add("application/xml");
            this.add("application/xhtml+xml");
            this.add("application/atom+xml");
            this.add("application/rss+xml");
            this.add("application/rdf+xml");
        }
    };
    private static final Set<String> PARSEABLE_MIME_TYPES = new HashSet<String>(){
        {
            this.add("text/html");
            this.addAll(XML_MIMETYPES);
        }
    };
    private static final String META_EQUIV_REFRESH_SELECTOR = "head meta[http-equiv=refresh]";
    public static final String LENGTH_L = "length_l";
    private final Boolean jsEnabledAuth;
    private final Boolean maintainCookies;
    private final WebConfig config;
    private final CloseableHttpClient httpClient;
    private final CredentialsWrapper credentialsWrapper;
    private final WebCookieStore sharedCookieStore;
    private final RobotsTxt robotsTXT;
    private final Integer timeout;
    private final Boolean infiniteTimeout;
    private final LinkValidator linkValidator;
    private final Boolean followCanonicalTags;
    private final Integer canonicalTagsRedirectLimit;
    private final Boolean diagnosticMode;
    private final Boolean respectMetaEquivRedirects;
    private final String defaultCharSet;
    private final RequestExecutor exec;
    private final AccessTokenRequester tokenRequester;
    private final JsEvaluator jsEvaluator;

    @Inject
    public WebClient(WebConfig config, CloseableHttpClient httpClient, CredentialsWrapper credentialsWrapper, @Nullable AccessTokenRequester tokenRequester, RobotsTxt robotsTXT, LinkValidator linkValidator, RequestExecutor executor, JsEvaluator jsEvaluator, WebCookieStore sharedCookieStore) {
        this.config = config;
        this.httpClient = httpClient;
        this.credentialsWrapper = credentialsWrapper;
        this.tokenRequester = tokenRequester;
        this.robotsTXT = robotsTXT;
        this.jsEnabledAuth = config.properties().javascriptEvaluationConfig().jsEnabledAuth();
        this.maintainCookies = config.properties().crawlAuthenticationConfig().maintainCookies();
        this.sharedCookieStore = sharedCookieStore;
        this.infiniteTimeout = config.properties().crawlPerformanceConfig().infiniteTimeout();
        this.timeout = this.infiniteTimeout != false ? 0 : config.properties().crawlPerformanceConfig().timeoutMS();
        this.linkValidator = linkValidator;
        this.followCanonicalTags = config.properties().dedupeConfig().followCanonicalTags();
        this.canonicalTagsRedirectLimit = config.properties().dedupeConfig().canonicalTagsRedirectLimit();
        this.diagnosticMode = config.diagnosticLogging();
        this.respectMetaEquivRedirects = config.properties().linkDiscoveryConfig().respectMetaEquivRedirects();
        this.defaultCharSet = config.properties().documentParsingConfig().defaultCharSet();
        this.exec = executor;
        this.jsEvaluator = jsEvaluator;
    }

    public WebClientResponse fetch(FetchInput input) {
        String signature = input.getMetadata().getOrDefault("signature", "");
        Long lastModified = input.getMetadata().getOrDefault("lastModified", 0L);
        return this.fetchWithRedirects(input.getId(), lastModified, signature, 0);
    }

    public WebClientResponse fetchWithRedirects(String origID, int redirectCounter) {
        return this.fetchWithRedirects(origID, 0L, null, redirectCounter);
    }

    public WebClientResponse fetchWithRedirects(String origID, long lastModified, String signature, int redirectCounter) {
        HttpGet req = new HttpGet();
        try {
            HttpContext context = this.setupRequest(origID, lastModified, signature, req);
            this.applyRobotsDelay(origID);
            HttpResponse resp = this.runRequest(req, context);
            int status = resp.getStatusLine().getStatusCode();
            if (status < 200 || status > 300) {
                WebClientResponse webClientResponse = new WebClientResponse(status, null, null);
                return webClientResponse;
            }
            WebClientResponse webClientResponse = this.processSuccessfulResponse(req, resp, context, origID, redirectCounter);
            return webClientResponse;
        }
        catch (IOException | InterruptedException | URISyntaxException | ExecutionException | TimeoutException e) {
            throw new WebConnectorException("Could not fetch " + origID, e);
        }
        finally {
            req.releaseConnection();
        }
    }

    @Nonnull
    private Document getDocument(String fetchedID, String mimeType, String parseCharSet, byte[] rawContent) throws UnsupportedEncodingException {
        Parser parser = XML_MIMETYPES.contains(mimeType) ? Parser.xmlParser() : Parser.htmlParser();
        Document document = Jsoup.parse((String)new String(rawContent, parseCharSet), (String)fetchedID, (Parser)parser);
        document.outputSettings().charset(parseCharSet);
        document.setBaseUri(fetchedID);
        if (XML_MIMETYPES.contains(mimeType)) {
            document.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
        } else {
            document.outputSettings().escapeMode(Entities.EscapeMode.extended);
        }
        return document;
    }

    private Boolean getObeyRobotsDelay() {
        return this.config.properties().crawlPerformanceConfig().obeyRobotsDelay();
    }

    private String getMetaEquivRedirect(Document doc) {
        String redirect = null;
        Elements elements = doc.select(META_EQUIV_REFRESH_SELECTOR);
        if (!elements.isEmpty()) {
            Element e = elements.first();
            Matcher m = META_EQUIV_REFRESH_REGEX.get().reset(e.attr("content"));
            if (m.find()) {
                redirect = e.attr("__url__", m.group(1)).absUrl("__url__");
                if (this.diagnosticMode.booleanValue()) {
                    logger.info("Meta http-equiv redirect: [ {} -> {} ]", (Object)doc.baseUri(), (Object)redirect);
                }
            }
        }
        return redirect;
    }

    private void applyRobotsDelay(String origID) throws InterruptedException, MalformedURLException {
        if (this.getObeyRobotsDelay().booleanValue()) {
            this.robotsTXT.delay(origID, (HttpClient)this.httpClient, this.exec.get(), this.timeout);
        }
    }

    private WebClientResponse processSuccessfulResponse(HttpGet req, HttpResponse resp, HttpContext context, String origID, int redirectCounter) throws IOException, InterruptedException, ExecutionException, TimeoutException, URISyntaxException {
        Map<String, Object> respMetadata = HttpClientUtil.parseResponseMetadata((HttpUriRequest)req, resp, context);
        ResponseMetadata metadata = this.extractResponseMetadata(respMetadata, resp.getStatusLine().getStatusCode());
        String validatedFetchedID = this.validateRedirection(metadata.fetchedID, origID, metadata.parseCharSet);
        WebClientResponse sizeCheckResult = this.checkContentSize(metadata, respMetadata);
        if (sizeCheckResult != null) {
            return sizeCheckResult;
        }
        if (PARSEABLE_MIME_TYPES.contains(metadata.mimeType)) {
            return this.processParseableContent(req, resp, validatedFetchedID, metadata, redirectCounter, respMetadata);
        }
        return this.processNonParseableContent(req, resp, metadata.status, respMetadata);
    }

    private ResponseMetadata extractResponseMetadata(Map<String, Object> respMetadata, int status) {
        String fetchedID = (String)respMetadata.get("fetchedID");
        String mimeType = respMetadata.containsKey("mimeType") ? (String)respMetadata.get("mimeType") : this.config.properties().documentParsingConfig().defaultMIMEType();
        String charSet = (String)respMetadata.getOrDefault("charSet", this.defaultCharSet);
        String parseCharSet = this.getParseCharSet(charSet);
        Long contentLength = respMetadata.containsKey("contentLength") ? (Long)respMetadata.get("contentLength") : 0L;
        return new ResponseMetadata(fetchedID, mimeType, parseCharSet, contentLength, status);
    }

    private String validateRedirection(String fetchedID, String origID, String parseCharSet) {
        if (!fetchedID.equals(origID) && !this.isUrlNormalization(fetchedID, origID)) {
            String nFetchedID = this.linkValidator.validate(fetchedID, parseCharSet);
            if (null == nFetchedID) {
                throw new IllegalArgumentException("Redirect to invalid URL: " + fetchedID);
            }
            return nFetchedID;
        }
        return fetchedID;
    }

    private boolean isUrlNormalization(String fetchedID, String origID) {
        return (origID + "/").equals(fetchedID) || origID.equals(fetchedID + "/");
    }

    private WebClientResponse checkContentSize(ResponseMetadata metadata, Map<String, Object> respMetadata) {
        int maxSizeBytes = this.config.properties().limitDocumentsConfig().maxSizeBytes();
        if (maxSizeBytes > 0 && metadata.contentLength > (long)maxSizeBytes) {
            respMetadata.put(LENGTH_L, metadata.contentLength);
            if (this.diagnosticMode.booleanValue()) {
                logger.info("Discarded Content due maxSizeBytes={}", (Object)maxSizeBytes);
            }
            return new WebClientResponse(metadata.status, respMetadata, null);
        }
        return null;
    }

    private WebClientResponse processParseableContent(HttpGet req, HttpResponse resp, String fetchedID, ResponseMetadata metadata, int redirectCounter, Map<String, Object> originalRespMetadata) throws IOException, InterruptedException, ExecutionException, TimeoutException {
        byte[] rawContent = HttpClientUtil.readEntity((HttpUriRequest)req, resp, this.exec.get(), this.timeout, this.infiniteTimeout);
        EntityUtils.consume((HttpEntity)resp.getEntity());
        Document document = this.getDocument(fetchedID, metadata.mimeType, metadata.parseCharSet, rawContent);
        WebClientResponse redirectResult = this.handleRedirects(document, fetchedID, metadata.parseCharSet, redirectCounter);
        if (redirectResult != null) {
            return redirectResult;
        }
        if (this.config.properties().javascriptEvaluationConfig().crawlJS().booleanValue()) {
            String pageSource = this.jsEvaluator.evaluateJs(fetchedID);
            document = this.getDocument(fetchedID, metadata.mimeType, metadata.parseCharSet, pageSource.getBytes());
        }
        HashMap<String, Object> respMetadata = new HashMap<String, Object>();
        respMetadata.put("fetchedID", fetchedID);
        respMetadata.put("mimeType", metadata.mimeType);
        respMetadata.put("charSet", metadata.parseCharSet);
        if (originalRespMetadata.containsKey("etag")) {
            respMetadata.put("etag", originalRespMetadata.get("etag"));
        }
        if (originalRespMetadata.containsKey("lastModified")) {
            respMetadata.put("lastModified", originalRespMetadata.get("lastModified"));
        }
        return new WebClientResponse(metadata.status, respMetadata, document);
    }

    private WebClientResponse handleRedirects(Document document, String fetchedID, String parseCharSet, int redirectCounter) {
        WebClientResponse canonicalRedirect = this.handleCanonicalRedirect(document, fetchedID, parseCharSet, redirectCounter);
        if (canonicalRedirect != null) {
            return canonicalRedirect;
        }
        return this.handleMetaEquivRedirect(document, parseCharSet, redirectCounter);
    }

    private WebClientResponse handleCanonicalRedirect(Document document, String fetchedID, String parseCharSet, int redirectCounter) {
        String newFetchedId;
        Elements elements = document.select("link[rel=canonical]");
        if (this.followCanonicalTags.booleanValue() && !elements.isEmpty() && this.canonicalTagsRedirectLimit >= redirectCounter && !StringUtils.equalsIgnoreCase((CharSequence)fetchedID, (CharSequence)(newFetchedId = elements.first().absUrl("href")))) {
            try {
                String nRedirect = this.linkValidator.validate(newFetchedId, parseCharSet);
                if (null == nRedirect) {
                    throw new IllegalArgumentException("Invalid URL found from canonical tag: " + newFetchedId);
                }
                return this.fetchWithRedirects(nRedirect, ++redirectCounter);
            }
            catch (Exception ex) {
                logger.warn("Invalid canonical link or problem fetching the page. Using current page with url {} instead. Reason: {}", (Object)fetchedID, (Object)ex);
            }
        }
        return null;
    }

    private WebClientResponse handleMetaEquivRedirect(Document document, String parseCharSet, int redirectCounter) {
        String redirect;
        if (this.respectMetaEquivRedirects.booleanValue() && this.canonicalTagsRedirectLimit >= redirectCounter && null != (redirect = this.getMetaEquivRedirect(document))) {
            String nRedirect = this.linkValidator.validate(redirect, parseCharSet);
            if (null == nRedirect) {
                throw new IllegalArgumentException("Redirect to invalid URL: " + redirect);
            }
            return this.fetchWithRedirects(nRedirect, ++redirectCounter);
        }
        return null;
    }

    private WebClientResponse processNonParseableContent(HttpGet req, HttpResponse resp, int status, Map<String, Object> respMetadata) throws IOException, InterruptedException, ExecutionException, TimeoutException {
        ByteArrayInputStream content = resp.getEntity() != null ? new ByteArrayInputStream(HttpClientUtil.readEntity((HttpUriRequest)req, resp, this.exec.get(), this.timeout, this.infiniteTimeout)) : null;
        return new WebClientResponse(status, respMetadata, content);
    }

    private String getParseCharSet(String charSet) {
        return null != charSet && Charset.isSupported(charSet) ? charSet : this.defaultCharSet;
    }

    private HttpResponse runRequest(HttpGet req, HttpContext context) throws TimeoutException, InterruptedException, ExecutionException {
        HttpResponse resp;
        int status;
        if (this.tokenRequester != null) {
            req.addHeader((Header)this.getAuthorizationHeader(this.tokenRequester.getAccessToken()));
        }
        if ((status = (resp = HttpClientUtil.request((HttpClient)this.httpClient, (HttpUriRequest)req, context, this.exec.get(), this.timeout, this.infiniteTimeout)).getStatusLine().getStatusCode()) == 401 && this.tokenRequester != null) {
            logger.warn("Request unauthorized - trying with new access token");
            req.setHeader((Header)this.getAuthorizationHeader(this.tokenRequester.getNewAccessToken()));
            resp = HttpClientUtil.request((HttpClient)this.httpClient, (HttpUriRequest)req, context, this.exec.get(), this.timeout, this.infiniteTimeout);
        }
        return resp;
    }

    @Nonnull
    private BasicHeader getAuthorizationHeader(String accessToken) {
        return new BasicHeader("Authorization", "Bearer " + accessToken);
    }

    private HttpContext setupRequest(String origID, long lastModified, String signature, HttpGet req) throws URISyntaxException, IOException {
        BasicHttpContext context = new BasicHttpContext();
        if (this.credentialsWrapper.hasFormLogins()) {
            this.credentialsWrapper.getFormLogins().login((HttpClient)this.httpClient);
        }
        if (this.credentialsWrapper.hasSmartFormLogins()) {
            if (!this.jsEnabledAuth.booleanValue()) {
                this.credentialsWrapper.getSmartFormLogins().login(this.httpClient, this.sharedCookieStore);
            } else {
                this.jsEvaluator.authenticate();
            }
        }
        if (!(this.maintainCookies.booleanValue() || this.credentialsWrapper.hasFormLogins() || this.credentialsWrapper.hasHttpLogins() || this.credentialsWrapper.hasSmartFormLogins())) {
            this.sharedCookieStore.clear();
        }
        req.setURI(URIUtils.resolve((URI)new URI(origID), (String)""));
        if (lastModified > 0L) {
            req.addHeader("If-Modified-Since", HTTP_DATE_FORMATTER.get().format(new Date(lastModified)));
        }
        if (null != signature) {
            req.addHeader("If-None-Match", signature);
        }
        String addedHeaders = this.config.properties().linkDiscoveryConfig().addedHeaders();
        HttpClientUtil.putAddedHeadersIntoHttpGet(req, addedHeaders);
        return context;
    }

    public void close() throws IOException {
        this.httpClient.close();
        this.jsEvaluator.close();
        this.exec.shutdown();
    }

    private static class ResponseMetadata {
        final String fetchedID;
        final String mimeType;
        final String parseCharSet;
        final Long contentLength;
        final int status;

        ResponseMetadata(String fetchedID, String mimeType, String parseCharSet, Long contentLength, int status) {
            this.fetchedID = fetchedID;
            this.mimeType = mimeType;
            this.parseCharSet = parseCharSet;
            this.contentLength = contentLength;
            this.status = status;
        }
    }
}

