/*
 * Decompiled with CFR 0.152.
 */
package com.lucidworks.connector.plugins.web.fetcher.http;

import com.google.inject.Inject;
import com.lucidworks.connector.plugins.web.config.CrawlIdConfig;
import com.lucidworks.connector.plugins.web.config.WebConfig;
import com.lucidworks.connector.plugins.web.fetcher.http.UserAgent;
import com.lucidworks.connector.plugins.web.util.HttpClientUtil;
import crawlercommons.robots.BaseRobotRules;
import crawlercommons.robots.SimpleRobotRules;
import crawlercommons.robots.SimpleRobotRulesParser;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.Delayed;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.protocol.HttpContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RobotsTxt {
    private static final Logger LOG = LoggerFactory.getLogger(RobotsTxt.class);
    private final UserAgent userAgent;
    private final String addedHeaders;
    private final ConcurrentMap<String, BaseRobotRules> robotRules;
    private final ConcurrentMap<String, DelayQueue<RobotsDelayed>> delayedHosts;

    @Inject
    public RobotsTxt(WebConfig config) {
        CrawlIdConfig idConfig = config.properties().crawlIdConfig();
        this.userAgent = new UserAgent(idConfig.userAgentName(), idConfig.userAgentEmail(), idConfig.userAgentWebAddr());
        this.robotRules = new ConcurrentHashMap<String, BaseRobotRules>();
        this.delayedHosts = new ConcurrentHashMap<String, DelayQueue<RobotsDelayed>>();
        this.addedHeaders = config.properties().linkDiscoveryConfig().addedHeaders();
    }

    public boolean isAllowed(String absURL, HttpClient client, ExecutorService executor, int timeoutMS) throws MalformedURLException {
        URL url = new URL(absURL);
        String key = RobotsTxt.getKey(url);
        if (!this.robotRules.containsKey(key)) {
            this.addRobotsHost(url, client, executor, timeoutMS);
        }
        return ((BaseRobotRules)this.robotRules.get(key)).isAllowed(url.toExternalForm());
    }

    public void delay(String absURL, HttpClient client, ExecutorService executor, int timeoutMS) throws InterruptedException, MalformedURLException {
        URL url = new URL(absURL);
        String key = RobotsTxt.getKey(url);
        if (!this.robotRules.containsKey(key)) {
            this.addRobotsHost(url, client, executor, timeoutMS);
        }
        if (this.delayedHosts.containsKey(key)) {
            long delay = ((BaseRobotRules)this.robotRules.get(key)).getCrawlDelay();
            LOG.info("Waiting {}ms per the crawl-delay in robots.txt for {}...", (Object)delay, (Object)key);
            boolean taken = false;
            while (!taken) {
                ((DelayQueue)this.delayedHosts.get(key)).take();
                taken = true;
            }
            ((DelayQueue)this.delayedHosts.get(key)).add(new RobotsDelayed(delay));
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    private synchronized void addRobotsHost(URL absURL, HttpClient client, ExecutorService executor, int timeoutMS) {
        String key = RobotsTxt.getKey(absURL);
        if (this.robotRules.containsKey(key)) return;
        HttpGet req = new HttpGet();
        HttpClientUtil.putAddedHeadersIntoHttpGet(req, this.addedHeaders);
        try {
            LOG.info("Fetching robots.txt for {}...", (Object)key);
            req.setURI(new URL(key + "/robots.txt").toURI());
            HttpClientContext context = new HttpClientContext();
            HttpResponse resp = HttpClientUtil.request(client, (HttpUriRequest)req, (HttpContext)context, executor, timeoutMS);
            int status = resp.getStatusLine().getStatusCode();
            if ((status < 200 || status >= 300) && status != 304) {
                switch (status) {
                    case 404: {
                        LOG.warn("{} has no robots.txt, status={}, defaulting to ALLOW-ALL", (Object)key, (Object)status);
                        this.robotRules.put(key, (BaseRobotRules)new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_ALL));
                        return;
                    }
                    default: {
                        LOG.warn("Failed to retrieve robots.txt for {}, status={}, defaulting to DENY-ALL", (Object)key, (Object)status);
                        this.robotRules.put(key, (BaseRobotRules)new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_NONE));
                        return;
                    }
                }
            }
            String mimeType = (String)HttpClientUtil.parseResponseMetadata((HttpUriRequest)req, resp, (HttpContext)context).get("mimeType");
            Integer redirects = (Integer)context.getAttribute("redirect-count");
            if (null != mimeType && !mimeType.equals("text/plain") && null != redirects && redirects > 0) {
                LOG.warn("Redirected to a non-parseable robots.txt MIME-type for {}: '{}'; defaulting to DENY-ALL", (Object)key, (Object)mimeType);
                this.robotRules.put(key, (BaseRobotRules)new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_NONE));
                return;
            }
            byte[] robotsTxt = HttpClientUtil.readEntity((HttpUriRequest)req, resp, executor, timeoutMS);
            SimpleRobotRules brr = new SimpleRobotRulesParser().parseContent(req.getURI().toString(), robotsTxt, mimeType, this.userAgent.getAgentName());
            this.robotRules.put(key, (BaseRobotRules)brr);
            if (brr.getCrawlDelay() <= 0L) return;
            DelayQueue<RobotsDelayed> dq = new DelayQueue<RobotsDelayed>();
            dq.add(new RobotsDelayed(brr.getCrawlDelay()));
            this.delayedHosts.put(key, dq);
            return;
        }
        catch (MalformedURLException | URISyntaxException e) {
            LOG.warn("Failed to formulate a robots.txt URL for {}, defaulting to DENY-ALL", (Object)key, (Object)e);
            this.robotRules.put(key, (BaseRobotRules)new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_NONE));
            return;
        }
        catch (InterruptedException | ExecutionException | TimeoutException e) {
            LOG.warn("Failed to fetch robots.txt for {}, defaulting to DENY-ALL", (Object)key, (Object)e);
            this.robotRules.put(key, (BaseRobotRules)new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_NONE));
            return;
        }
        finally {
            req.releaseConnection();
        }
    }

    private static String getKey(URL url) {
        return url.getProtocol() + "://" + url.getHost() + ":" + (-1 == url.getPort() ? url.getDefaultPort() : url.getPort());
    }

    private static class RobotsDelayed
    implements Delayed {
        private long start;

        public RobotsDelayed(long delay) {
            this.start = System.currentTimeMillis() + delay;
        }

        @Override
        public long getDelay(TimeUnit unit) {
            return unit.convert(this.start - System.currentTimeMillis(), TimeUnit.MILLISECONDS);
        }

        @Override
        public int compareTo(Delayed o) {
            if (this.start < ((RobotsDelayed)o).start) {
                return -1;
            }
            if (this.start > ((RobotsDelayed)o).start) {
                return 1;
            }
            return 0;
        }
    }
}

