/*
 * Decompiled with CFR 0.152.
 */
package com.lucidworks.connector.plugins.web.fetcher;

import com.lucidworks.connector.plugins.web.WebConnectorException;
import com.lucidworks.connector.plugins.web.config.WebConfig;
import com.lucidworks.connector.plugins.web.fetcher.CandidateEmitter;
import com.lucidworks.connector.plugins.web.fetcher.Emitter;
import com.lucidworks.connector.plugins.web.fetcher.FetchDelay;
import com.lucidworks.connector.plugins.web.fetcher.LinkRewriter;
import com.lucidworks.connector.plugins.web.fetcher.WebContentProcessor;
import com.lucidworks.connector.plugins.web.fetcher.http.client.WebClient;
import com.lucidworks.connector.plugins.web.fetcher.http.client.WebClientResponse;
import com.lucidworks.connector.plugins.web.fetcher.recrawl.RecrawlRules;
import com.lucidworks.connector.plugins.web.fetcher.sitemap.SitemapLink;
import com.lucidworks.connector.plugins.web.fetcher.sitemap.SitemapLinks;
import com.lucidworks.connector.plugins.web.fetcher.web.DataToEmit;
import com.lucidworks.connector.plugins.web.fetcher.web.DeleteDocument;
import com.lucidworks.connector.plugins.web.fetcher.web.ErrorDocument;
import com.lucidworks.connector.plugins.web.fetcher.web.NewDocument;
import com.lucidworks.connector.plugins.web.fetcher.web.SkipDocument;
import com.lucidworks.fusion.connector.plugin.api.fetcher.context.FetcherContext;
import com.lucidworks.fusion.connector.plugin.api.fetcher.result.FetchResult;
import com.lucidworks.fusion.connector.plugin.api.fetcher.result.StartResult;
import com.lucidworks.fusion.connector.plugin.api.fetcher.result.StopResult;
import com.lucidworks.fusion.connector.plugin.api.fetcher.type.content.ContentFetcher;
import com.lucidworks.fusion.connector.plugin.api.fetcher.type.content.FetchInput;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import javax.inject.Inject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class WebFetcher
implements ContentFetcher {
    private static final Logger LOG = LoggerFactory.getLogger(WebFetcher.class);
    public static final String CRAWL_SITEMAP = "_crawl_sitemap";
    public static final String IS_SITEMAP_URL = "_is_sitemap_url";
    private final WebConfig config;
    private final WebClient client;
    private final Emitter emitter;
    private final CandidateEmitter candidateEmitter;
    private final WebContentProcessor processor;
    private final WebConfig.Properties props;
    private final RecrawlRules recrawlRules;
    private final FetchDelay delay;
    private final SitemapLinks sitemapLinks;
    private final LinkRewriter linkRewriter;
    private final Boolean sitemapIncrementalCrawl;
    private boolean isRecrawl = true;

    @Inject
    public WebFetcher(WebConfig config, WebClient client, Emitter emitter, CandidateEmitter candidateEmitter, WebContentProcessor processor, RecrawlRules recrawlRules, FetchDelay delay, SitemapLinks sitemapLinks, LinkRewriter linkRewriter) {
        this.config = config;
        this.client = client;
        this.emitter = emitter;
        this.candidateEmitter = candidateEmitter;
        this.processor = processor;
        this.props = config.properties();
        this.recrawlRules = recrawlRules;
        this.delay = delay;
        this.sitemapLinks = sitemapLinks;
        this.linkRewriter = linkRewriter;
        this.sitemapIncrementalCrawl = Optional.ofNullable(config.properties().recrawlRulesConfig().sitemapIncrementalCrawling()).orElse(false);
    }

    public StartResult start(ContentFetcher.StartContext context) {
        LOG.info("Threads total:" + ManagementFactory.getThreadMXBean().getThreadCount());
        return super.start((FetcherContext.StartContext)context);
    }

    public FetchResult fetch(ContentFetcher.FetchContext fetchContext) {
        FetchInput input = fetchContext.getFetchInput();
        String id = input.getId();
        LOG.info("Processing id: {} with metadata: {} and fields: {}", new Object[]{id, input.getMetadata(), input.getFields()});
        try {
            DataToEmit dataToEmit;
            if (this.isInitialCrawl(input)) {
                this.isRecrawl = false;
                this.emitStartLinkCandidates(fetchContext);
                return fetchContext.newResult();
            }
            if (this.sitemapUrlRemoved(input) && this.config.properties().recrawlRulesConfig().sitemapIncrementalCrawling().booleanValue()) {
                this.emitter.emitDelete(fetchContext);
                return fetchContext.newResult();
            }
            if (this.isRecrawl && this.recrawlRules.shouldSkip(input)) {
                this.emitter.emitSkip(fetchContext);
                return fetchContext.newResult();
            }
            if (CRAWL_SITEMAP.equals(id)) {
                this.emitSitemapLinkCandidates(fetchContext, input);
                return fetchContext.newResult();
            }
            if (this.emitter.isLimitReached()) {
                LOG.info("Max items is reached. Skipping processing id: {}", (Object)id);
                return fetchContext.newResult();
            }
            this.delay.delayFetch(id);
            WebClientResponse resp = this.client.fetch(input);
            if (!this.emitter.isLimitReached()) {
                Set<String> links = this.processor.getLinks(resp);
                links.stream().map(this.linkRewriter::rewriteLink).forEach(l -> this.candidateEmitter.emitCandidate(fetchContext, (String)l));
            }
            if ((dataToEmit = this.processor.process(resp, input)) instanceof NewDocument) {
                NewDocument newDoc = (NewDocument)dataToEmit;
                this.emitter.emitContent(fetchContext, id, newDoc.getContent(), newDoc.getFields(), newDoc.getMetadata());
            } else if (dataToEmit instanceof DeleteDocument) {
                this.emitter.emitDelete(fetchContext);
            } else if (dataToEmit instanceof ErrorDocument) {
                ErrorDocument error = (ErrorDocument)dataToEmit;
                this.emitter.emitErrorOrDelete(fetchContext, input, error.getMessage());
            } else if (dataToEmit instanceof SkipDocument) {
                this.emitter.emitSkip(fetchContext);
            }
            return fetchContext.newResult();
        }
        catch (Exception e) {
            this.emitter.emitErrorOrDelete(fetchContext, input, e.getMessage());
            LOG.error("Could not process id: " + id, (Throwable)e);
            return fetchContext.newResult();
        }
    }

    private boolean sitemapUrlRemoved(FetchInput input) {
        boolean isSitemapUrl = input.getMetadata().getOrDefault(IS_SITEMAP_URL, false);
        String url = input.getId();
        return isSitemapUrl && !this.sitemapLinks.hasLink(url);
    }

    private void emitSitemapLinkCandidates(ContentFetcher.FetchContext fetchContext, FetchInput input) {
        Collection<SitemapLink> links = this.sitemapIncrementalCrawl != false ? this.sitemapLinks.getLinks(input.getMetadata().getOrDefault("lastEmitted", 0L)) : this.sitemapLinks.getAllLinks().values();
        links.forEach(l -> this.candidateEmitter.emitSitemapCandidate(fetchContext, l.getUrl(), l.getParent()));
        this.emitter.emitSkip(fetchContext);
    }

    private boolean isInitialCrawl(FetchInput input) {
        return !input.hasId();
    }

    private void emitStartLinkCandidates(ContentFetcher.FetchContext fetchContext) {
        this.props.startLinks().forEach(s -> this.candidateEmitter.emitCandidate(fetchContext, (String)s));
        this.candidateEmitter.emitCandidate(fetchContext, CRAWL_SITEMAP);
        WebFetcher.bulkStartLinkList(this.config).forEach(s -> this.candidateEmitter.emitCandidate(fetchContext, (String)s));
    }

    public static List<String> bulkStartLinkList(WebConfig config) {
        if (config.properties().bulkStartLinks() == null) {
            return List.of();
        }
        return Arrays.stream(config.properties().bulkStartLinks().split("\n")).map(String::trim).filter(s -> !s.isBlank()).collect(Collectors.toList());
    }

    public StopResult stop(ContentFetcher.StopContext context) {
        try {
            this.client.close();
        }
        catch (IOException e) {
            throw new WebConnectorException("Error when stopping the fetcher", e);
        }
        return super.stop((FetcherContext.StopContext)context);
    }
}

