package com.metreeca.flow.xml.actions;

import com.metreeca.flow.http.Message;
import com.metreeca.flow.http.Request;
import com.metreeca.flow.http.actions.Fetch;
import com.metreeca.flow.http.actions.Parse;
import com.metreeca.flow.http.actions.Query;
import com.metreeca.flow.work.Regex;
import com.metreeca.flow.work.Xtream;
import com.metreeca.flow.xml.XPath;
import com.metreeca.flow.xml.formats.HTML;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Phaser;
import java.util.function.BiPredicate;
import java.util.function.Function;
import java.util.stream.Stream;
import org.w3c.dom.Document;
import org.w3c.dom.Node;

/* loaded from: input_file:com/metreeca/flow/xml/actions/Crawl.class */
public final class Crawl implements Function<String, Stream<String>> {
    private int threads;
    private final Function<String, Optional<Request>> head = new Query(request -> {
        return request.method("HEAD");
    });
    private final Function<String, Optional<Request>> get = new Query();
    private final Function<Message<?>, Optional<Document>> parse = new Parse(new HTML());
    private Fetch fetch = new Fetch();
    private Function<? super Node, Optional<Node>> focus = (v0) -> {
        return Optional.of(v0);
    };
    private BiPredicate<String, String> prune = (str, str2) -> {
        return true;
    };

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/metreeca/flow/xml/actions/Crawl$Crawler.class */
    public final class Crawler {
        private final String root;
        private final Map<String, Boolean> pages = new ConcurrentHashMap();
        private final Phaser phaser = new Phaser();
        private final ExecutorService executor;

        private Crawler(String str) {
            this.executor = Executors.newFixedThreadPool(Crawl.this.threads > 0 ? Crawl.this.threads : Runtime.getRuntime().availableProcessors());
            this.root = str;
        }

        private Stream<String> crawl() {
            try {
                this.phaser.register();
                crawl(this.root);
                this.phaser.arriveAndAwaitAdvance();
                return this.pages.entrySet().stream().filter((v0) -> {
                    return v0.getValue();
                }).map((v0) -> {
                    return v0.getKey();
                });
            } finally {
                this.executor.shutdown();
            }
        }

        private void crawl(String str) {
            if (this.pages.putIfAbsent(str, false) == null) {
                this.phaser.register();
                this.executor.execute(() -> {
                    try {
                        Xtream.of(str).filter(str2 -> {
                            return Xtream.of(str2).optMap(Crawl.this.head).optMap(Crawl.this.fetch).anyMatch(response -> {
                                return response.header("Content-Type").filter(HTML.MIMEPattern.asPredicate()).isPresent();
                            });
                        }).optMap(Crawl.this.get).optMap(Crawl.this.fetch).peek(response -> {
                            this.pages.put((String) response.header("Content-Location").orElse(str), true);
                        }).optMap(Crawl.this.parse).optMap(Crawl.this.focus).map(XPath::new).flatMap(xPath -> {
                            return xPath.links("//a/@href");
                        }).map(Regex::new).map(regex -> {
                            return regex.replace("#.*$", "");
                        }).map(Regex::new).map(regex2 -> {
                            return regex2.replace("\\?.*$", "");
                        }).filter(str3 -> {
                            try {
                                URI normalize = new URI(this.root).normalize();
                                URI normalize2 = new URI(str3).normalize();
                                return !normalize.relativize(normalize2).equals(normalize2);
                            } catch (URISyntaxException e) {
                                return false;
                            }
                        }).filter(str4 -> {
                            return Crawl.this.prune.test(this.root, str4);
                        }).forEach(this::crawl);
                    } finally {
                        this.phaser.arrive();
                    }
                });
            }
        }
    }

    public Crawl threads(int i) {
        if (i < 0) {
            throw new IllegalArgumentException("negative thread count");
        }
        this.threads = i;
        return this;
    }

    public Crawl fetch(Fetch fetch) {
        if (fetch == null) {
            throw new NullPointerException("null fetch");
        }
        this.fetch = fetch;
        return this;
    }

    public Crawl focus(Function<? super Node, Optional<Node>> function) {
        if (function == null) {
            throw new NullPointerException("null focus");
        }
        this.focus = function;
        return this;
    }

    public Crawl prune(BiPredicate<String, String> biPredicate) {
        if (biPredicate == null) {
            throw new NullPointerException("null prune");
        }
        this.prune = biPredicate;
        return this;
    }

    @Override // java.util.function.Function
    public Stream<String> apply(String str) {
        return (str == null || str.isEmpty()) ? Stream.empty() : new Crawler(str).crawl();
    }
}
