package com.metreeca.flow.xml.actions;

import com.metreeca.flow.xml.XPath;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Stream;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

/* loaded from: input_file:com/metreeca/flow/xml/actions/Extract.class */
public final class Extract implements Function<Node, Optional<Node>> {
    private static final Collection<String> textual = new HashSet(Arrays.asList("h1", "h2", "h3", "h4", "h5", "h6", "p", "blockquote", "pre", "ul", "ol", "dl", "li", "dt", "dd", "table", "th", "td"));
    private static final Collection<String> ignored = new HashSet(Arrays.asList("style", "script"));

    @Override // java.util.function.Function
    public Optional<Node> apply(Node node) {
        return node == null ? Optional.empty() : new XPath(node).node(".//main").or(() -> {
            return Stream.of(annotate(node)).map(XPath::new).flatMap(xPath -> {
                return xPath.nodes(".//*");
            }).max(Comparator.comparingDouble(node2 -> {
                return ((Double) get(node2, "echars", Double.valueOf(0.0d))).doubleValue();
            }));
        }).map(node2 -> {
            try {
                Document newDocument = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
                newDocument.setDocumentURI(node2.getBaseURI());
                newDocument.appendChild(newDocument.adoptNode(node2.cloneNode(true)));
                newDocument.normalizeDocument();
                return newDocument;
            } catch (ParserConfigurationException e) {
                throw new RuntimeException(e);
            }
        });
    }

    /* JADX WARN: Multi-variable type inference failed */
    private <T extends Node> T annotate(T t) {
        if (t instanceof Document) {
            ((Document) t).normalizeDocument();
            annotate(((Document) t).getDocumentElement());
        } else if ((t instanceof Element) && !ignored.contains(t.getNodeName())) {
            double d = 0.0d;
            double d2 = 0.0d;
            int i = 0;
            int i2 = 0;
            NodeList childNodes = t.getChildNodes();
            int length = childNodes.getLength();
            for (int i3 = 0; i3 < length; i3++) {
                Node annotate = annotate(childNodes.item(i3));
                d += ((Double) get(annotate, "xchars", Double.valueOf(0.0d))).doubleValue();
                d2 += ((Double) get(annotate, "echars", Double.valueOf(0.0d))).doubleValue();
                if (annotate instanceof Element) {
                    i++;
                }
                if (textual.contains(annotate.getNodeName())) {
                    i2++;
                }
            }
            boolean z = textual.contains(t.getNodeName()) && d2 == 0.0d;
            set(t, "xchars", Double.valueOf(d));
            set(t, "echars", Double.valueOf(z ? d : (d2 * (i2 + 1)) / (i + 1)));
            ((Element) t).setAttribute("chars", String.format("%.1f/%.0f", get(t, "echars", Double.valueOf(0.0d)), get(t, "xchars", Double.valueOf(0.0d))));
        } else if (t instanceof Text) {
            double length2 = XPath.normalize(t.getTextContent()).length();
            set(t, "xchars", Double.valueOf(length2 * length2));
        }
        return t;
    }

    private <T> T get(Node node, String str, T t) {
        return (T) Optional.ofNullable(node.getUserData(str)).orElse(t);
    }

    private <T> void set(Node node, String str, T t) {
        node.setUserData(str, t, null);
    }
}
