package org.codelibs.fess.crawler.extractor.impl;

import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.xml.xpath.XPathEvaluationResult;
import javax.xml.xpath.XPathException;
import javax.xml.xpath.XPathNodes;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fess.crawler.entity.ExtractData;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.crawler.interval.IntervalController;
import org.codelibs.fess.crawler.util.XPathAPI;
import org.codelibs.nekohtml.parsers.DOMParser;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;

/* loaded from: input_file:org/codelibs/fess/crawler/extractor/impl/HtmlExtractor.class */
public class HtmlExtractor extends AbstractXmlExtractor {
    protected static final Logger logger = LogManager.getLogger(HtmlExtractor.class);
    protected Pattern metaCharsetPattern = Pattern.compile("<meta.*content\\s*=\\s*['\"].*;\\s*charset=([\\w\\d\\-_]*)['\"]\\s*/?>", 10);
    protected Pattern htmlTagPattern = Pattern.compile("<[^>]+>");
    protected Map<String, String> featureMap = new HashMap();
    protected Map<String, String> propertyMap = new HashMap();
    protected String contentXpath = "//BODY";
    protected Map<String, String> metadataXpathMap = new HashMap();
    private final ThreadLocal<XPathAPI> xpathAPI = new ThreadLocal<>();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.codelibs.fess.crawler.extractor.impl.HtmlExtractor$1, reason: invalid class name */
    /* loaded from: input_file:org/codelibs/fess/crawler/extractor/impl/HtmlExtractor$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$javax$xml$xpath$XPathEvaluationResult$XPathResultType = new int[XPathEvaluationResult.XPathResultType.values().length];

        static {
            try {
                $SwitchMap$javax$xml$xpath$XPathEvaluationResult$XPathResultType[XPathEvaluationResult.XPathResultType.BOOLEAN.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$javax$xml$xpath$XPathEvaluationResult$XPathResultType[XPathEvaluationResult.XPathResultType.NUMBER.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$javax$xml$xpath$XPathEvaluationResult$XPathResultType[XPathEvaluationResult.XPathResultType.STRING.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$javax$xml$xpath$XPathEvaluationResult$XPathResultType[XPathEvaluationResult.XPathResultType.NODESET.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$javax$xml$xpath$XPathEvaluationResult$XPathResultType[XPathEvaluationResult.XPathResultType.NODE.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
        }
    }

    @Override // org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor
    protected ExtractData createExtractData(String str) {
        DOMParser domParser = getDomParser();
        try {
            StringReader stringReader = new StringReader(str);
            try {
                domParser.parse(new InputSource(stringReader));
                stringReader.close();
                Document document = domParser.getDocument();
                try {
                    ExtractData extractData = new ExtractData((String) StreamUtil.stream(getStringsByXPath(document, this.contentXpath)).get(stream -> {
                        return (String) stream.collect(Collectors.joining(" "));
                    }));
                    this.metadataXpathMap.entrySet().stream().forEach(entry -> {
                        extractData.putValues((String) entry.getKey(), getStringsByXPath(document, (String) entry.getValue()));
                    });
                    this.xpathAPI.remove();
                    return extractData;
                } catch (Throwable th) {
                    this.xpathAPI.remove();
                    throw th;
                }
            } finally {
            }
        } catch (Exception e) {
            logger.warn("Failed to parse the content.", e);
            return new ExtractData(extractString(str));
        }
    }

    protected String[] getStringsByXPath(Document document, String str) {
        try {
            XPathEvaluationResult<?> eval = getXPathAPI().eval(document, str);
            switch (AnonymousClass1.$SwitchMap$javax$xml$xpath$XPathEvaluationResult$XPathResultType[eval.type().ordinal()]) {
                case IntervalController.PRE_PROCESSING /* 1 */:
                    return new String[]{((Boolean) eval.value()).toString()};
                case IntervalController.POST_PROCESSING /* 2 */:
                    return new String[]{((Number) eval.value()).toString()};
                case 3:
                    return new String[]{((String) eval.value()).trim()};
                case IntervalController.NO_URL_IN_QUEUE /* 4 */:
                    XPathNodes xPathNodes = (XPathNodes) eval.value();
                    ArrayList arrayList = new ArrayList();
                    for (int i = 0; i < xPathNodes.size(); i++) {
                        arrayList.add(xPathNodes.get(i).getTextContent());
                    }
                    return (String[]) arrayList.toArray(i2 -> {
                        return new String[i2];
                    });
                case 5:
                    return new String[]{((Node) eval.value()).getTextContent()};
                default:
                    Object value = eval.value();
                    if (value == null) {
                        value = "";
                    }
                    return new String[]{value.toString()};
            }
        } catch (XPathException e) {
            logger.warn("Failed to parse the content by {}", str, e);
            return StringUtil.EMPTY_STRINGS;
        }
    }

    protected DOMParser getDomParser() {
        DOMParser dOMParser = new DOMParser();
        try {
            for (Map.Entry<String, String> entry : this.featureMap.entrySet()) {
                dOMParser.setFeature(entry.getKey(), "true".equalsIgnoreCase(entry.getValue()));
            }
            for (Map.Entry<String, String> entry2 : this.propertyMap.entrySet()) {
                dOMParser.setProperty(entry2.getKey(), entry2.getValue());
            }
            return dOMParser;
        } catch (Exception e) {
            throw new CrawlerSystemException("Invalid parser configuration.", e);
        }
    }

    protected XPathAPI getXPathAPI() {
        XPathAPI xPathAPI = this.xpathAPI.get();
        if (xPathAPI == null) {
            xPathAPI = new XPathAPI();
            this.xpathAPI.set(xPathAPI);
        }
        return xPathAPI;
    }

    public void addMetadata(String str, String str2) {
        this.metadataXpathMap.put(str, str2);
    }

    @Override // org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor
    protected Pattern getEncodingPattern() {
        return this.metaCharsetPattern;
    }

    @Override // org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor
    protected Pattern getTagPattern() {
        return this.htmlTagPattern;
    }

    public Pattern getMetaCharsetPattern() {
        return this.metaCharsetPattern;
    }

    public void setMetaCharsetPattern(Pattern pattern) {
        this.metaCharsetPattern = pattern;
    }

    public Pattern getHtmlTagPattern() {
        return this.htmlTagPattern;
    }

    public void setHtmlTagPattern(Pattern pattern) {
        this.htmlTagPattern = pattern;
    }

    public Map<String, String> getFeatureMap() {
        return this.featureMap;
    }

    public void setFeatureMap(Map<String, String> map) {
        this.featureMap = map;
    }

    public Map<String, String> getPropertyMap() {
        return this.propertyMap;
    }

    public void setPropertyMap(Map<String, String> map) {
        this.propertyMap = map;
    }
}
