package org.codelibs.fess.crawler.extractor.impl;

import jakarta.annotation.PostConstruct;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
import org.apache.commons.io.output.DeferredFileOutputStream;
import org.apache.commons.lang3.SystemUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.SecureContentHandler;
import org.codelibs.core.beans.util.BeanUtil;
import org.codelibs.core.io.CloseableUtil;
import org.codelibs.core.io.CopyUtil;
import org.codelibs.core.io.FileUtil;
import org.codelibs.core.io.PropertiesUtil;
import org.codelibs.core.io.ReaderUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.crawler.Constants;
import org.codelibs.fess.crawler.entity.ExtractData;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.crawler.exception.ExtractException;
import org.codelibs.fess.crawler.extractor.Extractor;
import org.codelibs.fess.crawler.util.TextUtil;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/codelibs/fess/crawler/extractor/impl/TikaExtractor.class */
public class TikaExtractor extends PasswordBasedExtractor {
    private static final Logger logger = LogManager.getLogger(TikaExtractor.class);
    public static final String TIKA_TESSERACT_CONFIG = "tika.tesseract.config";
    public static final String TIKA_PDF_CONFIG = "tika.pdf.config";
    public static final String NORMALIZE_TEXT = "normalize_text";
    private static final String FILE_PASSWORD = "fess.file.password";
    protected TikaConfig tikaConfig;
    protected String outputEncoding = Constants.UTF_8;
    protected boolean readAsTextIfFailed = false;
    protected long maxCompressionRatio = 100;
    protected long maxUncompressionSize = 1000000;
    protected int initialBufferSize = 10000;
    protected boolean replaceDuplication = false;
    protected int[] spaceChars = {32, 160, 12288, 65533};
    protected int memorySize = 1048576;
    protected int maxAlphanumTermSize = -1;
    protected int maxSymbolTermSize = -1;
    private final Map<String, TesseractOCRConfig> tesseractOCRConfigMap = new ConcurrentHashMap();
    private final Map<String, PDFParserConfig> pdfParserConfigMap = new ConcurrentHashMap();

    /* JADX INFO: Access modifiers changed from: protected */
    @FunctionalInterface
    /* loaded from: input_file:org/codelibs/fess/crawler/extractor/impl/TikaExtractor$ContentWriter.class */
    public interface ContentWriter {
        void accept(Writer writer) throws IOException, TikaException, SAXException;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/codelibs/fess/crawler/extractor/impl/TikaExtractor$TikaDetectParser.class */
    public class TikaDetectParser extends CompositeParser {
        private static final long serialVersionUID = 1;
        private final Detector detector;

        public TikaDetectParser(TikaExtractor tikaExtractor) {
            this(tikaExtractor.tikaConfig);
        }

        public TikaDetectParser(TikaConfig tikaConfig) {
            super(tikaConfig.getMediaTypeRegistry(), new Parser[]{tikaConfig.getParser()});
            this.detector = tikaConfig.getDetector();
        }

        public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
            TemporaryResources temporaryResources = new TemporaryResources();
            try {
                TikaInputStream tikaInputStream = TikaInputStream.get(inputStream, temporaryResources, metadata);
                MediaType detect = this.detector.detect(tikaInputStream, metadata);
                metadata.set("Content-Type", detect.toString());
                SecureContentHandler secureContentHandler = new SecureContentHandler(contentHandler, tikaInputStream);
                secureContentHandler.setMaximumCompressionRatio(TikaExtractor.this.maxCompressionRatio);
                secureContentHandler.setOutputThreshold(TikaExtractor.this.maxUncompressionSize);
                if (parseContext.get(EmbeddedDocumentExtractor.class) == null) {
                    if (((Parser) parseContext.get(Parser.class)) == null) {
                        parseContext.set(Parser.class, this);
                    }
                    parseContext.set(EmbeddedDocumentExtractor.class, new ParsingEmbeddedDocumentExtractor(parseContext));
                }
                if (TikaExtractor.logger.isDebugEnabled()) {
                    TikaExtractor.logger.debug("type: {}, metadata: {}, maxCompressionRatio: {}, maxUncompressionSize: {}", detect, metadata, Long.valueOf(TikaExtractor.this.maxCompressionRatio), Long.valueOf(TikaExtractor.this.maxUncompressionSize));
                }
                try {
                    super.parse(tikaInputStream, secureContentHandler, metadata, parseContext);
                } catch (SAXException e) {
                    secureContentHandler.throwIfCauseOf(e);
                    throw e;
                }
            } finally {
                temporaryResources.dispose();
            }
        }
    }

    @PostConstruct
    public void init() {
        if (this.tikaConfig == null && this.crawlerContainer != null) {
            try {
                this.tikaConfig = (TikaConfig) this.crawlerContainer.getComponent("tikaConfig");
            } catch (Exception e) {
                logger.debug("tikaConfig component is not found.", e);
            }
        }
        if (this.tikaConfig == null) {
            this.tikaConfig = TikaConfig.getDefaultConfig();
        }
        if (logger.isDebugEnabled()) {
            logger.debug("supportedTypes: {}", this.tikaConfig.getParser().getSupportedTypes(new ParseContext()));
        }
    }

    @Override // org.codelibs.fess.crawler.extractor.Extractor
    public ExtractData getText(InputStream inputStream, Map<String, String> map) {
        return getText(inputStream, map, null);
    }

    /* JADX WARN: Failed to calculate best type for var: r14v2 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.calculateFromBounds(FixTypesVisitor.java:156)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.setBestType(FixTypesVisitor.java:133)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.deduceType(FixTypesVisitor.java:238)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.tryDeduceTypes(FixTypesVisitor.java:221)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Failed to calculate best type for var: r14v2 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.calculateFromBounds(TypeInferenceVisitor.java:145)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.setBestType(TypeInferenceVisitor.java:123)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.lambda$runTypePropagation$2(TypeInferenceVisitor.java:101)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.runTypePropagation(TypeInferenceVisitor.java:101)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.visit(TypeInferenceVisitor.java:75)
     */
    /* JADX WARN: Failed to calculate best type for var: r15v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.calculateFromBounds(FixTypesVisitor.java:156)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.setBestType(FixTypesVisitor.java:133)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.deduceType(FixTypesVisitor.java:238)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.tryDeduceTypes(FixTypesVisitor.java:221)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Failed to calculate best type for var: r15v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.calculateFromBounds(TypeInferenceVisitor.java:145)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.setBestType(TypeInferenceVisitor.java:123)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.lambda$runTypePropagation$2(TypeInferenceVisitor.java:101)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.runTypePropagation(TypeInferenceVisitor.java:101)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.visit(TypeInferenceVisitor.java:75)
     */
    /* JADX WARN: Failed to calculate best type for var: r16v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.calculateFromBounds(FixTypesVisitor.java:156)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.setBestType(FixTypesVisitor.java:133)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.deduceType(FixTypesVisitor.java:238)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.tryDeduceTypes(FixTypesVisitor.java:221)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Failed to calculate best type for var: r16v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.calculateFromBounds(TypeInferenceVisitor.java:145)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.setBestType(TypeInferenceVisitor.java:123)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.lambda$runTypePropagation$2(TypeInferenceVisitor.java:101)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.runTypePropagation(TypeInferenceVisitor.java:101)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.visit(TypeInferenceVisitor.java:75)
     */
    /* JADX WARN: Failed to calculate best type for var: r17v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.calculateFromBounds(FixTypesVisitor.java:156)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.setBestType(FixTypesVisitor.java:133)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.deduceType(FixTypesVisitor.java:238)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.tryDeduceTypes(FixTypesVisitor.java:221)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Failed to calculate best type for var: r17v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.calculateFromBounds(TypeInferenceVisitor.java:145)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.setBestType(TypeInferenceVisitor.java:123)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.lambda$runTypePropagation$2(TypeInferenceVisitor.java:101)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.runTypePropagation(TypeInferenceVisitor.java:101)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.visit(TypeInferenceVisitor.java:75)
     */
    /* JADX WARN: Finally extract failed */
    /* JADX WARN: Multi-variable type inference failed. Error: java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.applyWithWiderIgnSame(TypeUpdate.java:70)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.applyResolvedVars(TypeSearch.java:100)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.run(TypeSearch.java:76)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.runMultiVariableSearch(FixTypesVisitor.java:116)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Not initialized variable reg: 14, insn: 0x050a: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r14 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) A[TRY_LEAVE], block:B:191:0x050a */
    /* JADX WARN: Not initialized variable reg: 15, insn: 0x054d: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r15 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]), block:B:203:0x054d */
    /* JADX WARN: Not initialized variable reg: 16, insn: 0x0526: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r16 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]), block:B:193:0x0526 */
    /* JADX WARN: Not initialized variable reg: 17, insn: 0x057a: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r17 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]), block:B:208:0x057a */
    protected ExtractData getText(InputStream inputStream, Map<String, String> map, BiConsumer<ExtractData, InputStream> biConsumer) {
        File createTempFile;
        PrintStream printStream;
        PrintStream printStream2;
        ByteArrayOutputStream byteArrayOutputStream;
        ByteArrayOutputStream byteArrayOutputStream2;
        Extractor extractor;
        long j;
        String str;
        String[] values;
        if (inputStream == null) {
            throw new CrawlerSystemException("The inputstream is null.");
        }
        boolean z = inputStream instanceof ByteArrayInputStream;
        if (z) {
            inputStream.mark(0);
            createTempFile = null;
        } else {
            try {
                createTempFile = File.createTempFile("tikaExtractor-", ".out");
            } catch (IOException e) {
                throw new ExtractException("Could not create a temp file.", e);
            }
        }
        try {
            try {
                try {
                    PrintStream printStream3 = System.out;
                    ByteArrayOutputStream byteArrayOutputStream3 = new ByteArrayOutputStream();
                    System.setOut(new PrintStream((OutputStream) byteArrayOutputStream3, true));
                    PrintStream printStream4 = System.err;
                    ByteArrayOutputStream byteArrayOutputStream4 = new ByteArrayOutputStream();
                    System.setErr(new PrintStream((OutputStream) byteArrayOutputStream4, true));
                    if (map == null) {
                        str = null;
                    } else {
                        try {
                            str = map.get("resourceName");
                        } catch (TikaException e2) {
                            if (e2.getMessage().indexOf("bomb") >= 0) {
                                throw new ExtractException("Zip bomb detected.", e2);
                            }
                            if (!(e2.getCause() instanceof SAXException) || (extractor = (Extractor) this.crawlerContainer.getComponent("xmlExtractor")) == null) {
                                throw e2;
                            }
                            j = null;
                            try {
                                if (z) {
                                    inputStream.reset();
                                    j = inputStream;
                                } else {
                                    j = new FileInputStream(createTempFile);
                                }
                                ExtractData text = extractor.getText(j, map);
                                CloseableUtil.closeQuietly(j);
                                if (printStream3 != null) {
                                    try {
                                        System.setOut(printStream3);
                                    } catch (Exception e3) {
                                        logger.warn("Failed to set originalOutStream.", e3);
                                    }
                                }
                                if (printStream4 != null) {
                                    try {
                                        System.setErr(printStream4);
                                    } catch (Exception e4) {
                                        logger.warn("Failed to set originalErrStream.", e4);
                                    }
                                }
                                try {
                                    if (logger.isInfoEnabled()) {
                                        byte[] byteArray = byteArrayOutputStream3.toByteArray();
                                        if (byteArray.length != 0) {
                                            logger.info(new String(byteArray, this.outputEncoding));
                                        }
                                    }
                                    if (logger.isWarnEnabled()) {
                                        byte[] byteArray2 = byteArrayOutputStream4.toByteArray();
                                        if (byteArray2.length != 0) {
                                            logger.warn(new String(byteArray2, this.outputEncoding));
                                        }
                                    }
                                } catch (Exception e5) {
                                }
                                FileUtil.deleteInBackground(createTempFile);
                                return text;
                            } finally {
                                CloseableUtil.closeQuietly(j);
                            }
                        }
                    }
                    String str2 = str;
                    String str3 = map == null ? null : map.get("Content-Type");
                    String str4 = map == null ? null : map.get("Content-Encoding");
                    boolean z2 = map == null ? true : !Constants.FALSE.equalsIgnoreCase(map.get(NORMALIZE_TEXT));
                    String password = getPassword(map);
                    Metadata createMetadata = createMetadata(str2, str3, str4, password);
                    TikaDetectParser tikaDetectParser = new TikaDetectParser(this);
                    ParseContext createParseContext = createParseContext(tikaDetectParser, map);
                    File file = createTempFile;
                    String content = getContent(writer -> {
                        InputStream fileInputStream;
                        if (z) {
                            fileInputStream = inputStream;
                        } else {
                            try {
                                FileOutputStream fileOutputStream = new FileOutputStream(file);
                                try {
                                    CopyUtil.copy(inputStream, fileOutputStream);
                                    fileOutputStream.close();
                                    fileInputStream = new FileInputStream(file);
                                } finally {
                                }
                            } catch (Throwable th) {
                                CloseableUtil.closeQuietly((Closeable) null);
                                throw th;
                            }
                        }
                        tikaDetectParser.parse(fileInputStream, new BodyContentHandler(writer), createMetadata, createParseContext);
                        CloseableUtil.closeQuietly(fileInputStream);
                    }, str4, z2);
                    if (StringUtil.isBlank(content)) {
                        if (str2 != null) {
                            if (logger.isDebugEnabled()) {
                                logger.debug("retry without a resource name: {}", str2);
                            }
                            Metadata createMetadata2 = createMetadata(null, str3, str4, password);
                            File file2 = createTempFile;
                            content = getContent(writer2 -> {
                                InputStream inputStream2 = null;
                                try {
                                    if (z) {
                                        inputStream.reset();
                                        inputStream2 = inputStream;
                                    } else {
                                        inputStream2 = new FileInputStream(file2);
                                    }
                                    tikaDetectParser.parse(inputStream2, new BodyContentHandler(writer2), createMetadata2, createParseContext);
                                    CloseableUtil.closeQuietly(inputStream2);
                                } catch (Throwable th) {
                                    CloseableUtil.closeQuietly(inputStream2);
                                    throw th;
                                }
                            }, str4, z2);
                        }
                        if (StringUtil.isBlank(content) && str3 != null) {
                            if (logger.isDebugEnabled()) {
                                logger.debug("retry without a content type: {}", str3);
                            }
                            Metadata createMetadata3 = createMetadata(null, null, str4, password);
                            File file3 = createTempFile;
                            content = getContent(writer3 -> {
                                InputStream inputStream2 = null;
                                try {
                                    if (z) {
                                        inputStream.reset();
                                        inputStream2 = inputStream;
                                    } else {
                                        inputStream2 = new FileInputStream(file3);
                                    }
                                    tikaDetectParser.parse(inputStream2, new BodyContentHandler(writer3), createMetadata3, createParseContext);
                                    CloseableUtil.closeQuietly(inputStream2);
                                } catch (Throwable th) {
                                    CloseableUtil.closeQuietly(inputStream2);
                                    throw th;
                                }
                            }, str4, z2);
                        }
                        if (StringUtil.isBlank(content)) {
                            ArrayList arrayList = new ArrayList();
                            for (String str5 : createMetadata.names()) {
                                String lowerCase = str5.toLowerCase(Locale.ROOT);
                                if ((lowerCase.contains("comment") || lowerCase.contains("text")) && (values = createMetadata.getValues(str5)) != null) {
                                    Collections.addAll(arrayList, values);
                                }
                            }
                            if (!arrayList.isEmpty()) {
                                content = (String) arrayList.stream().filter(StringUtil::isNotBlank).collect(Collectors.joining(" "));
                            }
                        }
                        if (this.readAsTextIfFailed && StringUtil.isBlank(content)) {
                            if (logger.isDebugEnabled()) {
                                logger.debug("read the content as a text.");
                            }
                            if (str4 == null) {
                                str4 = Constants.UTF_8;
                            }
                            String str6 = str4;
                            File file4 = createTempFile;
                            content = getContent(writer4 -> {
                                BufferedReader bufferedReader = null;
                                try {
                                    try {
                                        if (z) {
                                            inputStream.reset();
                                            bufferedReader = new BufferedReader(new InputStreamReader(inputStream, str6));
                                        } else {
                                            bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file4), str6));
                                        }
                                        while (true) {
                                            String readLine = bufferedReader.readLine();
                                            if (readLine == null) {
                                                CloseableUtil.closeQuietly(bufferedReader);
                                                return;
                                            }
                                            writer4.write(readLine);
                                        }
                                    } catch (Exception e6) {
                                        logger.warn("Could not read " + (file4 != null ? file4.getAbsolutePath() : "a byte stream"), e6);
                                        CloseableUtil.closeQuietly(bufferedReader);
                                    }
                                } catch (Throwable th) {
                                    CloseableUtil.closeQuietly(bufferedReader);
                                    throw th;
                                }
                            }, str4, z2);
                        }
                    }
                    ExtractData extractData = new ExtractData(content);
                    j = z ? ((ByteArrayInputStream) inputStream).available() : createTempFile.length();
                    extractData.putValue("Content-Length", Long.toString(r2));
                    String[] names = createMetadata.names();
                    Arrays.sort(names);
                    for (String str7 : names) {
                        extractData.putValues(str7, createMetadata.getValues(str7));
                    }
                    if (biConsumer != null) {
                        InputStream inputStream2 = null;
                        try {
                            if (z) {
                                inputStream.reset();
                                inputStream2 = inputStream;
                            } else {
                                inputStream2 = new FileInputStream(createTempFile);
                            }
                            biConsumer.accept(extractData, inputStream2);
                            CloseableUtil.closeQuietly(inputStream2);
                        } catch (Throwable th) {
                            throw th;
                        }
                    }
                    if (logger.isDebugEnabled()) {
                        logger.debug("Result: metadata: {}", createMetadata);
                    }
                    if (printStream3 != null) {
                        try {
                            System.setOut(printStream3);
                        } catch (Exception e6) {
                            logger.warn("Failed to set originalOutStream.", e6);
                        }
                    }
                    if (printStream4 != null) {
                        try {
                            System.setErr(printStream4);
                        } catch (Exception e7) {
                            logger.warn("Failed to set originalErrStream.", e7);
                        }
                    }
                    try {
                        if (logger.isInfoEnabled()) {
                            byte[] byteArray3 = byteArrayOutputStream3.toByteArray();
                            if (byteArray3.length != 0) {
                                logger.info(new String(byteArray3, this.outputEncoding));
                            }
                        }
                        if (logger.isWarnEnabled()) {
                            byte[] byteArray4 = byteArrayOutputStream4.toByteArray();
                            if (byteArray4.length != 0) {
                                logger.warn(new String(byteArray4, this.outputEncoding));
                            }
                        }
                    } catch (Exception e8) {
                    }
                    return extractData;
                } catch (Throwable th2) {
                    if (printStream != 0) {
                        try {
                            System.setOut(printStream);
                        } catch (Exception e9) {
                            logger.warn("Failed to set originalOutStream.", e9);
                        }
                    }
                    if (printStream2 != 0) {
                        try {
                            System.setErr(printStream2);
                        } catch (Exception e10) {
                            logger.warn("Failed to set originalErrStream.", e10);
                        }
                    }
                    try {
                        if (logger.isInfoEnabled()) {
                            byte[] byteArray5 = byteArrayOutputStream2.toByteArray();
                            if (byteArray5.length != 0) {
                                logger.info(new String(byteArray5, this.outputEncoding));
                            }
                        }
                        if (logger.isWarnEnabled()) {
                            byte[] byteArray6 = byteArrayOutputStream.toByteArray();
                            if (byteArray6.length != 0) {
                                logger.warn(new String(byteArray6, this.outputEncoding));
                            }
                        }
                    } catch (Exception e11) {
                    }
                    throw th2;
                }
            } finally {
                FileUtil.deleteInBackground(createTempFile);
            }
        } catch (Exception e12) {
            throw new ExtractException("Could not extract a content.", e12);
        }
    }

    protected ParseContext createParseContext(Parser parser, Map<String, String> map) {
        ParseContext parseContext = new ParseContext();
        parseContext.set(Parser.class, parser);
        String str = map != null ? map.get(TIKA_TESSERACT_CONFIG) : null;
        if (StringUtil.isNotBlank(str)) {
            TesseractOCRConfig tesseractOCRConfig = this.tesseractOCRConfigMap.get(str);
            if (tesseractOCRConfig == null) {
                Properties properties = new Properties();
                PropertiesUtil.load(properties, str);
                Map map2 = (Map) properties.entrySet().stream().collect(Collectors.toMap(entry -> {
                    return (String) entry.getKey();
                }, entry2 -> {
                    return (String) entry2.getValue();
                }));
                tesseractOCRConfig = new TesseractOCRConfig();
                BeanUtil.copyMapToBean(map2, tesseractOCRConfig);
                this.tesseractOCRConfigMap.put(str, tesseractOCRConfig);
            }
            parseContext.set(TesseractOCRConfig.class, tesseractOCRConfig);
        }
        String str2 = map != null ? map.get(TIKA_PDF_CONFIG) : null;
        if (StringUtil.isNotBlank(str2)) {
            PDFParserConfig pDFParserConfig = this.pdfParserConfigMap.get(str2);
            if (pDFParserConfig == null) {
                Properties properties2 = new Properties();
                PropertiesUtil.load(properties2, str2);
                Map map3 = (Map) properties2.entrySet().stream().collect(Collectors.toMap(entry3 -> {
                    return (String) entry3.getKey();
                }, entry4 -> {
                    return (String) entry4.getValue();
                }));
                pDFParserConfig = new PDFParserConfig();
                BeanUtil.copyMapToBean(map3, pDFParserConfig);
                this.pdfParserConfigMap.put(str2, pDFParserConfig);
            }
            parseContext.set(PDFParserConfig.class, pDFParserConfig);
        }
        parseContext.set(PasswordProvider.class, metadata -> {
            return metadata.get(FILE_PASSWORD);
        });
        return parseContext;
    }

    protected InputStream getContentStream(DeferredFileOutputStream deferredFileOutputStream) throws IOException {
        return deferredFileOutputStream.isInMemory() ? new ByteArrayInputStream(deferredFileOutputStream.getData()) : new BufferedInputStream(new FileInputStream(deferredFileOutputStream.getFile()));
    }

    protected String getContent(ContentWriter contentWriter, String str, boolean z) throws TikaException {
        File file = null;
        String str2 = str == null ? Constants.UTF_8 : str;
        try {
            try {
                try {
                    DeferredFileOutputStream deferredFileOutputStream = new DeferredFileOutputStream(this.memorySize, "tika", ".tmp", SystemUtils.getJavaIoTmpDir());
                    try {
                        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter((OutputStream) deferredFileOutputStream, str2));
                        contentWriter.accept(bufferedWriter);
                        bufferedWriter.flush();
                        if (!deferredFileOutputStream.isInMemory()) {
                            file = deferredFileOutputStream.getFile();
                        }
                        InputStreamReader inputStreamReader = new InputStreamReader(getContentStream(deferredFileOutputStream), str2);
                        try {
                            if (z) {
                                String execute = TextUtil.normalizeText(inputStreamReader).initialCapacity(this.initialBufferSize).maxAlphanumTermSize(this.maxAlphanumTermSize).maxSymbolTermSize(this.maxSymbolTermSize).duplicateTermRemoved(this.replaceDuplication).spaceChars(this.spaceChars).execute();
                                inputStreamReader.close();
                                deferredFileOutputStream.close();
                                FileUtil.deleteInBackground(file);
                                return execute;
                            }
                            String readText = ReaderUtil.readText(inputStreamReader);
                            inputStreamReader.close();
                            deferredFileOutputStream.close();
                            FileUtil.deleteInBackground(file);
                            return readText;
                        } catch (Throwable th) {
                            try {
                                inputStreamReader.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                            throw th;
                        }
                    } catch (Throwable th3) {
                        try {
                            deferredFileOutputStream.close();
                        } catch (Throwable th4) {
                            th3.addSuppressed(th4);
                        }
                        throw th3;
                    }
                } catch (Throwable th5) {
                    FileUtil.deleteInBackground((File) null);
                    throw th5;
                }
            } catch (TikaException e) {
                throw e;
            }
        } catch (Exception e2) {
            throw new ExtractException("Failed to read a content.", e2);
        }
    }

    protected Metadata createMetadata(String str, String str2, String str3, String str4) {
        Metadata metadata = new Metadata();
        if (StringUtil.isNotEmpty(str)) {
            metadata.set("resourceName", str);
        }
        if (StringUtil.isNotBlank(str2)) {
            metadata.set("Content-Type", str2);
        }
        if (StringUtil.isNotBlank(str3)) {
            metadata.set("Content-Encoding", str3);
        }
        if (str4 != null) {
            metadata.add(FILE_PASSWORD, str4);
        }
        if (logger.isDebugEnabled()) {
            logger.debug("metadata: {}", metadata);
        }
        return metadata;
    }

    public void setOutputEncoding(String str) {
        this.outputEncoding = str;
    }

    public void setReadAsTextIfFailed(boolean z) {
        this.readAsTextIfFailed = z;
    }

    public void setMaxCompressionRatio(long j) {
        this.maxCompressionRatio = j;
    }

    public void setMaxUncompressionSize(long j) {
        this.maxUncompressionSize = j;
    }

    public void setInitialBufferSize(int i) {
        this.initialBufferSize = i;
    }

    public void setReplaceDuplication(boolean z) {
        this.replaceDuplication = z;
    }

    public void setMemorySize(int i) {
        this.memorySize = i;
    }

    public void setMaxAlphanumTermSize(int i) {
        this.maxAlphanumTermSize = i;
    }

    public void setMaxSymbolTermSize(int i) {
        this.maxSymbolTermSize = i;
    }

    public void setSpaceChars(int[] iArr) {
        this.spaceChars = iArr;
    }

    public void setTikaConfig(TikaConfig tikaConfig) {
        this.tikaConfig = tikaConfig;
    }
}
