package org.codelibs.fess.crawler.extractor;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.util.Map;
import org.apache.commons.io.output.DeferredFileOutputStream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.io.CopyUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.crawler.container.CrawlerContainer;
import org.codelibs.fess.crawler.entity.ExtractData;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.exception.ExtractException;
import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
import org.codelibs.fess.crawler.helper.ContentLengthHelper;
import org.codelibs.fess.crawler.helper.MimeTypeHelper;

/* loaded from: input_file:org/codelibs/fess/crawler/extractor/ExtractorBuilder.class */
public class ExtractorBuilder {
    private static final Logger logger = LogManager.getLogger(ExtractorBuilder.class);
    private final InputStream in;
    private final Map<String, String> params;
    private final CrawlerContainer crawlerContainer;
    private String mimeType;
    private String filename;
    private int cacheFileSize = 1000000;
    private String extractorName = "tikaExtractor";
    private long maxContentLength = -1;

    /* JADX INFO: Access modifiers changed from: protected */
    public ExtractorBuilder(CrawlerContainer crawlerContainer, InputStream inputStream, Map<String, String> map) {
        this.crawlerContainer = crawlerContainer;
        this.in = inputStream;
        this.params = map;
    }

    public ExtractorBuilder mimeType(String str) {
        this.mimeType = str;
        return this;
    }

    public ExtractorBuilder filename(String str) {
        this.filename = str;
        return this;
    }

    public ExtractorBuilder extractorName(String str) {
        this.extractorName = str;
        return this;
    }

    public ExtractorBuilder maxContentLength(long j) {
        this.maxContentLength = j;
        return this;
    }

    public ExtractorBuilder cacheFileSize(int i) {
        this.cacheFileSize = i;
        return this;
    }

    public ExtractData extract() {
        ExtractorFactory extractorFactory = (ExtractorFactory) this.crawlerContainer.getComponent("extractorFactory");
        DeferredFileOutputStream deferredFileOutputStream = null;
        try {
            try {
                DeferredFileOutputStream deferredFileOutputStream2 = new DeferredFileOutputStream(this.cacheFileSize, "fess-extractor-", ".out", (File) null);
                try {
                    CopyUtil.copy(this.in, deferredFileOutputStream2);
                    deferredFileOutputStream2.flush();
                    Extractor extractor = StringUtil.isBlank(this.mimeType) ? null : extractorFactory.getExtractor(this.mimeType);
                    if (extractor == null) {
                        String mimeType = getMimeType(deferredFileOutputStream2);
                        extractor = extractorFactory.getExtractor(mimeType);
                        if (extractor == null) {
                            if (logger.isDebugEnabled()) {
                                logger.debug("Using default extractor {} for MIME type {}", this.extractorName, this.mimeType);
                            }
                            extractor = (Extractor) this.crawlerContainer.getComponent(this.extractorName);
                        } else if (logger.isDebugEnabled()) {
                            logger.debug("Using {} for detected MIME type {}, not {}", extractor.getClass().getName(), mimeType, this.mimeType);
                        }
                    } else if (logger.isDebugEnabled()) {
                        logger.debug("Using {} for MIME type {}", extractor.getClass().getName(), this.mimeType);
                    }
                    if (this.maxContentLength < 0) {
                        this.maxContentLength = ((ContentLengthHelper) this.crawlerContainer.getComponent("contentLengthHelper")).getMaxLength(this.mimeType);
                    }
                    long contentLength = getContentLength(deferredFileOutputStream2);
                    if (contentLength > this.maxContentLength) {
                        long j = this.maxContentLength;
                        MaxLengthExceededException maxLengthExceededException = new MaxLengthExceededException("Content length (" + contentLength + " bytes) exceeds the maximum allowed length (" + maxLengthExceededException + " bytes).");
                        throw maxLengthExceededException;
                    }
                    if (contentLength == 0) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("The content length is 0.");
                        }
                        ExtractData extractData = new ExtractData("");
                        deferredFileOutputStream2.close();
                        if (deferredFileOutputStream2 != null && !deferredFileOutputStream2.isInMemory()) {
                            File file = deferredFileOutputStream2.getFile();
                            try {
                                Files.delete(file.toPath());
                            } catch (IOException e) {
                                logger.warn("Failed to delete {}.", file.getAbsolutePath(), e);
                            }
                        }
                        return extractData;
                    }
                    InputStream contentInputStream = getContentInputStream(deferredFileOutputStream2);
                    try {
                        ExtractData text = extractor.getText(contentInputStream, this.params);
                        if (contentInputStream != null) {
                            contentInputStream.close();
                        }
                        deferredFileOutputStream2.close();
                        if (deferredFileOutputStream2 != null && !deferredFileOutputStream2.isInMemory()) {
                            File file2 = deferredFileOutputStream2.getFile();
                            try {
                                Files.delete(file2.toPath());
                            } catch (IOException e2) {
                                logger.warn("Failed to delete {}.", file2.getAbsolutePath(), e2);
                            }
                        }
                        return text;
                    } catch (Throwable th) {
                        if (contentInputStream != null) {
                            try {
                                contentInputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        }
                        throw th;
                    }
                } catch (Throwable th3) {
                    try {
                        deferredFileOutputStream2.close();
                    } catch (Throwable th4) {
                        th3.addSuppressed(th4);
                    }
                    throw th3;
                }
            } catch (Throwable th5) {
                if (0 != 0 && !deferredFileOutputStream.isInMemory()) {
                    File file3 = deferredFileOutputStream.getFile();
                    try {
                        Files.delete(file3.toPath());
                    } catch (IOException e3) {
                        logger.warn("Failed to delete {}.", file3.getAbsolutePath(), e3);
                    }
                }
                throw th5;
            }
        } catch (CrawlingAccessException e4) {
            throw e4;
        } catch (Exception e5) {
            throw new ExtractException("Failed to extract data.", e5);
        }
    }

    protected String getMimeType(DeferredFileOutputStream deferredFileOutputStream) throws IOException {
        MimeTypeHelper mimeTypeHelper = (MimeTypeHelper) this.crawlerContainer.getComponent("mimeTypeHelper");
        InputStream contentInputStream = getContentInputStream(deferredFileOutputStream);
        try {
            String contentType = mimeTypeHelper.getContentType(contentInputStream, this.filename);
            if (contentInputStream != null) {
                contentInputStream.close();
            }
            return contentType;
        } catch (Throwable th) {
            if (contentInputStream != null) {
                try {
                    contentInputStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    protected InputStream getContentInputStream(DeferredFileOutputStream deferredFileOutputStream) throws IOException {
        return deferredFileOutputStream.isInMemory() ? new ByteArrayInputStream(deferredFileOutputStream.getData()) : new FileInputStream(deferredFileOutputStream.getFile());
    }

    protected long getContentLength(DeferredFileOutputStream deferredFileOutputStream) throws IOException {
        return deferredFileOutputStream.isInMemory() ? deferredFileOutputStream.getData().length : deferredFileOutputStream.getFile().length();
    }
}
