package org.codelibs.fess.ds.csv;

import com.orangesignal.csv.CsvConfig;
import com.orangesignal.csv.CsvReader;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.io.CloseableUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fess.app.service.FailureUrlService;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException;
import org.codelibs.fess.ds.AbstractDataStore;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.exception.DataStoreCrawlingException;
import org.codelibs.fess.exception.DataStoreException;
import org.codelibs.fess.helper.CrawlerStatsHelper;
import org.codelibs.fess.util.ComponentUtil;

/* loaded from: input_file:org/codelibs/fess/ds/csv/CsvDataStore.class */
public class CsvDataStore extends AbstractDataStore {
    private static final Logger logger = LogManager.getLogger(CsvDataStore.class);
    protected static final String ESCAPE_CHARACTER_PARAM = "escape_character";
    protected static final String QUOTE_CHARACTER_PARAM = "quote_character";
    protected static final String SEPARATOR_CHARACTER_PARAM = "separator_character";
    protected static final String SKIP_LINES_PARAM = "skip_lines";
    protected static final String IGNORE_LINE_PATTERNS_PARAM = "ignore_line_patterns";
    protected static final String IGNORE_EMPTY_LINES_PARAM = "ignore_empty_lines";
    protected static final String IGNORE_TRAILING_WHITESPACES_PARAM = "ignore_trailing_whitespaces";
    protected static final String IGNORE_LEADING_WHITESPACES_PARAM = "ignore_leading_whitespaces";
    protected static final String NULL_STRING_PARAM = "null_string";
    protected static final String BREAK_STRING_PARAM = "break_string";
    protected static final String ESCAPE_DISABLED_PARAM = "escape_disabled";
    protected static final String QUOTE_DISABLED_PARAM = "quote_disabled";
    protected static final String CSV_FILE_ENCODING_PARAM = "file_encoding";
    protected static final String CSV_FILES_PARAM = "files";
    protected static final String CSV_DIRS_PARAM = "directories";
    protected static final String HAS_HEADER_LINE_PARAM = "has_header_line";
    protected static final String CELL_PREFIX = "cell";
    public String[] csvFileSuffixs = {".csv", ".tsv"};

    protected String getName() {
        return getClass().getSimpleName();
    }

    protected List<File> getCsvFileList(DataStoreParams dataStoreParams) {
        String asString = dataStoreParams.getAsString(CSV_FILES_PARAM);
        ArrayList arrayList = new ArrayList();
        if (StringUtil.isBlank(asString)) {
            asString = dataStoreParams.getAsString(CSV_DIRS_PARAM);
            if (StringUtil.isBlank(asString)) {
                throw new DataStoreException("files and directories are blank.");
            }
            logger.info("{}={}", CSV_DIRS_PARAM, asString);
            for (String str : asString.split(",")) {
                File file = new File(str);
                if (file.isDirectory()) {
                    StreamUtil.stream(file.listFiles()).of(stream -> {
                        stream.filter(file2 -> {
                            return isCsvFile(file2.getParentFile(), file2.getName(), dataStoreParams);
                        }).sorted((file3, file4) -> {
                            return (int) (file3.lastModified() - file4.lastModified());
                        }).forEach(file5 -> {
                            arrayList.add(file5);
                        });
                    });
                } else {
                    logger.warn("{} is not a directory.", str);
                }
            }
        } else {
            logger.info("{}={}", CSV_FILES_PARAM, asString);
            for (String str2 : asString.split(",")) {
                File file2 = new File(str2);
                if (file2.isFile() && isCsvFile(file2.getParentFile(), file2.getName(), dataStoreParams)) {
                    arrayList.add(file2);
                } else {
                    logger.warn("{} is not found.", str2);
                }
            }
        }
        if (arrayList.isEmpty() && logger.isDebugEnabled()) {
            logger.debug("No csv files in {}", asString);
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isCsvFile(File file, String str, DataStoreParams dataStoreParams) {
        String lowerCase = str.toLowerCase(Locale.ROOT);
        for (String str2 : this.csvFileSuffixs) {
            if (lowerCase.endsWith(str2)) {
                return true;
            }
        }
        return false;
    }

    protected String getCsvFileEncoding(DataStoreParams dataStoreParams) {
        String asString = dataStoreParams.getAsString(CSV_FILE_ENCODING_PARAM);
        return StringUtil.isBlank(asString) ? "UTF-8" : asString;
    }

    protected boolean hasHeaderLine(DataStoreParams dataStoreParams) {
        String asString = dataStoreParams.getAsString(HAS_HEADER_LINE_PARAM);
        if (StringUtil.isBlank(asString)) {
            return false;
        }
        try {
            return Boolean.parseBoolean(asString);
        } catch (Exception e) {
            return false;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void storeData(DataConfig dataConfig, IndexUpdateCallback indexUpdateCallback, DataStoreParams dataStoreParams, Map<String, String> map, Map<String, Object> map2) {
        long readInterval = getReadInterval(dataStoreParams);
        List<File> csvFileList = getCsvFileList(dataStoreParams);
        if (csvFileList.isEmpty()) {
            logger.warn("No CSV file.");
            return;
        }
        String csvFileEncoding = getCsvFileEncoding(dataStoreParams);
        boolean hasHeaderLine = hasHeaderLine(dataStoreParams);
        CsvConfig buildCsvConfig = buildCsvConfig(dataStoreParams);
        Iterator<File> it = csvFileList.iterator();
        while (it.hasNext()) {
            processCsv(dataConfig, indexUpdateCallback, dataStoreParams, map, map2, buildCsvConfig, it.next(), readInterval, csvFileEncoding, hasHeaderLine);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void processCsv(DataConfig dataConfig, IndexUpdateCallback indexUpdateCallback, DataStoreParams dataStoreParams, Map<String, String> map, Map<String, Object> map2, CsvConfig csvConfig, File file, long j, String str, boolean z) {
        String str2;
        boolean z2;
        logger.info("Loading {}", file.getAbsolutePath());
        CrawlerStatsHelper crawlerStatsHelper = ComponentUtil.getCrawlerStatsHelper();
        String scriptType = getScriptType(dataStoreParams);
        CsvReader csvReader = null;
        try {
            try {
                csvReader = new CsvReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), str)), csvConfig);
                List readValues = z ? csvReader.readValues() : null;
                boolean z3 = true;
                while (true) {
                    List readValues2 = csvReader.readValues();
                    if (readValues2 == null || !z3 || !this.alive) {
                        break;
                    }
                    CrawlerStatsHelper.StatsKeyObject statsKeyObject = new CrawlerStatsHelper.StatsKeyObject(file.getAbsolutePath() + "#" + csvReader.getLineNumber());
                    dataStoreParams.put("crawler.stats.key", statsKeyObject);
                    HashMap hashMap = new HashMap(map2);
                    LinkedHashMap linkedHashMap = new LinkedHashMap();
                    try {
                        try {
                            crawlerStatsHelper.begin(statsKeyObject);
                            linkedHashMap.putAll(dataStoreParams.asMap());
                            linkedHashMap.put("csvfile", file.getAbsolutePath());
                            linkedHashMap.put("csvfilename", file.getName());
                            linkedHashMap.put("crawlingConfig", dataConfig);
                            z2 = false;
                            for (int i = 0; i < readValues2.size(); i++) {
                                String str3 = (String) readValues2.get(i);
                                if (str3 == null) {
                                    str3 = "";
                                }
                                if (StringUtil.isNotBlank(str3)) {
                                    z2 = true;
                                }
                                if (readValues != null && readValues.size() > i) {
                                    String str4 = (String) readValues.get(i);
                                    if (StringUtil.isNotBlank(str4)) {
                                        linkedHashMap.put(str4, str3);
                                    }
                                }
                                linkedHashMap.put("cell" + Integer.toString(i + 1), str3);
                            }
                        } finally {
                        }
                    } catch (CrawlingAccessException e) {
                        logger.warn("Crawling Access Exception at : {}", hashMap, e);
                        Throwable th = e;
                        if (th instanceof MultipleCrawlingAccessException) {
                            Throwable[] causes = ((MultipleCrawlingAccessException) th).getCauses();
                            if (causes.length > 0) {
                                th = causes[causes.length - 1];
                            }
                        }
                        Throwable cause = th.getCause();
                        String canonicalName = cause != null ? cause.getClass().getCanonicalName() : th.getClass().getCanonicalName();
                        if (th instanceof DataStoreCrawlingException) {
                            DataStoreCrawlingException dataStoreCrawlingException = (DataStoreCrawlingException) th;
                            str2 = dataStoreCrawlingException.getUrl();
                            if (dataStoreCrawlingException.aborted()) {
                                z3 = false;
                            }
                        } else {
                            str2 = file.getAbsolutePath() + ":" + csvReader.getLineNumber();
                        }
                        ((FailureUrlService) ComponentUtil.getComponent(FailureUrlService.class)).store(dataConfig, canonicalName, str2, th);
                        crawlerStatsHelper.record(statsKeyObject, CrawlerStatsHelper.StatsAction.ACCESS_EXCEPTION);
                        crawlerStatsHelper.done(statsKeyObject);
                    } catch (Throwable th2) {
                        logger.warn("Crawling Access Exception at : {}", hashMap, th2);
                        ((FailureUrlService) ComponentUtil.getComponent(FailureUrlService.class)).store(dataConfig, th2.getClass().getCanonicalName(), file.getAbsolutePath() + ":" + csvReader.getLineNumber(), th2);
                        crawlerStatsHelper.record(statsKeyObject, CrawlerStatsHelper.StatsAction.EXCEPTION);
                        crawlerStatsHelper.done(statsKeyObject);
                    }
                    if (z2) {
                        crawlerStatsHelper.record(statsKeyObject, CrawlerStatsHelper.StatsAction.PREPARED);
                        if (logger.isDebugEnabled()) {
                            for (Map.Entry entry : linkedHashMap.entrySet()) {
                                logger.debug("{}={}", entry.getKey(), entry.getValue());
                            }
                        }
                        HashMap hashMap2 = new HashMap();
                        hashMap2.put("doc", hashMap);
                        linkedHashMap.put("crawlingContext", hashMap2);
                        for (Map.Entry<String, String> entry2 : map.entrySet()) {
                            Object convertValue = convertValue(scriptType, entry2.getValue(), linkedHashMap);
                            if (convertValue != null) {
                                hashMap.put(entry2.getKey(), convertValue);
                            }
                        }
                        crawlerStatsHelper.record(statsKeyObject, CrawlerStatsHelper.StatsAction.EVALUATED);
                        if (logger.isDebugEnabled()) {
                            for (Map.Entry entry3 : hashMap.entrySet()) {
                                logger.debug("{}={}", entry3.getKey(), entry3.getValue());
                            }
                        }
                        Object obj = hashMap.get("url");
                        if (obj instanceof String) {
                            statsKeyObject.setUrl((String) obj);
                        }
                        indexUpdateCallback.store(dataStoreParams, hashMap);
                        crawlerStatsHelper.record(statsKeyObject, CrawlerStatsHelper.StatsAction.FINISHED);
                        crawlerStatsHelper.done(statsKeyObject);
                        if (j > 0) {
                            sleep(j);
                        }
                    } else {
                        logger.debug("No data in line: {}", linkedHashMap);
                        crawlerStatsHelper.discard(statsKeyObject);
                        crawlerStatsHelper.done(statsKeyObject);
                    }
                }
                CloseableUtil.closeQuietly(csvReader);
            } catch (Throwable th3) {
                CloseableUtil.closeQuietly(csvReader);
                throw th3;
            }
        } catch (Exception e2) {
            throw new DataStoreException("Failed to crawl data when reading csv file.", e2);
        }
    }

    protected CsvConfig buildCsvConfig(DataStoreParams dataStoreParams) {
        CsvConfig csvConfig = new CsvConfig();
        if (dataStoreParams.containsKey(SEPARATOR_CHARACTER_PARAM)) {
            String asString = dataStoreParams.getAsString(SEPARATOR_CHARACTER_PARAM);
            if (StringUtil.isNotBlank(asString)) {
                try {
                    csvConfig.setSeparator(StringEscapeUtils.unescapeJava(asString).charAt(0));
                } catch (Exception e) {
                    logger.warn("Failed to load {}", SEPARATOR_CHARACTER_PARAM, e);
                }
            }
        }
        if (dataStoreParams.containsKey(QUOTE_CHARACTER_PARAM)) {
            String asString2 = dataStoreParams.getAsString(QUOTE_CHARACTER_PARAM);
            if (StringUtil.isNotBlank(asString2)) {
                try {
                    csvConfig.setQuote(asString2.charAt(0));
                } catch (Exception e2) {
                    logger.warn("Failed to load {}", QUOTE_CHARACTER_PARAM, e2);
                }
            }
        }
        if (dataStoreParams.containsKey(ESCAPE_CHARACTER_PARAM)) {
            String asString3 = dataStoreParams.getAsString(ESCAPE_CHARACTER_PARAM);
            if (StringUtil.isNotBlank(asString3)) {
                try {
                    csvConfig.setEscape(asString3.charAt(0));
                } catch (Exception e3) {
                    logger.warn("Failed to load {}", ESCAPE_CHARACTER_PARAM, e3);
                }
            }
        }
        if (dataStoreParams.containsKey(QUOTE_DISABLED_PARAM)) {
            String asString4 = dataStoreParams.getAsString(QUOTE_DISABLED_PARAM);
            if (StringUtil.isNotBlank(asString4)) {
                try {
                    csvConfig.setQuoteDisabled(Boolean.parseBoolean(asString4));
                } catch (Exception e4) {
                    logger.warn("Failed to load {}", QUOTE_DISABLED_PARAM, e4);
                }
            }
        }
        if (dataStoreParams.containsKey(ESCAPE_DISABLED_PARAM)) {
            String asString5 = dataStoreParams.getAsString(ESCAPE_DISABLED_PARAM);
            if (StringUtil.isNotBlank(asString5)) {
                try {
                    csvConfig.setEscapeDisabled(Boolean.parseBoolean(asString5));
                } catch (Exception e5) {
                    logger.warn("Failed to load {}", ESCAPE_DISABLED_PARAM, e5);
                }
            }
        }
        if (dataStoreParams.containsKey(BREAK_STRING_PARAM)) {
            String asString6 = dataStoreParams.getAsString(BREAK_STRING_PARAM);
            if (StringUtil.isNotBlank(asString6)) {
                csvConfig.setBreakString(asString6);
            }
        }
        if (dataStoreParams.containsKey(NULL_STRING_PARAM)) {
            String asString7 = dataStoreParams.getAsString(NULL_STRING_PARAM);
            if (StringUtil.isNotBlank(asString7)) {
                csvConfig.setNullString(asString7);
            }
        }
        if (dataStoreParams.containsKey(IGNORE_LEADING_WHITESPACES_PARAM)) {
            String asString8 = dataStoreParams.getAsString(IGNORE_LEADING_WHITESPACES_PARAM);
            if (StringUtil.isNotBlank(asString8)) {
                try {
                    csvConfig.setIgnoreLeadingWhitespaces(Boolean.parseBoolean(asString8));
                } catch (Exception e6) {
                    logger.warn("Failed to load {}", IGNORE_LEADING_WHITESPACES_PARAM, e6);
                }
            }
        }
        if (dataStoreParams.containsKey(IGNORE_TRAILING_WHITESPACES_PARAM)) {
            String asString9 = dataStoreParams.getAsString(IGNORE_TRAILING_WHITESPACES_PARAM);
            if (StringUtil.isNotBlank(asString9)) {
                try {
                    csvConfig.setIgnoreTrailingWhitespaces(Boolean.parseBoolean(asString9));
                } catch (Exception e7) {
                    logger.warn("Failed to load {}", IGNORE_TRAILING_WHITESPACES_PARAM, e7);
                }
            }
        }
        if (dataStoreParams.containsKey(IGNORE_EMPTY_LINES_PARAM)) {
            String asString10 = dataStoreParams.getAsString(IGNORE_EMPTY_LINES_PARAM);
            if (StringUtil.isNotBlank(asString10)) {
                try {
                    csvConfig.setIgnoreEmptyLines(Boolean.parseBoolean(asString10));
                } catch (Exception e8) {
                    logger.warn("Failed to load {}", IGNORE_EMPTY_LINES_PARAM, e8);
                }
            }
        }
        if (dataStoreParams.containsKey(IGNORE_LINE_PATTERNS_PARAM)) {
            String asString11 = dataStoreParams.getAsString(IGNORE_LINE_PATTERNS_PARAM);
            if (StringUtil.isNotBlank(asString11)) {
                csvConfig.setIgnoreLinePatterns(new Pattern[]{Pattern.compile(asString11)});
            }
        }
        if (dataStoreParams.containsKey(SKIP_LINES_PARAM)) {
            String asString12 = dataStoreParams.getAsString(SKIP_LINES_PARAM);
            if (StringUtil.isNotBlank(asString12)) {
                try {
                    csvConfig.setSkipLines(Integer.parseInt(asString12));
                } catch (Exception e9) {
                    logger.warn("Failed to load {}", SKIP_LINES_PARAM, e9);
                }
            }
        }
        return csvConfig;
    }
}
