package org.codelibs.fess.ds.wikipedia;

import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.fess.app.service.FailureUrlService;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException;
import org.codelibs.fess.ds.AbstractDataStore;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
import org.codelibs.fess.ds.wikipedia.exception.ParserStoppedException;
import org.codelibs.fess.ds.wikipedia.support.WikiXMLSAXParser;
import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.exception.DataStoreCrawlingException;
import org.codelibs.fess.exception.DataStoreException;
import org.codelibs.fess.helper.CrawlerStatsHelper;
import org.codelibs.fess.util.ComponentUtil;

/* loaded from: input_file:org/codelibs/fess/ds/wikipedia/WikipediaDataStore.class */
public class WikipediaDataStore extends AbstractDataStore {
    private static final Logger logger = LogManager.getLogger(WikipediaDataStore.class);
    private static final String DEFAULT_WIKIPEDIA_URL = "http://download.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2";

    protected String getName() {
        return getClass().getSimpleName();
    }

    /* JADX WARN: Type inference failed for: r31v0, types: [java.lang.Throwable, org.codelibs.fess.ds.wikipedia.exception.ParserStoppedException] */
    protected void storeData(DataConfig dataConfig, IndexUpdateCallback indexUpdateCallback, DataStoreParams dataStoreParams, Map<String, String> map, Map<String, Object> map2) {
        CrawlerStatsHelper crawlerStatsHelper = ComponentUtil.getCrawlerStatsHelper();
        long readInterval = getReadInterval(dataStoreParams);
        URL wikipediaUrl = getWikipediaUrl(dataStoreParams);
        int parseInt = Integer.parseInt(dataStoreParams.getAsString("limit", "0"));
        int parseInt2 = Integer.parseInt(dataStoreParams.getAsString("total_entity_size_limit", "100000000"));
        int parseInt3 = Integer.parseInt(dataStoreParams.getAsString("max_digest_length", "100"));
        String scriptType = getScriptType(dataStoreParams);
        logger.info("url: {}", wikipediaUrl);
        AtomicInteger atomicInteger = new AtomicInteger();
        WikiXMLSAXParser wikiXMLSAXParser = new WikiXMLSAXParser(wikipediaUrl);
        wikiXMLSAXParser.setTotalEntitySizeLimit(parseInt2);
        wikiXMLSAXParser.setPageCallback(wikiPage -> {
            CrawlerStatsHelper.StatsKeyObject statsKeyObject = new CrawlerStatsHelper.StatsKeyObject(dataConfig.getId() + "#" + wikiPage.getId());
            dataStoreParams.put("crawler.stats.key", statsKeyObject);
            HashMap hashMap = new HashMap(map2);
            LinkedHashMap linkedHashMap = new LinkedHashMap();
            try {
                try {
                    try {
                        crawlerStatsHelper.begin(statsKeyObject);
                        linkedHashMap.putAll(dataStoreParams.asMap());
                        String stripTitle = stripTitle(wikiPage.getTitle());
                        String text = wikiPage.getText();
                        linkedHashMap.put("id", wikiPage.getId());
                        linkedHashMap.put("title", stripTitle);
                        linkedHashMap.put("content", text);
                        linkedHashMap.put("encodedTitle", URLEncoder.encode(stripTitle, "UTF-8"));
                        linkedHashMap.put("digest", StringUtils.abbreviate(text, parseInt3));
                        linkedHashMap.put("format", wikiPage.getFormat());
                        linkedHashMap.put("model", wikiPage.getModel());
                        linkedHashMap.put("timestamp", wikiPage.getTimestamp());
                        crawlerStatsHelper.record(statsKeyObject, CrawlerStatsHelper.StatsAction.PREPARED);
                        if (logger.isDebugEnabled()) {
                            for (Map.Entry entry : linkedHashMap.entrySet()) {
                                logger.debug("{}={}", entry.getKey(), entry.getValue());
                            }
                        }
                        HashMap hashMap2 = new HashMap();
                        hashMap2.put("doc", hashMap);
                        linkedHashMap.put("crawlingContext", hashMap2);
                        for (Map.Entry entry2 : map.entrySet()) {
                            Object convertValue = convertValue(scriptType, (String) entry2.getValue(), linkedHashMap);
                            if (convertValue != null) {
                                hashMap.put((String) entry2.getKey(), convertValue);
                            }
                        }
                        crawlerStatsHelper.record(statsKeyObject, CrawlerStatsHelper.StatsAction.EVALUATED);
                        if (logger.isDebugEnabled()) {
                            for (Map.Entry entry3 : hashMap.entrySet()) {
                                logger.debug("{}={}", entry3.getKey(), entry3.getValue());
                            }
                        }
                        Object obj = hashMap.get("url");
                        if (obj instanceof String) {
                            statsKeyObject.setUrl((String) obj);
                        }
                        indexUpdateCallback.store(dataStoreParams, hashMap);
                        crawlerStatsHelper.record(statsKeyObject, CrawlerStatsHelper.StatsAction.FINISHED);
                        crawlerStatsHelper.done(statsKeyObject);
                    } catch (Throwable th) {
                        logger.warn("Crawling Access Exception at : {}", hashMap, th);
                        ((FailureUrlService) ComponentUtil.getComponent(FailureUrlService.class)).store(dataConfig, th.getClass().getCanonicalName(), wikiPage.getId(), th);
                        if (readInterval > 0) {
                            sleep(readInterval);
                        }
                        crawlerStatsHelper.record(statsKeyObject, CrawlerStatsHelper.StatsAction.EXCEPTION);
                        crawlerStatsHelper.done(statsKeyObject);
                    }
                } catch (CrawlingAccessException e) {
                    logger.warn("Crawling Access Exception at : {}", hashMap, e);
                    Throwable th2 = e;
                    if (th2 instanceof MultipleCrawlingAccessException) {
                        Throwable[] causes = ((MultipleCrawlingAccessException) th2).getCauses();
                        if (causes.length > 0) {
                            th2 = causes[causes.length - 1];
                        }
                    }
                    Throwable cause = th2.getCause();
                    String canonicalName = cause != null ? cause.getClass().getCanonicalName() : th2.getClass().getCanonicalName();
                    if ((th2 instanceof DataStoreCrawlingException) && ((DataStoreCrawlingException) th2).aborted()) {
                        throw new ParserStoppedException(wikiPage.getId());
                    }
                    ((FailureUrlService) ComponentUtil.getComponent(FailureUrlService.class)).store(dataConfig, canonicalName, wikiPage.getId(), th2);
                    crawlerStatsHelper.record(statsKeyObject, CrawlerStatsHelper.StatsAction.ACCESS_EXCEPTION);
                    crawlerStatsHelper.done(statsKeyObject);
                }
                if (parseInt <= 0 || atomicInteger.incrementAndGet() < parseInt) {
                    return;
                }
                logger.info("Wikipedia crawler is stopped. ({} > {})", Integer.valueOf(atomicInteger.get()), Integer.valueOf(parseInt));
                throw new ParserStoppedException(wikiPage.getId());
            } catch (Throwable th3) {
                crawlerStatsHelper.done(statsKeyObject);
                throw th3;
            }
        });
        try {
            wikiXMLSAXParser.parse();
        } catch (ParserStoppedException e) {
            if (logger.isDebugEnabled()) {
                logger.debug("Wikipedia crawler is stopped at " + e.getMessage(), (Throwable) e);
            }
        }
    }

    private URL getWikipediaUrl(DataStoreParams dataStoreParams) {
        try {
            return new URL(dataStoreParams.getAsString("url", DEFAULT_WIKIPEDIA_URL));
        } catch (MalformedURLException e) {
            throw new DataStoreException("Could not parse Wikipedia URL.", e);
        }
    }

    private String stripTitle(String str) {
        StringBuilder sb = new StringBuilder();
        sb.append(str);
        while (sb.length() > 0 && (sb.charAt(sb.length() - 1) == '\n' || sb.charAt(sb.length() - 1) == ' ')) {
            sb.deleteCharAt(sb.length() - 1);
        }
        return sb.toString();
    }
}
