package org.codelibs.fess.crawler.helper;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.crawler.Constants;
import org.codelibs.fess.crawler.entity.SitemapFile;
import org.codelibs.fess.crawler.entity.SitemapSet;
import org.codelibs.fess.crawler.entity.SitemapUrl;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.exception.SitemapsException;
import org.xml.sax.Attributes;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/codelibs/fess/crawler/helper/SitemapsHelper.class */
public class SitemapsHelper {
    private static final Logger logger = LogManager.getLogger(SitemapsHelper.class);
    protected int preloadSize = 512;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/codelibs/fess/crawler/helper/SitemapsHelper$XmlSitemapsHandler.class */
    public static class XmlSitemapsHandler extends DefaultHandler {
        private static final String PRIORITY_ELEMENT = "priority";
        private static final String CHANGEFREQ_ELEMENT = "changefreq";
        private static final String LASTMOD_ELEMENT = "lastmod";
        private static final String LOC_ELEMENT = "loc";
        private static final String URL_ELEMENT = "url";
        private SitemapSet sitemapSet;
        private SitemapUrl sitemapUrl;
        private StringBuilder buf;

        protected XmlSitemapsHandler() {
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startDocument() {
            this.sitemapSet = new SitemapSet();
            this.sitemapSet.setType(SitemapSet.URLSET);
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) {
            if ("url".equals(str3)) {
                this.sitemapUrl = new SitemapUrl();
            } else if (LOC_ELEMENT.equals(str3) || LASTMOD_ELEMENT.equals(str3) || CHANGEFREQ_ELEMENT.equals(str3) || PRIORITY_ELEMENT.equals(str3)) {
                this.buf = new StringBuilder();
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) {
            if (this.buf != null) {
                this.buf.append(new String(cArr, i, i2));
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) {
            if ("url".equals(str3)) {
                if (this.sitemapUrl != null) {
                    this.sitemapSet.addSitemap(this.sitemapUrl);
                }
                this.sitemapUrl = null;
                return;
            }
            if (LOC_ELEMENT.equals(str3)) {
                if (this.buf != null) {
                    this.sitemapUrl.setLoc(this.buf.toString().trim());
                    this.buf = null;
                    return;
                }
                return;
            }
            if (LASTMOD_ELEMENT.equals(str3)) {
                if (this.buf != null) {
                    this.sitemapUrl.setLastmod(this.buf.toString().trim());
                    this.buf = null;
                    return;
                }
                return;
            }
            if (CHANGEFREQ_ELEMENT.equals(str3)) {
                if (this.buf != null) {
                    this.sitemapUrl.setChangefreq(this.buf.toString().trim());
                    this.buf = null;
                    return;
                }
                return;
            }
            if (!PRIORITY_ELEMENT.equals(str3) || this.buf == null) {
                return;
            }
            this.sitemapUrl.setPriority(this.buf.toString().trim());
            this.buf = null;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endDocument() {
        }

        public SitemapSet getSitemapSet() {
            return this.sitemapSet;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/codelibs/fess/crawler/helper/SitemapsHelper$XmlSitemapsIndexHandler.class */
    public static class XmlSitemapsIndexHandler extends DefaultHandler {
        private SitemapSet sitemapSet;
        private SitemapFile sitemapFile;
        private StringBuilder buf;

        protected XmlSitemapsIndexHandler() {
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startDocument() {
            this.sitemapSet = new SitemapSet();
            this.sitemapSet.setType(SitemapSet.INDEX);
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) {
            if ("sitemap".equals(str3)) {
                this.sitemapFile = new SitemapFile();
            } else if ("loc".equals(str3) || "lastmod".equals(str3)) {
                this.buf = new StringBuilder();
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) {
            if (this.buf != null) {
                this.buf.append(new String(cArr, i, i2));
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) {
            if ("sitemap".equals(str3)) {
                if (this.sitemapFile != null) {
                    this.sitemapSet.addSitemap(this.sitemapFile);
                }
                this.sitemapFile = null;
            } else {
                if ("loc".equals(str3)) {
                    if (this.buf != null) {
                        this.sitemapFile.setLoc(this.buf.toString().trim());
                        this.buf = null;
                        return;
                    }
                    return;
                }
                if (!"lastmod".equals(str3) || this.buf == null) {
                    return;
                }
                this.sitemapFile.setLastmod(this.buf.toString().trim());
                this.buf = null;
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endDocument() {
        }

        public SitemapSet getSitemapSet() {
            return this.sitemapSet;
        }
    }

    public boolean isValid(InputStream inputStream) {
        return isValid(inputStream, true);
    }

    protected boolean isValid(InputStream inputStream, boolean z) {
        BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
        bufferedInputStream.mark(this.preloadSize);
        byte[] bArr = new byte[this.preloadSize];
        try {
            if (bufferedInputStream.read(bArr) == -1) {
                return false;
            }
            String str = new String(bArr, Constants.UTF_8);
            if (str.indexOf("<urlset") >= 0 || str.indexOf("<sitemapindex") >= 0 || str.startsWith("http://") || str.startsWith("https://")) {
                return true;
            }
            bufferedInputStream.reset();
            return isValid(new GZIPInputStream(bufferedInputStream), false);
        } catch (Exception e) {
            if (!logger.isDebugEnabled()) {
                return false;
            }
            logger.debug("Failed to validate a file.", e);
            return false;
        }
    }

    public SitemapSet parse(InputStream inputStream) {
        return parse(inputStream, true);
    }

    protected SitemapSet parse(InputStream inputStream, boolean z) {
        BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
        bufferedInputStream.mark(this.preloadSize);
        byte[] bArr = new byte[this.preloadSize];
        try {
            if (bufferedInputStream.read(bArr) == -1) {
                throw new CrawlingAccessException("No sitemaps data.");
            }
            String str = new String(bArr, Constants.UTF_8);
            if (str.indexOf("<urlset") >= 0) {
                bufferedInputStream.reset();
                return parseXmlSitemaps(bufferedInputStream);
            }
            if (str.indexOf("<sitemapindex") >= 0) {
                bufferedInputStream.reset();
                return parseXmlSitemapsIndex(bufferedInputStream);
            }
            if (str.startsWith("http://") || str.startsWith("https://")) {
                bufferedInputStream.reset();
                return parseTextSitemaps(bufferedInputStream);
            }
            bufferedInputStream.reset();
            return parse(new GZIPInputStream(bufferedInputStream), false);
        } catch (CrawlingAccessException e) {
            throw e;
        } catch (Exception e2) {
            throw new CrawlingAccessException("Could not parse Sitemaps: " + "", e2);
        }
    }

    protected SitemapSet parseTextSitemaps(InputStream inputStream) {
        SitemapSet sitemapSet = new SitemapSet();
        sitemapSet.setType(SitemapSet.URLSET);
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, Constants.UTF_8));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return sitemapSet;
                }
                String trim = readLine.trim();
                if (StringUtil.isNotBlank(trim) && (trim.startsWith("http://") || trim.startsWith("https://"))) {
                    SitemapUrl sitemapUrl = new SitemapUrl();
                    sitemapUrl.setLoc(trim);
                    sitemapSet.addSitemap(sitemapUrl);
                }
            }
        } catch (Exception e) {
            throw new SitemapsException("Could not parse Text Sitemaps.", e);
        }
    }

    protected SitemapSet parseXmlSitemaps(InputStream inputStream) {
        XmlSitemapsHandler xmlSitemapsHandler = new XmlSitemapsHandler();
        try {
            SAXParserFactory newInstance = SAXParserFactory.newInstance();
            newInstance.setFeature(Constants.FEATURE_SECURE_PROCESSING, true);
            newInstance.setFeature(Constants.FEATURE_EXTERNAL_GENERAL_ENTITIES, false);
            newInstance.setFeature(Constants.FEATURE_EXTERNAL_PARAMETER_ENTITIES, false);
            SAXParser newSAXParser = newInstance.newSAXParser();
            disableExternalResources(newSAXParser);
            newSAXParser.parse(inputStream, xmlSitemapsHandler);
            return xmlSitemapsHandler.getSitemapSet();
        } catch (Exception e) {
            throw new SitemapsException("Could not parse XML Sitemaps.", e);
        }
    }

    protected void disableExternalResources(SAXParser sAXParser) throws SAXNotRecognizedException, SAXNotSupportedException {
        try {
            sAXParser.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "");
            sAXParser.setProperty("http://javax.xml.XMLConstants/property/accessExternalSchema", "");
        } catch (Exception e) {
            if (logger.isDebugEnabled()) {
                logger.debug("Failed to set a property.", e);
            }
        }
    }

    protected SitemapSet parseXmlSitemapsIndex(InputStream inputStream) {
        XmlSitemapsIndexHandler xmlSitemapsIndexHandler = new XmlSitemapsIndexHandler();
        try {
            SAXParserFactory newInstance = SAXParserFactory.newInstance();
            newInstance.setFeature(Constants.FEATURE_SECURE_PROCESSING, true);
            newInstance.setFeature(Constants.FEATURE_EXTERNAL_GENERAL_ENTITIES, false);
            newInstance.setFeature(Constants.FEATURE_EXTERNAL_PARAMETER_ENTITIES, false);
            SAXParser newSAXParser = newInstance.newSAXParser();
            disableExternalResources(newSAXParser);
            newSAXParser.parse(inputStream, xmlSitemapsIndexHandler);
            return xmlSitemapsIndexHandler.getSitemapSet();
        } catch (Exception e) {
            throw new SitemapsException("Could not parse XML Sitemaps Index.", e);
        }
    }

    public void setPreloadSize(int i) {
        this.preloadSize = i;
    }
}
