package org.codelibs.fess.crawler.client.http;

import jakarta.annotation.Resource;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.ConnectException;
import java.net.NoRouteToHostException;
import java.net.SocketException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.security.KeyStore;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import javax.net.ssl.SSLContext;
import org.apache.commons.io.output.DeferredFileOutputStream;
import org.apache.commons.lang3.SystemUtils;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.auth.AuthScheme;
import org.apache.http.auth.AuthSchemeProvider;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.Credentials;
import org.apache.http.client.AuthCache;
import org.apache.http.client.CookieStore;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpHead;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.config.Lookup;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.DnsResolver;
import org.apache.http.conn.HttpClientConnectionManager;
import org.apache.http.conn.HttpConnectionFactory;
import org.apache.http.conn.SchemePortResolver;
import org.apache.http.conn.routing.HttpRoutePlanner;
import org.apache.http.conn.socket.LayeredConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.util.PublicSuffixMatcher;
import org.apache.http.conn.util.PublicSuffixMatcherLoader;
import org.apache.http.cookie.Cookie;
import org.apache.http.cookie.CookieSpecProvider;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicAuthCache;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.conn.DefaultProxyRoutePlanner;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.impl.cookie.DefaultCookieSpecProvider;
import org.apache.http.impl.cookie.IgnoreSpecProvider;
import org.apache.http.impl.cookie.NetscapeDraftSpecProvider;
import org.apache.http.impl.cookie.RFC6265CookieSpecProvider;
import org.apache.http.message.BasicHeader;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.ssl.SSLContextBuilder;
import org.apache.http.util.EntityUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.beans.BeanDesc;
import org.codelibs.core.beans.factory.BeanDescFactory;
import org.codelibs.core.io.CloseableUtil;
import org.codelibs.core.io.CopyUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.misc.Pair;
import org.codelibs.core.timer.TimeoutManager;
import org.codelibs.core.timer.TimeoutTask;
import org.codelibs.fess.crawler.Constants;
import org.codelibs.fess.crawler.CrawlerContext;
import org.codelibs.fess.crawler.client.AbstractCrawlerClient;
import org.codelibs.fess.crawler.client.AccessTimeoutTarget;
import org.codelibs.fess.crawler.client.http.conn.IdnDnsResolver;
import org.codelibs.fess.crawler.client.http.form.FormScheme;
import org.codelibs.fess.crawler.entity.ResponseData;
import org.codelibs.fess.crawler.entity.RobotsTxt;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
import org.codelibs.fess.crawler.helper.ContentLengthHelper;
import org.codelibs.fess.crawler.helper.MimeTypeHelper;
import org.codelibs.fess.crawler.helper.RobotsTxtHelper;
import org.codelibs.fess.crawler.util.CrawlingParameterUtil;

/* loaded from: input_file:org/codelibs/fess/crawler/client/http/HcHttpClient.class */
public class HcHttpClient extends AbstractCrawlerClient {
    public static final String CONNECTION_TIMEOUT_PROPERTY = "connectionTimeout";
    public static final String SO_TIMEOUT_PROPERTY = "soTimeout";
    public static final String PROXY_HOST_PROPERTY = "proxyHost";
    public static final String PROXY_PORT_PROPERTY = "proxyPort";
    public static final String PROXY_AUTH_SCHEME_PROPERTY = "proxyAuthScheme";
    public static final String PROXY_CREDENTIALS_PROPERTY = "proxyCredentials";
    public static final String USER_AGENT_PROPERTY = "userAgent";
    public static final String ROBOTS_TXT_ENABLED_PROPERTY = "robotsTxtEnabled";
    public static final String AUTHENTICATIONS_PROPERTY = "webAuthentications";
    public static final String REQUEST_HEADERS_PROPERTY = "requestHeaders";
    public static final String REDIRECTS_ENABLED = "redirectsEnabled";
    public static final String COOKIES_PROPERTY = "cookies";
    public static final String AUTH_SCHEME_PROVIDERS_PROPERTY = "authSchemeProviders";
    public static final String IGNORE_SSL_CERTIFICATE_PROPERTY = "ignoreSslCertificate";
    public static final String DEFAULT_MAX_CONNECTION_PER_ROUTE_PROPERTY = "defaultMaxConnectionPerRoute";
    public static final String MAX_TOTAL_CONNECTION_PROPERTY = "maxTotalConnection";
    public static final String TIME_TO_LIVE_TIME_UNIT_PROPERTY = "timeToLiveTimeUnit";
    public static final String TIME_TO_LIVE_PROPERTY = "timeToLive";
    private static final Logger logger = LogManager.getLogger(HcHttpClient.class);

    @Resource
    protected RobotsTxtHelper robotsTxtHelper;

    @Resource
    protected ContentLengthHelper contentLengthHelper;

    @Resource
    protected MimeTypeHelper mimeTypeHelper;
    protected volatile CloseableHttpClient httpClient;
    private TimeoutTask connectionMonitorTask;
    protected Integer connectionTimeout;
    protected Integer maxTotalConnections;
    protected Integer maxConnectionsPerRoute;
    protected Integer soTimeout;
    protected String cookieSpec;
    protected String proxyHost;
    protected Integer proxyPort;
    protected Credentials proxyCredentials;
    protected HttpClientConnectionManager clientConnectionManager;
    protected Map<String, AuthSchemeProvider> authSchemeProviderMap;
    protected HttpRoutePlanner routePlanner;
    protected Lookup<CookieSpecProvider> cookieSpecRegistry;
    protected LayeredConnectionSocketFactory sslSocketFactory;
    private final List<Header> requestHeaderList = new ArrayList();
    private final Map<String, Object> httpClientPropertyMap = new HashMap();
    protected String userAgent = "Crawler";
    protected HttpClientContext httpClientContext = HttpClientContext.create();
    protected AuthScheme proxyAuthScheme = new BasicScheme();
    protected String defaultMimeType = "application/octet-stream";
    protected CookieStore cookieStore = new BasicCookieStore();
    protected DnsResolver dnsResolver = new IdnDnsResolver();
    protected int connectionCheckInterval = 5;
    protected long idleConnectionTimeout = 60000;
    protected Pattern redirectHttpStatusPattern = Pattern.compile("[3][0-9][0-9]");
    protected boolean useRobotsTxtDisallows = true;
    protected boolean useRobotsTxtAllows = true;
    protected CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
    protected AuthCache authCache = new BasicAuthCache();
    protected boolean redirectsEnabled = false;
    protected String[] cookieDatePatterns = {"EEE, dd MMM yyyy HH:mm:ss zzz", "EEE, dd-MMM-yy HH:mm:ss zzz", "EEE MMM d HH:mm:ss yyyy", ""};

    @Override // org.codelibs.fess.crawler.client.AbstractCrawlerClient
    public synchronized void init() {
        if (this.httpClient != null) {
            return;
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Initializing {}", HcHttpClient.class.getName());
        }
        super.init();
        Boolean bool = (Boolean) getInitParameter(ROBOTS_TXT_ENABLED_PROPERTY, Boolean.TRUE, Boolean.class);
        if (this.robotsTxtHelper != null) {
            this.robotsTxtHelper.setEnabled(bool.booleanValue());
        }
        RequestConfig.Builder custom = RequestConfig.custom();
        HttpClientBuilder create = HttpClientBuilder.create();
        Integer num = (Integer) getInitParameter(CONNECTION_TIMEOUT_PROPERTY, this.connectionTimeout, Integer.class);
        if (num != null) {
            custom.setConnectTimeout(num.intValue());
        }
        Integer num2 = (Integer) getInitParameter(SO_TIMEOUT_PROPERTY, this.soTimeout, Integer.class);
        if (num2 != null) {
            custom.setSocketTimeout(num2.intValue());
        }
        RegistryBuilder create2 = RegistryBuilder.create();
        Map map = (Map) getInitParameter(AUTH_SCHEME_PROVIDERS_PROPERTY, this.authSchemeProviderMap, Map.class);
        if (map != null) {
            for (Map.Entry entry : map.entrySet()) {
                create2.register((String) entry.getKey(), (AuthSchemeProvider) entry.getValue());
            }
        }
        this.userAgent = (String) getInitParameter(USER_AGENT_PROPERTY, this.userAgent, String.class);
        if (StringUtil.isNotBlank(this.userAgent)) {
            create.setUserAgent(this.userAgent);
        }
        HttpRoutePlanner buildRoutePlanner = buildRoutePlanner();
        if (buildRoutePlanner != null) {
            create.setRoutePlanner(buildRoutePlanner);
        }
        Authentication[] authenticationArr = (Authentication[]) getInitParameter(AUTHENTICATIONS_PROPERTY, new Authentication[0], Authentication[].class);
        ArrayList arrayList = new ArrayList();
        for (Authentication authentication : authenticationArr) {
            AuthScheme authScheme = authentication.getAuthScheme();
            if (authScheme instanceof FormScheme) {
                arrayList.add(new Pair((FormScheme) authScheme, authentication.getCredentials()));
            } else {
                AuthScope authScope = authentication.getAuthScope();
                this.credentialsProvider.setCredentials(authScope, authentication.getCredentials());
                if (authScope.getHost() != null && authScheme != null) {
                    this.authCache.put(new HttpHost(authScope.getHost(), authScope.getPort()), authScheme);
                }
            }
        }
        this.httpClientContext.setAuthCache(this.authCache);
        this.httpClientContext.setCredentialsProvider(this.credentialsProvider);
        for (RequestHeader requestHeader : (RequestHeader[]) getInitParameter(REQUEST_HEADERS_PROPERTY, new RequestHeader[0], RequestHeader[].class)) {
            if (requestHeader.isValid()) {
                this.requestHeaderList.add(new BasicHeader(requestHeader.getName(), requestHeader.getValue()));
            }
        }
        custom.setRedirectsEnabled(((Boolean) getInitParameter(REDIRECTS_ENABLED, Boolean.valueOf(this.redirectsEnabled), Boolean.class)).booleanValue());
        if (this.cookieSpec != null) {
            custom.setCookieSpec(this.cookieSpec);
        }
        create.setDefaultCookieStore(this.cookieStore);
        if (this.cookieStore != null) {
            for (Cookie cookie : (Cookie[]) getInitParameter(COOKIES_PROPERTY, new Cookie[0], Cookie[].class)) {
                this.cookieStore.addCookie(cookie);
            }
        }
        Lookup<CookieSpecProvider> buildCookieSpecRegistry = buildCookieSpecRegistry();
        if (buildCookieSpecRegistry != null) {
            create.setDefaultCookieSpecRegistry(buildCookieSpecRegistry);
        }
        this.clientConnectionManager = buildConnectionManager(create);
        this.connectionMonitorTask = TimeoutManager.getInstance().addTimeoutTarget(new HcConnectionMonitorTarget(this.clientConnectionManager, this.idleConnectionTimeout), this.connectionCheckInterval, true);
        CloseableHttpClient build = create.setDnsResolver(this.dnsResolver).setConnectionManager(this.clientConnectionManager).setDefaultRequestConfig(custom.build()).build();
        if (!this.httpClientPropertyMap.isEmpty()) {
            BeanDesc beanDesc = BeanDescFactory.getBeanDesc(build.getClass());
            for (Map.Entry<String, Object> entry2 : this.httpClientPropertyMap.entrySet()) {
                String key = entry2.getKey();
                if (beanDesc.hasPropertyDesc(key)) {
                    beanDesc.getPropertyDesc(key).setValue(build, entry2.getValue());
                } else {
                    logger.warn("DefaultHttpClient does not have {}.", key);
                }
            }
        }
        arrayList.forEach(pair -> {
            FormScheme formScheme = (FormScheme) pair.getFirst();
            formScheme.authenticate((Credentials) pair.getSecond(), (httpUriRequest, biConsumer) -> {
                Iterator<Header> it = this.requestHeaderList.iterator();
                while (it.hasNext()) {
                    httpUriRequest.addHeader(it.next());
                }
                HttpEntity httpEntity = null;
                try {
                    try {
                        CloseableHttpResponse execute = build.execute(httpUriRequest, new BasicHttpContext(this.httpClientContext));
                        httpEntity = execute.getEntity();
                        biConsumer.accept(execute, httpEntity);
                        EntityUtils.consumeQuietly(httpEntity);
                    } catch (Exception e) {
                        httpUriRequest.abort();
                        logger.warn("Failed to authenticate on " + String.valueOf(formScheme), e);
                        EntityUtils.consumeQuietly(httpEntity);
                    }
                } catch (Throwable th) {
                    EntityUtils.consumeQuietly(httpEntity);
                    throw th;
                }
            });
        });
        this.httpClient = build;
    }

    protected HttpClientConnectionManager buildConnectionManager(HttpClientBuilder httpClientBuilder) {
        Registry build = RegistryBuilder.create().register("http", PlainConnectionSocketFactory.getSocketFactory()).register("https", buildSSLSocketFactory(httpClientBuilder)).build();
        long longValue = ((Long) getInitParameter(TIME_TO_LIVE_PROPERTY, 5L, Long.class)).longValue();
        TimeUnit valueOf = TimeUnit.valueOf((String) getInitParameter(TIME_TO_LIVE_TIME_UNIT_PROPERTY, "MINUTES", String.class));
        int intValue = ((Integer) getInitParameter(MAX_TOTAL_CONNECTION_PROPERTY, Integer.valueOf(Constants.OK_STATUS_CODE), Integer.class)).intValue();
        int intValue2 = ((Integer) getInitParameter(DEFAULT_MAX_CONNECTION_PER_ROUTE_PROPERTY, 20, Integer.class)).intValue();
        PoolingHttpClientConnectionManager poolingHttpClientConnectionManager = new PoolingHttpClientConnectionManager(build, (HttpConnectionFactory) null, (SchemePortResolver) null, this.dnsResolver, longValue, valueOf);
        poolingHttpClientConnectionManager.setMaxTotal(intValue);
        poolingHttpClientConnectionManager.setDefaultMaxPerRoute(intValue2);
        return poolingHttpClientConnectionManager;
    }

    protected LayeredConnectionSocketFactory buildSSLSocketFactory(HttpClientBuilder httpClientBuilder) {
        if (this.sslSocketFactory != null) {
            return this.sslSocketFactory;
        }
        if (((Boolean) getInitParameter(IGNORE_SSL_CERTIFICATE_PROPERTY, false, Boolean.class)).booleanValue()) {
            try {
                SSLContext build = new SSLContextBuilder().loadTrustMaterial((KeyStore) null, (x509CertificateArr, str) -> {
                    return true;
                }).build();
                httpClientBuilder.setSSLContext(build);
                return new SSLConnectionSocketFactory(build, NoopHostnameVerifier.INSTANCE);
            } catch (Exception e) {
                logger.warn("Failed to create TrustSelfSignedStrategy.", e);
            }
        }
        return SSLConnectionSocketFactory.getSocketFactory();
    }

    protected Lookup<CookieSpecProvider> buildCookieSpecRegistry() {
        if (this.cookieSpecRegistry != null) {
            return this.cookieSpecRegistry;
        }
        PublicSuffixMatcher publicSuffixMatcher = PublicSuffixMatcherLoader.getDefault();
        DefaultCookieSpecProvider defaultCookieSpecProvider = new DefaultCookieSpecProvider(DefaultCookieSpecProvider.CompatibilityLevel.DEFAULT, publicSuffixMatcher, this.cookieDatePatterns, false);
        RFC6265CookieSpecProvider rFC6265CookieSpecProvider = new RFC6265CookieSpecProvider(RFC6265CookieSpecProvider.CompatibilityLevel.RELAXED, publicSuffixMatcher);
        return RegistryBuilder.create().register("default", defaultCookieSpecProvider).register("best-match", defaultCookieSpecProvider).register("compatibility", defaultCookieSpecProvider).register("standard", rFC6265CookieSpecProvider).register("standard-strict", new RFC6265CookieSpecProvider(RFC6265CookieSpecProvider.CompatibilityLevel.STRICT, publicSuffixMatcher)).register("netscape", new NetscapeDraftSpecProvider()).register("ignoreCookies", new IgnoreSpecProvider()).build();
    }

    @Override // org.codelibs.fess.crawler.client.CrawlerClient, java.lang.AutoCloseable
    public void close() {
        if (this.httpClient == null) {
            return;
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Closing HcHttpClient...");
        }
        if (this.connectionMonitorTask != null) {
            this.connectionMonitorTask.cancel();
        }
        if (this.httpClient != null) {
            try {
                this.httpClient.close();
            } catch (IOException e) {
                logger.error("Failed to close httpClient.", e);
            }
            this.httpClient = null;
            if (this.clientConnectionManager != null) {
                this.clientConnectionManager.shutdown();
            }
        }
    }

    public void addHttpClientProperty(String str, Object obj) {
        if (!StringUtil.isNotBlank(str) || obj == null) {
            return;
        }
        this.httpClientPropertyMap.put(str, obj);
    }

    protected void processRobotsTxt(String str) {
        CrawlerContext crawlerContext;
        RobotsTxt parse;
        if (StringUtil.isBlank(str)) {
            throw new CrawlerSystemException("url is null or empty.");
        }
        if (this.robotsTxtHelper == null || !this.robotsTxtHelper.isEnabled() || (crawlerContext = CrawlingParameterUtil.getCrawlerContext()) == null) {
            return;
        }
        int indexOf = str.indexOf(47, str.indexOf("://") + 3);
        String substring = indexOf >= 0 ? str.substring(0, indexOf) : str;
        String str2 = substring + "/robots.txt";
        if (crawlerContext.getRobotsTxtUrlSet().contains(str2)) {
            if (logger.isDebugEnabled()) {
                logger.debug("{} is already visited.", str2);
                return;
            }
            return;
        }
        if (logger.isInfoEnabled()) {
            logger.info("Checking URL: {}", str2);
        }
        crawlerContext.getRobotsTxtUrlSet().add(str2);
        HttpGet httpGet = new HttpGet(str2);
        Iterator<Header> it = this.requestHeaderList.iterator();
        while (it.hasNext()) {
            httpGet.addHeader(it.next());
        }
        try {
            try {
                HttpResponse executeHttpClient = executeHttpClient(httpGet);
                HttpEntity entity = executeHttpClient.getEntity();
                if (executeHttpClient.getStatusLine().getStatusCode() == 200) {
                    Header firstHeader = executeHttpClient.getFirstHeader("Content-Length");
                    if (firstHeader != null) {
                        long parseLong = Long.parseLong(firstHeader.getValue());
                        if (this.contentLengthHelper != null) {
                            long maxLength = this.contentLengthHelper.getMaxLength("text/plain");
                            if (parseLong > maxLength) {
                                MaxLengthExceededException maxLengthExceededException = new MaxLengthExceededException("The content length (" + parseLong + " byte) is over " + maxLengthExceededException + " byte. The url is " + maxLength);
                                throw maxLengthExceededException;
                            }
                        }
                    }
                    if (entity != null && (parse = this.robotsTxtHelper.parse(entity.getContent())) != null) {
                        String[] sitemaps = parse.getSitemaps();
                        if (sitemaps.length > 0) {
                            crawlerContext.addSitemaps(sitemaps);
                        }
                        RobotsTxt.Directive matchedDirective = parse.getMatchedDirective(this.userAgent);
                        if (matchedDirective != null) {
                            if (this.useRobotsTxtDisallows) {
                                for (String str3 : matchedDirective.getDisallows()) {
                                    if (StringUtil.isNotBlank(str3)) {
                                        String str4 = substring + convertRobotsTxtPatternToRegex(str3);
                                        crawlerContext.getUrlFilter().addExclude(str4);
                                        if (logger.isInfoEnabled()) {
                                            logger.info("Excluded URL: {}", str4);
                                        }
                                    }
                                }
                            }
                            if (this.useRobotsTxtAllows) {
                                for (String str5 : matchedDirective.getAllows()) {
                                    if (StringUtil.isNotBlank(str5)) {
                                        String str6 = substring + convertRobotsTxtPatternToRegex(str5);
                                        crawlerContext.getUrlFilter().addInclude(str6);
                                        if (logger.isInfoEnabled()) {
                                            logger.info("Included URL: {}", str6);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                EntityUtils.consumeQuietly(entity);
            } catch (CrawlerSystemException e) {
                httpGet.abort();
                throw e;
            } catch (Exception e2) {
                httpGet.abort();
                throw new CrawlingAccessException("Could not process " + str2 + ". ", e2);
            }
        } catch (Throwable th) {
            EntityUtils.consumeQuietly(null);
            throw th;
        }
    }

    protected String convertRobotsTxtPatternToRegex(String str) {
        String replace = str.replace(".", "\\.").replace("?", "\\?").replace("*", ".*");
        if (replace.charAt(0) != '/') {
            replace = ".*" + replace;
        }
        if (!replace.endsWith("$") && !replace.endsWith(".*")) {
            replace = replace + ".*";
        }
        return replace.replace(".*.*", ".*");
    }

    @Override // org.codelibs.fess.crawler.client.AbstractCrawlerClient
    public ResponseData doGet(String str) {
        try {
            return doHttpMethod(str, new HttpGet(str));
        } catch (IllegalArgumentException e) {
            throw new CrawlingAccessException("The url may not be valid: " + str, e);
        }
    }

    @Override // org.codelibs.fess.crawler.client.AbstractCrawlerClient
    public ResponseData doHead(String str) {
        try {
            return doHttpMethod(str, new HttpHead(str));
        } catch (IllegalArgumentException e) {
            throw new CrawlingAccessException("The url may not be valid: " + str, e);
        }
    }

    /* JADX WARN: Finally extract failed */
    public ResponseData doHttpMethod(String str, HttpUriRequest httpUriRequest) {
        if (this.httpClient == null) {
            init();
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Accessing {}", str);
        }
        AccessTimeoutTarget accessTimeoutTarget = null;
        TimeoutTask timeoutTask = null;
        if (this.accessTimeout != null) {
            accessTimeoutTarget = new AccessTimeoutTarget(Thread.currentThread());
            timeoutTask = TimeoutManager.getInstance().addTimeoutTarget(accessTimeoutTarget, this.accessTimeout.intValue(), false);
        }
        try {
            ResponseData processHttpMethod = processHttpMethod(str, httpUriRequest);
            if (accessTimeoutTarget != null) {
                accessTimeoutTarget.stop();
                if (!timeoutTask.isCanceled()) {
                    timeoutTask.cancel();
                }
            }
            return processHttpMethod;
        } catch (Throwable th) {
            if (accessTimeoutTarget != null) {
                accessTimeoutTarget.stop();
                if (!timeoutTask.isCanceled()) {
                    timeoutTask.cancel();
                }
            }
            throw th;
        }
    }

    protected ResponseData processHttpMethod(String str, HttpUriRequest httpUriRequest) {
        Date parseLastModifiedDate;
        try {
            processRobotsTxt(str);
        } catch (CrawlingAccessException e) {
            if (logger.isInfoEnabled()) {
                StringBuilder sb = new StringBuilder(100);
                sb.append(e.getMessage());
                if (e.getCause() != null) {
                    sb.append(e.getCause().getMessage());
                }
                logger.info(sb.toString());
            } else if (logger.isDebugEnabled()) {
                logger.debug("Crawling Access Exception at {}", str, e);
            }
        }
        Iterator<Header> it = this.requestHeaderList.iterator();
        while (it.hasNext()) {
            httpUriRequest.addHeader(it.next());
        }
        ResponseData responseData = new ResponseData();
        try {
            try {
                try {
                    try {
                        try {
                            try {
                                HttpResponse executeHttpClient = executeHttpClient(httpUriRequest);
                                HttpEntity entity = executeHttpClient.getEntity();
                                int statusCode = executeHttpClient.getStatusLine().getStatusCode();
                                if (isRedirectHttpStatus(statusCode)) {
                                    Header firstHeader = executeHttpClient.getFirstHeader("location");
                                    if (firstHeader == null) {
                                        throw new CrawlingAccessException("Invalid redirect location at " + str);
                                    }
                                    responseData.setRedirectLocation(firstHeader.getValue().startsWith("/") ? constructRedirectLocation(str, firstHeader.getValue()) : firstHeader.getValue());
                                    EntityUtils.consumeQuietly(entity);
                                    return responseData;
                                }
                                String str2 = null;
                                Header firstHeader2 = executeHttpClient.getFirstHeader("Content-Type");
                                if (firstHeader2 != null) {
                                    str2 = firstHeader2.getValue();
                                    int indexOf = str2.indexOf(59);
                                    if (indexOf > 0) {
                                        str2 = str2.substring(0, indexOf);
                                        if ("application/octet-stream".equals(str2)) {
                                            str2 = null;
                                        }
                                    }
                                }
                                long j = 0;
                                String str3 = Constants.UTF_8;
                                if (entity == null) {
                                    responseData.setResponseBody(new byte[0]);
                                    if (str2 == null) {
                                        str2 = this.defaultMimeType;
                                    }
                                } else {
                                    InputStream content = entity.getContent();
                                    DeferredFileOutputStream deferredFileOutputStream = DeferredFileOutputStream.builder().setThreshold((int) this.maxCachedContentSize).setPrefix("crawler-HcHttpClient-").setSuffix(".out").setDirectory(SystemUtils.getJavaIoTmpDir()).get();
                                    try {
                                        CopyUtil.copy(content, deferredFileOutputStream);
                                        deferredFileOutputStream.flush();
                                        if (deferredFileOutputStream.isInMemory()) {
                                            responseData.setResponseBody(deferredFileOutputStream.getData());
                                            j = deferredFileOutputStream.getData().length;
                                            if (str2 == null) {
                                                try {
                                                    ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(deferredFileOutputStream.getData());
                                                    try {
                                                        str2 = this.mimeTypeHelper.getContentType(byteArrayInputStream, str);
                                                        byteArrayInputStream.close();
                                                    } catch (Throwable th) {
                                                        try {
                                                            byteArrayInputStream.close();
                                                        } catch (Throwable th2) {
                                                            th.addSuppressed(th2);
                                                        }
                                                        throw th;
                                                    }
                                                } catch (Exception e2) {
                                                    logger.debug("Failed to detect mime-type.", e2);
                                                    str2 = this.defaultMimeType;
                                                }
                                            }
                                        } else {
                                            File file = deferredFileOutputStream.getFile();
                                            responseData.setResponseBody(file, true);
                                            j = file.length();
                                            if (str2 == null) {
                                                try {
                                                    FileInputStream fileInputStream = new FileInputStream(file);
                                                    try {
                                                        str2 = this.mimeTypeHelper.getContentType(fileInputStream, str);
                                                        fileInputStream.close();
                                                    } catch (Throwable th3) {
                                                        try {
                                                            fileInputStream.close();
                                                        } catch (Throwable th4) {
                                                            th3.addSuppressed(th4);
                                                        }
                                                        throw th3;
                                                    }
                                                } catch (Exception e3) {
                                                    logger.debug("Failed to detect mime-type.", e3);
                                                    str2 = this.defaultMimeType;
                                                }
                                            }
                                        }
                                        if (deferredFileOutputStream != null) {
                                            deferredFileOutputStream.close();
                                        }
                                        Header contentEncoding = entity.getContentEncoding();
                                        if (contentEncoding != null) {
                                            str3 = contentEncoding.getValue();
                                        }
                                    } catch (Throwable th5) {
                                        if (deferredFileOutputStream != null) {
                                            try {
                                                deferredFileOutputStream.close();
                                            } catch (Throwable th6) {
                                                th5.addSuppressed(th6);
                                            }
                                        }
                                        throw th5;
                                    }
                                }
                                if (this.contentLengthHelper != null) {
                                    long maxLength = this.contentLengthHelper.getMaxLength(str2);
                                    if (j > maxLength) {
                                        MaxLengthExceededException maxLengthExceededException = new MaxLengthExceededException("The content length (" + j + " byte) is over " + maxLengthExceededException + " byte. The url is " + maxLength);
                                        throw maxLengthExceededException;
                                    }
                                }
                                responseData.setUrl(str);
                                responseData.setCharSet(str3);
                                if (httpUriRequest instanceof HttpHead) {
                                    responseData.setMethod(Constants.HEAD_METHOD);
                                } else {
                                    responseData.setMethod(Constants.GET_METHOD);
                                }
                                responseData.setHttpStatusCode(statusCode);
                                for (Header header : executeHttpClient.getAllHeaders()) {
                                    responseData.addMetaData(header.getName(), header.getValue());
                                }
                                responseData.setMimeType(str2);
                                Header firstHeader3 = executeHttpClient.getFirstHeader("Content-Length");
                                if (firstHeader3 == null) {
                                    responseData.setContentLength(j);
                                } else {
                                    try {
                                        responseData.setContentLength(Long.parseLong(firstHeader3.getValue()));
                                    } catch (Exception e4) {
                                        responseData.setContentLength(j);
                                    }
                                }
                                checkMaxContentLength(responseData);
                                Header firstHeader4 = executeHttpClient.getFirstHeader("Last-Modified");
                                if (firstHeader4 != null) {
                                    String value = firstHeader4.getValue();
                                    if (StringUtil.isNotBlank(value) && (parseLastModifiedDate = parseLastModifiedDate(value)) != null) {
                                        responseData.setLastModified(parseLastModifiedDate);
                                    }
                                }
                                EntityUtils.consumeQuietly(entity);
                                return responseData;
                            } catch (UnknownHostException e5) {
                                closeResources(httpUriRequest, responseData);
                                throw new CrawlingAccessException("Unknown host(" + e5.getMessage() + "): " + str, e5);
                            }
                        } catch (Exception e6) {
                            closeResources(httpUriRequest, responseData);
                            throw new CrawlerSystemException("Failed to access " + str, e6);
                        }
                    } catch (CrawlerSystemException e7) {
                        closeResources(httpUriRequest, responseData);
                        throw e7;
                    }
                } catch (Throwable th7) {
                    EntityUtils.consumeQuietly(null);
                    throw th7;
                }
            } catch (ConnectException e8) {
                closeResources(httpUriRequest, responseData);
                throw new CrawlingAccessException("Connection time out(" + e8.getMessage() + "): " + str, e8);
            } catch (IOException e9) {
                closeResources(httpUriRequest, responseData);
                throw new CrawlingAccessException("I/O exception(" + e9.getMessage() + "): " + str, e9);
            }
        } catch (NoRouteToHostException e10) {
            closeResources(httpUriRequest, responseData);
            throw new CrawlingAccessException("No route to host(" + e10.getMessage() + "): " + str, e10);
        } catch (SocketException e11) {
            closeResources(httpUriRequest, responseData);
            throw new CrawlingAccessException("Socket exception(" + e11.getMessage() + "): " + str, e11);
        }
    }

    protected void closeResources(HttpUriRequest httpUriRequest, ResponseData responseData) {
        CloseableUtil.closeQuietly(responseData);
        httpUriRequest.abort();
    }

    protected boolean isRedirectHttpStatus(int i) {
        return this.redirectHttpStatusPattern.matcher(Integer.toString(i)).matches();
    }

    protected HttpResponse executeHttpClient(HttpUriRequest httpUriRequest) throws IOException {
        return this.httpClient.execute(httpUriRequest, new BasicHttpContext(this.httpClientContext));
    }

    protected Date parseLastModifiedDate(String str) {
        try {
            return new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.ENGLISH).parse(str);
        } catch (ParseException e) {
            return null;
        }
    }

    protected HttpRoutePlanner buildRoutePlanner() {
        if (this.routePlanner != null) {
            return this.routePlanner;
        }
        String str = (String) getInitParameter(PROXY_HOST_PROPERTY, this.proxyHost, String.class);
        Integer num = (Integer) getInitParameter(PROXY_PORT_PROPERTY, this.proxyPort, Integer.class);
        if (str == null || num == null) {
            return null;
        }
        HttpHost httpHost = new HttpHost(str, num.intValue());
        DefaultProxyRoutePlanner defaultProxyRoutePlanner = new DefaultProxyRoutePlanner(httpHost);
        Credentials credentials = (Credentials) getInitParameter(PROXY_CREDENTIALS_PROPERTY, this.proxyCredentials, Credentials.class);
        if (credentials != null) {
            this.credentialsProvider.setCredentials(new AuthScope(str, num.intValue()), credentials);
            AuthScheme authScheme = (AuthScheme) getInitParameter(PROXY_AUTH_SCHEME_PROPERTY, this.proxyAuthScheme, AuthScheme.class);
            if (authScheme != null) {
                this.authCache.put(httpHost, authScheme);
            }
        }
        return defaultProxyRoutePlanner;
    }

    protected static String constructRedirectLocation(String str, String str2) {
        try {
            URI uri = new URI(str);
            if (StringUtil.isNotEmpty(str2)) {
                uri = uri.resolve(str2.replace(" ", "%20"));
            }
            return uri.normalize().toASCIIString();
        } catch (URISyntaxException e) {
            throw new CrawlingAccessException(e);
        }
    }

    public void setConnectionTimeout(Integer num) {
        this.connectionTimeout = num;
    }

    public void setMaxTotalConnections(Integer num) {
        this.maxTotalConnections = num;
    }

    public void setMaxConnectionsPerRoute(Integer num) {
        this.maxConnectionsPerRoute = num;
    }

    public void setSoTimeout(Integer num) {
        this.soTimeout = num;
    }

    public void setCookieSpec(String str) {
        this.cookieSpec = str;
    }

    public void setUserAgent(String str) {
        this.userAgent = str;
    }

    public void setProxyHost(String str) {
        this.proxyHost = str;
    }

    public void setProxyPort(Integer num) {
        this.proxyPort = num;
    }

    public void setProxyAuthScheme(AuthScheme authScheme) {
        this.proxyAuthScheme = authScheme;
    }

    public void setProxyCredentials(Credentials credentials) {
        this.proxyCredentials = credentials;
    }

    public void setDefaultMimeType(String str) {
        this.defaultMimeType = str;
    }

    public void setCookieStore(CookieStore cookieStore) {
        this.cookieStore = cookieStore;
    }

    public void setHttpClientContext(HttpClientContext httpClientContext) {
        this.httpClientContext = httpClientContext;
    }

    public void setAuthSchemeProviderMap(Map<String, AuthSchemeProvider> map) {
        this.authSchemeProviderMap = map;
    }

    public void setConnectionCheckInterval(int i) {
        this.connectionCheckInterval = i;
    }

    public void setIdleConnectionTimeout(long j) {
        this.idleConnectionTimeout = j;
    }

    public void setRedirectHttpStatusPattern(Pattern pattern) {
        this.redirectHttpStatusPattern = pattern;
    }

    public void setUseRobotsTxtDisallows(boolean z) {
        this.useRobotsTxtDisallows = z;
    }

    public void setUseRobotsTxtAllows(boolean z) {
        this.useRobotsTxtAllows = z;
    }

    public void setCredentialsProvider(CredentialsProvider credentialsProvider) {
        this.credentialsProvider = credentialsProvider;
    }

    public void setAuthCache(AuthCache authCache) {
        this.authCache = authCache;
    }

    public void setRoutePlanner(HttpRoutePlanner httpRoutePlanner) {
        this.routePlanner = httpRoutePlanner;
    }

    public void setRedirectsEnabled(boolean z) {
        this.redirectsEnabled = z;
    }

    public void setCookieSpecRegistry(Lookup<CookieSpecProvider> lookup) {
        this.cookieSpecRegistry = lookup;
    }

    public void setCookieDatePatterns(String[] strArr) {
        this.cookieDatePatterns = strArr;
    }

    public void setDnsResolver(DnsResolver dnsResolver) {
        this.dnsResolver = dnsResolver;
    }

    public void setSslSocketFactory(LayeredConnectionSocketFactory layeredConnectionSocketFactory) {
        this.sslSocketFactory = layeredConnectionSocketFactory;
    }
}
