package ai.philterd.phileas.services;

import ai.philterd.phileas.model.configuration.PhileasConfiguration;
import ai.philterd.phileas.model.enums.FilterType;
import ai.philterd.phileas.model.enums.MimeType;
import ai.philterd.phileas.model.filter.Filter;
import ai.philterd.phileas.model.objects.Explanation;
import ai.philterd.phileas.model.objects.PdfRedactionOptions;
import ai.philterd.phileas.model.objects.Span;
import ai.philterd.phileas.model.policy.Ignored;
import ai.philterd.phileas.model.policy.Policy;
import ai.philterd.phileas.model.policy.config.Pdf;
import ai.philterd.phileas.model.policy.graphical.BoundingBox;
import ai.philterd.phileas.model.responses.BinaryDocumentFilterResponse;
import ai.philterd.phileas.model.responses.FilterResponse;
import ai.philterd.phileas.model.serializers.PlaceholderDeserializer;
import ai.philterd.phileas.model.services.AlertService;
import ai.philterd.phileas.model.services.CacheService;
import ai.philterd.phileas.model.services.Classification;
import ai.philterd.phileas.model.services.DocumentProcessor;
import ai.philterd.phileas.model.services.FilterService;
import ai.philterd.phileas.model.services.MetricsService;
import ai.philterd.phileas.model.services.PolicyService;
import ai.philterd.phileas.model.services.PostFilter;
import ai.philterd.phileas.model.services.SplitService;
import ai.philterd.phileas.processors.unstructured.UnstructuredDocumentProcessor;
import ai.philterd.phileas.service.ai.sentiment.OpenNLPSentimentDetector;
import ai.philterd.phileas.services.alerts.DefaultAlertService;
import ai.philterd.phileas.services.disambiguation.VectorBasedSpanDisambiguationService;
import ai.philterd.phileas.services.metrics.NoOpMetricsService;
import ai.philterd.phileas.services.policies.InMemoryPolicyService;
import ai.philterd.phileas.services.policies.LocalPolicyService;
import ai.philterd.phileas.services.policies.utils.PolicyUtils;
import ai.philterd.phileas.services.postfilters.IgnoredPatternsFilter;
import ai.philterd.phileas.services.postfilters.IgnoredTermsFilter;
import ai.philterd.phileas.services.postfilters.TrailingNewLinePostFilter;
import ai.philterd.phileas.services.postfilters.TrailingPeriodPostFilter;
import ai.philterd.phileas.services.postfilters.TrailingSpacePostFilter;
import ai.philterd.phileas.services.split.SplitFactory;
import ai.philterd.services.pdf.PdfRedacter;
import ai.philterd.services.pdf.PdfTextExtractor;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/* loaded from: input_file:ai/philterd/phileas/services/PhileasFilterService.class */
public class PhileasFilterService implements FilterService {
    private static final Logger LOGGER = LogManager.getLogger(PhileasFilterService.class);
    private final PolicyService policyService;
    private final PolicyUtils policyUtils;
    private final AlertService alertService;
    private final DocumentProcessor unstructuredDocumentProcessor;
    private final FilterPolicyLoader filterPolicyLoader;
    private final Map<String, Map<FilterType, Filter>> filterCache;

    public PhileasFilterService(PhileasConfiguration phileasConfiguration, CacheService cacheService) throws IOException {
        this(phileasConfiguration, new NoOpMetricsService(), cacheService);
    }

    public PhileasFilterService(PhileasConfiguration phileasConfiguration, MetricsService metricsService, CacheService cacheService) throws IOException {
        LOGGER.info("Initializing Phileas engine.");
        this.filterCache = new ConcurrentHashMap();
        Gson create = new GsonBuilder().registerTypeAdapter(String.class, new PlaceholderDeserializer()).create();
        this.policyService = buildPolicyService(phileasConfiguration, cacheService);
        this.policyUtils = new PolicyUtils(this.policyService, create);
        this.alertService = new DefaultAlertService(cacheService);
        this.filterPolicyLoader = new FilterPolicyLoader(this.alertService, cacheService, metricsService, new HashMap(), phileasConfiguration);
        this.unstructuredDocumentProcessor = new UnstructuredDocumentProcessor(metricsService, new VectorBasedSpanDisambiguationService(phileasConfiguration, cacheService));
    }

    public PolicyService getPolicyService() {
        return this.policyService;
    }

    public AlertService getAlertService() {
        return this.alertService;
    }

    public FilterResponse filter(Policy policy, String str, String str2, String str3, MimeType mimeType) throws Exception {
        FilterResponse process;
        Classification classify;
        Classification classify2;
        HashMap hashMap = new HashMap();
        List<Filter> filtersForPolicy = this.filterPolicyLoader.getFiltersForPolicy(policy, this.filterCache);
        List<PostFilter> postFiltersForPolicy = getPostFiltersForPolicy(policy);
        if (policy.getConfig().getAnalysis().getSentiment().isEnabled() && (classify2 = new OpenNLPSentimentDetector().classify(policy, str3)) != null) {
            hashMap.put("sentiment", classify2.label());
            hashMap.put("sentiment-confidence", String.valueOf(classify2.confidence()));
        }
        if (policy.getConfig().getAnalysis().getOffensiveness().isEnabled() && (classify = new OpenNLPSentimentDetector().classify(policy, str3)) != null) {
            hashMap.put("offensiveness", classify.label());
            hashMap.put("offensiveness-confidence", String.valueOf(classify.confidence()));
        }
        if (StringUtils.isEmpty(str2)) {
            str2 = DigestUtils.md5Hex(String.valueOf(UUID.randomUUID()) + "-" + str + "-" + policy.getName() + "-" + str3);
            LOGGER.debug("Generated document ID {}", str2);
        }
        if (mimeType != MimeType.TEXT_PLAIN) {
            throw new Exception("Unknown mime type.");
        }
        if (!policy.getConfig().getSplitting().isEnabled() || str3.length() < policy.getConfig().getSplitting().getThreshold()) {
            process = this.unstructuredDocumentProcessor.process(policy, filtersForPolicy, postFiltersForPolicy, str, str2, 0, str3, hashMap);
        } else {
            SplitService splitService = SplitFactory.getSplitService(policy.getConfig().getSplitting().getMethod());
            LinkedList linkedList = new LinkedList();
            List split = splitService.split(str3);
            for (int i = 0; i < split.size(); i++) {
                linkedList.add(this.unstructuredDocumentProcessor.process(policy, filtersForPolicy, postFiltersForPolicy, str, str2, i, (String) split.get(i), hashMap));
            }
            process = FilterResponse.combine(linkedList, str, str2, splitService.getSeparator());
        }
        return process;
    }

    public FilterResponse filter(List<String> list, String str, String str2, String str3, MimeType mimeType) throws Exception {
        return filter(this.policyUtils.getCombinedPolicies(list), str, str2, str3, mimeType);
    }

    public BinaryDocumentFilterResponse filter(List<String> list, String str, String str2, byte[] bArr, MimeType mimeType, MimeType mimeType2) throws Exception {
        Policy combinedPolicies = this.policyUtils.getCombinedPolicies(list);
        HashMap hashMap = new HashMap();
        if (StringUtils.isEmpty(str2)) {
            str2 = DigestUtils.md5Hex(String.valueOf(UUID.randomUUID()) + "-" + str + "-" + combinedPolicies.getName() + "-" + Arrays.toString(bArr));
            LOGGER.debug("Generated document ID {}", str2);
        }
        if (mimeType != MimeType.APPLICATION_PDF) {
            throw new Exception("Unknown mime type.");
        }
        List<String> lines = new PdfTextExtractor().getLines(bArr);
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        LinkedHashSet linkedHashSet2 = new LinkedHashSet();
        int i = 0;
        List<Filter> filtersForPolicy = this.filterPolicyLoader.getFiltersForPolicy(combinedPolicies, this.filterCache);
        List<PostFilter> postFiltersForPolicy = getPostFiltersForPolicy(combinedPolicies);
        for (String str3 : lines) {
            FilterResponse process = this.unstructuredDocumentProcessor.process(combinedPolicies, filtersForPolicy, postFiltersForPolicy, str, str2, 0, str3, hashMap);
            linkedHashSet.addAll(process.getExplanation().appliedSpans());
            for (Span span : process.getExplanation().appliedSpans()) {
                span.setCharacterStart(span.getCharacterStart() + i);
                span.setCharacterEnd(span.getCharacterEnd() + i);
                linkedHashSet2.add(span);
            }
            i += str3.length();
        }
        Pdf pdf = combinedPolicies.getConfig().getPdf();
        byte[] process2 = new PdfRedacter(combinedPolicies, linkedHashSet, new PdfRedactionOptions(pdf.getDpi(), pdf.getCompressionQuality(), pdf.getScale(), pdf.getPreserveUnredactedPages()), getBoundingBoxes(combinedPolicies, mimeType)).process(bArr, mimeType2);
        ArrayList arrayList = new ArrayList(linkedHashSet2);
        return new BinaryDocumentFilterResponse(process2, str, str2, new Explanation(arrayList, arrayList));
    }

    private PolicyService buildPolicyService(PhileasConfiguration phileasConfiguration, CacheService cacheService) {
        return StringUtils.equalsIgnoreCase(phileasConfiguration.policyService(), "memory") ? new InMemoryPolicyService() : new LocalPolicyService(phileasConfiguration, cacheService);
    }

    private List<BoundingBox> getBoundingBoxes(Policy policy, MimeType mimeType) {
        LinkedList linkedList = new LinkedList();
        for (BoundingBox boundingBox : policy.getGraphical().getBoundingBoxes()) {
            if (StringUtils.equalsIgnoreCase(boundingBox.getMimeType(), mimeType.toString())) {
                linkedList.add(boundingBox);
            }
        }
        return linkedList;
    }

    private List<PostFilter> getPostFiltersForPolicy(Policy policy) throws IOException {
        LinkedList linkedList = new LinkedList();
        if (CollectionUtils.isNotEmpty(policy.getIgnored())) {
            Iterator it = policy.getIgnored().iterator();
            while (it.hasNext()) {
                linkedList.add(new IgnoredTermsFilter((Ignored) it.next()));
            }
        }
        if (CollectionUtils.isNotEmpty(policy.getIgnoredPatterns())) {
            linkedList.add(new IgnoredPatternsFilter(policy.getIgnoredPatterns()));
        }
        if (policy.getConfig().getPostFilters().isRemoveTrailingPeriods()) {
            linkedList.add(TrailingPeriodPostFilter.getInstance());
        }
        if (policy.getConfig().getPostFilters().isRemoveTrailingSpaces()) {
            linkedList.add(TrailingSpacePostFilter.getInstance());
        }
        if (policy.getConfig().getPostFilters().isRemoveTrailingNewLines()) {
            linkedList.add(TrailingNewLinePostFilter.getInstance());
        }
        return linkedList;
    }
}
