package org.deeplearning4j.nn.modelimport.keras.preprocessing.text;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map;
import lombok.Generated;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;

/* loaded from: input_file:org/deeplearning4j/nn/modelimport/keras/preprocessing/text/KerasTokenizer.class */
public class KerasTokenizer {
    private static final String DEFAULT_FILTER = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n";
    private static final String DEFAULT_SPLIT = " ";
    private Integer numWords;
    private String filters;
    private boolean lower;
    private String split;
    private boolean charLevel;
    private String outOfVocabularyToken;
    private Map<String, Integer> wordCounts;
    private HashMap<String, Integer> wordDocs;
    private Map<String, Integer> wordIndex;
    private Map<Integer, String> indexWord;
    private Map<Integer, Integer> indexDocs;
    private Integer documentCount;

    public KerasTokenizer(Integer num, String str, boolean z, String str2, boolean z2, String str3) {
        this.wordCounts = new LinkedHashMap();
        this.wordDocs = new HashMap<>();
        this.wordIndex = new HashMap();
        this.indexWord = new HashMap();
        this.indexDocs = new HashMap();
        this.numWords = num;
        this.filters = str;
        this.lower = z;
        this.split = str2;
        this.charLevel = z2;
        this.outOfVocabularyToken = str3;
    }

    public KerasTokenizer(Integer num) {
        this(num, DEFAULT_FILTER, true, DEFAULT_SPLIT, false, null);
    }

    public KerasTokenizer() {
        this(null, DEFAULT_FILTER, true, DEFAULT_SPLIT, false, null);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static KerasTokenizer fromJson(String str) throws IOException, InvalidKerasConfigurationException {
        Map<String, Object> parseJsonString = KerasModelUtils.parseJsonString(new String(Files.readAllBytes(Paths.get(str, new String[0]))));
        if (!parseJsonString.containsKey("config")) {
            throw new InvalidKerasConfigurationException("No configuration found for Keras tokenizer");
        }
        Map map = (Map) parseJsonString.get("config");
        Integer num = (Integer) map.get("num_words");
        String str2 = (String) map.get("filters");
        Boolean bool = (Boolean) map.get("lower");
        String str3 = (String) map.get("split");
        Boolean bool2 = (Boolean) map.get("char_level");
        String str4 = (String) map.get("oov_token");
        Integer num2 = (Integer) map.get("document_count");
        Map<String, Object> parseJsonString2 = KerasModelUtils.parseJsonString((String) map.get("word_counts"));
        Map<String, Object> parseJsonString3 = KerasModelUtils.parseJsonString((String) map.get("word_docs"));
        Map<String, Object> parseJsonString4 = KerasModelUtils.parseJsonString((String) map.get("word_index"));
        Map<String, Object> parseJsonString5 = KerasModelUtils.parseJsonString((String) map.get("index_word"));
        Map<String, Object> parseJsonString6 = KerasModelUtils.parseJsonString((String) map.get("index_docs"));
        KerasTokenizer kerasTokenizer = new KerasTokenizer(num, str2, bool.booleanValue(), str3, bool2.booleanValue(), str4);
        kerasTokenizer.setDocumentCount(num2);
        kerasTokenizer.setWordCounts(parseJsonString2);
        kerasTokenizer.setWordDocs(new HashMap(parseJsonString3));
        kerasTokenizer.setWordIndex(parseJsonString4);
        kerasTokenizer.setIndexWord(parseJsonString5);
        kerasTokenizer.setIndexDocs(parseJsonString6);
        return kerasTokenizer;
    }

    public static String[] textToWordSequence(String str, String str2, boolean z, String str3) {
        if (z) {
            str = str.toLowerCase();
        }
        for (String str4 : str2.split("")) {
            str = str.replace(str4, str3);
        }
        ArrayList arrayList = new ArrayList(Arrays.asList(str.split(str3)));
        arrayList.removeAll(Arrays.asList("", null));
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    public void fitOnTexts(String[] strArr) {
        String[] textToWordSequence;
        for (String str : strArr) {
            if (this.documentCount == null) {
                this.documentCount = 1;
            } else {
                this.documentCount = Integer.valueOf(this.documentCount.intValue() + 1);
            }
            if (this.charLevel) {
                if (this.lower) {
                    str = str.toLowerCase();
                }
                textToWordSequence = str.split("");
            } else {
                textToWordSequence = textToWordSequence(str, this.filters, this.lower, this.split);
            }
            for (String str2 : textToWordSequence) {
                if (this.wordCounts.containsKey(str2)) {
                    this.wordCounts.put(str2, Integer.valueOf(this.wordCounts.get(str2).intValue() + 1));
                } else {
                    this.wordCounts.put(str2, 1);
                }
            }
            for (String str3 : new HashSet(Arrays.asList(textToWordSequence))) {
                if (this.wordDocs.containsKey(str3)) {
                    this.wordDocs.put(str3, Integer.valueOf(this.wordDocs.get(str3).intValue() + 1));
                } else {
                    this.wordDocs.put(str3, 1);
                }
            }
        }
        HashMap reverseSortByValues = reverseSortByValues((HashMap) this.wordCounts);
        ArrayList arrayList = new ArrayList();
        if (this.outOfVocabularyToken != null) {
            arrayList.add(this.outOfVocabularyToken);
        }
        Iterator it = reverseSortByValues.keySet().iterator();
        while (it.hasNext()) {
            arrayList.add((String) it.next());
        }
        for (int i = 0; i < arrayList.size(); i++) {
            this.wordIndex.put((String) arrayList.get(i), Integer.valueOf(i + 1));
        }
        for (String str4 : this.wordIndex.keySet()) {
            this.indexWord.put(this.wordIndex.get(str4), str4);
        }
        for (String str5 : this.wordDocs.keySet()) {
            this.indexDocs.put(this.wordIndex.get(str5), this.wordDocs.get(str5));
        }
    }

    private static HashMap reverseSortByValues(HashMap hashMap) {
        LinkedList<Map.Entry> linkedList = new LinkedList(hashMap.entrySet());
        Collections.sort(linkedList, new Comparator() { // from class: org.deeplearning4j.nn.modelimport.keras.preprocessing.text.KerasTokenizer.1
            @Override // java.util.Comparator
            public int compare(Object obj, Object obj2) {
                return ((Comparable) ((Map.Entry) obj).getValue()).compareTo(((Map.Entry) obj2).getValue());
            }
        });
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (Map.Entry entry : linkedList) {
            linkedHashMap.put(entry.getKey(), entry.getValue());
        }
        return linkedHashMap;
    }

    public void fitOnSequences(Integer[][] numArr) {
        this.documentCount = Integer.valueOf(this.documentCount.intValue() + 1);
        for (Integer[] numArr2 : numArr) {
            for (Integer num : new HashSet(Arrays.asList(numArr2))) {
                this.indexDocs.put(num, Integer.valueOf(this.indexDocs.get(num).intValue() + 1));
            }
        }
    }

    public Integer[][] textsToSequences(String[] strArr) {
        String[] textToWordSequence;
        Integer num = this.wordIndex.get(this.outOfVocabularyToken);
        ArrayList arrayList = new ArrayList();
        for (String str : strArr) {
            if (this.charLevel) {
                if (this.lower) {
                    str = str.toLowerCase();
                }
                textToWordSequence = str.split("");
            } else {
                textToWordSequence = textToWordSequence(str, this.filters, this.lower, this.split);
            }
            ArrayList arrayList2 = new ArrayList();
            for (String str2 : textToWordSequence) {
                if (this.wordIndex.containsKey(str2)) {
                    int intValue = this.wordIndex.get(str2).intValue();
                    if (this.numWords == null || intValue < this.numWords.intValue()) {
                        arrayList2.add(Integer.valueOf(intValue));
                    } else if (num != null) {
                        arrayList2.add(num);
                    }
                } else if (num != null) {
                    arrayList2.add(num);
                }
            }
            arrayList.add((Integer[]) arrayList2.toArray(new Integer[arrayList2.size()]));
        }
        return (Integer[][]) arrayList.toArray(new Integer[arrayList.size()]);
    }

    public String[] sequencesToTexts(Integer[][] numArr) {
        Integer num = this.wordIndex.get(this.outOfVocabularyToken);
        ArrayList arrayList = new ArrayList();
        for (Integer[] numArr2 : numArr) {
            ArrayList arrayList2 = new ArrayList();
            for (Integer num2 : numArr2) {
                if (this.indexWord.containsKey(num2)) {
                    String str = this.indexWord.get(num2);
                    if (this.numWords != null && num2.intValue() >= this.numWords.intValue()) {
                        if (num != null) {
                            arrayList2.add(this.indexWord.get(num));
                        } else {
                            arrayList2.add(str);
                        }
                    }
                } else if (num != null) {
                    arrayList2.add(this.indexWord.get(num));
                }
            }
            StringBuilder sb = new StringBuilder();
            Iterator it = arrayList2.iterator();
            while (it.hasNext()) {
                sb.append(((String) it.next()) + this.split);
            }
            arrayList.add(sb.toString());
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    public INDArray textsToMatrix(String[] strArr, TokenizerMode tokenizerMode) {
        return sequencesToMatrix(textsToSequences(strArr), tokenizerMode);
    }

    public INDArray sequencesToMatrix(Integer[][] numArr, TokenizerMode tokenizerMode) {
        if (this.numWords == null) {
            if (this.wordIndex.isEmpty()) {
                throw new IllegalArgumentException("Either specify numWords argumentor fit Tokenizer on data first, i.e. by using fitOnTexts");
            }
            this.numWords = Integer.valueOf(this.wordIndex.size());
        }
        if (tokenizerMode.equals(TokenizerMode.TFIDF) && this.documentCount == null) {
            throw new IllegalArgumentException("To use TFIDF mode you need tofit the Tokenizer instance with fitOnTexts first.");
        }
        INDArray zeros = Nd4j.zeros(new int[]{numArr.length, this.numWords.intValue()});
        for (int i = 0; i < numArr.length; i++) {
            Integer[] numArr2 = numArr[i];
            if (numArr2 != null) {
                HashMap hashMap = new HashMap();
                for (Integer num : numArr2) {
                    int intValue = num.intValue();
                    if (intValue < this.numWords.intValue()) {
                        if (hashMap.containsKey(Integer.valueOf(intValue))) {
                            hashMap.put(Integer.valueOf(intValue), Integer.valueOf(((Integer) hashMap.get(Integer.valueOf(intValue))).intValue() + 1));
                        } else {
                            hashMap.put(Integer.valueOf(intValue), 1);
                        }
                    }
                }
                Iterator it = hashMap.keySet().iterator();
                while (it.hasNext()) {
                    int intValue2 = ((Integer) it.next()).intValue();
                    int intValue3 = ((Integer) hashMap.get(Integer.valueOf(intValue2))).intValue();
                    switch (tokenizerMode) {
                        case COUNT:
                            zeros.put(i, intValue2, Integer.valueOf(intValue3));
                            break;
                        case FREQ:
                            zeros.put(i, intValue2, Integer.valueOf(intValue3 / numArr2.length));
                            break;
                        case BINARY:
                            zeros.put(i, intValue2, 1);
                            break;
                        case TFIDF:
                            zeros.put(i, intValue2, Double.valueOf((1.0d + Math.log(intValue3)) * Math.log(1.0d + (this.documentCount.intValue() / (1.0d + (this.indexDocs.containsKey(Integer.valueOf(intValue2)) ? this.indexDocs.get(Integer.valueOf(intValue2)).intValue() : 0))))));
                            break;
                    }
                }
            }
        }
        return zeros;
    }

    @Generated
    public Integer getNumWords() {
        return this.numWords;
    }

    @Generated
    public String getFilters() {
        return this.filters;
    }

    @Generated
    public boolean isLower() {
        return this.lower;
    }

    @Generated
    public String getSplit() {
        return this.split;
    }

    @Generated
    public boolean isCharLevel() {
        return this.charLevel;
    }

    @Generated
    public String getOutOfVocabularyToken() {
        return this.outOfVocabularyToken;
    }

    @Generated
    public Map<String, Integer> getWordCounts() {
        return this.wordCounts;
    }

    @Generated
    public HashMap<String, Integer> getWordDocs() {
        return this.wordDocs;
    }

    @Generated
    public Map<String, Integer> getWordIndex() {
        return this.wordIndex;
    }

    @Generated
    public Map<Integer, String> getIndexWord() {
        return this.indexWord;
    }

    @Generated
    public Map<Integer, Integer> getIndexDocs() {
        return this.indexDocs;
    }

    @Generated
    public Integer getDocumentCount() {
        return this.documentCount;
    }

    @Generated
    public void setNumWords(Integer num) {
        this.numWords = num;
    }

    @Generated
    public void setFilters(String str) {
        this.filters = str;
    }

    @Generated
    public void setLower(boolean z) {
        this.lower = z;
    }

    @Generated
    public void setSplit(String str) {
        this.split = str;
    }

    @Generated
    public void setCharLevel(boolean z) {
        this.charLevel = z;
    }

    @Generated
    public void setOutOfVocabularyToken(String str) {
        this.outOfVocabularyToken = str;
    }

    @Generated
    public void setWordCounts(Map<String, Integer> map) {
        this.wordCounts = map;
    }

    @Generated
    public void setWordDocs(HashMap<String, Integer> hashMap) {
        this.wordDocs = hashMap;
    }

    @Generated
    public void setWordIndex(Map<String, Integer> map) {
        this.wordIndex = map;
    }

    @Generated
    public void setIndexWord(Map<Integer, String> map) {
        this.indexWord = map;
    }

    @Generated
    public void setIndexDocs(Map<Integer, Integer> map) {
        this.indexDocs = map;
    }

    @Generated
    public void setDocumentCount(Integer num) {
        this.documentCount = num;
    }

    @Generated
    public boolean equals(Object obj) {
        if (obj == this) {
            return true;
        }
        if (!(obj instanceof KerasTokenizer)) {
            return false;
        }
        KerasTokenizer kerasTokenizer = (KerasTokenizer) obj;
        if (!kerasTokenizer.canEqual(this) || isLower() != kerasTokenizer.isLower() || isCharLevel() != kerasTokenizer.isCharLevel()) {
            return false;
        }
        Integer numWords = getNumWords();
        Integer numWords2 = kerasTokenizer.getNumWords();
        if (numWords == null) {
            if (numWords2 != null) {
                return false;
            }
        } else if (!numWords.equals(numWords2)) {
            return false;
        }
        Integer documentCount = getDocumentCount();
        Integer documentCount2 = kerasTokenizer.getDocumentCount();
        if (documentCount == null) {
            if (documentCount2 != null) {
                return false;
            }
        } else if (!documentCount.equals(documentCount2)) {
            return false;
        }
        String filters = getFilters();
        String filters2 = kerasTokenizer.getFilters();
        if (filters == null) {
            if (filters2 != null) {
                return false;
            }
        } else if (!filters.equals(filters2)) {
            return false;
        }
        String split = getSplit();
        String split2 = kerasTokenizer.getSplit();
        if (split == null) {
            if (split2 != null) {
                return false;
            }
        } else if (!split.equals(split2)) {
            return false;
        }
        String outOfVocabularyToken = getOutOfVocabularyToken();
        String outOfVocabularyToken2 = kerasTokenizer.getOutOfVocabularyToken();
        if (outOfVocabularyToken == null) {
            if (outOfVocabularyToken2 != null) {
                return false;
            }
        } else if (!outOfVocabularyToken.equals(outOfVocabularyToken2)) {
            return false;
        }
        Map<String, Integer> wordCounts = getWordCounts();
        Map<String, Integer> wordCounts2 = kerasTokenizer.getWordCounts();
        if (wordCounts == null) {
            if (wordCounts2 != null) {
                return false;
            }
        } else if (!wordCounts.equals(wordCounts2)) {
            return false;
        }
        HashMap<String, Integer> wordDocs = getWordDocs();
        HashMap<String, Integer> wordDocs2 = kerasTokenizer.getWordDocs();
        if (wordDocs == null) {
            if (wordDocs2 != null) {
                return false;
            }
        } else if (!wordDocs.equals(wordDocs2)) {
            return false;
        }
        Map<String, Integer> wordIndex = getWordIndex();
        Map<String, Integer> wordIndex2 = kerasTokenizer.getWordIndex();
        if (wordIndex == null) {
            if (wordIndex2 != null) {
                return false;
            }
        } else if (!wordIndex.equals(wordIndex2)) {
            return false;
        }
        Map<Integer, String> indexWord = getIndexWord();
        Map<Integer, String> indexWord2 = kerasTokenizer.getIndexWord();
        if (indexWord == null) {
            if (indexWord2 != null) {
                return false;
            }
        } else if (!indexWord.equals(indexWord2)) {
            return false;
        }
        Map<Integer, Integer> indexDocs = getIndexDocs();
        Map<Integer, Integer> indexDocs2 = kerasTokenizer.getIndexDocs();
        return indexDocs == null ? indexDocs2 == null : indexDocs.equals(indexDocs2);
    }

    @Generated
    protected boolean canEqual(Object obj) {
        return obj instanceof KerasTokenizer;
    }

    @Generated
    public int hashCode() {
        int i = (((1 * 59) + (isLower() ? 79 : 97)) * 59) + (isCharLevel() ? 79 : 97);
        Integer numWords = getNumWords();
        int hashCode = (i * 59) + (numWords == null ? 43 : numWords.hashCode());
        Integer documentCount = getDocumentCount();
        int hashCode2 = (hashCode * 59) + (documentCount == null ? 43 : documentCount.hashCode());
        String filters = getFilters();
        int hashCode3 = (hashCode2 * 59) + (filters == null ? 43 : filters.hashCode());
        String split = getSplit();
        int hashCode4 = (hashCode3 * 59) + (split == null ? 43 : split.hashCode());
        String outOfVocabularyToken = getOutOfVocabularyToken();
        int hashCode5 = (hashCode4 * 59) + (outOfVocabularyToken == null ? 43 : outOfVocabularyToken.hashCode());
        Map<String, Integer> wordCounts = getWordCounts();
        int hashCode6 = (hashCode5 * 59) + (wordCounts == null ? 43 : wordCounts.hashCode());
        HashMap<String, Integer> wordDocs = getWordDocs();
        int hashCode7 = (hashCode6 * 59) + (wordDocs == null ? 43 : wordDocs.hashCode());
        Map<String, Integer> wordIndex = getWordIndex();
        int hashCode8 = (hashCode7 * 59) + (wordIndex == null ? 43 : wordIndex.hashCode());
        Map<Integer, String> indexWord = getIndexWord();
        int hashCode9 = (hashCode8 * 59) + (indexWord == null ? 43 : indexWord.hashCode());
        Map<Integer, Integer> indexDocs = getIndexDocs();
        return (hashCode9 * 59) + (indexDocs == null ? 43 : indexDocs.hashCode());
    }

    @Generated
    public String toString() {
        return "KerasTokenizer(numWords=" + getNumWords() + ", filters=" + getFilters() + ", lower=" + isLower() + ", split=" + getSplit() + ", charLevel=" + isCharLevel() + ", outOfVocabularyToken=" + getOutOfVocabularyToken() + ", wordCounts=" + getWordCounts() + ", wordDocs=" + getWordDocs() + ", wordIndex=" + getWordIndex() + ", indexWord=" + getIndexWord() + ", indexDocs=" + getIndexDocs() + ", documentCount=" + getDocumentCount() + ")";
    }
}
