package org.jabref.logic.importer.fileformat;

import com.google.common.annotations.VisibleForTesting;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringWriter;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.lang.runtime.ObjectMethods;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javafx.collections.ObservableList;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.jabref.architecture.AllowedToUseApacheCommonsLang3;
import org.jabref.logic.citationkeypattern.CitationKeyGenerator;
import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences;
import org.jabref.logic.cleanup.URLCleanup;
import org.jabref.logic.formatter.bibtexfields.NormalizeUnicodeFormatter;
import org.jabref.logic.importer.AuthorListParser;
import org.jabref.logic.importer.Importer;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.FileType;
import org.jabref.logic.util.StandardFileType;
import org.jabref.logic.xmp.EncryptedPdfsNotSupportedException;
import org.jabref.logic.xmp.XmpUtilReader;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.Date;
import org.jabref.model.entry.EntryLinkList;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@AllowedToUseApacheCommonsLang3("Fastest method to count spaces in a string")
/* loaded from: input_file:org/jabref/logic/importer/fileformat/BibliographyFromPdfImporter.class */
public class BibliographyFromPdfImporter extends Importer {
    private static final Logger LOGGER = LoggerFactory.getLogger(BibliographyFromPdfImporter.class);
    private static final Pattern REFERENCES = Pattern.compile("References", 2);
    private static final Pattern REFERENCE_PATTERN = Pattern.compile("\\[(\\d+)\\](.*?)(?=\\[|$)", 32);
    private static final Pattern YEAR_AT_END = Pattern.compile(", (\\d{4})\\.$");
    private static final Pattern YEAR = Pattern.compile(", (\\d{4})(.*)");
    private static final Pattern PAGES = Pattern.compile(", pp\\. (\\d+--?\\d+)\\.?(.*)");
    private static final Pattern PAGE = Pattern.compile(", p\\. (\\d+)(.*)");
    private static final Pattern SERIES = Pattern.compile(", ser\\. ([^.,]+)(.*)");
    private static final Pattern MONTH_RANGE_AND_YEAR = Pattern.compile(", ([A-Z][a-z]{2,7}\\.?)-[A-Z][a-z]{2,7}\\.? (\\d+)(.*)");
    private static final Pattern MONTH_AND_YEAR = Pattern.compile(", ([A-Z][a-z]{2,7}\\.? \\d+),? ?(.*)");
    private static final Pattern VOLUME = Pattern.compile(", vol\\. (\\d+)(.*)");
    private static final Pattern NO = Pattern.compile(", no\\. (\\d+)(.*)");
    private static final Pattern PROCEEDINGS_INDICATION = Pattern.compile("^in (Proc\\. )?(.*)");
    private static final Pattern WORKSHOP = Pattern.compile("Workshop");
    private static final Pattern AUTHORS_AND_TITLE_AT_BEGINNING = Pattern.compile("^([^“]+), “(.*?)(”,|,”) ");
    private static final Pattern TITLE = Pattern.compile("“(.*?)”, (.*)");
    private final CitationKeyPatternPreferences citationKeyPatternPreferences;
    private final NormalizeUnicodeFormatter normalizeUnicodeFormatter;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$EntryUpdateResult.class */
    public static final class EntryUpdateResult extends Record {
        private final boolean modified;
        private final String newReference;

        private EntryUpdateResult(boolean z, String str) {
            this.modified = z;
            this.newReference = str;
        }

        @Override // java.lang.Record
        public final String toString() {
            return (String) ObjectMethods.bootstrap(MethodHandles.lookup(), "toString", MethodType.methodType(String.class, EntryUpdateResult.class), EntryUpdateResult.class, "modified;newReference", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$EntryUpdateResult;->modified:Z", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$EntryUpdateResult;->newReference:Ljava/lang/String;").dynamicInvoker().invoke(this) /* invoke-custom */;
        }

        @Override // java.lang.Record
        public final int hashCode() {
            return (int) ObjectMethods.bootstrap(MethodHandles.lookup(), "hashCode", MethodType.methodType(Integer.TYPE, EntryUpdateResult.class), EntryUpdateResult.class, "modified;newReference", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$EntryUpdateResult;->modified:Z", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$EntryUpdateResult;->newReference:Ljava/lang/String;").dynamicInvoker().invoke(this) /* invoke-custom */;
        }

        @Override // java.lang.Record
        public final boolean equals(Object obj) {
            return (boolean) ObjectMethods.bootstrap(MethodHandles.lookup(), "equals", MethodType.methodType(Boolean.TYPE, EntryUpdateResult.class, Object.class), EntryUpdateResult.class, "modified;newReference", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$EntryUpdateResult;->modified:Z", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$EntryUpdateResult;->newReference:Ljava/lang/String;").dynamicInvoker().invoke(this, obj) /* invoke-custom */;
        }

        public boolean modified() {
            return this.modified;
        }

        public String newReference() {
            return this.newReference;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @VisibleForTesting
    /* loaded from: input_file:org/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$IntermediateData.class */
    public static final class IntermediateData extends Record {
        private final String number;
        private final String reference;

        IntermediateData(String str, String str2) {
            this.number = str;
            this.reference = str2;
        }

        @Override // java.lang.Record
        public final String toString() {
            return (String) ObjectMethods.bootstrap(MethodHandles.lookup(), "toString", MethodType.methodType(String.class, IntermediateData.class), IntermediateData.class, "number;reference", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$IntermediateData;->number:Ljava/lang/String;", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$IntermediateData;->reference:Ljava/lang/String;").dynamicInvoker().invoke(this) /* invoke-custom */;
        }

        @Override // java.lang.Record
        public final int hashCode() {
            return (int) ObjectMethods.bootstrap(MethodHandles.lookup(), "hashCode", MethodType.methodType(Integer.TYPE, IntermediateData.class), IntermediateData.class, "number;reference", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$IntermediateData;->number:Ljava/lang/String;", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$IntermediateData;->reference:Ljava/lang/String;").dynamicInvoker().invoke(this) /* invoke-custom */;
        }

        @Override // java.lang.Record
        public final boolean equals(Object obj) {
            return (boolean) ObjectMethods.bootstrap(MethodHandles.lookup(), "equals", MethodType.methodType(Boolean.TYPE, IntermediateData.class, Object.class), IntermediateData.class, "number;reference", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$IntermediateData;->number:Ljava/lang/String;", "FIELD:Lorg/jabref/logic/importer/fileformat/BibliographyFromPdfImporter$IntermediateData;->reference:Ljava/lang/String;").dynamicInvoker().invoke(this, obj) /* invoke-custom */;
        }

        public String number() {
            return this.number;
        }

        public String reference() {
            return this.reference;
        }
    }

    public BibliographyFromPdfImporter() {
        this.normalizeUnicodeFormatter = new NormalizeUnicodeFormatter();
        this.citationKeyPatternPreferences = null;
    }

    public BibliographyFromPdfImporter(CitationKeyPatternPreferences citationKeyPatternPreferences) {
        this.normalizeUnicodeFormatter = new NormalizeUnicodeFormatter();
        this.citationKeyPatternPreferences = citationKeyPatternPreferences;
    }

    @Override // org.jabref.logic.importer.Importer
    public boolean isRecognizedFormat(BufferedReader bufferedReader) throws IOException {
        return bufferedReader.readLine().startsWith("%PDF");
    }

    @Override // org.jabref.logic.importer.Importer
    public ParserResult importDatabase(BufferedReader bufferedReader) throws IOException {
        Objects.requireNonNull(bufferedReader);
        throw new UnsupportedOperationException("BibliopgraphyFromPdfImporter does not support importDatabase(BufferedReader reader).Instead use importDatabase(Path filePath).");
    }

    @Override // org.jabref.logic.importer.Importer
    public String getId() {
        return "pdfBibiliography";
    }

    @Override // org.jabref.logic.importer.Importer
    public String getName() {
        return "Bibliography from PDF";
    }

    @Override // org.jabref.logic.importer.Importer
    public String getDescription() {
        return Localization.lang("Reads the references from the 'References' section of a PDF file.", new Object[0]);
    }

    @Override // org.jabref.logic.importer.Importer
    public FileType getFileType() {
        return StandardFileType.PDF;
    }

    @Override // org.jabref.logic.importer.Importer
    public ParserResult importDatabase(Path path) {
        try {
            PDDocument loadWithAutomaticDecryption = new XmpUtilReader().loadWithAutomaticDecryption(path);
            try {
                List<BibEntry> entriesFromPDFContent = getEntriesFromPDFContent(getReferencesPagesText(loadWithAutomaticDecryption));
                if (loadWithAutomaticDecryption != null) {
                    loadWithAutomaticDecryption.close();
                }
                ParserResult parserResult = new ParserResult(entriesFromPDFContent);
                if (this.citationKeyPatternPreferences == null) {
                    return parserResult;
                }
                CitationKeyGenerator citationKeyGenerator = new CitationKeyGenerator(parserResult.getDatabaseContext(), this.citationKeyPatternPreferences);
                ObservableList<BibEntry> entries = parserResult.getDatabase().getEntries();
                Objects.requireNonNull(citationKeyGenerator);
                entries.forEach(citationKeyGenerator::generateAndSetKey);
                return parserResult;
            } finally {
            }
        } catch (EncryptedPdfsNotSupportedException e) {
            return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported.", new Object[0]));
        } catch (IOException e2) {
            return ParserResult.fromError(e2);
        }
    }

    private List<BibEntry> getEntriesFromPDFContent(String str) {
        return getIntermediateData(str).stream().map(intermediateData -> {
            return parseReference(intermediateData.number(), intermediateData.reference());
        }).toList();
    }

    @VisibleForTesting
    static List<IntermediateData> getIntermediateData(String str) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = REFERENCE_PATTERN.matcher(str);
        while (matcher.find()) {
            arrayList.add(new IntermediateData(matcher.group(1), matcher.group(2).replaceAll("\\r?\\n", " ").trim()));
        }
        return arrayList;
    }

    private String getReferencesPagesText(PDDocument pDDocument) throws IOException {
        int numberOfPages = pDDocument.getNumberOfPages();
        String prependToResult = prependToResult("", pDDocument, new PDFTextStripper(), numberOfPages);
        Matcher matcher = REFERENCES.matcher(prependToResult);
        if (matcher.find()) {
            return prependToResult.substring(matcher.end());
        }
        LOGGER.warn("Could not found 'References'. Returning last page only.");
        return getPageContents(pDDocument, new PDFTextStripper(), numberOfPages);
    }

    private static boolean containsWordReferences(String str) {
        return REFERENCES.matcher(str).find();
    }

    private String prependToResult(String str, PDDocument pDDocument, PDFTextStripper pDFTextStripper, int i) throws IOException {
        String pageContents = getPageContents(pDDocument, pDFTextStripper, i);
        String str2 = pageContents + str;
        return (containsWordReferences(pageContents) || i <= 0) ? str2 : prependToResult(str2, pDDocument, pDFTextStripper, i - 1);
    }

    private static String getPageContents(PDDocument pDDocument, PDFTextStripper pDFTextStripper, int i) throws IOException {
        pDFTextStripper.setStartPage(i);
        pDFTextStripper.setEndPage(i);
        StringWriter stringWriter = new StringWriter();
        pDFTextStripper.writeText(pDDocument, stringWriter);
        return stringWriter.toString();
    }

    @VisibleForTesting
    BibEntry parseReference(String str, String str2) {
        String str3;
        int i;
        String str4;
        String format = this.normalizeUnicodeFormatter.format(str2);
        String str5 = "[" + str + "] " + format;
        BibEntry withCitationKey = new BibEntry(StandardEntryType.Article).withCitationKey(str);
        String replaceAll = format.replace(".-", "-").replace("–", "-").replaceAll("([^ ])- ", "$1");
        Matcher matcher = URLCleanup.URL_PATTERN.matcher(replaceAll);
        if (matcher.find()) {
            String group = matcher.group();
            withCitationKey.setField(StandardField.URL, group);
            replaceAll = replaceAll.replace(group, "").trim();
            if (replaceAll.endsWith(EntryLinkList.SEPARATOR)) {
                replaceAll = replaceAll.substring(0, replaceAll.length() - 1);
            }
        }
        int indexOf = replaceAll.indexOf("doi:");
        if (indexOf >= 0) {
            withCitationKey.setField(StandardField.DOI, replaceAll.substring(indexOf + "doi:".length()).trim().replace(" ", ""));
            replaceAll = replaceAll.substring(0, indexOf).trim();
        }
        String str6 = updateEntryAndReferenceIfMatches(updateEntryAndReferenceIfMatches(updateEntryAndReferenceIfMatches(replaceAll, PAGES, withCitationKey, StandardField.PAGES).newReference, PAGE, withCitationKey, StandardField.PAGES).newReference, SERIES, withCitationKey, StandardField.SERIES).newReference;
        Matcher matcher2 = MONTH_RANGE_AND_YEAR.matcher(str6);
        if (matcher2.find()) {
            str6 = str6.substring(0, matcher2.start()) + ", " + matcher2.group(1) + " " + matcher2.group(2) + matcher2.group(3);
        }
        Matcher matcher3 = MONTH_AND_YEAR.matcher(str6);
        if (matcher3.find()) {
            Optional<Date> parse = Date.parse(matcher3.group(1));
            if (parse.isPresent()) {
                Date date = parse.get();
                date.getYear().ifPresent(num -> {
                    withCitationKey.setField(StandardField.YEAR, num.toString());
                });
                date.getMonth().ifPresent(month -> {
                    withCitationKey.setField(StandardField.MONTH, month.getJabRefFormat());
                });
                String trim = str6.substring(0, matcher3.start()).trim();
                String group2 = matcher3.group(2);
                str6 = trim + ((group2.isEmpty() || ".".equals(group2)) ? "" : ", " + group2.replaceAll("^\\. ", ""));
            }
        }
        Matcher matcher4 = YEAR_AT_END.matcher(str6);
        if (matcher4.find()) {
            withCitationKey.setField(StandardField.YEAR, matcher4.group(1));
            str6 = str6.substring(0, matcher4.start()).trim();
        }
        EntryUpdateResult updateEntryAndReferenceIfMatches = updateEntryAndReferenceIfMatches(updateEntryAndReferenceIfMatches(str6, YEAR, withCitationKey, StandardField.YEAR).newReference, VOLUME, withCitationKey, StandardField.VOLUME);
        boolean z = updateEntryAndReferenceIfMatches.modified;
        EntryUpdateResult updateEntryAndReferenceIfMatches2 = updateEntryAndReferenceIfMatches(updateEntryAndReferenceIfMatches.newReference, NO, withCitationKey, StandardField.NUMBER);
        boolean z2 = updateEntryAndReferenceIfMatches2.modified;
        String str7 = updateEntryAndReferenceIfMatches2.newReference;
        Matcher matcher5 = AUTHORS_AND_TITLE_AT_BEGINNING.matcher(str7);
        if (matcher5.find()) {
            String replaceAll2 = matcher5.group(1).replaceAll("et al\\.?", "and others");
            withCitationKey.setField(StandardField.AUTHOR, AuthorListParser.normalizeSimply(replaceAll2).orElseGet(() -> {
                return AuthorList.fixAuthorFirstNameFirst(replaceAll2);
            }));
            withCitationKey.setField(StandardField.TITLE, matcher5.group(2).replaceAll("et al\\.?", "and others"));
            str3 = str7.substring(matcher5.end()).trim();
        } else {
            str3 = updateEntryAndReferenceIfMatches(str7, TITLE, withCitationKey, StandardField.TITLE).newReference;
        }
        for (String str8 : List.of("presented at", "to be presented at")) {
            if (str3.startsWith(str8)) {
                str3 = str3.substring(str8.length()).trim();
                withCitationKey.setType(StandardEntryType.InProceedings);
            }
        }
        Matcher matcher6 = PROCEEDINGS_INDICATION.matcher(str3);
        Matcher matcher7 = WORKSHOP.matcher(str3);
        if (matcher6.find() || (matcher7.find() && !z && !z2)) {
            withCitationKey.setType(StandardEntryType.InProceedings);
            if (matcher6.hasMatch()) {
                i = matcher6.start(2) - 3;
                String group3 = matcher6.group(1);
                str4 = group3 == null ? matcher6.group(2) : group3 + matcher6.group(2);
            } else {
                i = 0;
                str4 = str3;
            }
            str3 = "";
            int lastIndexOf = str4.substring(i).lastIndexOf(". ");
            if (lastIndexOf == -1) {
                lastIndexOf = str4.substring(i).lastIndexOf(46);
            }
            if (lastIndexOf > i) {
                String trim2 = str4.substring(i + lastIndexOf + 1).trim();
                if (!trim2.contains("http") && StringUtils.countMatches(trim2, ' ') <= 1) {
                    str4 = str4.substring(0, i + lastIndexOf).trim();
                    if (str4.startsWith("in ")) {
                        str4 = str4.substring(3);
                    }
                    withCitationKey.setField(StandardField.PUBLISHER, trim2);
                }
            }
            withCitationKey.setField(StandardField.BOOKTITLE, str4);
        }
        if (str3.isEmpty()) {
            withCitationKey.setField(StandardField.COMMENT, str5);
            return withCitationKey;
        }
        String replaceAll3 = str3.trim().replaceAll("\\.$", "");
        if (z || z2) {
            withCitationKey.setField(StandardField.JOURNAL, replaceAll3);
        } else if (replaceAll3.contains(EntryLinkList.SEPARATOR) || replaceAll3.isEmpty()) {
            LOGGER.trace("InProceedings fallback used. Reference: {}", replaceAll3);
            withCitationKey.setType(StandardEntryType.InProceedings);
            if (withCitationKey.hasField(StandardField.BOOKTITLE)) {
                withCitationKey.setField(StandardField.BOOKTITLE, withCitationKey.getField(StandardField.BOOKTITLE).get() + " " + replaceAll3);
            } else {
                withCitationKey.setField(StandardField.BOOKTITLE, replaceAll3);
            }
        } else if (replaceAll3.endsWith(" Note") || replaceAll3.endsWith(" note")) {
            withCitationKey.setField(StandardField.NOTE, replaceAll3);
            withCitationKey.setType(StandardEntryType.TechReport);
        } else {
            LOGGER.debug("Falling back to journal even if no volume and no number was found. Reference: {}", replaceAll3);
            withCitationKey.setField(StandardField.JOURNAL, replaceAll3);
        }
        withCitationKey.setField(StandardField.COMMENT, str5);
        return withCitationKey;
    }

    private static EntryUpdateResult updateEntryAndReferenceIfMatches(String str, Pattern pattern, BibEntry bibEntry, Field field) {
        Matcher matcher = pattern.matcher(str);
        if (!matcher.find()) {
            return new EntryUpdateResult(false, str);
        }
        bibEntry.setField(field, matcher.group(1));
        return new EntryUpdateResult(true, str.substring(0, matcher.start()).trim() + matcher.group(2));
    }
}
