package org.noear.solon.ai.rag.loader;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.commonmark.node.AbstractVisitor;
import org.commonmark.node.BlockQuote;
import org.commonmark.node.Code;
import org.commonmark.node.FencedCodeBlock;
import org.commonmark.node.HardLineBreak;
import org.commonmark.node.Heading;
import org.commonmark.node.ListItem;
import org.commonmark.node.Node;
import org.commonmark.node.SoftLineBreak;
import org.commonmark.node.Text;
import org.commonmark.node.ThematicBreak;
import org.commonmark.parser.Parser;
import org.noear.solon.Utils;
import org.noear.solon.ai.rag.Document;
import org.noear.solon.core.util.SupplierEx;
import org.noear.solon.lang.Preview;

@Preview("3.1")
/* loaded from: input_file:org/noear/solon/ai/rag/loader/MarkdownLoader.class */
public class MarkdownLoader extends AbstractOptionsDocumentLoader<Options, MarkdownLoader> {
    private final SupplierEx<InputStream> source;
    private final Parser parser;

    /* loaded from: input_file:org/noear/solon/ai/rag/loader/MarkdownLoader$Options.class */
    public static class Options {
        private boolean horizontalLineAsNew;
        private boolean codeBlockAsNew;
        private boolean blockquoteAsNew;

        public Options horizontalLineAsNew(boolean z) {
            this.horizontalLineAsNew = z;
            return this;
        }

        public Options codeBlockAsNew(boolean z) {
            this.codeBlockAsNew = z;
            return this;
        }

        public Options blockquoteAsNew(boolean z) {
            this.blockquoteAsNew = z;
            return this;
        }
    }

    /* loaded from: input_file:org/noear/solon/ai/rag/loader/MarkdownLoader$SplitVisitor.class */
    static class SplitVisitor extends AbstractVisitor {
        private final List<Document> documents = new ArrayList();
        private final List<String> currentParagraphs = new ArrayList();
        private final MarkdownLoader loader;
        private Document currentDocument;

        SplitVisitor(MarkdownLoader markdownLoader) {
            this.loader = markdownLoader;
        }

        public void visit(org.commonmark.node.Document document) {
            this.currentDocument = new Document();
            super.visit(document);
        }

        public void visit(Heading heading) {
            doneAndNew();
            super.visit(heading);
        }

        public void visit(ThematicBreak thematicBreak) {
            if (((Options) this.loader.options).horizontalLineAsNew) {
                doneAndNew();
            }
            super.visit(thematicBreak);
        }

        public void visit(SoftLineBreak softLineBreak) {
            translateLineBreakToSpace();
            super.visit(softLineBreak);
        }

        public void visit(HardLineBreak hardLineBreak) {
            translateLineBreakToSpace();
            super.visit(hardLineBreak);
        }

        public void visit(ListItem listItem) {
            translateLineBreakToSpace();
            super.visit(listItem);
        }

        public void visit(BlockQuote blockQuote) {
            if (((Options) this.loader.options).blockquoteAsNew) {
                doneAndNew();
            }
            translateLineBreakToSpace();
            this.currentDocument.metadata("category", "blockquote");
            super.visit(blockQuote);
        }

        public void visit(Code code) {
            this.currentParagraphs.add(code.getLiteral());
            this.currentDocument.metadata("category", "code_inline");
            super.visit(code);
        }

        public void visit(FencedCodeBlock fencedCodeBlock) {
            if (((Options) this.loader.options).codeBlockAsNew) {
                doneAndNew();
            }
            translateLineBreakToSpace();
            this.currentParagraphs.add(fencedCodeBlock.getLiteral());
            this.currentDocument.metadata("category", "code_block");
            this.currentDocument.metadata("lang", fencedCodeBlock.getInfo());
            doneAndNew();
            super.visit(fencedCodeBlock);
        }

        public void visit(Text text) {
            Heading parent = text.getParent();
            if (parent instanceof Heading) {
                this.currentDocument.metadata("category", String.format("header_%d", Integer.valueOf(parent.getLevel())));
                this.currentDocument.metadata("title", text.getLiteral());
            } else {
                this.currentParagraphs.add(text.getLiteral());
            }
            super.visit(text);
        }

        public List<Document> extract() {
            doneAndNew();
            return this.documents;
        }

        private void doneAndNew() {
            if (!this.currentParagraphs.isEmpty()) {
                this.currentDocument.content(String.join("", this.currentParagraphs));
                this.currentDocument.metadata(this.loader.additionalMetadata);
                this.documents.add(this.currentDocument);
                this.currentParagraphs.clear();
            }
            this.currentDocument = new Document();
        }

        private void translateLineBreakToSpace() {
            if (Utils.isNotEmpty(this.currentParagraphs)) {
                this.currentParagraphs.add(" ");
            }
        }
    }

    public MarkdownLoader(byte[] bArr) {
        this((SupplierEx<InputStream>) () -> {
            return new ByteArrayInputStream(bArr);
        });
    }

    public MarkdownLoader(File file) {
        this((SupplierEx<InputStream>) () -> {
            return new FileInputStream(file);
        });
    }

    public MarkdownLoader(URL url) {
        this((SupplierEx<InputStream>) () -> {
            return url.openStream();
        });
    }

    public MarkdownLoader(SupplierEx<InputStream> supplierEx) {
        if (supplierEx == null) {
            throw new IllegalArgumentException("Source cannot be null");
        }
        this.source = supplierEx;
        this.parser = Parser.builder().build();
        this.options = new Options();
        this.additionalMetadata.put("type", "markdown");
    }

    public List<Document> load() throws IOException {
        try {
            InputStream inputStream = (InputStream) this.source.get();
            Throwable th = null;
            try {
                Node parseReader = this.parser.parseReader(new InputStreamReader(inputStream));
                SplitVisitor splitVisitor = new SplitVisitor(this);
                parseReader.accept(splitVisitor);
                List<Document> extract = splitVisitor.extract();
                if (inputStream != null) {
                    if (0 != 0) {
                        try {
                            inputStream.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        inputStream.close();
                    }
                }
                return extract;
            } finally {
            }
        } catch (IOException e) {
            throw e;
        } catch (RuntimeException e2) {
            throw e2;
        } catch (Throwable th3) {
            throw new RuntimeException(th3);
        }
    }
}
