package org.netpreserve.jwarc.tools;

import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.Iterator;
import org.netpreserve.jwarc.WarcCaptureRecord;
import org.netpreserve.jwarc.WarcDigest;
import org.netpreserve.jwarc.WarcPayload;
import org.netpreserve.jwarc.WarcReader;
import org.netpreserve.jwarc.WarcRecord;
import org.netpreserve.jwarc.WarcResponse;
import org.netpreserve.jwarc.WarcRevisit;
import org.netpreserve.jwarc.WarcWriter;
import org.netpreserve.jwarc.cdx.CdxReader;
import org.netpreserve.jwarc.cdx.CdxRecord;

/* loaded from: input_file:BOOT-INF/lib/jwarc-0.31.1.jar:org/netpreserve/jwarc/tools/DedupeTool.class */
public class DedupeTool {
    private long minimumSize = 256;
    private String cdxServer;
    private boolean verbose;

    /* JADX WARN: Finally extract failed */
    public void deduplicateWarcFile(Path path, Path path2) throws IOException {
        FileChannel open = FileChannel.open(path, new OpenOption[0]);
        Throwable th = null;
        try {
            WarcReader warcReader = new WarcReader(open);
            Throwable th2 = null;
            try {
                FileChannel open2 = FileChannel.open(path2, StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
                Throwable th3 = null;
                try {
                    try {
                        WarcWriter warcWriter = null;
                        WarcRecord orElse = warcReader.next().orElse(null);
                        while (orElse != null) {
                            long position = warcReader.position();
                            WarcRevisit deduplicate = deduplicate(orElse);
                            orElse = warcReader.next().orElse(null);
                            long position2 = warcReader.position() - position;
                            if (deduplicate == null) {
                                if (this.verbose) {
                                    System.out.println("Copying " + position + ":" + position2);
                                }
                                transferExactly(open, position, position2, open2);
                            } else {
                                if (this.verbose) {
                                    System.out.println("Writing revisit for " + position + ":" + position2);
                                }
                                if (warcWriter == null) {
                                    warcWriter = new WarcWriter(open2, warcReader.compression());
                                }
                                warcWriter.write(deduplicate);
                            }
                        }
                        if (open2 != null) {
                            if (0 != 0) {
                                try {
                                    open2.close();
                                } catch (Throwable th4) {
                                    th3.addSuppressed(th4);
                                }
                            } else {
                                open2.close();
                            }
                        }
                        if (warcReader != null) {
                            if (0 != 0) {
                                try {
                                    warcReader.close();
                                } catch (Throwable th5) {
                                    th2.addSuppressed(th5);
                                }
                            } else {
                                warcReader.close();
                            }
                        }
                        if (open != null) {
                            if (0 == 0) {
                                open.close();
                                return;
                            }
                            try {
                                open.close();
                            } catch (Throwable th6) {
                                th.addSuppressed(th6);
                            }
                        }
                    } catch (Throwable th7) {
                        th3 = th7;
                        throw th7;
                    }
                } catch (Throwable th8) {
                    if (open2 != null) {
                        if (th3 != null) {
                            try {
                                open2.close();
                            } catch (Throwable th9) {
                                th3.addSuppressed(th9);
                            }
                        } else {
                            open2.close();
                        }
                    }
                    throw th8;
                }
            } catch (Throwable th10) {
                if (warcReader != null) {
                    if (0 != 0) {
                        try {
                            warcReader.close();
                        } catch (Throwable th11) {
                            th2.addSuppressed(th11);
                        }
                    } else {
                        warcReader.close();
                    }
                }
                throw th10;
            }
        } catch (Throwable th12) {
            if (open != null) {
                if (0 != 0) {
                    try {
                        open.close();
                    } catch (Throwable th13) {
                        th.addSuppressed(th13);
                    }
                } else {
                    open.close();
                }
            }
            throw th12;
        }
    }

    private static void transferExactly(FileChannel fileChannel, long j, long j2, FileChannel fileChannel2) throws IOException {
        long j3 = 0;
        while (true) {
            long j4 = j3;
            if (j4 >= j2) {
                if (j4 != j2) {
                    throw new IOException("Expected to transfer " + j2 + " but actually transferred " + j4);
                }
                return;
            } else {
                long transferTo = fileChannel.transferTo(j + j4, j2 - j4, fileChannel2);
                if (transferTo <= 0) {
                    throw new IOException("FileChannel.transferTo returned " + transferTo);
                }
                j3 = j4 + transferTo;
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    private WarcRevisit deduplicate(WarcRecord warcRecord) throws IOException {
        WarcDigest orElse;
        CdxRecord findMatchingRecord;
        if (!(warcRecord instanceof WarcResponse)) {
            return null;
        }
        WarcResponse warcResponse = (WarcResponse) warcRecord;
        WarcPayload orElse2 = warcResponse.payload().orElse(null);
        if (orElse2 == null || orElse2.body().size() < this.minimumSize || (orElse = warcResponse.payloadDigest().orElse(null)) == null || (findMatchingRecord = findMatchingRecord(warcResponse, orElse.base32())) == null) {
            return null;
        }
        return ((WarcRevisit.Builder) ((WarcRevisit.Builder) ((WarcRevisit.Builder) new WarcRevisit.Builder(warcResponse.target(), WarcRevisit.IDENTICAL_PAYLOAD_DIGEST_1_0).date(warcResponse.date())).refersTo((URI) null, findMatchingRecord.target(), findMatchingRecord.date()).body(warcResponse.contentType(), warcResponse.http().serializeHeader())).payloadDigest(orElse)).build();
    }

    private CdxRecord findMatchingRecord(WarcCaptureRecord warcCaptureRecord, String str) throws IOException {
        CdxReader cdxReader = new CdxReader(new URL(this.cdxServer + "?sort=reverse&rows=10&matchType=exact&url=" + URLEncoder.encode(warcCaptureRecord.target(), StandardCharsets.UTF_8.name())).openStream());
        Throwable th = null;
        try {
            try {
                Iterator<CdxRecord> it = cdxReader.iterator();
                while (it.hasNext()) {
                    CdxRecord next = it.next();
                    if (str.equalsIgnoreCase(next.digest())) {
                        if (cdxReader != null) {
                            if (0 != 0) {
                                try {
                                    cdxReader.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                cdxReader.close();
                            }
                        }
                        return next;
                    }
                }
                if (cdxReader == null) {
                    return null;
                }
                if (0 == 0) {
                    cdxReader.close();
                    return null;
                }
                try {
                    cdxReader.close();
                    return null;
                } catch (Throwable th3) {
                    th.addSuppressed(th3);
                    return null;
                }
            } catch (Throwable th4) {
                th = th4;
                throw th4;
            }
        } catch (Throwable th5) {
            if (cdxReader != null) {
                if (th != null) {
                    try {
                        cdxReader.close();
                    } catch (Throwable th6) {
                        th.addSuppressed(th6);
                    }
                } else {
                    cdxReader.close();
                }
            }
            throw th5;
        }
    }

    public void setCdxServer(String str) {
        this.cdxServer = str;
    }

    private static Path determineOutputPath(Path path) {
        String[] strArr = {".warc.gz", ".warc", ".arc.gz", ".arc"};
        String path2 = path.getFileName().toString();
        Path parent = path.getParent();
        if (parent == null) {
            parent = Paths.get(".", new String[0]);
        }
        for (String str : strArr) {
            if (path2.endsWith(str)) {
                return parent.resolve(path2.substring(0, path2.length() - str.length()) + "-dedup" + str);
            }
        }
        return parent.resolve(path2 + ".dedup");
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Code restructure failed: missing block: B:27:0x00cb, code lost:
    
        switch(r10) {
            case 0: goto L29;
            case 1: goto L30;
            case 2: goto L44;
            case 3: goto L44;
            case 4: goto L33;
            case 5: goto L33;
            default: goto L45;
        };
     */
    /* JADX WARN: Code restructure failed: missing block: B:28:0x00f0, code lost:
    
        r8 = r8 + 1;
        r0.setCdxServer(r5[r8]);
     */
    /* JADX WARN: Code restructure failed: missing block: B:31:0x00fd, code lost:
    
        r8 = r8 + 1;
        r0.setMinimumSize(java.lang.Long.parseLong(r5[r8]));
     */
    /* JADX WARN: Code restructure failed: missing block: B:33:0x0155, code lost:
    
        r0.verbose = true;
     */
    /* JADX WARN: Code restructure failed: missing block: B:36:0x010d, code lost:
    
        java.lang.System.out.println("Usage: jwarc dedupe [options] [warc-files...]");
        java.lang.System.out.println();
        java.lang.System.out.println("Options:");
        java.lang.System.out.println("      --cdx-server URL      De-deduplicate against a remote CDX server");
        java.lang.System.out.println("      --minimum-size BYTES  Minimum payload size to consider de-duplicating (default " + r0.minimumSize + ")");
        java.lang.System.out.println("  -v, --verbose             Verbose output");
     */
    /* JADX WARN: Code restructure failed: missing block: B:37:0x0154, code lost:
    
        return;
     */
    /* JADX WARN: Code restructure failed: missing block: B:40:0x015d, code lost:
    
        java.lang.System.err.println("Unrecognized option: " + r5[r8]);
        java.lang.System.err.println("Try `jwarc dedupe --help` for usage information");
        java.lang.System.exit(1);
     */
    /* JADX WARN: Code restructure failed: missing block: B:41:0x0184, code lost:
    
        return;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public static void main(java.lang.String[] r5) throws java.io.IOException {
        /*
            Method dump skipped, instructions count: 457
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.netpreserve.jwarc.tools.DedupeTool.main(java.lang.String[]):void");
    }

    public void setMinimumSize(long j) {
        this.minimumSize = j;
    }
}
