pdfocr-tesseract4

pkg:maven/com.itextpdf/pdfocr-tesseract4@5.0.1

Used in:

components

Overview
Overview

Versions
Versions

Dependents
Dependents

Dependencies
Dependencies

Overview

Description

pdfOCR-Tesseract4 is an iText add-on for Java to recognize and extract text in scanned documents and images. It can also convert them into fully ISO-compliant PDF or PDF/A-3u files that are accessible, searchable, and suitable for archiving

<dependency>
    <groupId>com.itextpdf</groupId>
    <artifactId>pdfocr-tesseract4</artifactId>
    <version>5.0.1</version>
</dependency>

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <parent>
    <groupId>com.itextpdf</groupId>
    <artifactId>pdfocr-root</artifactId>
    <version>5.0.1</version>
  </parent>

  <artifactId>pdfocr-tesseract4</artifactId>

  <name>pdfOCR-Tesseract4</name>
  <description>pdfOCR-Tesseract4 is an iText add-on for Java to recognize and extract text in scanned documents and images. It can also convert them into fully ISO-compliant PDF or PDF/A-3u files that are accessible, searchable, and suitable for archiving</description>

  <properties>
    <sonar.exclusions>src/main/java/com/itextpdf/pdfocr/tesseract4/LeptonicaWrapper.java</sonar.exclusions>

    <sharpen.phase>install</sharpen.phase>
    <sharpen.projectName>pdfocr-tesseract4</sharpen.projectName>
    <sharpen.cSharpTargetFolder>./../../../sharp/pdfocr</sharpen.cSharpTargetFolder>
    <sharpen.cSharpSourceCodeDestination>itext/itext.pdfocr.tesseract4</sharpen.cSharpSourceCodeDestination>
    <sharpen.cSharpTestCodeDestination>itext.tests/itext.pdfocr.tesseract4.tests</sharpen.cSharpTestCodeDestination>
  </properties>

  <dependencies>
    <dependency>
      <groupId>com.itextpdf</groupId>
      <artifactId>pdfocr-api</artifactId>
      <version>${project.version}</version>
    </dependency>
    <dependency>
      <groupId>com.itextpdf</groupId>
      <artifactId>styled-xml-parser</artifactId>
      <version>${itext.version}</version>
    </dependency>
    <dependency>
      <groupId>net.sourceforge.tess4j</groupId>
      <artifactId>tess4j</artifactId>
      <version>4.6.1</version>
      <exclusions>
        <exclusion>
          <groupId>log4j</groupId>
          <artifactId>log4j</artifactId>
        </exclusion>
        <exclusion>
          <artifactId>ghost4j</artifactId>
          <groupId>org.ghost4j</groupId>
        </exclusion>
        <exclusion>
          <artifactId>slf4j-api</artifactId>
          <groupId>org.slf4j</groupId>
        </exclusion>
        <exclusion>
          <artifactId>log4j-over-slf4j</artifactId>
          <groupId>org.slf4j</groupId>
        </exclusion>
        <exclusion>
          <artifactId>pdfbox</artifactId>
          <groupId>org.apache.pdfbox</groupId>
        </exclusion>
        <exclusion>
          <artifactId>pdfbox-tools</artifactId>
          <groupId>org.apache.pdfbox</groupId>
        </exclusion>
        <exclusion>
          <artifactId>jbig2-imageio</artifactId>
          <groupId>org.apache.pdfbox</groupId>
        </exclusion>
      </exclusions>
    </dependency>
    <!--Direct dependency added for commons-IO because of CVE-2024-47554 for lower versions.
    This dependency originally comes from tess4j.-->
    <dependency>
      <groupId>commons-io</groupId>
      <artifactId>commons-io</artifactId>
      <version>2.21.0</version>
    </dependency>
    <dependency>
      <groupId>com.itextpdf</groupId>
      <artifactId>pdftest</artifactId>
      <version>${itext.version}</version>
      <scope>test</scope>
    </dependency>
  </dependencies>

</project>