pdfocr-tesseract4
Used in:
components
- OverviewOverview
- VersionsVersions
- DependentsDependents
- DependenciesDependencies
<dependency> <groupId>com.itextpdf</groupId> <artifactId>pdfocr-tesseract4</artifactId> <version>4.0.2</version> </dependency>
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>com.itextpdf</groupId> <artifactId>pdfocr-root</artifactId> <version>4.0.2</version> </parent> <artifactId>pdfocr-tesseract4</artifactId> <name>pdfOCR-Tesseract4</name> <description>pdfOCR-Tesseract4 is an iText add-on for Java to recognize and extract text in scanned documents and images. It can also convert them into fully ISO-compliant PDF or PDF/A-3u files that are accessible, searchable, and suitable for archiving</description> <properties> <sonar.exclusions>src/main/java/com/itextpdf/pdfocr/tesseract4/LeptonicaWrapper.java</sonar.exclusions> </properties> <dependencies> <dependency> <groupId>com.itextpdf</groupId> <artifactId>pdfocr-api</artifactId> <version>${project.version}</version> </dependency> <dependency> <groupId>com.itextpdf</groupId> <artifactId>styled-xml-parser</artifactId> <version>${itext.version}</version> </dependency> <dependency> <groupId>net.sourceforge.tess4j</groupId> <artifactId>tess4j</artifactId> <version>4.6.1</version> <exclusions> <exclusion> <groupId>log4j</groupId> <artifactId>log4j</artifactId> </exclusion> <exclusion> <artifactId>ghost4j</artifactId> <groupId>org.ghost4j</groupId> </exclusion> <exclusion> <artifactId>slf4j-api</artifactId> <groupId>org.slf4j</groupId> </exclusion> <exclusion> <artifactId>log4j-over-slf4j</artifactId> <groupId>org.slf4j</groupId> </exclusion> <exclusion> <artifactId>pdfbox</artifactId> <groupId>org.apache.pdfbox</groupId> </exclusion> <exclusion> <artifactId>pdfbox-tools</artifactId> <groupId>org.apache.pdfbox</groupId> </exclusion> <exclusion> <artifactId>jbig2-imageio</artifactId> <groupId>org.apache.pdfbox</groupId> </exclusion> </exclusions> </dependency> <!--Direct dependency added for commons-IO because of CVE-2024-47554 for lower versions. This dependency originally comes from tess4j.--> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.14.0</version> </dependency> <dependency> <groupId>com.itextpdf</groupId> <artifactId>pdftest</artifactId> <version>${itext.version}</version> <scope>test</scope> </dependency> </dependencies> <profiles> <profile> <id>with-sharpen</id> <build> <plugins> <plugin> <groupId>sharpen</groupId> <artifactId>sharpen-maven-plugin</artifactId> <version>1.0-SNAPSHOT</version> <executions> <execution> <phase>install</phase> <goals> <goal>sharpen</goal> </goals> </execution> </executions> <dependencies> <dependency> <groupId>sharpen</groupId> <artifactId>standard-framework-mapping</artifactId> <version>1.0-SNAPSHOT</version> </dependency> </dependencies> <configuration> <projectName>pdfocr-tesseract4</projectName> <cSharpTargetFolder>./../../../sharp/pdfocr</cSharpTargetFolder> <cSharpSourceCodeDestination>itext/itext.pdfocr.tesseract4</cSharpSourceCodeDestination> <cSharpTestCodeDestination>itext.tests/itext.pdfocr.tesseract4.tests</cSharpTestCodeDestination> <buildDotnet>${sharpen.builddotnet}</buildDotnet> <showDiff>${sharpen.showdiff}</showDiff> <sourceCodeFiles> <file>**/src/main/java/**/*.java</file> </sourceCodeFiles> <testCodeFiles> <file>**/src/test/java/**/*.java</file> </testCodeFiles> </configuration> </plugin> </plugins> </build> </profile> </profiles> </project>