tika-ocr
Used in:
components
- OverviewOverview
- VersionsVersions
- DependentsDependents
- DependenciesDependencies
<dependency> <groupId>com.databricks.labs</groupId> <artifactId>tika-ocr</artifactId> <version>0.1.6</version> </dependency>
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.databricks.labs</groupId> <artifactId>tika-ocr</artifactId> <version>0.1.6</version> <name>tika-ocr</name> <url>https://github.com/databrickslabs/tika-ocr</url> <description>Using Apache tika and tesseract to extact text from any document</description> <inceptionYear>2022</inceptionYear> <developers> <developer> <id>aamend</id> <name>Antoine Amend</name> <email>antoine.amend@databricks.com</email> <organization>Databricks</organization> <timezone>MST</timezone> <roles> <role>Technical Director - Financial Services</role> </roles> </developer> </developers> <organization> <name>Databricks</name> <url>https://databricks.com/learn/labs</url> </organization> <licenses> <license> <name>Databricks License, Proprietary - Copyright (2022)</name> </license> </licenses> <scm> <url>https://github.com/databrickslabs/tika-ocr</url> <connection>scm:git:git@github.com:databrickslabs/tika-ocr.git</connection> <tag>tika-ocr-0.1.6</tag> </scm> <distributionManagement> <snapshotRepository> <id>ossrh</id> <url>https://oss.sonatype.org/content/repositories/snapshots</url> </snapshotRepository> <repository> <id>ossrh</id> <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url> </repository> </distributionManagement> <properties> <minimum.coverage>80</minimum.coverage> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <scala.version>2.12.15</scala.version> <scala.binary.version>2.12</scala.binary.version> <spark.version>3.5.0</spark.version> <java.version>11</java.version> </properties> <dependencies> <!--SCALA DEPENDENCIES (provided)--> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>${scala.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-reflect</artifactId> <version>${scala.version}</version> <scope>provided</scope> </dependency> <!--SPARK DEPENDENCIES (provided)--> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-graphx_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-mllib_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <!--TIKA DEPENDENCIES--> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-core</artifactId> <version>2.9.1</version> <exclusions> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-parsers-standard-package</artifactId> <version>2.9.1</version> <exclusions> <exclusion> <groupId>org.slf4j</groupId> <artifactId>jcl-over-slf4j</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>5.2.5</version> </dependency> <dependency> <groupId>net.sourceforge.tess4j</groupId> <artifactId>tess4j</artifactId> <version>5.8.0</version> <exclusions> <exclusion> <groupId>org.slf4j</groupId> <artifactId>jul-to-slf4j</artifactId> </exclusion> <exclusion> <groupId>org.slf4j</groupId> <artifactId>jcl-over-slf4j</artifactId> </exclusion> <exclusion> <groupId>org.slf4j</groupId> <artifactId>log4j-over-slf4j</artifactId> </exclusion> <exclusion> <groupId>log4j</groupId> <artifactId>log4j</artifactId> </exclusion> <exclusion> <groupId>ch.qos.logback</groupId> <artifactId>logback-classic</artifactId> </exclusion> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>com.github.jai-imageio</groupId> <artifactId>jai-imageio-core</artifactId> <version>1.4.0</version> </dependency> <dependency> <groupId>net.java.dev.jna</groupId> <artifactId>jna</artifactId> <version>5.13.0</version> </dependency> <!--TEST DEPENDENCIES (test)--> <dependency> <groupId>org.scalatest</groupId> <artifactId>scalatest_${scala.binary.version}</artifactId> <version>3.3.0-SNAP4</version> <scope>test</scope> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.13.2</version> <scope>test</scope> </dependency> </dependencies> <build> <plugins> <!-- ************************ --> <!-- COMPILER LOGIC --> <!-- ************************ --> <!-- Java compiler --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.10.1</version> <configuration> <source>${java.version}</source> <target>${java.version}</target> </configuration> </plugin> <!-- Scala compiler --> <plugin> <groupId>net.alchim31.maven</groupId> <artifactId>scala-maven-plugin</artifactId> <version>4.8.1</version> <executions> <execution> <goals> <goal>compile</goal> <goal>testCompile</goal> </goals> </execution> <execution> <id>attach-javadocs</id> <goals> <goal>doc-jar</goal> <goal>add-source</goal> </goals> </execution> </executions> </plugin> <!-- ************************ --> <!-- UNIT TESTS LOGIC --> <!-- ************************ --> <!-- Scala test plugin --> <plugin> <groupId>org.scalatest</groupId> <artifactId>scalatest-maven-plugin</artifactId> <version>2.2.0</version> <configuration> <reportsDirectory>${project.build.directory}/test-reports</reportsDirectory> </configuration> <executions> <execution> <id>test</id> <goals> <goal>test</goal> </goals> </execution> </executions> </plugin> <!-- Code coverage --> <plugin> <groupId>org.scoverage</groupId> <artifactId>scoverage-maven-plugin</artifactId> <version>1.4.11</version> <executions> <execution> <id>scoverage-report</id> <phase>package</phase> <goals> <goal>report</goal> <goal>check</goal> </goals> </execution> </executions> <configuration> <minimumCoverage>${minimum.coverage}</minimumCoverage> <failOnMinimumCoverage>true</failOnMinimumCoverage> <scalaVersion>${scala.version}</scalaVersion> </configuration> </plugin> <!-- ************************ --> <!-- Release logic --> <!-- ************************ --> <!-- Where dependency will be release --> <plugin> <groupId>org.sonatype.plugins</groupId> <artifactId>nexus-staging-maven-plugin</artifactId> <version>1.6.13</version> <extensions>true</extensions> <configuration> <serverId>ossrh</serverId> <nexusUrl>https://oss.sonatype.org/</nexusUrl> <autoReleaseAfterClose>true</autoReleaseAfterClose> </configuration> </plugin> <!-- Handling release / tagging / publishing as maven goal --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-release-plugin</artifactId> <version>2.5.3</version> <configuration> <autoVersionSubmodules>true</autoVersionSubmodules> <useReleaseProfile>false</useReleaseProfile> <releaseProfiles>release</releaseProfiles> <goals>deploy</goals> </configuration> <dependencies> <dependency> <groupId>org.apache.maven.scm</groupId> <artifactId>maven-scm-provider-gitexe</artifactId> <version>1.13.0</version> </dependency> <dependency> <groupId>org.apache.maven.scm</groupId> <artifactId>maven-scm-api</artifactId> <version>1.13.0</version> </dependency> </dependencies> </plugin> <!-- Add Javadoc --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-source-plugin</artifactId> <version>3.2.1</version> <executions> <execution> <id>attach-sources</id> <phase>verify</phase> <goals> <goal>jar-no-fork</goal> </goals> </execution> </executions> </plugin> <!-- Sign artifacts --> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-gpg-plugin</artifactId> <version>3.0.1</version> <executions> <execution> <id>sign-artifacts</id> <phase>verify</phase> <goals> <goal>sign</goal> </goals> </execution> </executions> </plugin> </plugins> </build> <profiles> <profile> <!-- CREATING UBER JAR --> <id>shaded</id> <activation> <activeByDefault>false</activeByDefault> </activation> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>3.5.1</version> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <filters> <filter> <artifact>*:*</artifact> <excludes> <!-- SecurityException: Invalid signature file digest for Manifest main attributes--> <exclude>META-INF/*.SF</exclude> <exclude>META-INF/*.DSA</exclude> <exclude>META-INF/*.RSA</exclude> </excludes> </filter> </filters> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer" /> </transformers> </configuration> </execution> </executions> </plugin> </plugins> </build> </profile> </profiles> </project>