tika-normaliser
Used in
components
- OverviewOverview
- VersionsVersions
- DependentsDependents
- DependenciesDependencies
<dependency> <groupId>org.ow2.weblab.webservices</groupId> <artifactId>tika-normaliser</artifactId> <version>1.8.2</version> </dependency>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.ow2.weblab.webservices</groupId> <artifactId>parent</artifactId> <version>1.2.2</version> <relativePath>../parent/pom.xml</relativePath> </parent> <artifactId>tika-normaliser</artifactId> <version>1.8.2</version> <packaging>war</packaging> <name>Normaliser using Tika</name> <description>This service is an integration of Apache Tika project. It enables to extract metadata and text content of many kinds of files format. The WebLab document in input is enriched with RDF properties for the metadata and Text unit(s) for the content. The service can be configured through the Spring bean of CXF to handle various kind of features (identifying language or not, provide a normalised XHTML output of the document...).</description> <dependencies> <dependency> <groupId>org.ow2.weblab.core</groupId> <artifactId>model</artifactId> </dependency> <dependency> <groupId>org.ow2.weblab.core</groupId> <artifactId>extended</artifactId> </dependency> <dependency> <groupId>org.ow2.weblab.components</groupId> <artifactId>content-manager</artifactId> <version>1.9</version> </dependency> <dependency> <groupId>org.ow2.weblab.core.helpers</groupId> <artifactId>rdf-helper-jena</artifactId> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-core</artifactId> <version>1.1</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-parsers</artifactId> <version>1.1</version> <exclusions> <exclusion> <groupId>xerces</groupId> <artifactId>xercesImpl</artifactId> </exclusion> </exclusions> </dependency> <!-- Optional library for PDFBox inside Tika enabling to normalise arabic texts. --> <dependency> <groupId>com.ibm.icu</groupId> <artifactId>icu4j</artifactId> <version>3.8</version> <scope>runtime</scope> </dependency> <!-- Optional library needed to parse successfully the mails. --> <dependency> <groupId>javax.mail</groupId> <artifactId>mail</artifactId> <scope>runtime</scope> </dependency> </dependencies> <build> <finalName>${project.artifactId}</finalName> </build> <reporting> <plugins> <plugin> <artifactId>maven-javadoc-plugin</artifactId> <!-- Licence: Apache 2 --> <configuration> <links> <link>http://tika.apache.org/1.1/api/</link> <link>http://weblab.ow2.org/WebLab1.2.2/javadoc/</link> </links> </configuration> </plugin> </plugins> </reporting> </project>