crawler4j
Used in:
components
- OverviewOverview
- VersionsVersions
- DependentsDependents
- DependenciesDependencies
<dependency> <groupId>edu.uci.ics</groupId> <artifactId>crawler4j</artifactId> <version>4.4.0</version> </dependency>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <artifactId>crawler4j-parent</artifactId> <groupId>edu.uci.ics</groupId> <version>4.4.0</version> <relativePath>../pom.xml</relativePath> </parent> <artifactId>crawler4j</artifactId> <name>${project.groupId}:${project.artifactId}</name> <description>Open Source Web Crawler for Java</description> <url>https://github.com/yasserg/crawler4j</url> <properties> <slf4j.version>1.7.22</slf4j.version> <logback.version>1.1.7</logback.version> <guava.version>24.0-jre</guava.version> <apache.http.components.version>4.5.3</apache.http.components.version> <je.version>5.0.84</je.version> <apache.tika.version>1.16</apache.tika.version> <!--test dependency versions --> <junit.version>4.12</junit.version> <wiremock.version>2.14.0</wiremock.version> <spock.version>1.0-groovy-2.4</spock.version> <groovy.version>2.4.12</groovy.version> </properties> <profiles> <profile> <id>fatjar</id> <build> <plugins> <plugin> <artifactId>maven-assembly-plugin</artifactId> <version>2.5.3</version> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> </configuration> <executions> <execution> <id>make-fat-jar</id> <phase>package</phase> <goals> <goal>single</goal> </goals> <configuration> <finalName>crawler4j-${project.version}</finalName> </configuration> </execution> </executions> </plugin> </plugins> </build> </profile> </profiles> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> <version>2.5</version> <configuration> <excludes> <exclude>**/*.properties</exclude> </excludes> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-checkstyle-plugin</artifactId> </plugin> <plugin> <groupId>org.jacoco</groupId> <artifactId>jacoco-maven-plugin</artifactId> <version>0.7.9</version> <executions> <execution> <id>pre-unit-test</id> <goals> <goal>prepare-agent</goal> </goals> </execution> <execution> <id>post-unit-test</id> <phase>test</phase> <goals> <goal>report</goal> </goals> <configuration> <excludes> <exclude>**/exceptions/**</exclude> </excludes> </configuration> </execution> </executions> </plugin> <!-- Groovy compiler stuff --> <plugin> <groupId>org.codehaus.gmavenplus</groupId> <artifactId>gmavenplus-plugin</artifactId> <version>1.5</version> <executions> <execution> <goals> <goal>addSources</goal> <goal>addTestSources</goal> <goal>generateStubs</goal> <goal>compile</goal> <goal>testGenerateStubs</goal> <goal>testCompile</goal> <goal>removeStubs</goal> <goal>removeTestStubs</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.codehaus.mojo</groupId> <artifactId>build-helper-maven-plugin</artifactId> <version>1.9.1</version> <executions> <execution> <id>add-source</id> <phase>generate-sources</phase> <goals> <goal>add-source</goal> </goals> <configuration> <sources> <source>src/main/groovy</source> </sources> </configuration> </execution> <execution> <id>add-test-source</id> <phase>generate-test-sources</phase> <goals> <goal>add-test-source</goal> </goals> <configuration> <sources> <source>src/test/groovy</source> </sources> </configuration> </execution> </executions> </plugin> </plugins> </build> <dependencies> <!-- Compile time Dependencies --> <dependency> <!-- Logging framework --> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>${slf4j.version}</version> </dependency> <dependency> <!-- Implementation of slf4j --> <groupId>ch.qos.logback</groupId> <artifactId>logback-classic</artifactId> <version>${logback.version}</version> <scope>runtime</scope> </dependency> <dependency> <!-- Google's core Java libraries --> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>${guava.version}</version> </dependency> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>${apache.http.components.version}</version> <scope>compile</scope> </dependency> <dependency> <groupId>com.sleepycat</groupId> <artifactId>je</artifactId> <version>${je.version}</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-parsers</artifactId> <version>${apache.tika.version}</version> <exclusions> <exclusion> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> </exclusion> <exclusion> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> </exclusion> <exclusion> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> </exclusion> <exclusion> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> </exclusion> <exclusion> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> </exclusion> <exclusion> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> </exclusion> <exclusion> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox-tools</artifactId> </exclusion> <exclusion> <groupId>org.apache.pdfbox</groupId> <artifactId>jempbox</artifactId> </exclusion> <exclusion> <groupId>org.tallison</groupId> <artifactId>jmatio</artifactId> </exclusion> <exclusion> <groupId>com.healthmarketscience.jackcess</groupId> <artifactId>jackcess</artifactId> </exclusion> <exclusion> <groupId>com.healthmarketscience.jackcess</groupId> <artifactId>jackcess-encrypt</artifactId> </exclusion> <exclusion> <groupId>org.tukaani</groupId> <artifactId>xz</artifactId> </exclusion> <exclusion> <groupId>com.github.junrar</groupId> <artifactId>junrar</artifactId> </exclusion> <exclusion> <groupId>org.apache.opennlp</groupId> <artifactId>opennlp-tools</artifactId> </exclusion> <exclusion> <groupId>org.apache.sis.core</groupId> <artifactId>sis-utility</artifactId> </exclusion> <exclusion> <groupId>org.apache.sis.storage</groupId> <artifactId>sis-netcdf</artifactId> </exclusion> <exclusion> <groupId>org.apache.sis.core</groupId> <artifactId>sis-metadata</artifactId> </exclusion> <exclusion> <groupId>org.opengis</groupId> <artifactId>geoapi</artifactId> </exclusion> <exclusion> <groupId>com.pff</groupId> <artifactId>java-libpst</artifactId> </exclusion> <exclusion> <groupId>com.rometools</groupId> <artifactId>rome</artifactId> </exclusion> <exclusion> <groupId>org.json</groupId> <artifactId>json</artifactId> </exclusion> <exclusion> <groupId>edu.ucar</groupId> <artifactId>netcdf4</artifactId> </exclusion> <exclusion> <groupId>edu.ucar</groupId> <artifactId>grib</artifactId> </exclusion> <exclusion> <groupId>edu.ucar</groupId> <artifactId>cdm</artifactId> </exclusion> <exclusion> <groupId>edu.ucar</groupId> <artifactId>httpservices</artifactId> </exclusion> <exclusion> <groupId>org.gagravarr</groupId> <artifactId>vorbis-java-tika</artifactId> </exclusion> <exclusion> <groupId>org.gagravarr</groupId> <artifactId>vorbis-java-core</artifactId> </exclusion> <exclusion> <groupId>org.ow2.asm</groupId> <artifactId>asm</artifactId> </exclusion> <exclusion> <groupId>com.googlecode.mp4parser</groupId> <artifactId>isoparser</artifactId> </exclusion> <exclusion> <groupId>com.drewnoakes</groupId> <artifactId>metadata-extractor</artifactId> </exclusion> <exclusion> <groupId>org.apache.commons</groupId> <artifactId>commons-csv</artifactId> </exclusion> <exclusion> <groupId>org.apache.commons</groupId> <artifactId>commons-exec</artifactId> </exclusion> <exclusion> <groupId>org.apache.commons</groupId> <artifactId>commons-compress</artifactId> </exclusion> <exclusion> <groupId>commons-codec</groupId> <artifactId>commons-codec</artifactId> </exclusion> <exclusion> <groupId>com.googlecode.json-simple</groupId> <artifactId>json-simple</artifactId> </exclusion> <exclusion> <groupId>com.google.code.gson</groupId> <artifactId>gson</artifactId> </exclusion> <exclusion> <groupId>de.l3s.boilerpipe</groupId> <artifactId>boilerpipe</artifactId> </exclusion> <exclusion> <groupId>com.googlecode.juniversalchardet</groupId> <artifactId>juniversalchardet</artifactId> </exclusion> <exclusion> <groupId>org.codelibs</groupId> <artifactId>jhighlight</artifactId> </exclusion> <exclusion> <groupId>org.bouncycastle</groupId> <artifactId>bcmail-jdk15on</artifactId> </exclusion> <exclusion> <groupId>org.bouncycastle</groupId> <artifactId>bcprov-jdk15on</artifactId> </exclusion> <exclusion> <groupId>org.apache.cxf</groupId> <artifactId>cxf-rt-rs-client</artifactId> </exclusion> <exclusion> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-core</artifactId> </exclusion> <exclusion> <groupId>com.tdunning</groupId> <artifactId>json</artifactId> </exclusion> <exclusion> <groupId>edu.usc.ir</groupId> <artifactId>sentiment-analysis-parser</artifactId> </exclusion> </exclusions> </dependency> <!-- Test Dependencies --> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>${junit.version}</version> <scope>test</scope> </dependency> <dependency> <groupId>com.github.tomakehurst</groupId> <artifactId>wiremock</artifactId> <version>${wiremock.version}</version> <scope>test</scope> </dependency> <dependency> <groupId>org.codehaus.groovy</groupId> <artifactId>groovy-all</artifactId> <version>${groovy.version}</version> <scope>test</scope> </dependency> <dependency> <groupId>org.spockframework</groupId> <artifactId>spock-core</artifactId> <version>${spock.version}</version> <scope>test</scope> </dependency> </dependencies> <repositories> <repository> <id>oracleReleases</id> <name>Oracle Released Java Packages</name> <url>http://download.oracle.com/maven</url> <layout>default</layout> </repository> </repositories> </project>