aut
Used in:
components
- OverviewOverview
- VersionsVersions
- DependentsDependents
- DependenciesDependencies
<dependency>
<groupId>io.archivesunleashed</groupId>
<artifactId>aut</artifactId>
<version>1.2.0</version>
</dependency><?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.archivesunleashed</groupId>
<artifactId>aut</artifactId>
<packaging>jar</packaging>
<version>1.2.0</version>
<name>Archives Unleashed Toolkit</name>
<description>An open-source toolkit for analyzing web archives.</description>
<url>https://github.com/archivesunleashed/aut</url>
<inceptionYear>2017</inceptionYear>
<organization>
<name>The Archives Unleashed Project</name>
<url>https://archivesunleashed.org/</url>
</organization>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<project.name>${project.artifactId}</project.name>
<scala.version>2.12.10</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<hadoop.version>2.7.4</hadoop.version>
<spark.version>3.0.1</spark.version>
<guava.version>29.0-jre</guava.version>
<github.global.server>github</github.global.server>
<license.plugin.version>3.0</license.plugin.version>
<release.plugin.version>2.5.2</release.plugin.version>
<scm-provider-gitexe.plugin.version>1.9.5</scm-provider-gitexe.plugin.version>
<changelog.plugin.version>2.3</changelog.plugin.version>
<github-site.plugin.version>0.12</github-site.plugin.version>
<gpg.plugin.version>1.6</gpg.plugin.version>
<build-helper.plugin.version>3.0.0</build-helper.plugin.version>
<deploy.plugin.version>2.8.2</deploy.plugin.version>
<site.plugin.version>3.3</site.plugin.version>
<project-info-reports.plugin.version>2.7</project-info-reports.plugin.version>
<doxia-markdown.plugin.version>1.7</doxia-markdown.plugin.version>
<failsafe.plugin.version>2.22.0</failsafe.plugin.version>
<jxr.plugin.version>2.5</jxr.plugin.version>
<surefire.plugin.version>2.22.0</surefire.plugin.version>
<jacoco.plugin.version>0.8.4</jacoco.plugin.version>
<versions.plugin.version>2.1</versions.plugin.version>
<tika.version>1.23</tika.version>
<jackson.version>2.10.0</jackson.version>
<scala.maven.plugin.version>4.5.4</scala.maven.plugin.version>
</properties>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<connection>scm:git:git@github.com:archivesunleashed/aut.git</connection>
<developerConnection>scm:git:git@github.com:archivesunleashed/aut.git</developerConnection>
<url>git@github.com:archivesunleashed/aut.git</url>
<tag>aut-1.2.0</tag>
</scm>
<repositories>
<repository>
<id>maven</id>
<url>https://repo.maven.apache.org/maven2/</url>
</repository>
<repository>
<id>jitpack.io</id>
<url>https://jitpack.io</url>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
<configuration>
<debug>true</debug>
<release>11</release>
<source>11</source>
<target>11</target>
<compilerArgument>-Xlint:unchecked,deprecation</compilerArgument>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/services/org.apache.lucene.codecs.Codec</resource>
</transformer>
</transformers>
<relocations>
<relocation>
<pattern>com.google.common.</pattern>
<shadedPattern>com.google.common.shaded.</shadedPattern>
</relocation>
</relocations>
<!-- This fixes the issue "Invalid signature file digest for Manifest main attributes"
cf. http://zhentao-li.blogspot.com/2012/06/maven-shade-plugin-invalid-signature.html -->
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<!-- This will create both a normal thin jar and also a fatjar. -->
<shadedArtifactAttached>true</shadedArtifactAttached>
<shadedClassifierName>fatjar</shadedClassifierName>
<artifactSet>
<excludes>
<exclude>org.apache.hadoop:hadoop-core</exclude>
<exclude>org.apache.hadoop:hadoop-common</exclude>
<exclude>org.apache.hadoop:hadoop-mapreduce-client-core</exclude>
<exclude>org.apache.spark:*</exclude>
</excludes>
</artifactSet>
</configuration>
</execution>
</executions>
</plugin>
<!-- For Scala. -->
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>${scala.maven.plugin.version}</version>
<executions>
<execution>
<phase>process-resources</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
<execution>
<id>attach-scaladocs</id>
<phase>verify</phase>
<goals>
<goal>doc-jar</goal>
</goals>
<configuration>
<args>
<arg>-no-java-comments</arg>
<arg>-no-link-warnings</arg>
</args>
</configuration>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
<checkMultipleScalaVersions>true</checkMultipleScalaVersions>
<failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
<sendJavaToScalac>true</sendJavaToScalac>
<args>
<arg>-unchecked</arg>
<arg>-deprecation</arg>
<arg>-feature</arg>
<arg>-explaintypes</arg>
<arg>-target:jvm-1.8</arg>
<arg>-Ywarn-unused-import</arg>
</args>
<compilerPlugins>
<compilerPlugin>
<groupId>org.scalameta</groupId>
<artifactId>semanticdb-scalac_${scala.version}</artifactId>
<version>4.6.0</version>
</compilerPlugin>
</compilerPlugins>
</configuration>
</plugin>
<!-- For license header enforcement. -->
<plugin>
<groupId>com.mycila</groupId>
<artifactId>license-maven-plugin</artifactId>
<version>${license.plugin.version}</version>
<configuration>
<header>config/LICENSE_HEADER.txt</header>
<mapping>
<scala>SLASHSTAR_STYLE</scala>
</mapping>
<includes>
<include>src/main/scala/**</include>
<include>src/test/scala/**</include>
</includes>
<excludes>
<exclude>target/**</exclude>
<exclude>src/test/resources/**</exclude>
<exclude>src/main/resources/**</exclude>
<exclude>**/*.properties</exclude>
</excludes>
<properties>
<owner>${project.organization.name}</owner>
</properties>
<encoding>UTF-8</encoding>
<strictCheck>true</strictCheck>
</configuration>
<executions>
<execution>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- So we can release aut. -->
<plugin>
<artifactId>maven-release-plugin</artifactId>
<version>${release.plugin.version}</version>
<configuration>
<!-- see http://jira.codehaus.org/browse/MRELEASE-424 -->
<mavenExecutorId>forked-path</mavenExecutorId>
</configuration>
<dependencies>
<dependency>
<groupId>org.apache.maven.scm</groupId>
<artifactId>maven-scm-provider-gitexe</artifactId>
<version>${scm-provider-gitexe.plugin.version}</version>
</dependency>
</dependencies>
</plugin>
<!-- GitHub Pages -->
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>${deploy.plugin.version}</version>
</plugin>
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>${site.plugin.version}</version>
<configuration>
<skipDeploy>true</skipDeploy>
</configuration>
<dependencies>
<dependency>
<!-- Allows markdown syntax for site generation. To use it
place files below src/site/markdown/[filename].md -->
<groupId>org.apache.maven.doxia</groupId>
<artifactId>doxia-module-markdown</artifactId>
<version>${doxia-markdown.plugin.version}</version>
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>com.github.github</groupId>
<artifactId>site-maven-plugin</artifactId>
<version>${github-site.plugin.version}</version>
<configuration>
<message>Creating site for ${project.artifactId}, ${project.version}</message>
<path>${project.distributionManagement.site.url}</path>
<merge>true</merge>
<excludes>
<exclude>xref-test/**</exclude>
<exclude>testapidocs/**</exclude>
</excludes>
</configuration>
<executions>
<execution>
<id>github</id>
<goals>
<goal>site</goal>
</goals>
<phase>site-deploy</phase>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-changelog-plugin</artifactId>
<version>${changelog.plugin.version}</version>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>${build-helper.plugin.version}</version>
</plugin>
<!-- This is to create a zip of PySpark modules. -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.6</version>
<configuration>
<descriptors>
<descriptor>src/main/assembly/python.xml</descriptor>
</descriptors>
<finalName>aut</finalName>
<appendAssemblyId>false</appendAssemblyId>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>${jacoco.plugin.version}</version>
<executions>
<execution>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<phase>test</phase>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>io.github.evis</groupId>
<artifactId>scalafix-maven-plugin_${scala.binary.version}</artifactId>
<version>0.1.7_0.10.4</version>
</plugin>
</plugins>
</build>
<reporting>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
<version>${failsafe.plugin.version}</version>
<configuration>
<argLine>--illegal-access=permit</argLine>
</configuration>
</plugin>
<plugin>
<artifactId>maven-jxr-plugin</artifactId>
<version>${jxr.plugin.version}</version>
</plugin>
<plugin>
<artifactId>maven-surefire-report-plugin</artifactId>
<version>${surefire.plugin.version}</version>
<configuration>
<argLine>-XX:-UseSplitVerifier</argLine>
<argLine>--illegal-access=permit</argLine>
<outputName>surefire-report</outputName>
<aggregate>true</aggregate>
<reportsDirectories>
<reportsDirectory>${project.build.directory}/surefire-reports/</reportsDirectory>
<reportsDirectory>${project.build.directory}/failsafe-reports/</reportsDirectory>
</reportsDirectories>
</configuration>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>${project-info-reports.plugin.version}</version>
<configuration>
<dependencyLocationsEnabled>false</dependencyLocationsEnabled>
<dependencyDetailsEnabled>false</dependencyDetailsEnabled>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>versions-maven-plugin</artifactId>
<version>${versions.plugin.version}</version>
<reportSets>
<reportSet>
<reports>
<report>dependency-updates-report</report>
<report>plugin-updates-report</report>
<report>property-updates-report</report>
</reports>
</reportSet>
</reportSets>
</plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>${scala.maven.plugin.version}</version>
<configuration>
<args>
<arg>-no-java-comments</arg>
<arg>-no-link-warnings</arg>
</args>
</configuration>
</plugin>
</plugins>
</reporting>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.1</version>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<version>3.0.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scala-lang.modules</groupId>
<artifactId>scala-parser-combinators_${scala.binary.version}</artifactId>
<version>1.1.2</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.12</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.21</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
<version>1.1.7.3</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.3</version>
</dependency>
<dependency>
<groupId>org.netpreserve.commons</groupId>
<artifactId>webarchive-commons</artifactId>
<version>1.1.9</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>${tika.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>${tika.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-langdetect</artifactId>
<version>${tika.version}</version>
<exclusions>
<exclusion>
<groupId>com.optimaize.languagedetector</groupId>
<artifactId>language-detector</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.activation</groupId>
<artifactId>jakarta.activation</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.rogach</groupId>
<artifactId>scallop_${scala.binary.version}</artifactId>
<version>3.3.1</version>
</dependency>
<dependency> <!-- Needed for running boilerpipe, but will compile without. -->
<groupId>com.syncthemall</groupId>
<artifactId>boilerpipe</artifactId>
<version>1.2.2</version>
</dependency>
<dependency> <!-- Needed for running boilerpipe. -->
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>2.12.2</version>
</dependency>
<dependency>
<groupId>tl.lin</groupId>
<artifactId>lintools-datatypes</artifactId>
<version>1.1.1</version>
</dependency>
<!--START pull #321-->
<dependency>
<groupId>com.github.netarchivesuite</groupId>
<artifactId>language-detector</artifactId>
<version>language-detector-0.6a</version>
</dependency>
<!--END pull #321-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aws</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>com.github.internetarchive</groupId>
<artifactId>Sparkling</artifactId>
<version>main-f002a0509e-1</version>
</dependency>
</dependencies>
<developers>
<developer>
<name>Jimmy Lin</name>
<id>lintool</id>
<email>jimmylin at uwaterloo dot ca</email>
<organization>University of Waterloo</organization>
<url>https://cs.uwaterloo.ca/~jimmylin/</url>
<timezone>-5</timezone>
</developer>
<developer>
<name>Ian Milligan</name>
<id>ianmilligan1</id>
<email>i2millig at uwaterloo dot ca</email>
<organization>University of Waterloo</organization>
<url>http://ianmilligan.ca/</url>
<timezone>-5</timezone>
</developer>
<developer>
<name>Nick Ruest</name>
<id>ruebot</id>
<email>ruestn at yorku dot ca</email>
<organization>York University</organization>
<url>https://ruebot.net</url>
<timezone>-5</timezone>
</developer>
</developers>
<issueManagement>
<system>GitHub</system>
<url>https://github.com/archivesunleashed/aut/issues</url>
</issueManagement>
<profiles>
<profile>
<id>release-sign-artifacts</id>
<activation>
<property>
<name>performRelease</name>
<value>true</value>
</property>
</activation>
<build>
<pluginManagement>
<plugins>
<plugin>
<artifactId>maven-gpg-plugin</artifactId>
<version>${gpg.plugin.version}</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
<configuration>
<useAgent>true</useAgent>
</configuration>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>${gpg.plugin.version}</version>
</plugin>
</plugins>
</build>
</profile>
</profiles>
<pluginRepositories>
<pluginRepository>
<id>sonatype-nexus-snapshots</id>
<name>Sonatype Nexus Snapshots</name>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</pluginRepository>
<pluginRepository>
<id>sonatype-nexus-staging</id>
<name>Nexus Release Repository</name>
<url>https://oss.sonatype.org/content/repositories/releases</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</pluginRepository>
</pluginRepositories>
<distributionManagement>
<site>
<id>gh-pages</id>
<name>Deployment through GitHub's site deployment plugin</name>
<url>${project.version}</url>
</site>
<snapshotRepository>
<id>sonatype-nexus-snapshots</id>
<name>Sonatype Nexus Shapshots</name>
<url>https://oss.sonatype.org/content/repositories/snapshots/</url>
</snapshotRepository>
<repository>
<id>sonatype-nexus-staging</id>
<url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
</repository>
</distributionManagement>
</project>