nixpkgs/pkgs/by-name/ti/tika/package.nix

100 lines
3.1 KiB
Nix

{
lib,
stdenv,
maven,
jdk8,
fetchFromGitHub,
makeWrapper,
mvnDepsHash ? null,
enableOcr ? true,
tesseract,
nixosTests,
}:
let
mvnDepsHashes = {
"x86_64-linux" = "sha256-M8O1EJtlTm+mVy/qxapRcBWxD14eYL/LLUxP2uOBoM4=";
"aarch64-linux" = "sha256-+ewdV9g0MfgiBiRAimkIZp9lrOTKnKnBB1LqhIlOSaQ=";
"x86_64-darwin" = "sha256-nUAy2+O8REuq6pOWb8d+/c/YxPxj+XwtCtkaxfihDzc=";
"aarch64-darwin" = "sha256-D6adBXtBH1IokUwwA2Z6m+6rJP2xg6BK4rcPyDSgo6o=";
};
knownMvnDepsHash =
mvnDepsHashes.${stdenv.system}
or (lib.warn "This platform doesn't have a default mvnDepsHash value, you'll need to specify it manually" lib.fakeHash);
in
maven.buildMavenPackage rec {
pname = "tika";
version = "2.9.2";
src = fetchFromGitHub {
owner = "apache";
repo = "tika";
rev = version;
hash = "sha256-4pSQcLDKgIcU+YypJ/ywdthi6tI1852fGVOCREzUFH0=";
};
buildOffline = true;
manualMvnArtifacts = [
"org.objenesis:objenesis:2.1"
"org.apache.apache.resources:apache-jar-resource-bundle:1.5"
"org.apache.maven.surefire:surefire-junit-platform:3.1.2"
"org.junit.platform:junit-platform-launcher:1.10.0"
];
mvnJdk = jdk8;
mvnHash = if mvnDepsHash != null then mvnDepsHash else knownMvnDepsHash;
mvnParameters = toString [
"-DskipTests=true" # skip tests (out of memory exceptions)
"-Dossindex.skip" # skip dependency with vulnerability (recommended by upstream)
];
nativeBuildInputs = [ makeWrapper ];
installPhase =
let
binPath = lib.makeBinPath ([ jdk8.jre ] ++ lib.optionals enableOcr [ tesseract ]);
in
''
runHook preInstall
# Note: using * instead of version would match multiple files
install -Dm644 tika-app/target/tika-app-${version}.jar $out/share/tika/tika-app.jar
install -Dm644 tika-server/tika-server-standard/target/tika-server-standard-${version}.jar $out/share/tika/tika-server.jar
makeWrapper ${jdk8.jre}/bin/java $out/bin/tika-app \
--add-flags "-jar $out/share/tika/tika-app.jar"
makeWrapper ${jdk8.jre}/bin/java $out/bin/tika-server \
--prefix PATH : ${binPath} \
--add-flags "-jar $out/share/tika/tika-server.jar"
runHook postInstall
'';
passthru.tests = {
inherit (nixosTests) tika;
};
meta = {
changelog = "https://github.com/apache/tika/blob/${src.rev}/CHANGES.txt";
description = "A toolkit for extracting metadata and text from over a thousand different file types";
longDescription = ''
The Apache Tika toolkit detects and extracts metadata and text
from over a thousand different file types (such as PPT, XLS, and PDF).
All of these file types can be parsed through a single interface,
making Tika useful for search engine indexing, content analysis,
translation, and much more.
'';
homepage = "https://tika.apache.org";
license = lib.licenses.asl20;
mainProgram = "tika-server";
maintainers = with lib.maintainers; [ tomasajt ];
sourceProvenance = with lib.sourceTypes; [
fromSource
binaryBytecode # maven dependencies
];
};
}