Files
NowChessSystems/modules/analytics/build.gradle.kts
T
Janis Eccarius 95215b6a42 feat(analytics): add Dockerfile, CI workflow, and stable jar name for K8s deployment
- Pin jar output to analytics.jar (no version suffix) so Dockerfile COPY is stable
- Add Dockerfile based on apache/spark:3.5.4-scala2.13-java17-ubuntu
- Add versions.env (0.1.0) matching GitOps overlay image tag
- Add analytics-image.yml CI workflow following native-image.yml conventions

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-15 22:30:31 +02:00

97 lines
3.1 KiB
Kotlin

// Standalone Spark batch-analytics module.
//
// Spark 3.5.x ships only Scala 2.12/2.13 artifacts; Scala 3 code can consume
// them via JVM binary compatibility so long as we avoid macro-expanded APIs
// (spark.implicits._, typed Dataset[T]). We use the untyped DataFrame API
// exclusively, which is safe to call from Scala 3.
//
// Spark is declared compileOnly — the cluster provides it at runtime via
// spark-submit. Only the PostgreSQL driver and the Scala 3 runtime are
// bundled into the fat jar produced by the "jar" task.
//
// Build the submission jar:
// ./gradlew :modules:analytics:jar
//
// Run a job:
// spark-submit \
// --class de.nowchess.analytics.OpeningBookJob \
// modules/analytics/build/libs/analytics-<version>.jar \
// [outputDir] [maxPlies]
//
// Environment variables consumed:
// NOWCHESS_JDBC_URL (default: jdbc:postgresql://localhost:5432/nowchess)
// NOWCHESS_DB_USER (default: nowchess)
// NOWCHESS_DB_PASS (default: nowchess)
plugins {
id("scala")
application
}
group = "de.nowchess"
version = "1.0-SNAPSHOT"
@Suppress("UNCHECKED_CAST")
val versions = rootProject.extra["VERSIONS"] as Map<String, String>
repositories {
mavenCentral()
}
scala {
scalaVersion = versions["SCALA3"]!!
}
val sparkVersion = "3.5.4"
dependencies {
compileOnly("org.scala-lang:scala3-compiler_3") {
version { strictly(versions["SCALA3"]!!) }
}
implementation("org.scala-lang:scala3-library_3") {
version { strictly(versions["SCALA3"]!!) }
}
implementation("org.scala-lang:scala-library") {
version { strictly(versions["SCALA_LIBRARY"]!!) }
}
// Spark is provided by the cluster — compile-only, not bundled.
compileOnly("org.apache.spark:spark-sql_2.13:$sparkVersion") {
exclude(group = "org.slf4j", module = "slf4j-log4j12")
}
compileOnly("org.apache.spark:spark-core_2.13:$sparkVersion") {
exclude(group = "org.slf4j", module = "slf4j-log4j12")
}
compileOnly("org.apache.spark:spark-mllib_2.13:$sparkVersion") {
exclude(group = "org.slf4j", module = "slf4j-log4j12")
}
compileOnly("org.apache.spark:spark-graphx_2.13:$sparkVersion") {
exclude(group = "org.slf4j", module = "slf4j-log4j12")
}
// PostgreSQL JDBC driver bundled so it is available on executor classpath.
implementation("org.postgresql:postgresql:42.7.4")
}
application {
mainClass.set("de.nowchess.analytics.OpeningBookJob")
}
// Fat jar: includes runtimeClasspath (our code + pg driver + scala3-library)
// but NOT compileOnly Spark jars.
// archiveVersion is cleared so the output is always "analytics.jar" — stable
// name required by the Dockerfile COPY instruction.
tasks.jar {
archiveBaseName.set("analytics")
archiveVersion.set("")
manifest {
attributes["Main-Class"] = "de.nowchess.analytics.OpeningBookJob"
}
from(configurations.runtimeClasspath.get().map { if (it.isDirectory) it else zipTree(it) })
duplicatesStrategy = DuplicatesStrategy.EXCLUDE
}
tasks.withType<ScalaCompile> {
scalaCompileOptions.additionalParameters = listOf("-encoding", "UTF-8")
}