diff --git a/modules/coordinator/build.gradle.kts b/modules/coordinator/build.gradle.kts index 312ec3b..7192371 100644 --- a/modules/coordinator/build.gradle.kts +++ b/modules/coordinator/build.gradle.kts @@ -78,6 +78,7 @@ dependencies { implementation("com.fasterxml.jackson.module:jackson-module-scala_3:${versions["JACKSON_SCALA"]!!}") implementation("io.quarkus:quarkus-redis-client") implementation("io.quarkus:quarkus-kubernetes-client") + implementation("io.quarkus:quarkus-scheduler") testImplementation(platform("org.junit:junit-bom:${versions["JUNIT_BOM"]!!}")) testImplementation("org.junit.jupiter:junit-jupiter") diff --git a/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/HealthMonitor.scala b/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/HealthMonitor.scala index 1b31f17..e76599e 100644 --- a/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/HealthMonitor.scala +++ b/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/HealthMonitor.scala @@ -5,6 +5,7 @@ import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.event.Observes import jakarta.enterprise.inject.Instance import jakarta.inject.Inject +import io.quarkus.scheduler.Scheduled import de.nowchess.coordinator.config.CoordinatorConfig import io.fabric8.kubernetes.client.KubernetesClient import io.fabric8.kubernetes.api.model.Pod @@ -73,7 +74,12 @@ class HealthMonitor: Thread.ofVirtual().start(() => validateStartupInstances(timeoutMs)) startPodWatch() - def checkInstanceHealth: Unit = + @Scheduled(every = "10s") + def periodicHealthCheck(): Unit = + try checkInstanceHealth() + catch case ex: Exception => log.warnf(ex, "Health check failed") + + def checkInstanceHealth(): Unit = meterRegistry.counter("nowchess.coordinator.health.checks").increment() val evicted = instanceRegistry.evictStaleInstances(config.instanceDeadTimeout) if evicted.nonEmpty then