feat: add periodic health check to evict dead instances
Build & Test (NowChessSystems) TeamCity build failed

Add quarkus-scheduler dependency and schedule health check every 10 seconds.
Dead instances (marked with state="DEAD") now automatically evicted instead of
accumulating indefinitely.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-05-13 18:09:51 +02:00
parent 43184d296d
commit 380a2cceeb
2 changed files with 8 additions and 1 deletions
+1
View File
@@ -78,6 +78,7 @@ dependencies {
implementation("com.fasterxml.jackson.module:jackson-module-scala_3:${versions["JACKSON_SCALA"]!!}")
implementation("io.quarkus:quarkus-redis-client")
implementation("io.quarkus:quarkus-kubernetes-client")
implementation("io.quarkus:quarkus-scheduler")
testImplementation(platform("org.junit:junit-bom:${versions["JUNIT_BOM"]!!}"))
testImplementation("org.junit.jupiter:junit-jupiter")
@@ -5,6 +5,7 @@ import jakarta.enterprise.context.ApplicationScoped
import jakarta.enterprise.event.Observes
import jakarta.enterprise.inject.Instance
import jakarta.inject.Inject
import io.quarkus.scheduler.Scheduled
import de.nowchess.coordinator.config.CoordinatorConfig
import io.fabric8.kubernetes.client.KubernetesClient
import io.fabric8.kubernetes.api.model.Pod
@@ -73,7 +74,12 @@ class HealthMonitor:
Thread.ofVirtual().start(() => validateStartupInstances(timeoutMs))
startPodWatch()
def checkInstanceHealth: Unit =
@Scheduled(every = "10s")
def periodicHealthCheck(): Unit =
try checkInstanceHealth()
catch case ex: Exception => log.warnf(ex, "Health check failed")
def checkInstanceHealth(): Unit =
meterRegistry.counter("nowchess.coordinator.health.checks").increment()
val evicted = instanceRegistry.evictStaleInstances(config.instanceDeadTimeout)
if evicted.nonEmpty then