feat: add periodic health check to evict dead instances
Build & Test (NowChessSystems) TeamCity build failed
Build & Test (NowChessSystems) TeamCity build failed
Add quarkus-scheduler dependency and schedule health check every 10 seconds. Dead instances (marked with state="DEAD") now automatically evicted instead of accumulating indefinitely. Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -78,6 +78,7 @@ dependencies {
|
|||||||
implementation("com.fasterxml.jackson.module:jackson-module-scala_3:${versions["JACKSON_SCALA"]!!}")
|
implementation("com.fasterxml.jackson.module:jackson-module-scala_3:${versions["JACKSON_SCALA"]!!}")
|
||||||
implementation("io.quarkus:quarkus-redis-client")
|
implementation("io.quarkus:quarkus-redis-client")
|
||||||
implementation("io.quarkus:quarkus-kubernetes-client")
|
implementation("io.quarkus:quarkus-kubernetes-client")
|
||||||
|
implementation("io.quarkus:quarkus-scheduler")
|
||||||
|
|
||||||
testImplementation(platform("org.junit:junit-bom:${versions["JUNIT_BOM"]!!}"))
|
testImplementation(platform("org.junit:junit-bom:${versions["JUNIT_BOM"]!!}"))
|
||||||
testImplementation("org.junit.jupiter:junit-jupiter")
|
testImplementation("org.junit.jupiter:junit-jupiter")
|
||||||
|
|||||||
+7
-1
@@ -5,6 +5,7 @@ import jakarta.enterprise.context.ApplicationScoped
|
|||||||
import jakarta.enterprise.event.Observes
|
import jakarta.enterprise.event.Observes
|
||||||
import jakarta.enterprise.inject.Instance
|
import jakarta.enterprise.inject.Instance
|
||||||
import jakarta.inject.Inject
|
import jakarta.inject.Inject
|
||||||
|
import io.quarkus.scheduler.Scheduled
|
||||||
import de.nowchess.coordinator.config.CoordinatorConfig
|
import de.nowchess.coordinator.config.CoordinatorConfig
|
||||||
import io.fabric8.kubernetes.client.KubernetesClient
|
import io.fabric8.kubernetes.client.KubernetesClient
|
||||||
import io.fabric8.kubernetes.api.model.Pod
|
import io.fabric8.kubernetes.api.model.Pod
|
||||||
@@ -73,7 +74,12 @@ class HealthMonitor:
|
|||||||
Thread.ofVirtual().start(() => validateStartupInstances(timeoutMs))
|
Thread.ofVirtual().start(() => validateStartupInstances(timeoutMs))
|
||||||
startPodWatch()
|
startPodWatch()
|
||||||
|
|
||||||
def checkInstanceHealth: Unit =
|
@Scheduled(every = "10s")
|
||||||
|
def periodicHealthCheck(): Unit =
|
||||||
|
try checkInstanceHealth()
|
||||||
|
catch case ex: Exception => log.warnf(ex, "Health check failed")
|
||||||
|
|
||||||
|
def checkInstanceHealth(): Unit =
|
||||||
meterRegistry.counter("nowchess.coordinator.health.checks").increment()
|
meterRegistry.counter("nowchess.coordinator.health.checks").increment()
|
||||||
val evicted = instanceRegistry.evictStaleInstances(config.instanceDeadTimeout)
|
val evicted = instanceRegistry.evictStaleInstances(config.instanceDeadTimeout)
|
||||||
if evicted.nonEmpty then
|
if evicted.nonEmpty then
|
||||||
|
|||||||
Reference in New Issue
Block a user