fix: scale up immediately when instance is lost
Build & Test (NowChessSystems) TeamCity build failed
Build & Test (NowChessSystems) TeamCity build failed
When an instance is evicted or fails health check, immediately trigger scale-up to replace the lost capacity. Don't wait for the next scheduled scale check. HealthMonitor now calls autoScaler.scaleUp() when: 1. Stale instances are evicted 2. Instance fails health check and is marked dead Ensures quick recovery from instance loss. Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
+8
-1
@@ -45,6 +45,9 @@ class HealthMonitor:
|
|||||||
@Inject
|
@Inject
|
||||||
private var failoverService: FailoverService = uninitialized
|
private var failoverService: FailoverService = uninitialized
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
private var autoScaler: AutoScaler = uninitialized
|
||||||
|
|
||||||
private val log = Logger.getLogger(classOf[HealthMonitor])
|
private val log = Logger.getLogger(classOf[HealthMonitor])
|
||||||
private var redisPrefix = "nowchess"
|
private var redisPrefix = "nowchess"
|
||||||
// scalafix:on DisableSyntax.var
|
// scalafix:on DisableSyntax.var
|
||||||
@@ -85,14 +88,18 @@ class HealthMonitor:
|
|||||||
if evicted.nonEmpty then
|
if evicted.nonEmpty then
|
||||||
log.warnf("Evicted %d stale instances: %s", evicted.size, evicted.mkString(", "))
|
log.warnf("Evicted %d stale instances: %s", evicted.size, evicted.mkString(", "))
|
||||||
evicted.foreach(deleteK8sPod)
|
evicted.foreach(deleteK8sPod)
|
||||||
val instances = instanceRegistry.getAllInstances
|
autoScaler.scaleUp()
|
||||||
|
val instances = instanceRegistry.getAllInstances
|
||||||
|
var instanceFailed = false
|
||||||
instances.foreach { inst =>
|
instances.foreach { inst =>
|
||||||
val isHealthy = checkHealth(inst.instanceId)
|
val isHealthy = checkHealth(inst.instanceId)
|
||||||
if !isHealthy && inst.state == "HEALTHY" then
|
if !isHealthy && inst.state == "HEALTHY" then
|
||||||
log.warnf("Instance %s marked unhealthy", inst.instanceId)
|
log.warnf("Instance %s marked unhealthy", inst.instanceId)
|
||||||
instanceRegistry.markInstanceDead(inst.instanceId)
|
instanceRegistry.markInstanceDead(inst.instanceId)
|
||||||
deleteK8sPod(inst.instanceId)
|
deleteK8sPod(inst.instanceId)
|
||||||
|
instanceFailed = true
|
||||||
}
|
}
|
||||||
|
if instanceFailed then autoScaler.scaleUp()
|
||||||
|
|
||||||
private def checkHealth(instanceId: String): Boolean =
|
private def checkHealth(instanceId: String): Boolean =
|
||||||
val redisHealthy = checkRedisHeartbeat(instanceId)
|
val redisHealthy = checkRedisHeartbeat(instanceId)
|
||||||
|
|||||||
Reference in New Issue
Block a user