fix: scale up immediately when instance is lost

When an instance is evicted or fails health check, immediately trigger scale-up to replace the lost capacity. Don't wait for the next scheduled scale check. HealthMonitor now calls autoScaler.scaleUp() when: 1. Stale instances are evicted 2. Instance fails health check and is marked dead Ensures quick recovery from instance loss. Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-05-13 23:50:33 +02:00
parent 6bf1013710
commit 43525d41a3
1 changed files with 8 additions and 1 deletions
@@ -45,6 +45,9 @@ class HealthMonitor:
  @Inject
  private var failoverService: FailoverService = uninitialized

+  @Inject
+  private var autoScaler: AutoScaler = uninitialized
+
  private val log         = Logger.getLogger(classOf[HealthMonitor])
  private var redisPrefix = "nowchess"
  // scalafix:on DisableSyntax.var
@@ -85,14 +88,18 @@ class HealthMonitor:
    if evicted.nonEmpty then
      log.warnf("Evicted %d stale instances: %s", evicted.size, evicted.mkString(", "))
      evicted.foreach(deleteK8sPod)
-    val instances = instanceRegistry.getAllInstances
+      autoScaler.scaleUp()
+    val instances      = instanceRegistry.getAllInstances
+    var instanceFailed = false
    instances.foreach { inst =>
      val isHealthy = checkHealth(inst.instanceId)
      if !isHealthy && inst.state == "HEALTHY" then
        log.warnf("Instance %s marked unhealthy", inst.instanceId)
        instanceRegistry.markInstanceDead(inst.instanceId)
        deleteK8sPod(inst.instanceId)
+        instanceFailed = true
    }
+    if instanceFailed then autoScaler.scaleUp()

  private def checkHealth(instanceId: String): Boolean =
    val redisHealthy = checkRedisHeartbeat(instanceId)