fix: add instance-dead-timeout configuration and update HealthMonitor to use it for stale instance eviction
Build & Test (NowChessSystems) TeamCity build finished

This commit is contained in:
2026-05-08 15:32:36 +02:00
parent dcebdf237e
commit be0b710543
4 changed files with 6 additions and 3 deletions
@@ -33,6 +33,7 @@ nowchess:
rebalance-interval: 30s rebalance-interval: 30s
rebalance-min-interval: 60s rebalance-min-interval: 60s
heartbeat-ttl: 5s heartbeat-ttl: 5s
instance-dead-timeout: 60s
stream-heartbeat-interval: PT0.2S stream-heartbeat-interval: PT0.2S
cache-eviction-interval: 10m cache-eviction-interval: 10m
game-idle-threshold: 45m game-idle-threshold: 45m
@@ -21,6 +21,9 @@ trait CoordinatorConfig:
@WithName("heartbeat-ttl") @WithName("heartbeat-ttl")
def heartbeatTtl: Duration def heartbeatTtl: Duration
@WithName("instance-dead-timeout")
def instanceDeadTimeout: Duration
@WithName("stream-heartbeat-interval") @WithName("stream-heartbeat-interval")
def streamHeartbeatInterval: Duration def streamHeartbeatInterval: Duration
@@ -50,8 +50,7 @@ class AutoScaler:
val avgLoad = instances.map(_.subscriptionCount).sum.toDouble / instances.size val avgLoad = instances.map(_.subscriptionCount).sum.toDouble / instances.size
if avgLoad > config.scaleUpThreshold * config.maxGamesPerCore then scaleUp() if avgLoad > config.scaleUpThreshold * config.maxGamesPerCore then scaleUp()
else if avgLoad < config.scaleDownThreshold * config.maxGamesPerCore && instances.size > config.scaleMinReplicas else if avgLoad < config.scaleDownThreshold * config.maxGamesPerCore then scaleDown()
then scaleDown()
def scaleUp(): Unit = def scaleUp(): Unit =
log.info("Scaling up Argo Rollout") log.info("Scaling up Argo Rollout")
@@ -39,7 +39,7 @@ class HealthMonitor:
redisPrefix = prefix redisPrefix = prefix
def checkInstanceHealth: Unit = def checkInstanceHealth: Unit =
val evicted = instanceRegistry.evictStaleInstances(config.heartbeatTtl) val evicted = instanceRegistry.evictStaleInstances(config.instanceDeadTimeout)
if evicted.nonEmpty then log.warnf("Evicted %d stale instances: %s", evicted.size, evicted.mkString(", ")) if evicted.nonEmpty then log.warnf("Evicted %d stale instances: %s", evicted.size, evicted.mkString(", "))
val instances = instanceRegistry.getAllInstances val instances = instanceRegistry.getAllInstances
instances.foreach { inst => instances.foreach { inst =>