fix: add instance-dead-timeout configuration and update HealthMonitor to use it for stale instance eviction
Build & Test (NowChessSystems) TeamCity build finished
Build & Test (NowChessSystems) TeamCity build finished
This commit is contained in:
@@ -33,6 +33,7 @@ nowchess:
|
||||
rebalance-interval: 30s
|
||||
rebalance-min-interval: 60s
|
||||
heartbeat-ttl: 5s
|
||||
instance-dead-timeout: 60s
|
||||
stream-heartbeat-interval: PT0.2S
|
||||
cache-eviction-interval: 10m
|
||||
game-idle-threshold: 45m
|
||||
|
||||
+3
@@ -21,6 +21,9 @@ trait CoordinatorConfig:
|
||||
@WithName("heartbeat-ttl")
|
||||
def heartbeatTtl: Duration
|
||||
|
||||
@WithName("instance-dead-timeout")
|
||||
def instanceDeadTimeout: Duration
|
||||
|
||||
@WithName("stream-heartbeat-interval")
|
||||
def streamHeartbeatInterval: Duration
|
||||
|
||||
|
||||
@@ -50,8 +50,7 @@ class AutoScaler:
|
||||
val avgLoad = instances.map(_.subscriptionCount).sum.toDouble / instances.size
|
||||
|
||||
if avgLoad > config.scaleUpThreshold * config.maxGamesPerCore then scaleUp()
|
||||
else if avgLoad < config.scaleDownThreshold * config.maxGamesPerCore && instances.size > config.scaleMinReplicas
|
||||
then scaleDown()
|
||||
else if avgLoad < config.scaleDownThreshold * config.maxGamesPerCore then scaleDown()
|
||||
|
||||
def scaleUp(): Unit =
|
||||
log.info("Scaling up Argo Rollout")
|
||||
|
||||
+1
-1
@@ -39,7 +39,7 @@ class HealthMonitor:
|
||||
redisPrefix = prefix
|
||||
|
||||
def checkInstanceHealth: Unit =
|
||||
val evicted = instanceRegistry.evictStaleInstances(config.heartbeatTtl)
|
||||
val evicted = instanceRegistry.evictStaleInstances(config.instanceDeadTimeout)
|
||||
if evicted.nonEmpty then log.warnf("Evicted %d stale instances: %s", evicted.size, evicted.mkString(", "))
|
||||
val instances = instanceRegistry.getAllInstances
|
||||
instances.foreach { inst =>
|
||||
|
||||
Reference in New Issue
Block a user