refactor: resource-based scaling only, remove health-check triggered scaling

Scale up: only if resource constrained (CPU/memory)
Scale down: only if NOT resource constrained AND game load low
Remove: triggering scale-up on unexpected instance failures
Keep: health monitoring (mark dead, delete pod, failover games) but no scaling

Prevents cascade scaling from transient health check failures.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-05-18 20:18:56 +02:00
parent b4c75e2a0f
commit 32a12737e3
2 changed files with 3 additions and 9 deletions
@@ -182,16 +182,14 @@ class AutoScaler:
val hasHighCpuOrMemory = constrainedInstance.isDefined
log.infof(
"Scale check: instances=%d avgLoad=%.1f scaleUpAt=%.1f scaleDownAt=%.1f resourceConstrained=%s",
"Scale check: instances=%d avgLoad=%.1f resourceConstrained=%s",
instances.size,
avgLoad,
scaleUpLoad,
scaleDownLoad,
constrainedInstance.map(_.instanceId).getOrElse("none"),
)
if avgLoad > scaleUpLoad || hasHighCpuOrMemory then scaleUp()
else if avgLoad < scaleDownLoad && instances.size > config.scaleMinReplicas
if hasHighCpuOrMemory then scaleUp()
if !hasHighCpuOrMemory && avgLoad < scaleDownLoad && instances.size > config.scaleMinReplicas
then scaleDown()
private def patchRolloutReplicas(
@@ -88,9 +88,7 @@ class HealthMonitor:
if evicted.nonEmpty then
log.warnf("Evicted %d stale instances: %s", evicted.size, evicted.mkString(", "))
evicted.foreach(deleteK8sPod)
val unexpectedEvictions = evicted.filterNot(autoScaler.isDrainingForScaleDown)
evicted.foreach(autoScaler.clearDraining)
if unexpectedEvictions.nonEmpty then autoScaler.scaleUp()
val instances = instanceRegistry.getAllInstances
val failed = instances.collect { inst =>
val isHealthy = checkHealth(inst.instanceId)
@@ -101,8 +99,6 @@ class HealthMonitor:
Some(inst.instanceId)
else None
}.flatten
val unexpectedFailures = failed.filterNot(autoScaler.isDrainingForScaleDown)
if unexpectedFailures.nonEmpty then autoScaler.scaleUp()
private def checkHealth(instanceId: String): Boolean =
val redisHealthy = checkRedisHeartbeat(instanceId)