refactor: resource-based scaling only, remove health-check triggered scaling
Scale up: only if resource constrained (CPU/memory) Scale down: only if NOT resource constrained AND game load low Remove: triggering scale-up on unexpected instance failures Keep: health monitoring (mark dead, delete pod, failover games) but no scaling Prevents cascade scaling from transient health check failures. Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -182,16 +182,14 @@ class AutoScaler:
|
|||||||
val hasHighCpuOrMemory = constrainedInstance.isDefined
|
val hasHighCpuOrMemory = constrainedInstance.isDefined
|
||||||
|
|
||||||
log.infof(
|
log.infof(
|
||||||
"Scale check: instances=%d avgLoad=%.1f scaleUpAt=%.1f scaleDownAt=%.1f resourceConstrained=%s",
|
"Scale check: instances=%d avgLoad=%.1f resourceConstrained=%s",
|
||||||
instances.size,
|
instances.size,
|
||||||
avgLoad,
|
avgLoad,
|
||||||
scaleUpLoad,
|
|
||||||
scaleDownLoad,
|
|
||||||
constrainedInstance.map(_.instanceId).getOrElse("none"),
|
constrainedInstance.map(_.instanceId).getOrElse("none"),
|
||||||
)
|
)
|
||||||
|
|
||||||
if avgLoad > scaleUpLoad || hasHighCpuOrMemory then scaleUp()
|
if hasHighCpuOrMemory then scaleUp()
|
||||||
else if avgLoad < scaleDownLoad && instances.size > config.scaleMinReplicas
|
if !hasHighCpuOrMemory && avgLoad < scaleDownLoad && instances.size > config.scaleMinReplicas
|
||||||
then scaleDown()
|
then scaleDown()
|
||||||
|
|
||||||
private def patchRolloutReplicas(
|
private def patchRolloutReplicas(
|
||||||
|
|||||||
@@ -88,9 +88,7 @@ class HealthMonitor:
|
|||||||
if evicted.nonEmpty then
|
if evicted.nonEmpty then
|
||||||
log.warnf("Evicted %d stale instances: %s", evicted.size, evicted.mkString(", "))
|
log.warnf("Evicted %d stale instances: %s", evicted.size, evicted.mkString(", "))
|
||||||
evicted.foreach(deleteK8sPod)
|
evicted.foreach(deleteK8sPod)
|
||||||
val unexpectedEvictions = evicted.filterNot(autoScaler.isDrainingForScaleDown)
|
|
||||||
evicted.foreach(autoScaler.clearDraining)
|
evicted.foreach(autoScaler.clearDraining)
|
||||||
if unexpectedEvictions.nonEmpty then autoScaler.scaleUp()
|
|
||||||
val instances = instanceRegistry.getAllInstances
|
val instances = instanceRegistry.getAllInstances
|
||||||
val failed = instances.collect { inst =>
|
val failed = instances.collect { inst =>
|
||||||
val isHealthy = checkHealth(inst.instanceId)
|
val isHealthy = checkHealth(inst.instanceId)
|
||||||
@@ -101,8 +99,6 @@ class HealthMonitor:
|
|||||||
Some(inst.instanceId)
|
Some(inst.instanceId)
|
||||||
else None
|
else None
|
||||||
}.flatten
|
}.flatten
|
||||||
val unexpectedFailures = failed.filterNot(autoScaler.isDrainingForScaleDown)
|
|
||||||
if unexpectedFailures.nonEmpty then autoScaler.scaleUp()
|
|
||||||
|
|
||||||
private def checkHealth(instanceId: String): Boolean =
|
private def checkHealth(instanceId: String): Boolean =
|
||||||
val redisHealthy = checkRedisHeartbeat(instanceId)
|
val redisHealthy = checkRedisHeartbeat(instanceId)
|
||||||
|
|||||||
Reference in New Issue
Block a user