@@ -167,7 +167,7 @@ class AutoScaler:
|
||||
catch
|
||||
case ex: Exception =>
|
||||
meterRegistry.counter("nowchess.coordinator.scale.failures", "direction", "up").increment()
|
||||
log.warnf(ex, "Failed to scale up %s", config.k8sRolloutName)
|
||||
log.errorf(ex, "Failed to scale up %s", config.k8sRolloutName)
|
||||
|
||||
def scaleDown(): Unit =
|
||||
log.info("Scaling down Argo Rollout")
|
||||
@@ -225,4 +225,4 @@ class AutoScaler:
|
||||
catch
|
||||
case ex: Exception =>
|
||||
meterRegistry.counter("nowchess.coordinator.scale.failures", "direction", "down").increment()
|
||||
log.warnf(ex, "Failed to scale down %s", config.k8sRolloutName)
|
||||
log.errorf(ex, "Failed to scale down %s", config.k8sRolloutName)
|
||||
|
||||
+2
-2
@@ -80,14 +80,14 @@ class CacheEvictionManager:
|
||||
count + 1
|
||||
catch
|
||||
case ex: Exception =>
|
||||
log.warnf(ex, "Failed to evict game %s", gameId)
|
||||
log.errorf(ex, "Failed to evict game %s", gameId)
|
||||
count
|
||||
}
|
||||
else count
|
||||
}
|
||||
catch
|
||||
case ex: Exception =>
|
||||
log.warnf(ex, "Error processing game key %s", key)
|
||||
log.errorf(ex, "Error processing game key %s", key)
|
||||
count
|
||||
}
|
||||
|
||||
|
||||
+1
-1
@@ -112,7 +112,7 @@ class FailoverService:
|
||||
else false
|
||||
catch
|
||||
case ex: Exception =>
|
||||
log.warnf(ex, "Failed to migrate batch to %s, trying next", target.instanceId)
|
||||
log.errorf(ex, "Failed to migrate batch to %s, trying next", target.instanceId)
|
||||
false
|
||||
if success then true else tryMigrateBatch(batch, batchIdx, instances, deadId, attempt + 1)
|
||||
|
||||
|
||||
+1
-1
@@ -192,7 +192,7 @@ class HealthMonitor:
|
||||
log.debugf("No pod found for instance %s, skipping deletion", instanceId)
|
||||
catch
|
||||
case ex: Exception =>
|
||||
log.warnf(
|
||||
log.errorf(
|
||||
ex,
|
||||
"Failed to delete pod for instance %s — removing from registry to prevent blocking scale-down",
|
||||
instanceId,
|
||||
|
||||
+1
-1
@@ -99,7 +99,7 @@ class InstanceRegistry:
|
||||
}
|
||||
catch
|
||||
case ex: Exception =>
|
||||
log.warnf(ex, "Failed to parse instance metadata for %s — removing from registry", instanceId)
|
||||
log.errorf(ex, "Failed to parse instance metadata for %s — removing from registry", instanceId)
|
||||
instances.remove(instanceId)
|
||||
meterRegistry.counter("nowchess.coordinator.instances.removed").increment()
|
||||
Uni.createFrom().item(())
|
||||
|
||||
+3
-3
@@ -96,14 +96,14 @@ class LoadBalancer:
|
||||
log.infof("Moved %d games from %s to %s", subscribed, over.instanceId, target.instanceId)
|
||||
catch
|
||||
case ex: Exception =>
|
||||
log.warnf(ex, "Failed to move games from %s to %s", over.instanceId, target.instanceId)
|
||||
log.errorf(ex, "Failed to move games from %s to %s", over.instanceId, target.instanceId)
|
||||
}
|
||||
|
||||
val elapsed = System.currentTimeMillis() - startTime
|
||||
log.infof("Rebalance completed in %dms", elapsed)
|
||||
catch
|
||||
case ex: Exception =>
|
||||
log.warnf(ex, "Rebalance failed")
|
||||
log.errorf(ex, "Rebalance failed")
|
||||
|
||||
private def getGamesToMove(instanceId: String, count: Int): List[String] =
|
||||
try
|
||||
@@ -125,7 +125,7 @@ class LoadBalancer:
|
||||
}
|
||||
catch
|
||||
case ex: Exception =>
|
||||
log.warnf(ex, "Failed to update Redis game sets")
|
||||
log.errorf(ex, "Failed to update Redis game sets")
|
||||
|
||||
@Scheduled(every = "30s")
|
||||
def periodicRebalanceCheck(): Unit =
|
||||
|
||||
Reference in New Issue
Block a user