fix: refresh Redis TTL on instance heartbeat to prevent false DEAD marking
Instances were being incorrectly marked DEAD because their Redis key TTL was not being refreshed on heartbeat. HealthMonitor.checkRedisHeartbeat() checks pttl > 0, which fails when the TTL expires even if the instance is alive and sending regular heartbeats. Now pexpire(key, heartbeatTtl) is called on each heartbeat to keep the key alive. Prevents scaling messages from undercounting healthy instances. Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
+11
-1
@@ -9,6 +9,7 @@ import scala.jdk.CollectionConverters.*
|
||||
import scala.compiletime.uninitialized
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import de.nowchess.coordinator.dto.InstanceMetadata
|
||||
import de.nowchess.coordinator.config.CoordinatorConfig
|
||||
import java.util.concurrent.ConcurrentHashMap
|
||||
import java.time.{Duration, Instant}
|
||||
import io.micrometer.core.instrument.{Gauge, MeterRegistry}
|
||||
@@ -27,6 +28,9 @@ class InstanceRegistry:
|
||||
|
||||
@Inject
|
||||
private var meterRegistry: MeterRegistry = uninitialized
|
||||
|
||||
@Inject
|
||||
private var config: CoordinatorConfig = uninitialized
|
||||
// scalafix:on DisableSyntax.var
|
||||
|
||||
private val log = Logger.getLogger(classOf[InstanceRegistry])
|
||||
@@ -95,7 +99,13 @@ class InstanceRegistry:
|
||||
metadata.subscriptionCount,
|
||||
metadata.state,
|
||||
)
|
||||
Uni.createFrom().item(())
|
||||
val ttlMs = config.heartbeatTtl.toMillis
|
||||
redis
|
||||
.key(classOf[String])
|
||||
.pexpire(key, ttlMs)
|
||||
.map(_ => ())
|
||||
.onFailure()
|
||||
.recoverWithItem(())
|
||||
}
|
||||
catch
|
||||
case ex: Exception =>
|
||||
|
||||
Reference in New Issue
Block a user