From d438e97f32bdde0bfc63c1b4a8cc810cdd093166 Mon Sep 17 00:00:00 2001 From: Janis Date: Mon, 11 May 2026 22:37:22 +0200 Subject: [PATCH] feat: add initialization metrics for various services --- .../account/service/AccountService.scala | 8 ++++ .../account/service/ChallengeService.scala | 7 ++++ .../coordinator/service/AutoScaler.scala | 4 ++ .../service/CacheEvictionManager.scala | 7 ++++ .../coordinator/service/HealthMonitor.scala | 38 ++++++++++++++++++- .../service/InstanceRegistry.scala | 3 ++ .../chess/registry/RedisGameRegistry.scala | 15 ++++++++ .../service/InstanceHeartbeatService.scala | 1 + .../bot/service/OfficialBotService.scala | 9 +++++ .../store/service/GameWritebackService.scala | 10 ++++- .../ws/resource/GameWebSocketResource.scala | 8 ++++ 11 files changed, 108 insertions(+), 2 deletions(-) diff --git a/modules/account/src/main/scala/de/nowchess/account/service/AccountService.scala b/modules/account/src/main/scala/de/nowchess/account/service/AccountService.scala index 9afbc7c..04c1f95 100644 --- a/modules/account/src/main/scala/de/nowchess/account/service/AccountService.scala +++ b/modules/account/src/main/scala/de/nowchess/account/service/AccountService.scala @@ -7,6 +7,7 @@ import de.nowchess.account.repository.{BotAccountRepository, OfficialBotAccountR import io.micrometer.core.instrument.MeterRegistry import io.quarkus.elytron.security.common.BcryptUtil import io.smallrye.jwt.build.Jwt +import jakarta.annotation.PostConstruct import jakarta.enterprise.context.ApplicationScoped import jakarta.inject.Inject import jakarta.transaction.Transactional @@ -35,6 +36,13 @@ class AccountService: var meterRegistry: MeterRegistry = uninitialized // scalafix:on + @PostConstruct + def initializeMetrics(): Unit = + meterRegistry.counter("nowchess.users.registrations", "result", "success").increment(0) + meterRegistry.counter("nowchess.users.registrations", "result", "failure").increment(0) + meterRegistry.counter("nowchess.auth.logins", "result", "success").increment(0) + meterRegistry.counter("nowchess.auth.logins", "result", "failure").increment(0) + @Transactional def register(req: RegisterRequest): Either[AccountError, UserAccount] = log.infof("Registering user %s", req.username) diff --git a/modules/account/src/main/scala/de/nowchess/account/service/ChallengeService.scala b/modules/account/src/main/scala/de/nowchess/account/service/ChallengeService.scala index 69019fb..08e20d2 100644 --- a/modules/account/src/main/scala/de/nowchess/account/service/ChallengeService.scala +++ b/modules/account/src/main/scala/de/nowchess/account/service/ChallengeService.scala @@ -19,6 +19,7 @@ import de.nowchess.account.dto.{ import de.nowchess.account.error.ChallengeError import de.nowchess.account.repository.{ChallengeRepository, UserAccountRepository} import io.micrometer.core.instrument.MeterRegistry +import jakarta.annotation.PostConstruct import jakarta.enterprise.context.ApplicationScoped import jakarta.inject.Inject import jakarta.transaction.Transactional @@ -54,6 +55,12 @@ class ChallengeService: var meterRegistry: MeterRegistry = uninitialized // scalafix:on + @PostConstruct + def initializeMetrics(): Unit = + meterRegistry.counter("nowchess.challenges.created").increment(0) + meterRegistry.counter("nowchess.challenges.accepted").increment(0) + meterRegistry.counter("nowchess.challenges.declined").increment(0) + @Transactional def create(challengerId: UUID, destUsername: String, req: ChallengeRequest): Either[ChallengeError, Challenge] = val result = createChallenge(challengerId, destUsername, req) diff --git a/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/AutoScaler.scala b/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/AutoScaler.scala index f04ae97..9883c97 100644 --- a/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/AutoScaler.scala +++ b/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/AutoScaler.scala @@ -45,6 +45,10 @@ class AutoScaler: Gauge .builder("nowchess.coordinator.load.average", avgLoadRef, _.get()) .register(meterRegistry) + meterRegistry.counter("nowchess.coordinator.scale.events", "direction", "up").increment(0) + meterRegistry.counter("nowchess.coordinator.scale.events", "direction", "down").increment(0) + meterRegistry.counter("nowchess.coordinator.scale.failures", "direction", "up").increment(0) + meterRegistry.counter("nowchess.coordinator.scale.failures", "direction", "down").increment(0) () // scalafix:off DisableSyntax.asInstanceOf diff --git a/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/CacheEvictionManager.scala b/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/CacheEvictionManager.scala index ab48e1e..0008888 100644 --- a/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/CacheEvictionManager.scala +++ b/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/CacheEvictionManager.scala @@ -1,5 +1,6 @@ package de.nowchess.coordinator.service +import jakarta.annotation.PostConstruct import jakarta.enterprise.context.ApplicationScoped import jakarta.inject.Inject import io.quarkus.redis.datasource.RedisDataSource @@ -11,6 +12,7 @@ import org.jboss.logging.Logger import scala.compiletime.uninitialized import scala.util.Try import java.time.Instant +import java.util.concurrent.TimeUnit import de.nowchess.coordinator.grpc.CoreGrpcClient @ApplicationScoped @@ -41,6 +43,11 @@ class CacheEvictionManager: def setRedisPrefix(prefix: String): Unit = redisPrefix = prefix + @PostConstruct + def initializeMetrics(): Unit = + meterRegistry.timer("nowchess.coordinator.cache.eviction.duration").record(0L, TimeUnit.MILLISECONDS) + meterRegistry.counter("nowchess.coordinator.cache.evictions").increment(0) + def evictStaleGames: Unit = meterRegistry.timer("nowchess.coordinator.cache.eviction.duration").record((() => runEviction()): Runnable) diff --git a/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/HealthMonitor.scala b/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/HealthMonitor.scala index 15714d9..ecc4b76 100644 --- a/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/HealthMonitor.scala +++ b/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/HealthMonitor.scala @@ -1,5 +1,6 @@ package de.nowchess.coordinator.service +import jakarta.annotation.PostConstruct import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.inject.Instance import jakarta.inject.Inject @@ -42,16 +43,24 @@ class HealthMonitor: def setRedisPrefix(prefix: String): Unit = redisPrefix = prefix + @PostConstruct + def initializeMetrics(): Unit = + meterRegistry.counter("nowchess.coordinator.health.checks").increment(0) + meterRegistry.counter("nowchess.coordinator.pods.unhealthy").increment(0) + def checkInstanceHealth: Unit = meterRegistry.counter("nowchess.coordinator.health.checks").increment() val evicted = instanceRegistry.evictStaleInstances(config.instanceDeadTimeout) - if evicted.nonEmpty then log.warnf("Evicted %d stale instances: %s", evicted.size, evicted.mkString(", ")) + if evicted.nonEmpty then + log.warnf("Evicted %d stale instances: %s", evicted.size, evicted.mkString(", ")) + evicted.foreach(deleteK8sPod) val instances = instanceRegistry.getAllInstances instances.foreach { inst => val isHealthy = checkHealth(inst.instanceId) if !isHealthy && inst.state == "HEALTHY" then log.warnf("Instance %s marked unhealthy", inst.instanceId) instanceRegistry.markInstanceDead(inst.instanceId) + deleteK8sPod(inst.instanceId) } private def checkHealth(instanceId: String): Boolean = @@ -116,6 +125,7 @@ class HealthMonitor: meterRegistry.counter("nowchess.coordinator.pods.unhealthy").increment() log.warnf("Pod %s not ready, marking instance %s dead", pod.getMetadata.getName, inst.instanceId) instanceRegistry.markInstanceDead(inst.instanceId) + deleteK8sPod(inst.instanceId) case None => log.warnf("No pod found for instance %s, evicting from registry", inst.instanceId) instanceRegistry.removeInstance(inst.instanceId) @@ -128,3 +138,29 @@ class HealthMonitor: Option(pod.getStatus) .flatMap(s => Option(s.getConditions)) .exists(_.asScala.exists(cond => cond.getType == "Ready" && cond.getStatus == "True")) + + private def deleteK8sPod(instanceId: String): Unit = + kubeClientOpt match + case None => + log.debugf("Kubernetes client not available, skipping pod deletion for %s", instanceId) + case Some(kube) => + try + val pods = kube + .pods() + .inNamespace(config.k8sNamespace) + .withLabel(config.k8sRolloutLabelSelector) + .list() + .getItems + .asScala + + pods.find(pod => pod.getMetadata.getName.contains(instanceId)) match + case Some(pod) => + val podName = pod.getMetadata.getName + kube.pods().inNamespace(config.k8sNamespace).withName(podName).delete() + meterRegistry.counter("nowchess.coordinator.pods.deleted").increment() + log.infof("Deleted pod %s for dead instance %s", podName, instanceId) + case None => + log.debugf("No pod found for instance %s, skipping deletion", instanceId) + catch + case ex: Exception => + log.warnf(ex, "Failed to delete pod for instance %s", instanceId) diff --git a/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/InstanceRegistry.scala b/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/InstanceRegistry.scala index c09e96a..859c0cd 100644 --- a/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/InstanceRegistry.scala +++ b/modules/coordinator/src/main/scala/de/nowchess/coordinator/service/InstanceRegistry.scala @@ -34,6 +34,9 @@ class InstanceRegistry: Gauge .builder("nowchess.coordinator.instances.active", instances, m => m.size().toDouble) .register(meterRegistry) + meterRegistry.counter("nowchess.coordinator.instances.joined").increment(0) + meterRegistry.counter("nowchess.coordinator.instances.removed").increment(0) + meterRegistry.counter("nowchess.coordinator.instances.evicted").increment(0) () def setRedisPrefix(prefix: String): Unit = diff --git a/modules/core/src/main/scala/de/nowchess/chess/registry/RedisGameRegistry.scala b/modules/core/src/main/scala/de/nowchess/chess/registry/RedisGameRegistry.scala index 256639c..9a93c2a 100644 --- a/modules/core/src/main/scala/de/nowchess/chess/registry/RedisGameRegistry.scala +++ b/modules/core/src/main/scala/de/nowchess/chess/registry/RedisGameRegistry.scala @@ -48,6 +48,21 @@ class RedisGameRegistry extends GameRegistry: Gauge .builder("nowchess.games.active", GameEngine.activeGamesCount, _.get().toDouble) .register(meterRegistry) + meterRegistry.counter("nowchess.games.cache.hits", "source", "local").increment(0) + meterRegistry.counter("nowchess.games.cache.hits", "source", "redis").increment(0) + meterRegistry.counter("nowchess.games.cache.hits", "source", "db").increment(0) + meterRegistry.counter("nowchess.games.cache.misses").increment(0) + meterRegistry.counter("nowchess.games.started").increment(0) + meterRegistry.counter("nowchess.games.completed", "result", "checkmate").increment(0) + meterRegistry.counter("nowchess.games.completed", "result", "draw.agreement").increment(0) + meterRegistry.counter("nowchess.games.completed", "result", "draw.fifty_move").increment(0) + meterRegistry.counter("nowchess.games.completed", "result", "draw.threefold").increment(0) + meterRegistry.counter("nowchess.games.completed", "result", "draw.stalemate").increment(0) + meterRegistry.counter("nowchess.games.completed", "result", "draw.insufficient").increment(0) + meterRegistry.counter("nowchess.games.completed", "result", "resignation").increment(0) + meterRegistry.counter("nowchess.games.completed", "result", "timeout").increment(0) + meterRegistry.counter("nowchess.moves.processed").increment(0) + meterRegistry.timer("nowchess.moves.duration").record(0L, java.util.concurrent.TimeUnit.MILLISECONDS) () private def cacheKey(gameId: String) = s"${redisConfig.prefix}:game:entry:$gameId" diff --git a/modules/core/src/main/scala/de/nowchess/chess/service/InstanceHeartbeatService.scala b/modules/core/src/main/scala/de/nowchess/chess/service/InstanceHeartbeatService.scala index 83214a4..097aa16 100644 --- a/modules/core/src/main/scala/de/nowchess/chess/service/InstanceHeartbeatService.scala +++ b/modules/core/src/main/scala/de/nowchess/chess/service/InstanceHeartbeatService.scala @@ -65,6 +65,7 @@ class InstanceHeartbeatService: def onStart(@Observes event: StartupEvent): Unit = Gauge.builder("nowchess.instance.subscriptions", subscriptionCount, _.get().toDouble).register(meterRegistry) + meterRegistry.counter("nowchess.heartbeat.failures").increment(0) if coordinatorEnabled then try shuttingDown = false diff --git a/modules/official-bots/src/main/scala/de/nowchess/bot/service/OfficialBotService.scala b/modules/official-bots/src/main/scala/de/nowchess/bot/service/OfficialBotService.scala index 25c2f2d..bc1a8a1 100644 --- a/modules/official-bots/src/main/scala/de/nowchess/bot/service/OfficialBotService.scala +++ b/modules/official-bots/src/main/scala/de/nowchess/bot/service/OfficialBotService.scala @@ -9,11 +9,13 @@ import de.nowchess.io.fen.FenParser import io.micrometer.core.instrument.MeterRegistry import io.quarkus.redis.datasource.RedisDataSource import io.quarkus.runtime.StartupEvent +import jakarta.annotation.PostConstruct import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.event.Observes import jakarta.inject.Inject import scala.compiletime.uninitialized import java.util.function.Consumer +import java.util.concurrent.TimeUnit @ApplicationScoped class OfficialBotService: @@ -29,6 +31,13 @@ class OfficialBotService: private val terminalStatuses = Set("checkmate", "resign", "timeout", "stalemate", "insufficientMaterial", "draw") + @PostConstruct + def initializeMetrics(): Unit = + BotController.listBots.foreach { bot => + meterRegistry.timer("nowchess.bot.move.duration", "bot", bot).record(0L, TimeUnit.MILLISECONDS) + meterRegistry.counter("nowchess.bot.moves.computed", "bot", bot).increment(0) + } + def onStart(@Observes event: StartupEvent): Unit = BotController.listBots.foreach(subscribeToEventChannel) diff --git a/modules/store/src/main/scala/de/nowchess/store/service/GameWritebackService.scala b/modules/store/src/main/scala/de/nowchess/store/service/GameWritebackService.scala index 0444d7b..9784084 100644 --- a/modules/store/src/main/scala/de/nowchess/store/service/GameWritebackService.scala +++ b/modules/store/src/main/scala/de/nowchess/store/service/GameWritebackService.scala @@ -4,6 +4,7 @@ import de.nowchess.api.dto.GameWritebackEventDto import de.nowchess.store.domain.GameRecord import de.nowchess.store.repository.GameRecordRepository import io.micrometer.core.instrument.{Counter, MeterRegistry, Timer} +import jakarta.annotation.PostConstruct import jakarta.enterprise.context.ApplicationScoped import jakarta.inject.Inject import jakarta.transaction.Transactional @@ -20,7 +21,14 @@ class GameWritebackService: @Inject var meterRegistry: MeterRegistry = uninitialized - // scalafix:on DisableSyntax.var + // scalafix:on + + @PostConstruct + def initializeMetrics(): Unit = + meterRegistry.timer("nowchess.store.writeback.duration").record(0L, java.util.concurrent.TimeUnit.MILLISECONDS) + meterRegistry.counter("nowchess.store.writeback.skipped").increment(0) + meterRegistry.counter("nowchess.store.games.written", "operation", "create").increment(0) + meterRegistry.counter("nowchess.store.games.written", "operation", "update").increment(0) private lazy val writebackTimer: Timer = meterRegistry.timer("nowchess.store.writeback.duration") diff --git a/modules/ws/src/main/scala/de/nowchess/ws/resource/GameWebSocketResource.scala b/modules/ws/src/main/scala/de/nowchess/ws/resource/GameWebSocketResource.scala index f665994..5c43200 100644 --- a/modules/ws/src/main/scala/de/nowchess/ws/resource/GameWebSocketResource.scala +++ b/modules/ws/src/main/scala/de/nowchess/ws/resource/GameWebSocketResource.scala @@ -6,6 +6,7 @@ import io.quarkus.redis.datasource.RedisDataSource import io.quarkus.redis.datasource.pubsub.PubSubCommands import io.quarkus.websockets.next.* import io.smallrye.jwt.auth.principal.JWTParser +import jakarta.annotation.PostConstruct import jakarta.inject.Inject import org.jboss.logging.Logger import scala.compiletime.uninitialized @@ -34,6 +35,13 @@ class GameWebSocketResource: private val connections = new ConcurrentHashMap[String, ConnectionMeta]() + @PostConstruct + def initializeMetrics(): Unit = + _ = connectionsOpened + _ = connectionsClosed + _ = messagesReceived + _ = activeGauge + private lazy val connectionsOpened: Counter = meterRegistry.counter("nowchess.ws.connections.opened")