feat(logging): deepen coordinator and core logging (NCS-72)
Build & Test (NowChessSystems) TeamCity build failed

Coordinator: InstanceRegistry logs instance join/update/dead/remove;
CoordinatorGrpcServer logs new stream + first heartbeat per instance;
CoreGrpcClient logs channel open/evict and RPC success counts.

Core: GameResource replaces println with logger; RedisGameRegistry
logs store/Redis-load/DB-load and surfaces silent failures;
CoordinatorServiceHandler logs inbound gRPC commands; IoGrpcClientWrapper
and RuleSetGrpcAdapter wrap gRPC calls with WARN on failure.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-05-02 17:25:51 +02:00
parent b3aab9efd5
commit 4850b23e19
12 changed files with 256 additions and 103 deletions
@@ -9,7 +9,6 @@ quarkus:
server:
port: 9086
rest-client:
connection-timeout: 5000
read-timeout: 10000
smallrye-openapi:
info-title: NowChess Coordinator Service
@@ -27,13 +27,24 @@ class CoordinatorGrpcServer extends CoordinatorServiceGrpc.CoordinatorServiceImp
override def heartbeatStream(
responseObserver: StreamObserver[CoordinatorCommand],
): StreamObserver[HeartbeatFrame] =
log.info("New heartbeat stream connection established")
new StreamObserver[HeartbeatFrame]:
// scalafix:off DisableSyntax.var
private var lastInstanceId = ""
private var firstFrameSeen = false
// scalafix:on DisableSyntax.var
override def onNext(frame: HeartbeatFrame): Unit =
lastInstanceId = frame.getInstanceId
if !firstFrameSeen then
firstFrameSeen = true
log.infof(
"First heartbeat from instance %s (host=%s http=%d grpc=%d)",
frame.getInstanceId,
frame.getHostname,
frame.getHttpPort,
frame.getGrpcPort,
)
instanceRegistry
.updateInstanceFromRedis(frame.getInstanceId)
.subscribe()
@@ -16,10 +16,18 @@ class CoreGrpcClient:
private val channels = ConcurrentHashMap[String, ManagedChannel]()
private def getChannel(host: String, port: Int): ManagedChannel =
channels.computeIfAbsent(s"$host:$port", _ => ManagedChannelBuilder.forAddress(host, port).usePlaintext().build())
channels.computeIfAbsent(
s"$host:$port",
_ =>
log.infof("Opening gRPC channel to %s:%d", host, port)
ManagedChannelBuilder.forAddress(host, port).usePlaintext().build(),
)
private def evictStaleChannel(host: String, port: Int): Unit =
Option(channels.remove(s"$host:$port")).foreach(_.shutdownNow())
Option(channels.remove(s"$host:$port")).foreach { ch =>
log.infof("Evicting stale gRPC channel to %s:%d", host, port)
ch.shutdownNow()
}
@PreDestroy
def shutdown(): Unit =
@@ -33,7 +41,9 @@ class CoreGrpcClient:
try
val stub = CoordinatorServiceGrpc.newBlockingStub(getChannel(host, port))
val request = BatchResubscribeRequest.newBuilder().addAllGameIds(gameIds.asJava).build()
stub.batchResubscribeGames(request).getSubscribedCount
val count = stub.batchResubscribeGames(request).getSubscribedCount
log.debugf("batchResubscribeGames %s:%d — subscribed %d games", host, port, count)
count
catch
case ex: Exception =>
log.warnf(ex, "batchResubscribeGames RPC failed for %s:%d", host, port)
@@ -44,7 +54,9 @@ class CoreGrpcClient:
try
val stub = CoordinatorServiceGrpc.newBlockingStub(getChannel(host, port))
val request = UnsubscribeGamesRequest.newBuilder().addAllGameIds(gameIds.asJava).build()
stub.unsubscribeGames(request).getUnsubscribedCount
val count = stub.unsubscribeGames(request).getUnsubscribedCount
log.debugf("unsubscribeGames %s:%d — unsubscribed %d games", host, port, count)
count
catch
case ex: Exception =>
log.warnf(ex, "unsubscribeGames RPC failed for %s:%d", host, port)
@@ -55,7 +67,9 @@ class CoreGrpcClient:
try
val stub = CoordinatorServiceGrpc.newBlockingStub(getChannel(host, port))
val request = EvictGamesRequest.newBuilder().addAllGameIds(gameIds.asJava).build()
stub.evictGames(request).getEvictedCount
val count = stub.evictGames(request).getEvictedCount
log.debugf("evictGames %s:%d — evicted %d games", host, port, count)
count
catch
case ex: Exception =>
log.warnf(ex, "evictGames RPC failed for %s:%d", host, port)
@@ -41,9 +41,20 @@ class InstanceRegistry:
.transformToUni { value =>
try
val metadata = mapper.readValue(value, classOf[InstanceMetadata])
val isNew = !instances.containsKey(instanceId)
instances.put(instanceId, metadata)
if isNew then
log.infof("Instance %s joined registry (subscriptions=%d)", instanceId, metadata.subscriptionCount)
else
log.debugf(
"Instance %s updated (subscriptions=%d state=%s)",
instanceId,
metadata.subscriptionCount,
metadata.state,
)
Uni.createFrom().item(())
catch case ex: Exception =>
catch
case ex: Exception =>
log.warnf(ex, "Failed to parse instance metadata for %s", instanceId)
Uni.createFrom().item(())
}
@@ -52,8 +63,8 @@ class InstanceRegistry:
def markInstanceDead(instanceId: String): Unit =
instances.computeIfPresent(instanceId, (_, inst) => inst.copy(state = "DEAD"))
()
log.infof("Instance %s marked dead", instanceId)
def removeInstance(instanceId: String): Unit =
instances.remove(instanceId)
()
log.infof("Instance %s removed from registry", instanceId)
@@ -7,11 +7,15 @@ import scala.compiletime.uninitialized
import de.nowchess.coordinator.proto.{CoordinatorServiceGrpc, *}
import de.nowchess.chess.redis.GameRedisSubscriberManager
import io.grpc.stub.StreamObserver
import org.jboss.logging.Logger
import scala.jdk.CollectionConverters.*
@GrpcService
@Singleton
class CoordinatorServiceHandler extends CoordinatorServiceGrpc.CoordinatorServiceImplBase:
private val log = Logger.getLogger(classOf[CoordinatorServiceHandler])
// scalafix:off DisableSyntax.var
@Inject
private var gameSubscriberManager: GameRedisSubscriberManager = uninitialized
@@ -22,6 +26,7 @@ class CoordinatorServiceHandler extends CoordinatorServiceGrpc.CoordinatorServic
responseObserver: StreamObserver[BatchResubscribeResponse],
): Unit =
val count = gameSubscriberManager.batchResubscribeGames(request.getGameIdsList)
log.infof("Coordinator: batch resubscribe %d games → subscribed %d", request.getGameIdsList.size(), count)
val response = BatchResubscribeResponse
.newBuilder()
.setSubscribedCount(count)
@@ -34,6 +39,7 @@ class CoordinatorServiceHandler extends CoordinatorServiceGrpc.CoordinatorServic
responseObserver: StreamObserver[UnsubscribeGamesResponse],
): Unit =
val count = gameSubscriberManager.unsubscribeGames(request.getGameIdsList)
log.infof("Coordinator: unsubscribe %d games → unsubscribed %d", request.getGameIdsList.size(), count)
val response = UnsubscribeGamesResponse
.newBuilder()
.setUnsubscribedCount(count)
@@ -46,6 +52,7 @@ class CoordinatorServiceHandler extends CoordinatorServiceGrpc.CoordinatorServic
responseObserver: StreamObserver[EvictGamesResponse],
): Unit =
val count = gameSubscriberManager.evictGames(request.getGameIdsList)
log.infof("Coordinator: evict %d games → evicted %d", request.getGameIdsList.size(), count)
val response = EvictGamesResponse
.newBuilder()
.setEvictedCount(count)
@@ -58,6 +65,7 @@ class CoordinatorServiceHandler extends CoordinatorServiceGrpc.CoordinatorServic
responseObserver: StreamObserver[DrainInstanceResponse],
): Unit =
val migrated = gameSubscriberManager.drainInstance()
log.infof("Coordinator: drain instance → migrated %d games", migrated)
val response = DrainInstanceResponse
.newBuilder()
.setGamesMigrated(migrated)
@@ -5,29 +5,63 @@ import de.nowchess.chess.client.CombinedExportResponse
import de.nowchess.core.proto.*
import io.quarkus.grpc.GrpcClient
import jakarta.enterprise.context.ApplicationScoped
import org.jboss.logging.Logger
import scala.compiletime.uninitialized
@ApplicationScoped
class IoGrpcClientWrapper:
private val log = Logger.getLogger(classOf[IoGrpcClientWrapper])
// scalafix:off DisableSyntax.var
@GrpcClient("io-grpc")
var stub: IoServiceGrpc.IoServiceBlockingStub = uninitialized
// scalafix:on DisableSyntax.var
def exportCombined(ctx: GameContext): CombinedExportResponse =
try
val combined = stub.exportCombined(CoreProtoMapper.toProtoGameContext(ctx))
CombinedExportResponse(combined.getFen, combined.getPgn)
catch
case ex: Exception =>
log.warnf(ex, "IO gRPC exportCombined failed")
// scalafix:off DisableSyntax.throw
throw ex
// scalafix:on DisableSyntax.throw
def importFen(fen: String): GameContext =
CoreProtoMapper.fromProtoGameContext(stub.importFen(ProtoImportFenRequest.newBuilder().setFen(fen).build()))
try CoreProtoMapper.fromProtoGameContext(stub.importFen(ProtoImportFenRequest.newBuilder().setFen(fen).build()))
catch
case ex: Exception =>
log.warnf(ex, "IO gRPC importFen failed for fen %s", fen)
// scalafix:off DisableSyntax.throw
throw ex
// scalafix:on DisableSyntax.throw
def importPgn(pgn: String): GameContext =
CoreProtoMapper.fromProtoGameContext(stub.importPgn(ProtoImportPgnRequest.newBuilder().setPgn(pgn).build()))
try CoreProtoMapper.fromProtoGameContext(stub.importPgn(ProtoImportPgnRequest.newBuilder().setPgn(pgn).build()))
catch
case ex: Exception =>
log.warnf(ex, "IO gRPC importPgn failed")
// scalafix:off DisableSyntax.throw
throw ex
// scalafix:on DisableSyntax.throw
def exportFen(ctx: GameContext): String =
stub.exportFen(CoreProtoMapper.toProtoGameContext(ctx)).getValue
try stub.exportFen(CoreProtoMapper.toProtoGameContext(ctx)).getValue
catch
case ex: Exception =>
log.warnf(ex, "IO gRPC exportFen failed")
// scalafix:off DisableSyntax.throw
throw ex
// scalafix:on DisableSyntax.throw
def exportPgn(ctx: GameContext): String =
stub.exportPgn(CoreProtoMapper.toProtoGameContext(ctx)).getValue
try stub.exportPgn(CoreProtoMapper.toProtoGameContext(ctx)).getValue
catch
case ex: Exception =>
log.warnf(ex, "IO gRPC exportPgn failed")
// scalafix:off DisableSyntax.throw
throw ex
// scalafix:on DisableSyntax.throw
@@ -7,6 +7,7 @@ import de.nowchess.api.rules.{PostMoveStatus, RuleSet}
import de.nowchess.core.proto.*
import io.quarkus.grpc.GrpcClient
import jakarta.enterprise.context.ApplicationScoped
import org.jboss.logging.Logger
import scala.compiletime.uninitialized
import scala.jdk.CollectionConverters.*
@@ -14,28 +15,59 @@ import scala.jdk.CollectionConverters.*
@ApplicationScoped
class RuleSetGrpcAdapter extends RuleSet:
private val log = Logger.getLogger(classOf[RuleSetGrpcAdapter])
// scalafix:off DisableSyntax.var
@GrpcClient("rule-grpc")
var stub: RuleServiceGrpc.RuleServiceBlockingStub = uninitialized
// scalafix:on DisableSyntax.var
def candidateMoves(ctx: GameContext)(sq: Square): List[Move] =
try
val req =
ProtoSquareRequest.newBuilder().setContext(CoreProtoMapper.toProtoGameContext(ctx)).setSquare(sq.toString).build()
ProtoSquareRequest
.newBuilder()
.setContext(CoreProtoMapper.toProtoGameContext(ctx))
.setSquare(sq.toString)
.build()
stub.candidateMoves(req).getMovesList.asScala.flatMap(CoreProtoMapper.fromProtoMove).toList
catch
case ex: Exception =>
log.warnf(ex, "Rule gRPC candidateMoves failed")
// scalafix:off DisableSyntax.throw
throw ex
// scalafix:on DisableSyntax.throw
def legalMoves(ctx: GameContext)(sq: Square): List[Move] =
try
val req =
ProtoSquareRequest.newBuilder().setContext(CoreProtoMapper.toProtoGameContext(ctx)).setSquare(sq.toString).build()
ProtoSquareRequest
.newBuilder()
.setContext(CoreProtoMapper.toProtoGameContext(ctx))
.setSquare(sq.toString)
.build()
stub.legalMoves(req).getMovesList.asScala.flatMap(CoreProtoMapper.fromProtoMove).toList
catch
case ex: Exception =>
log.warnf(ex, "Rule gRPC legalMoves failed")
// scalafix:off DisableSyntax.throw
throw ex
// scalafix:on DisableSyntax.throw
def allLegalMoves(ctx: GameContext): List[Move] =
try
stub
.allLegalMoves(CoreProtoMapper.toProtoGameContext(ctx))
.getMovesList
.asScala
.flatMap(CoreProtoMapper.fromProtoMove)
.toList
catch
case ex: Exception =>
log.warnf(ex, "Rule gRPC allLegalMoves failed")
// scalafix:off DisableSyntax.throw
throw ex
// scalafix:on DisableSyntax.throw
def isCheck(ctx: GameContext): Boolean =
stub.isCheck(CoreProtoMapper.toProtoGameContext(ctx)).getValue
@@ -56,14 +88,22 @@ class RuleSetGrpcAdapter extends RuleSet:
stub.isThreefoldRepetition(CoreProtoMapper.toProtoGameContext(ctx)).getValue
def applyMove(ctx: GameContext)(move: Move): GameContext =
try
val req = ProtoMoveRequest
.newBuilder()
.setContext(CoreProtoMapper.toProtoGameContext(ctx))
.setMove(CoreProtoMapper.toProtoMove(move))
.build()
CoreProtoMapper.fromProtoGameContext(stub.applyMove(req))
catch
case ex: Exception =>
log.warnf(ex, "Rule gRPC applyMove failed")
// scalafix:off DisableSyntax.throw
throw ex
// scalafix:on DisableSyntax.throw
override def postMoveStatus(ctx: GameContext): PostMoveStatus =
try
val p = stub.postMoveStatus(CoreProtoMapper.toProtoGameContext(ctx))
PostMoveStatus(
p.getIsCheckmate,
@@ -72,3 +112,9 @@ class RuleSetGrpcAdapter extends RuleSet:
p.getIsCheck,
p.getIsThreefoldRepetition,
)
catch
case ex: Exception =>
log.warnf(ex, "Rule gRPC postMoveStatus failed")
// scalafix:off DisableSyntax.throw
throw ex
// scalafix:on DisableSyntax.throw
@@ -17,6 +17,7 @@ import jakarta.enterprise.context.ApplicationScoped
import jakarta.inject.Inject
import org.eclipse.microprofile.rest.client.inject.RestClient
import scala.compiletime.uninitialized
import org.jboss.logging.Logger
import scala.util.Try
import java.nio.charset.StandardCharsets
import java.security.{MessageDigest, SecureRandom}
@@ -35,6 +36,7 @@ class RedisGameRegistry extends GameRegistry:
@Inject @RestClient var storeClient: StoreServiceClient = uninitialized
// scalafix:on
private val log = Logger.getLogger(classOf[RedisGameRegistry])
private val localEngines = ConcurrentHashMap[String, GameEntry]()
private val rng = new SecureRandom()
@@ -48,6 +50,12 @@ class RedisGameRegistry extends GameRegistry:
localEngines.put(entry.gameId, entry)
val combined = ioClient.exportCombined(entry.engine.context)
redis.value(classOf[String]).setex(cacheKey(entry.gameId), 1800L, toJson(entry, combined.fen, combined.pgn))
log.infof(
"Stored game %s in registry (white=%s black=%s)",
entry.gameId,
entry.white.displayName,
entry.black.displayName,
)
def get(gameId: String): Option[GameEntry] =
Option(localEngines.get(gameId)) match
@@ -71,9 +79,15 @@ class RedisGameRegistry extends GameRegistry:
private def fromRedis(gameId: String): Option[GameEntry] =
readRedisDto(gameId)
.flatMap(dto => Try(reconstruct(dto)).toOption)
.flatMap { dto =>
Try(reconstruct(dto)).toOption.orElse {
log.warnf("Failed to reconstruct game %s from Redis", gameId)
None
}
}
.map { entry =>
localEngines.put(gameId, entry)
log.infof("Loaded game %s from Redis cache", gameId)
entry
}
@@ -102,12 +116,15 @@ class RedisGameRegistry extends GameRegistry:
pendingDrawOffer = Option(record.pendingDrawOffer),
)
(dto, reconstruct(dto))
}.toOption
.map { case (dto, entry) =>
} match
case scala.util.Success((dto, entry)) =>
log.infof("Loaded game %s from store service", gameId)
localEngines.put(gameId, entry)
redis.value(classOf[String]).setex(cacheKey(gameId), 1800L, objectMapper.writeValueAsString(dto))
entry
}
Some(entry)
case scala.util.Failure(ex) =>
log.warnf(ex, "Failed to load game %s from store service", gameId)
None
private def reconstruct(dto: GameCacheDto): GameEntry =
val ctx = if dto.pgn.nonEmpty then ioClient.importPgn(dto.pgn) else GameContext.initial
@@ -30,6 +30,7 @@ import jakarta.inject.Inject
import jakarta.ws.rs.*
import jakarta.ws.rs.core.{MediaType, Response}
import org.eclipse.microprofile.jwt.JsonWebToken
import org.jboss.logging.Logger
import java.util.concurrent.atomic.AtomicReference
import scala.compiletime.uninitialized
@@ -38,6 +39,8 @@ import scala.compiletime.uninitialized
@ApplicationScoped
class GameResource:
private val log = Logger.getLogger(classOf[GameResource])
// scalafix:off DisableSyntax.var
@Inject
var registry: GameRegistry = uninitialized
@@ -165,7 +168,13 @@ class GameResource:
val entry = newEntry(GameContext.initial, white, black, tc, mode)
registry.store(entry)
subscriberManager.subscribeGame(entry.gameId)
println(s"Created game ${entry.gameId}")
log.infof(
"Game %s created — white=%s black=%s mode=%s",
entry.gameId,
white.displayName,
black.displayName,
mode.toString,
)
created(GameDtoMapper.toGameFullDto(entry, ioClient))
@GET
@@ -182,6 +191,7 @@ class GameResource:
val entry = registry.get(gameId).getOrElse(throw GameNotFoundException(gameId))
assertGameNotOver(entry)
val color = colorOf(entry)
log.infof("Game %s — resign by %s", gameId, color.label)
entry.engine.resign(color)
registry.update(entry.copy(resigned = true))
ok(OkResponseDto())
@@ -194,6 +204,7 @@ class GameResource:
val entry = registry.get(gameId).getOrElse(throw GameNotFoundException(gameId))
assertGameNotOver(entry)
assertIsCurrentPlayer(entry)
log.debugf("Game %s — move %s by %s", gameId, uci, colorOf(entry).label)
if Parser.parseMove(uci).isEmpty then
throw BadRequestException("INVALID_UCI", s"Invalid UCI notation: $uci", Some("uci"))
applyMoveInput(entry.engine, uci).foreach(err => throw BadRequestException("INVALID_MOVE", err, Some("uci")))
@@ -284,6 +295,7 @@ class GameResource:
val entry = newEntry(ctx, white, black, tc)
registry.store(entry)
subscriberManager.subscribeGame(entry.gameId)
log.infof("Imported FEN game %s", entry.gameId)
created(GameDtoMapper.toGameFullDto(entry, ioClient))
@POST
@@ -295,6 +307,7 @@ class GameResource:
val entry = newEntry(ctx, DefaultWhite, DefaultBlack)
registry.store(entry)
subscriberManager.subscribeGame(entry.gameId)
log.infof("Imported PGN game %s", entry.gameId)
created(GameDtoMapper.toGameFullDto(entry, ioClient))
@GET
+1 -1
View File
@@ -1,7 +1,7 @@
#! /usr/bin/env bash
set -euo pipefail
./gradlew test
./gradlew test -Dquarkus.profile=test
if [ "$#" -eq 0 ]; then
PYTHONUTF8=1 python3 jacoco-reporter/test_gaps.py