From b72e8ec0172e2b831abd6d3c79570057318a59cc Mon Sep 17 00:00:00 2001 From: Janis Date: Tue, 30 Jun 2026 12:12:26 +0200 Subject: [PATCH] refactor(bot): split NNUE into shared weights and per-thread evaluator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prerequisite for parallel search. NNUE held all state on one instance: the immutable transposed L1 weight matrix alongside the mutable accumulator stack, scratch buffers and eval cache. That made concurrent eval calls corrupt shared buffers. Extract the read-only parameters into NNUEWeights (heavy to build, safe to share). NNUE now owns only per-instance mutable buffers and references the shared weights, so many evaluators can run in parallel over one weight matrix without duplicating it. Single-instance behaviour is unchanged — EvaluationNNUE still uses one evaluator, so play is identical. Also applies scalafmt alignment to the MopUp files. Co-Authored-By: Claude Opus 4.8 --- .../de/nowchess/bot/bots/nnue/MopUp.scala | 5 +++-- .../de/nowchess/bot/bots/nnue/NNUE.scala | 22 +++++++++---------- .../nowchess/bot/bots/nnue/NNUEWeights.scala | 21 ++++++++++++++++++ .../scala/de/nowchess/bot/MopUpTest.scala | 4 ++-- 4 files changed, 37 insertions(+), 15 deletions(-) create mode 100644 modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUEWeights.scala diff --git a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/MopUp.scala b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/MopUp.scala index c2695c3..d2f275c 100644 --- a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/MopUp.scala +++ b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/MopUp.scala @@ -32,7 +32,7 @@ object MopUp: PROXIMITY_WEIGHT * (14 - kingDistance(winnerKing, loserKing))).getOrElse(0) private def loneKingColor(context: GameContext): Option[Color] = - val nonKing = context.board.pieces.values.filter(_.pieceType != PieceType.King) + val nonKing = context.board.pieces.values.filter(_.pieceType != PieceType.King) val whiteHasOther = nonKing.exists(_.color == Color.White) val blackHasOther = nonKing.exists(_.color == Color.Black) if whiteHasOther == blackHasOther then None @@ -48,7 +48,8 @@ object MopUp: case PieceType.Rook => 500 case PieceType.Bishop => 330 case PieceType.Knight => 320 - case _ => 0) + case _ => 0 + ) } private def centerDistance(sq: Square): Int = diff --git a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUE.scala b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUE.scala index a4916d4..0295c33 100644 --- a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUE.scala +++ b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUE.scala @@ -4,20 +4,20 @@ import de.nowchess.api.board.{Board, Color, Piece, PieceType, Square} import de.nowchess.api.game.GameContext import de.nowchess.api.move.{Move, MoveType, PromotionPiece} -class NNUE(model: NbaiModel): +object NNUE: + def apply(model: NbaiModel): NNUE = new NNUE(NNUEWeights(model)) + def apply(weights: NNUEWeights): NNUE = new NNUE(weights) + +/** Per-thread NNUE evaluator: owns the mutable accumulator stack, scratch buffers and eval cache, while sharing the + * read-only [[NNUEWeights]]. Construct one instance per search thread (cheap — only buffer allocation); they may all + * share a single weights instance. + */ +class NNUE(weights: NNUEWeights): + + import weights.{accSize, l1WeightsT, model, HALF_SIZE} - private val HALF_SIZE = 49152 // 64 king-squares × 12 piece-types × 64 piece-squares - private val featureSize = model.layers(0).inputSize // 98304 (= HALF_SIZE * 2) for king-relative - private val accSize = model.layers(0).outputSize private val validateAccum = sys.env.contains("NNUE_VALIDATE") - // Column-major L1 weights: l1WeightsT(featureIdx * accSize + outputIdx) - private val l1WeightsT: Array[Float] = - val w = model.weights(0).weights - val t = new Array[Float](featureSize * accSize) - for j <- 0 until featureSize; i <- 0 until accSize do t(j * accSize + i) = w(i * featureSize + j) - t - // ── Accumulator stack ──────────────────────────────────────────────────── private val MAX_PLY = 128 diff --git a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUEWeights.scala b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUEWeights.scala new file mode 100644 index 0000000..5a004ad --- /dev/null +++ b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUEWeights.scala @@ -0,0 +1,21 @@ +package de.nowchess.bot.bots.nnue + +/** Immutable, shareable NNUE parameters. + * + * Heavy to build (transposes the L1 weight matrix once, ~98304 × accSize floats) but read-only thereafter, so a single + * instance is safely shared across many per-thread [[NNUE]] evaluators. Holds no accumulator or scratch state — those + * live on each [[NNUE]] instance — which is what makes parallel search (independent evaluators sharing these weights) + * possible without duplicating the weight matrix. + */ +class NNUEWeights(val model: NbaiModel): + + val HALF_SIZE: Int = 49152 // 64 king-squares × 12 piece-types × 64 piece-squares + val featureSize: Int = model.layers(0).inputSize // 98304 (= HALF_SIZE * 2) for king-relative + val accSize: Int = model.layers(0).outputSize + + // Column-major L1 weights: l1WeightsT(featureIdx * accSize + outputIdx) + val l1WeightsT: Array[Float] = + val w = model.weights(0).weights + val t = new Array[Float](featureSize * accSize) + for j <- 0 until featureSize; i <- 0 until accSize do t(j * accSize + i) = w(i * featureSize + j) + t diff --git a/modules/official-bots/src/test/scala/de/nowchess/bot/MopUpTest.scala b/modules/official-bots/src/test/scala/de/nowchess/bot/MopUpTest.scala index 756158b..30eb6f5 100644 --- a/modules/official-bots/src/test/scala/de/nowchess/bot/MopUpTest.scala +++ b/modules/official-bots/src/test/scala/de/nowchess/bot/MopUpTest.scala @@ -11,8 +11,8 @@ class MopUpTest extends AnyFunSuite with Matchers: private def ctx(turn: Color, pieces: (Square, Piece)*): GameContext = GameContext.initial.withBoard(Board(pieces.toMap)).withTurn(turn) - private val wk = Square(File.E, Rank.R1) -> Piece.WhiteKing - private val wq = Square(File.D, Rank.R1) -> Piece.WhiteQueen + private val wk = Square(File.E, Rank.R1) -> Piece.WhiteKing + private val wq = Square(File.D, Rank.R1) -> Piece.WhiteQueen private val bkCorner = Square(File.H, Rank.R8) -> Piece.BlackKing private val bkCenter = Square(File.D, Rank.R4) -> Piece.BlackKing