refactor(bot): split NNUE into shared weights and per-thread evaluator

Prerequisite for parallel search. NNUE held all state on one instance: the immutable transposed L1 weight matrix alongside the mutable accumulator stack, scratch buffers and eval cache. That made concurrent eval calls corrupt shared buffers. Extract the read-only parameters into NNUEWeights (heavy to build, safe to share). NNUE now owns only per-instance mutable buffers and references the shared weights, so many evaluators can run in parallel over one weight matrix without duplicating it. Single-instance behaviour is unchanged — EvaluationNNUE still uses one evaluator, so play is identical. Also applies scalafmt alignment to the MopUp files. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-30 12:12:26 +02:00
parent 7136803c7e
commit b72e8ec017
4 changed files with 37 additions and 15 deletions
@@ -32,7 +32,7 @@ object MopUp:
      PROXIMITY_WEIGHT * (14 - kingDistance(winnerKing, loserKing))).getOrElse(0)

  private def loneKingColor(context: GameContext): Option[Color] =
-    val nonKing = context.board.pieces.values.filter(_.pieceType != PieceType.King)
+    val nonKing       = context.board.pieces.values.filter(_.pieceType != PieceType.King)
    val whiteHasOther = nonKing.exists(_.color == Color.White)
    val blackHasOther = nonKing.exists(_.color == Color.Black)
    if whiteHasOther == blackHasOther then None
@@ -48,7 +48,8 @@ object MopUp:
          case PieceType.Rook   => 500
          case PieceType.Bishop => 330
          case PieceType.Knight => 320
-          case _                => 0)
+          case _                => 0
+        )
    }

  private def centerDistance(sq: Square): Int =
@@ -4,20 +4,20 @@ import de.nowchess.api.board.{Board, Color, Piece, PieceType, Square}
 import de.nowchess.api.game.GameContext
 import de.nowchess.api.move.{Move, MoveType, PromotionPiece}

-class NNUE(model: NbaiModel):
+object NNUE:
+  def apply(model: NbaiModel): NNUE     = new NNUE(NNUEWeights(model))
+  def apply(weights: NNUEWeights): NNUE = new NNUE(weights)
+
+/** Per-thread NNUE evaluator: owns the mutable accumulator stack, scratch buffers and eval cache, while sharing the
+  * read-only [[NNUEWeights]]. Construct one instance per search thread (cheap — only buffer allocation); they may all
+  * share a single weights instance.
+  */
+class NNUE(weights: NNUEWeights):
+
+  import weights.{accSize, l1WeightsT, model, HALF_SIZE}

-  private val HALF_SIZE     = 49152                     // 64 king-squares × 12 piece-types × 64 piece-squares
-  private val featureSize   = model.layers(0).inputSize // 98304 (= HALF_SIZE * 2) for king-relative
-  private val accSize       = model.layers(0).outputSize
  private val validateAccum = sys.env.contains("NNUE_VALIDATE")

-  // Column-major L1 weights: l1WeightsT(featureIdx * accSize + outputIdx)
-  private val l1WeightsT: Array[Float] =
-    val w = model.weights(0).weights
-    val t = new Array[Float](featureSize * accSize)
-    for j <- 0 until featureSize; i <- 0 until accSize do t(j * accSize + i) = w(i * featureSize + j)
-    t
-
  // ── Accumulator stack ────────────────────────────────────────────────────

  private val MAX_PLY                      = 128
@@ -0,0 +1,21 @@
+package de.nowchess.bot.bots.nnue
+
+/** Immutable, shareable NNUE parameters.
+  *
+  * Heavy to build (transposes the L1 weight matrix once, ~98304 × accSize floats) but read-only thereafter, so a single
+  * instance is safely shared across many per-thread [[NNUE]] evaluators. Holds no accumulator or scratch state — those
+  * live on each [[NNUE]] instance — which is what makes parallel search (independent evaluators sharing these weights)
+  * possible without duplicating the weight matrix.
+  */
+class NNUEWeights(val model: NbaiModel):
+
+  val HALF_SIZE: Int   = 49152                     // 64 king-squares × 12 piece-types × 64 piece-squares
+  val featureSize: Int = model.layers(0).inputSize // 98304 (= HALF_SIZE * 2) for king-relative
+  val accSize: Int     = model.layers(0).outputSize
+
+  // Column-major L1 weights: l1WeightsT(featureIdx * accSize + outputIdx)
+  val l1WeightsT: Array[Float] =
+    val w = model.weights(0).weights
+    val t = new Array[Float](featureSize * accSize)
+    for j <- 0 until featureSize; i <- 0 until accSize do t(j * accSize + i) = w(i * featureSize + j)
+    t
@@ -11,8 +11,8 @@ class MopUpTest extends AnyFunSuite with Matchers:
  private def ctx(turn: Color, pieces: (Square, Piece)*): GameContext =
    GameContext.initial.withBoard(Board(pieces.toMap)).withTurn(turn)

-  private val wk = Square(File.E, Rank.R1) -> Piece.WhiteKing
-  private val wq = Square(File.D, Rank.R1) -> Piece.WhiteQueen
+  private val wk       = Square(File.E, Rank.R1) -> Piece.WhiteKing
+  private val wq       = Square(File.D, Rank.R1) -> Piece.WhiteQueen
  private val bkCorner = Square(File.H, Rank.R8) -> Piece.BlackKing
  private val bkCenter = Square(File.D, Rank.R4) -> Piece.BlackKing