From b72e8ec0172e2b831abd6d3c79570057318a59cc Mon Sep 17 00:00:00 2001
From: Janis <janis-e@gmx.de>
Date: Tue, 30 Jun 2026 12:12:26 +0200
Subject: [PATCH] refactor(bot): split NNUE into shared weights and per-thread
 evaluator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prerequisite for parallel search. NNUE held all state on one instance:
the immutable transposed L1 weight matrix alongside the mutable
accumulator stack, scratch buffers and eval cache. That made concurrent
eval calls corrupt shared buffers.

Extract the read-only parameters into NNUEWeights (heavy to build, safe
to share). NNUE now owns only per-instance mutable buffers and references
the shared weights, so many evaluators can run in parallel over one weight
matrix without duplicating it. Single-instance behaviour is unchanged —
EvaluationNNUE still uses one evaluator, so play is identical.

Also applies scalafmt alignment to the MopUp files.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../de/nowchess/bot/bots/nnue/MopUp.scala     |  5 +++--
 .../de/nowchess/bot/bots/nnue/NNUE.scala      | 22 +++++++++----------
 .../nowchess/bot/bots/nnue/NNUEWeights.scala  | 21 ++++++++++++++++++
 .../scala/de/nowchess/bot/MopUpTest.scala     |  4 ++--
 4 files changed, 37 insertions(+), 15 deletions(-)
 create mode 100644 modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUEWeights.scala

diff --git a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/MopUp.scala b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/MopUp.scala
index c2695c3..d2f275c 100644
--- a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/MopUp.scala
+++ b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/MopUp.scala
@@ -32,7 +32,7 @@ object MopUp:
       PROXIMITY_WEIGHT * (14 - kingDistance(winnerKing, loserKing))).getOrElse(0)
 
   private def loneKingColor(context: GameContext): Option[Color] =
-    val nonKing = context.board.pieces.values.filter(_.pieceType != PieceType.King)
+    val nonKing       = context.board.pieces.values.filter(_.pieceType != PieceType.King)
     val whiteHasOther = nonKing.exists(_.color == Color.White)
     val blackHasOther = nonKing.exists(_.color == Color.Black)
     if whiteHasOther == blackHasOther then None
@@ -48,7 +48,8 @@ object MopUp:
           case PieceType.Rook   => 500
           case PieceType.Bishop => 330
           case PieceType.Knight => 320
-          case _                => 0)
+          case _                => 0
+        )
     }
 
   private def centerDistance(sq: Square): Int =
diff --git a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUE.scala b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUE.scala
index a4916d4..0295c33 100644
--- a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUE.scala
+++ b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUE.scala
@@ -4,20 +4,20 @@ import de.nowchess.api.board.{Board, Color, Piece, PieceType, Square}
 import de.nowchess.api.game.GameContext
 import de.nowchess.api.move.{Move, MoveType, PromotionPiece}
 
-class NNUE(model: NbaiModel):
+object NNUE:
+  def apply(model: NbaiModel): NNUE     = new NNUE(NNUEWeights(model))
+  def apply(weights: NNUEWeights): NNUE = new NNUE(weights)
+
+/** Per-thread NNUE evaluator: owns the mutable accumulator stack, scratch buffers and eval cache, while sharing the
+  * read-only [[NNUEWeights]]. Construct one instance per search thread (cheap — only buffer allocation); they may all
+  * share a single weights instance.
+  */
+class NNUE(weights: NNUEWeights):
+
+  import weights.{accSize, l1WeightsT, model, HALF_SIZE}
 
-  private val HALF_SIZE     = 49152                     // 64 king-squares × 12 piece-types × 64 piece-squares
-  private val featureSize   = model.layers(0).inputSize // 98304 (= HALF_SIZE * 2) for king-relative
-  private val accSize       = model.layers(0).outputSize
   private val validateAccum = sys.env.contains("NNUE_VALIDATE")
 
-  // Column-major L1 weights: l1WeightsT(featureIdx * accSize + outputIdx)
-  private val l1WeightsT: Array[Float] =
-    val w = model.weights(0).weights
-    val t = new Array[Float](featureSize * accSize)
-    for j <- 0 until featureSize; i <- 0 until accSize do t(j * accSize + i) = w(i * featureSize + j)
-    t
-
   // ── Accumulator stack ────────────────────────────────────────────────────
 
   private val MAX_PLY                      = 128
diff --git a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUEWeights.scala b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUEWeights.scala
new file mode 100644
index 0000000..5a004ad
--- /dev/null
+++ b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NNUEWeights.scala
@@ -0,0 +1,21 @@
+package de.nowchess.bot.bots.nnue
+
+/** Immutable, shareable NNUE parameters.
+  *
+  * Heavy to build (transposes the L1 weight matrix once, ~98304 × accSize floats) but read-only thereafter, so a single
+  * instance is safely shared across many per-thread [[NNUE]] evaluators. Holds no accumulator or scratch state — those
+  * live on each [[NNUE]] instance — which is what makes parallel search (independent evaluators sharing these weights)
+  * possible without duplicating the weight matrix.
+  */
+class NNUEWeights(val model: NbaiModel):
+
+  val HALF_SIZE: Int   = 49152                     // 64 king-squares × 12 piece-types × 64 piece-squares
+  val featureSize: Int = model.layers(0).inputSize // 98304 (= HALF_SIZE * 2) for king-relative
+  val accSize: Int     = model.layers(0).outputSize
+
+  // Column-major L1 weights: l1WeightsT(featureIdx * accSize + outputIdx)
+  val l1WeightsT: Array[Float] =
+    val w = model.weights(0).weights
+    val t = new Array[Float](featureSize * accSize)
+    for j <- 0 until featureSize; i <- 0 until accSize do t(j * accSize + i) = w(i * featureSize + j)
+    t
diff --git a/modules/official-bots/src/test/scala/de/nowchess/bot/MopUpTest.scala b/modules/official-bots/src/test/scala/de/nowchess/bot/MopUpTest.scala
index 756158b..30eb6f5 100644
--- a/modules/official-bots/src/test/scala/de/nowchess/bot/MopUpTest.scala
+++ b/modules/official-bots/src/test/scala/de/nowchess/bot/MopUpTest.scala
@@ -11,8 +11,8 @@ class MopUpTest extends AnyFunSuite with Matchers:
   private def ctx(turn: Color, pieces: (Square, Piece)*): GameContext =
     GameContext.initial.withBoard(Board(pieces.toMap)).withTurn(turn)
 
-  private val wk = Square(File.E, Rank.R1) -> Piece.WhiteKing
-  private val wq = Square(File.D, Rank.R1) -> Piece.WhiteQueen
+  private val wk       = Square(File.E, Rank.R1) -> Piece.WhiteKing
+  private val wq       = Square(File.D, Rank.R1) -> Piece.WhiteQueen
   private val bkCorner = Square(File.H, Rank.R8) -> Piece.BlackKing
   private val bkCenter = Square(File.D, Rank.R4) -> Piece.BlackKing