refactor(bot): split NNUE into shared weights and per-thread evaluator

Prerequisite for parallel search. NNUE held all state on one instance:
the immutable transposed L1 weight matrix alongside the mutable
accumulator stack, scratch buffers and eval cache. That made concurrent
eval calls corrupt shared buffers.

Extract the read-only parameters into NNUEWeights (heavy to build, safe
to share). NNUE now owns only per-instance mutable buffers and references
the shared weights, so many evaluators can run in parallel over one weight
matrix without duplicating it. Single-instance behaviour is unchanged —
EvaluationNNUE still uses one evaluator, so play is identical.

Also applies scalafmt alignment to the MopUp files.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-30 12:12:26 +02:00
parent 7136803c7e
commit b72e8ec017
4 changed files with 37 additions and 15 deletions
@@ -32,7 +32,7 @@ object MopUp:
PROXIMITY_WEIGHT * (14 - kingDistance(winnerKing, loserKing))).getOrElse(0)
private def loneKingColor(context: GameContext): Option[Color] =
val nonKing = context.board.pieces.values.filter(_.pieceType != PieceType.King)
val nonKing = context.board.pieces.values.filter(_.pieceType != PieceType.King)
val whiteHasOther = nonKing.exists(_.color == Color.White)
val blackHasOther = nonKing.exists(_.color == Color.Black)
if whiteHasOther == blackHasOther then None
@@ -48,7 +48,8 @@ object MopUp:
case PieceType.Rook => 500
case PieceType.Bishop => 330
case PieceType.Knight => 320
case _ => 0)
case _ => 0
)
}
private def centerDistance(sq: Square): Int =
@@ -4,20 +4,20 @@ import de.nowchess.api.board.{Board, Color, Piece, PieceType, Square}
import de.nowchess.api.game.GameContext
import de.nowchess.api.move.{Move, MoveType, PromotionPiece}
class NNUE(model: NbaiModel):
object NNUE:
def apply(model: NbaiModel): NNUE = new NNUE(NNUEWeights(model))
def apply(weights: NNUEWeights): NNUE = new NNUE(weights)
/** Per-thread NNUE evaluator: owns the mutable accumulator stack, scratch buffers and eval cache, while sharing the
* read-only [[NNUEWeights]]. Construct one instance per search thread (cheap — only buffer allocation); they may all
* share a single weights instance.
*/
class NNUE(weights: NNUEWeights):
import weights.{accSize, l1WeightsT, model, HALF_SIZE}
private val HALF_SIZE = 49152 // 64 king-squares × 12 piece-types × 64 piece-squares
private val featureSize = model.layers(0).inputSize // 98304 (= HALF_SIZE * 2) for king-relative
private val accSize = model.layers(0).outputSize
private val validateAccum = sys.env.contains("NNUE_VALIDATE")
// Column-major L1 weights: l1WeightsT(featureIdx * accSize + outputIdx)
private val l1WeightsT: Array[Float] =
val w = model.weights(0).weights
val t = new Array[Float](featureSize * accSize)
for j <- 0 until featureSize; i <- 0 until accSize do t(j * accSize + i) = w(i * featureSize + j)
t
// ── Accumulator stack ────────────────────────────────────────────────────
private val MAX_PLY = 128
@@ -0,0 +1,21 @@
package de.nowchess.bot.bots.nnue
/** Immutable, shareable NNUE parameters.
*
* Heavy to build (transposes the L1 weight matrix once, ~98304 × accSize floats) but read-only thereafter, so a single
* instance is safely shared across many per-thread [[NNUE]] evaluators. Holds no accumulator or scratch state — those
* live on each [[NNUE]] instance — which is what makes parallel search (independent evaluators sharing these weights)
* possible without duplicating the weight matrix.
*/
class NNUEWeights(val model: NbaiModel):
val HALF_SIZE: Int = 49152 // 64 king-squares × 12 piece-types × 64 piece-squares
val featureSize: Int = model.layers(0).inputSize // 98304 (= HALF_SIZE * 2) for king-relative
val accSize: Int = model.layers(0).outputSize
// Column-major L1 weights: l1WeightsT(featureIdx * accSize + outputIdx)
val l1WeightsT: Array[Float] =
val w = model.weights(0).weights
val t = new Array[Float](featureSize * accSize)
for j <- 0 until featureSize; i <- 0 until accSize do t(j * accSize + i) = w(i * featureSize + j)
t
@@ -11,8 +11,8 @@ class MopUpTest extends AnyFunSuite with Matchers:
private def ctx(turn: Color, pieces: (Square, Piece)*): GameContext =
GameContext.initial.withBoard(Board(pieces.toMap)).withTurn(turn)
private val wk = Square(File.E, Rank.R1) -> Piece.WhiteKing
private val wq = Square(File.D, Rank.R1) -> Piece.WhiteQueen
private val wk = Square(File.E, Rank.R1) -> Piece.WhiteKing
private val wq = Square(File.D, Rank.R1) -> Piece.WhiteQueen
private val bkCorner = Square(File.H, Rank.R8) -> Piece.BlackKing
private val bkCenter = Square(File.D, Rank.R4) -> Piece.BlackKing