refactor(bot): split NNUE into shared weights and per-thread evaluator
Prerequisite for parallel search. NNUE held all state on one instance: the immutable transposed L1 weight matrix alongside the mutable accumulator stack, scratch buffers and eval cache. That made concurrent eval calls corrupt shared buffers. Extract the read-only parameters into NNUEWeights (heavy to build, safe to share). NNUE now owns only per-instance mutable buffers and references the shared weights, so many evaluators can run in parallel over one weight matrix without duplicating it. Single-instance behaviour is unchanged — EvaluationNNUE still uses one evaluator, so play is identical. Also applies scalafmt alignment to the MopUp files. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -32,7 +32,7 @@ object MopUp:
|
||||
PROXIMITY_WEIGHT * (14 - kingDistance(winnerKing, loserKing))).getOrElse(0)
|
||||
|
||||
private def loneKingColor(context: GameContext): Option[Color] =
|
||||
val nonKing = context.board.pieces.values.filter(_.pieceType != PieceType.King)
|
||||
val nonKing = context.board.pieces.values.filter(_.pieceType != PieceType.King)
|
||||
val whiteHasOther = nonKing.exists(_.color == Color.White)
|
||||
val blackHasOther = nonKing.exists(_.color == Color.Black)
|
||||
if whiteHasOther == blackHasOther then None
|
||||
@@ -48,7 +48,8 @@ object MopUp:
|
||||
case PieceType.Rook => 500
|
||||
case PieceType.Bishop => 330
|
||||
case PieceType.Knight => 320
|
||||
case _ => 0)
|
||||
case _ => 0
|
||||
)
|
||||
}
|
||||
|
||||
private def centerDistance(sq: Square): Int =
|
||||
|
||||
@@ -4,20 +4,20 @@ import de.nowchess.api.board.{Board, Color, Piece, PieceType, Square}
|
||||
import de.nowchess.api.game.GameContext
|
||||
import de.nowchess.api.move.{Move, MoveType, PromotionPiece}
|
||||
|
||||
class NNUE(model: NbaiModel):
|
||||
object NNUE:
|
||||
def apply(model: NbaiModel): NNUE = new NNUE(NNUEWeights(model))
|
||||
def apply(weights: NNUEWeights): NNUE = new NNUE(weights)
|
||||
|
||||
/** Per-thread NNUE evaluator: owns the mutable accumulator stack, scratch buffers and eval cache, while sharing the
|
||||
* read-only [[NNUEWeights]]. Construct one instance per search thread (cheap — only buffer allocation); they may all
|
||||
* share a single weights instance.
|
||||
*/
|
||||
class NNUE(weights: NNUEWeights):
|
||||
|
||||
import weights.{accSize, l1WeightsT, model, HALF_SIZE}
|
||||
|
||||
private val HALF_SIZE = 49152 // 64 king-squares × 12 piece-types × 64 piece-squares
|
||||
private val featureSize = model.layers(0).inputSize // 98304 (= HALF_SIZE * 2) for king-relative
|
||||
private val accSize = model.layers(0).outputSize
|
||||
private val validateAccum = sys.env.contains("NNUE_VALIDATE")
|
||||
|
||||
// Column-major L1 weights: l1WeightsT(featureIdx * accSize + outputIdx)
|
||||
private val l1WeightsT: Array[Float] =
|
||||
val w = model.weights(0).weights
|
||||
val t = new Array[Float](featureSize * accSize)
|
||||
for j <- 0 until featureSize; i <- 0 until accSize do t(j * accSize + i) = w(i * featureSize + j)
|
||||
t
|
||||
|
||||
// ── Accumulator stack ────────────────────────────────────────────────────
|
||||
|
||||
private val MAX_PLY = 128
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
package de.nowchess.bot.bots.nnue
|
||||
|
||||
/** Immutable, shareable NNUE parameters.
|
||||
*
|
||||
* Heavy to build (transposes the L1 weight matrix once, ~98304 × accSize floats) but read-only thereafter, so a single
|
||||
* instance is safely shared across many per-thread [[NNUE]] evaluators. Holds no accumulator or scratch state — those
|
||||
* live on each [[NNUE]] instance — which is what makes parallel search (independent evaluators sharing these weights)
|
||||
* possible without duplicating the weight matrix.
|
||||
*/
|
||||
class NNUEWeights(val model: NbaiModel):
|
||||
|
||||
val HALF_SIZE: Int = 49152 // 64 king-squares × 12 piece-types × 64 piece-squares
|
||||
val featureSize: Int = model.layers(0).inputSize // 98304 (= HALF_SIZE * 2) for king-relative
|
||||
val accSize: Int = model.layers(0).outputSize
|
||||
|
||||
// Column-major L1 weights: l1WeightsT(featureIdx * accSize + outputIdx)
|
||||
val l1WeightsT: Array[Float] =
|
||||
val w = model.weights(0).weights
|
||||
val t = new Array[Float](featureSize * accSize)
|
||||
for j <- 0 until featureSize; i <- 0 until accSize do t(j * accSize + i) = w(i * featureSize + j)
|
||||
t
|
||||
@@ -11,8 +11,8 @@ class MopUpTest extends AnyFunSuite with Matchers:
|
||||
private def ctx(turn: Color, pieces: (Square, Piece)*): GameContext =
|
||||
GameContext.initial.withBoard(Board(pieces.toMap)).withTurn(turn)
|
||||
|
||||
private val wk = Square(File.E, Rank.R1) -> Piece.WhiteKing
|
||||
private val wq = Square(File.D, Rank.R1) -> Piece.WhiteQueen
|
||||
private val wk = Square(File.E, Rank.R1) -> Piece.WhiteKing
|
||||
private val wq = Square(File.D, Rank.R1) -> Piece.WhiteQueen
|
||||
private val bkCorner = Square(File.H, Rank.R8) -> Piece.BlackKing
|
||||
private val bkCenter = Square(File.D, Rank.R4) -> Piece.BlackKing
|
||||
|
||||
|
||||
Reference in New Issue
Block a user