feat(bot): add Lazy SMP parallel search for the NNUE bot
Adds optional multithreaded search behind a thread count that defaults to 1, so the live bot's play is unchanged until explicitly configured. - ParallelSearch runs N AlphaBetaSearch workers over one shared, already-lock-protected TranspositionTable. Each worker has its own NNUE evaluator (independent accumulator) and ordering state; helpers only deepen the shared TT, the main worker's move is returned. - AlphaBetaSearch gains bestMoveWithTimeSharedTt: the coordinator clears the shared TT once before launching workers, so helpers must not clear. - EvaluationNNUE.freshEvaluator builds independent evaluators sharing the immutable weights (one per thread); the singleton still backs the default single-instance path. - NNUEBot uses ParallelSearch with NNUE_SEARCH_THREADS (default 1). numThreads <= 1 takes the single-worker clearing path, identical to the previous sequential search. Strength can be validated by self-play (threads N vs 1) before promoting the default. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -5,19 +5,23 @@ import de.nowchess.api.game.GameContext
|
||||
import de.nowchess.api.move.Move
|
||||
import de.nowchess.api.rules.RuleSet
|
||||
import de.nowchess.bot.bots.nnue.EvaluationNNUE
|
||||
import de.nowchess.bot.logic.AlphaBetaSearch
|
||||
import de.nowchess.bot.logic.{ParallelSearch, TranspositionTable}
|
||||
import de.nowchess.bot.util.{PolyglotBook, ZobristHash}
|
||||
import de.nowchess.bot.{BotDifficulty, BotMoveRepetition}
|
||||
import de.nowchess.rules.sets.DefaultRules
|
||||
|
||||
object NNUEBot:
|
||||
private def defaultThreads: Int =
|
||||
sys.env.get("NNUE_SEARCH_THREADS").flatMap(_.toIntOption).filter(_ >= 1).getOrElse(1)
|
||||
|
||||
def apply(
|
||||
difficulty: BotDifficulty,
|
||||
rules: RuleSet = DefaultRules,
|
||||
book: Option[PolyglotBook] = None,
|
||||
fixedMoveTimeMs: Option[Long] = None,
|
||||
searchThreads: Int = defaultThreads,
|
||||
): Bot =
|
||||
val search = AlphaBetaSearch(rules, weights = EvaluationNNUE)
|
||||
val search = ParallelSearch(rules, TranspositionTable(), () => EvaluationNNUE.freshEvaluator(), searchThreads)
|
||||
context =>
|
||||
val blockedMoves = BotMoveRepetition.blockedMoves(context)
|
||||
book
|
||||
|
||||
+30
-3
@@ -4,9 +4,11 @@ import de.nowchess.api.game.GameContext
|
||||
import de.nowchess.api.move.Move
|
||||
import de.nowchess.bot.ai.Evaluation
|
||||
|
||||
object EvaluationNNUE extends Evaluation:
|
||||
|
||||
private val nnue = NNUE(NbaiLoader.loadDefault())
|
||||
/** One independent NNUE evaluator: wraps its own [[NNUE]] (own accumulator stack, scratch buffers and eval cache) plus
|
||||
* the endgame mop-up correction. Independent instances may run concurrently as long as they share only the read-only
|
||||
* [[NNUEWeights]].
|
||||
*/
|
||||
final class NNUEEvaluator(nnue: NNUE) extends Evaluation:
|
||||
|
||||
val CHECKMATE_SCORE: Int = 10_000_000
|
||||
val DRAW_SCORE: Int = 0
|
||||
@@ -29,3 +31,28 @@ object EvaluationNNUE extends Evaluation:
|
||||
|
||||
override def evaluateAccumulator(ply: Int, context: GameContext, hash: Long): Int =
|
||||
nnue.evaluateAtPlyWithValidation(ply, context.turn, hash, context.board) + MopUp.score(context)
|
||||
|
||||
/** Default singleton evaluator plus a factory for independent per-thread evaluators that share the loaded weights. */
|
||||
object EvaluationNNUE extends Evaluation:
|
||||
|
||||
private val weights = NNUEWeights(NbaiLoader.loadDefault())
|
||||
private val default = NNUEEvaluator(NNUE(weights))
|
||||
|
||||
/** Build a fresh evaluator backed by its own [[NNUE]] but sharing the immutable [[weights]] — one per search thread.
|
||||
*/
|
||||
def freshEvaluator(): Evaluation = NNUEEvaluator(NNUE(weights))
|
||||
|
||||
val CHECKMATE_SCORE: Int = default.CHECKMATE_SCORE
|
||||
val DRAW_SCORE: Int = default.DRAW_SCORE
|
||||
|
||||
def evaluate(context: GameContext): Int = default.evaluate(context)
|
||||
|
||||
override def initAccumulator(context: GameContext): Unit = default.initAccumulator(context)
|
||||
|
||||
override def copyAccumulator(parentPly: Int, childPly: Int): Unit = default.copyAccumulator(parentPly, childPly)
|
||||
|
||||
override def pushAccumulator(childPly: Int, move: Move, parent: GameContext, child: GameContext): Unit =
|
||||
default.pushAccumulator(childPly, move, parent, child)
|
||||
|
||||
override def evaluateAccumulator(ply: Int, context: GameContext, hash: Long): Int =
|
||||
default.evaluateAccumulator(ply, context, hash)
|
||||
|
||||
@@ -95,7 +95,7 @@ final class AlphaBetaSearch(
|
||||
bestMoveWithTime(context, timeBudgetMs, Set.empty)
|
||||
|
||||
def bestMoveWithTime(context: GameContext, timeBudgetMs: Long, excludedRootMoves: Set[Move]): Option[Move] =
|
||||
doTimedSearch(context, timeBudgetMs, excludedRootMoves, Map.empty)
|
||||
doTimedSearch(context, timeBudgetMs, excludedRootMoves, Map.empty, clearTt = true)
|
||||
|
||||
def bestMoveWithTime(
|
||||
context: GameContext,
|
||||
@@ -103,15 +103,27 @@ final class AlphaBetaSearch(
|
||||
excludedRootMoves: Set[Move],
|
||||
hints: Map[Move, Int],
|
||||
): Option[Move] =
|
||||
doTimedSearch(context, timeBudgetMs, excludedRootMoves, hints)
|
||||
doTimedSearch(context, timeBudgetMs, excludedRootMoves, hints, clearTt = true)
|
||||
|
||||
/** Timed search over a transposition table that is shared with other workers (Lazy SMP): the caller is responsible
|
||||
* for clearing it once before launching all workers, so this worker must not clear it.
|
||||
*/
|
||||
def bestMoveWithTimeSharedTt(
|
||||
context: GameContext,
|
||||
timeBudgetMs: Long,
|
||||
excludedRootMoves: Set[Move],
|
||||
hints: Map[Move, Int],
|
||||
): Option[Move] =
|
||||
doTimedSearch(context, timeBudgetMs, excludedRootMoves, hints, clearTt = false)
|
||||
|
||||
private def doTimedSearch(
|
||||
context: GameContext,
|
||||
timeBudgetMs: Long,
|
||||
excludedRootMoves: Set[Move],
|
||||
hints: Map[Move, Int],
|
||||
clearTt: Boolean,
|
||||
): Option[Move] =
|
||||
tt.clear()
|
||||
if clearTt then tt.clear()
|
||||
ordering.clear()
|
||||
weights.initAccumulator(context)
|
||||
timeStartMs.set(System.currentTimeMillis)
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
package de.nowchess.bot.logic
|
||||
|
||||
import de.nowchess.api.game.GameContext
|
||||
import de.nowchess.api.move.Move
|
||||
import de.nowchess.api.rules.RuleSet
|
||||
import de.nowchess.bot.ai.Evaluation
|
||||
import de.nowchess.rules.sets.DefaultRules
|
||||
|
||||
import java.util.concurrent.{Callable, Executors}
|
||||
import scala.jdk.CollectionConverters.*
|
||||
|
||||
/** Lazy SMP search coordinator.
|
||||
*
|
||||
* Runs `numThreads` independent [[AlphaBetaSearch]] workers over one shared transposition table for the same time
|
||||
* budget. Every worker has its own evaluator (independent NNUE accumulator) and move-ordering state, but they share
|
||||
* the thread-safe TT, so faster-progressing threads deepen entries the others reuse. Only the main worker's move is
|
||||
* returned; helpers exist purely to enrich the shared TT.
|
||||
*
|
||||
* `numThreads <= 1` runs a single worker via the ordinary clearing entry point, byte-identical to sequential
|
||||
* [[AlphaBetaSearch]].
|
||||
*/
|
||||
final class ParallelSearch(
|
||||
rules: RuleSet = DefaultRules,
|
||||
tt: TranspositionTable = TranspositionTable(),
|
||||
evalFactory: () => Evaluation,
|
||||
numThreads: Int = 1,
|
||||
):
|
||||
|
||||
private val threadCount = math.max(1, numThreads)
|
||||
private val workers = Vector.fill(threadCount)(AlphaBetaSearch(rules, tt, evalFactory()))
|
||||
|
||||
def bestMoveWithTime(
|
||||
context: GameContext,
|
||||
timeBudgetMs: Long,
|
||||
excludedRootMoves: Set[Move] = Set.empty,
|
||||
hints: Map[Move, Int] = Map.empty,
|
||||
): Option[Move] =
|
||||
if threadCount == 1 then workers.head.bestMoveWithTime(context, timeBudgetMs, excludedRootMoves, hints)
|
||||
else runParallel(context, timeBudgetMs, excludedRootMoves, hints)
|
||||
|
||||
private def runParallel(
|
||||
context: GameContext,
|
||||
timeBudgetMs: Long,
|
||||
excludedRootMoves: Set[Move],
|
||||
hints: Map[Move, Int],
|
||||
): Option[Move] =
|
||||
tt.clear()
|
||||
val pool = Executors.newFixedThreadPool(threadCount)
|
||||
try
|
||||
val tasks = workers.map { worker =>
|
||||
new Callable[Option[Move]]:
|
||||
def call(): Option[Move] =
|
||||
worker.bestMoveWithTimeSharedTt(context, timeBudgetMs, excludedRootMoves, hints)
|
||||
}
|
||||
pool.invokeAll(tasks.asJava).get(0).get()
|
||||
finally pool.shutdownNow()
|
||||
@@ -0,0 +1,28 @@
|
||||
package de.nowchess.bot
|
||||
|
||||
import de.nowchess.api.game.GameContext
|
||||
import de.nowchess.bot.bots.classic.EvaluationClassic
|
||||
import de.nowchess.bot.logic.{ParallelSearch, TranspositionTable}
|
||||
import de.nowchess.rules.sets.DefaultRules
|
||||
import org.scalatest.funsuite.AnyFunSuite
|
||||
import org.scalatest.matchers.should.Matchers
|
||||
|
||||
class ParallelSearchTest extends AnyFunSuite with Matchers:
|
||||
|
||||
private def search(threads: Int): ParallelSearch =
|
||||
ParallelSearch(DefaultRules, TranspositionTable(), () => EvaluationClassic, threads)
|
||||
|
||||
test("single-threaded coordinator returns a legal move on the initial position"):
|
||||
val move = search(1).bestMoveWithTime(GameContext.initial, 200L)
|
||||
move should not be None
|
||||
DefaultRules.allLegalMoves(GameContext.initial) should contain(move.get)
|
||||
|
||||
test("multi-threaded Lazy SMP returns a legal move and does not crash under concurrency"):
|
||||
val parallel = search(4)
|
||||
for _ <- 1 to 5 do
|
||||
val move = parallel.bestMoveWithTime(GameContext.initial, 200L)
|
||||
move should not be None
|
||||
DefaultRules.allLegalMoves(GameContext.initial) should contain(move.get)
|
||||
|
||||
test("numThreads below one is clamped to a single worker"):
|
||||
search(0).bestMoveWithTime(GameContext.initial, 100L) should not be None
|
||||
Reference in New Issue
Block a user