feat(official-bots): standalone self-play + one-shot dataset builder for NNUE training
Build & Test (NowChessSystems) TeamCity build finished
Build & Test (NowChessSystems) TeamCity build finished
Add an easy local data pipeline feeding GPU training on Colab. - SelfPlayMain: standalone NNUEBot self-play (no microservices) writing FENs for labeling; randomised openings for game diversity, sequential due to the shared EvaluationNNUE accumulator. Exposed via the `selfPlay` Gradle task and selfplay.sh. - NNUEBot: optional fixedMoveTimeMs so self-play runs fast (default unchanged). - NbaiLoader: honor `-Dnnue.weights=<path>` to load weights from a file before falling back to the bundled resource. - build_dataset.py / dataset.sh: one command builds the entire dataset (Lichess eval-DB backbone + self-play + tactical + random filler), dedups, balances the eval histogram, writes append-only zstd shards + manifest, and rclone-pushes to Drive. - train.py: NNUEDataset reads a directory of .jsonl.zst shards (streaming) in addition to a single file. - NNUETraining.ipynb: clone to ephemeral /content, sync shards from Drive (cache-aware), train on the shards dir; removed Colab generation/upload steps. - Concept + implementation plan docs. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,7 @@ object NNUEBot:
|
||||
difficulty: BotDifficulty,
|
||||
rules: RuleSet = DefaultRules,
|
||||
book: Option[PolyglotBook] = None,
|
||||
fixedMoveTimeMs: Option[Long] = None,
|
||||
): Bot =
|
||||
val search = AlphaBetaSearch(rules, weights = EvaluationNNUE)
|
||||
context =>
|
||||
@@ -28,7 +29,8 @@ object NNUEBot:
|
||||
else
|
||||
val scored = batchEvaluateRoot(rules, context, moves)
|
||||
val bestMove = scored.maxBy(_._2)._1
|
||||
search.bestMoveWithTime(context, allocateTime(scored), blockedMoves, scored.toMap).orElse(Some(bestMove))
|
||||
val budget = fixedMoveTimeMs.getOrElse(allocateTime(scored))
|
||||
search.bestMoveWithTime(context, budget, blockedMoves, scored.toMap).orElse(Some(bestMove))
|
||||
}
|
||||
|
||||
private def batchEvaluateRoot(rules: RuleSet, context: GameContext, moves: List[Move]): List[(Move, Int)] =
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package de.nowchess.bot.bots.nnue
|
||||
|
||||
import java.io.InputStream
|
||||
import java.nio.file.{Files, Path}
|
||||
import java.nio.{ByteBuffer, ByteOrder}
|
||||
import java.nio.charset.StandardCharsets
|
||||
|
||||
@@ -17,13 +18,28 @@ object NbaiLoader:
|
||||
val weights = descs.map(_ => readLayerWeights(buf))
|
||||
NbaiModel(metadata, descs, weights)
|
||||
|
||||
/** Tries /nnue_weights.nbai on the classpath; falls back to migrating /nnue_weights.bin. */
|
||||
/** Loads weights from the `nnue.weights` system property if it points at a readable file; otherwise tries
|
||||
* /nnue_weights.nbai on the classpath, falling back to migrating /nnue_weights.bin.
|
||||
*/
|
||||
def loadDefault(): NbaiModel =
|
||||
Option(getClass.getResourceAsStream("/nnue_weights.nbai")) match
|
||||
case Some(s) =>
|
||||
overrideModel().getOrElse {
|
||||
Option(getClass.getResourceAsStream("/nnue_weights.nbai")) match
|
||||
case Some(s) =>
|
||||
try load(s)
|
||||
finally s.close()
|
||||
case None => NbaiMigrator.migrateFromBin()
|
||||
}
|
||||
|
||||
private def overrideModel(): Option[NbaiModel] =
|
||||
sys.props
|
||||
.get("nnue.weights")
|
||||
.map(Path.of(_))
|
||||
.filter(Files.isRegularFile(_))
|
||||
.map { path =>
|
||||
val s = Files.newInputStream(path)
|
||||
try load(s)
|
||||
finally s.close()
|
||||
case None => NbaiMigrator.migrateFromBin()
|
||||
}
|
||||
|
||||
private def checkHeader(buf: ByteBuffer): Unit =
|
||||
val magic = buf.getInt()
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
package de.nowchess.bot.selfplay
|
||||
|
||||
import de.nowchess.api.game.GameContext
|
||||
import de.nowchess.api.move.Move
|
||||
import de.nowchess.api.rules.RuleSet
|
||||
import de.nowchess.bot.BotDifficulty
|
||||
import de.nowchess.bot.bots.NNUEBot
|
||||
import de.nowchess.io.fen.FenExporter
|
||||
import de.nowchess.rules.sets.DefaultRules
|
||||
|
||||
import java.io.{BufferedWriter, FileWriter}
|
||||
import java.nio.file.{Files, Path}
|
||||
import scala.collection.mutable
|
||||
import scala.util.Random
|
||||
|
||||
/** Standalone self-play harness. Runs NNUEBot against itself from randomised openings and writes the visited positions
|
||||
* as one FEN per line — the input format expected by the Python labeler. No microservices.
|
||||
*
|
||||
* Games run sequentially because EvaluationNNUE holds a shared accumulator; the small per-move time budget keeps
|
||||
* throughput high. Stockfish relabels every position later, so shallow self-play search is sufficient.
|
||||
*/
|
||||
object SelfPlayMain:
|
||||
|
||||
private case class Config(
|
||||
games: Int = 500,
|
||||
out: String = "modules/official-bots/python/data/selfplay.txt",
|
||||
weights: Option[String] = None,
|
||||
moveTimeMs: Long = 50L,
|
||||
randomPlies: Int = 8,
|
||||
maxPlies: Int = 200,
|
||||
seed: Long = System.nanoTime(),
|
||||
)
|
||||
|
||||
def main(args: Array[String]): Unit =
|
||||
val config = parse(args.toList, Config())
|
||||
config.weights.foreach(System.setProperty("nnue.weights", _))
|
||||
|
||||
val rules = DefaultRules
|
||||
val bot = NNUEBot(BotDifficulty.Hard, rules, fixedMoveTimeMs = Some(config.moveTimeMs))
|
||||
val rng = new Random(config.seed)
|
||||
val seen = mutable.HashSet.empty[String]
|
||||
|
||||
Files.createDirectories(Path.of(config.out).toAbsolutePath.getParent)
|
||||
val writer = new BufferedWriter(new FileWriter(config.out))
|
||||
try
|
||||
var game = 0
|
||||
while game < config.games do
|
||||
playGame(rules, bot, rng, config, seen, writer)
|
||||
game += 1
|
||||
if game % 25 == 0 then
|
||||
writer.flush()
|
||||
println(s"games=$game/${config.games} positions=${seen.size}")
|
||||
finally writer.close()
|
||||
println(s"Done. ${seen.size} unique positions -> ${config.out}")
|
||||
|
||||
private def playGame(
|
||||
rules: RuleSet,
|
||||
bot: GameContext => Option[Move],
|
||||
rng: Random,
|
||||
config: Config,
|
||||
seen: mutable.HashSet[String],
|
||||
writer: BufferedWriter,
|
||||
): Unit =
|
||||
randomOpening(rules, rng, config.randomPlies, GameContext.initial) match
|
||||
case None => ()
|
||||
case Some(start) =>
|
||||
var ctx = start
|
||||
var plies = config.randomPlies
|
||||
var live = true
|
||||
while live && plies < config.maxPlies do
|
||||
if isTerminal(rules, ctx) then live = false
|
||||
else
|
||||
bot(ctx) match
|
||||
case None => live = false
|
||||
case Some(move) =>
|
||||
ctx = rules.applyMove(ctx)(move)
|
||||
plies += 1
|
||||
record(rules, ctx, seen, writer)
|
||||
|
||||
private def randomOpening(rules: RuleSet, rng: Random, plies: Int, start: GameContext): Option[GameContext] =
|
||||
var ctx = start
|
||||
var i = 0
|
||||
while i < plies do
|
||||
val legal = rules.allLegalMoves(ctx)
|
||||
if legal.isEmpty then return None
|
||||
ctx = rules.applyMove(ctx)(legal(rng.nextInt(legal.size)))
|
||||
i += 1
|
||||
Some(ctx)
|
||||
|
||||
private def record(rules: RuleSet, ctx: GameContext, seen: mutable.HashSet[String], writer: BufferedWriter): Unit =
|
||||
if !rules.isCheck(ctx) && !isTerminal(rules, ctx) then
|
||||
val fen = FenExporter.gameContextToFen(ctx)
|
||||
if seen.add(fen) then
|
||||
writer.write(fen)
|
||||
writer.newLine()
|
||||
|
||||
private def isTerminal(rules: RuleSet, ctx: GameContext): Boolean =
|
||||
rules.allLegalMoves(ctx).isEmpty ||
|
||||
rules.isInsufficientMaterial(ctx) ||
|
||||
rules.isFiftyMoveRule(ctx) ||
|
||||
rules.isThreefoldRepetition(ctx)
|
||||
|
||||
private def parse(args: List[String], acc: Config): Config = args match
|
||||
case "--games" :: v :: rest => parse(rest, acc.copy(games = v.toInt))
|
||||
case "--out" :: v :: rest => parse(rest, acc.copy(out = v))
|
||||
case "--weights" :: v :: rest => parse(rest, acc.copy(weights = Some(v)))
|
||||
case "--move-ms" :: v :: rest => parse(rest, acc.copy(moveTimeMs = v.toLong))
|
||||
case "--random-plies" :: v :: rest => parse(rest, acc.copy(randomPlies = v.toInt))
|
||||
case "--max-plies" :: v :: rest => parse(rest, acc.copy(maxPlies = v.toInt))
|
||||
case "--seed" :: v :: rest => parse(rest, acc.copy(seed = v.toLong))
|
||||
case Nil => acc
|
||||
case unknown :: rest => println(s"Ignoring unknown arg: $unknown"); parse(rest, acc)
|
||||
Reference in New Issue
Block a user