feat(official-bots): standalone self-play + one-shot dataset builder for NNUE training
Build & Test (NowChessSystems) TeamCity build finished

Add an easy local data pipeline feeding GPU training on Colab.

- SelfPlayMain: standalone NNUEBot self-play (no microservices) writing FENs
  for labeling; randomised openings for game diversity, sequential due to the
  shared EvaluationNNUE accumulator. Exposed via the `selfPlay` Gradle task and
  selfplay.sh.
- NNUEBot: optional fixedMoveTimeMs so self-play runs fast (default unchanged).
- NbaiLoader: honor `-Dnnue.weights=<path>` to load weights from a file before
  falling back to the bundled resource.
- build_dataset.py / dataset.sh: one command builds the entire dataset
  (Lichess eval-DB backbone + self-play + tactical + random filler), dedups,
  balances the eval histogram, writes append-only zstd shards + manifest, and
  rclone-pushes to Drive.
- train.py: NNUEDataset reads a directory of .jsonl.zst shards (streaming) in
  addition to a single file.
- NNUETraining.ipynb: clone to ephemeral /content, sync shards from Drive
  (cache-aware), train on the shards dir; removed Colab generation/upload steps.
- Concept + implementation plan docs.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Janis Eccarius
2026-06-24 22:04:22 +02:00
parent c8cbcdca3b
commit 1c80abdb8a
11 changed files with 909 additions and 198 deletions
@@ -15,6 +15,7 @@ object NNUEBot:
difficulty: BotDifficulty,
rules: RuleSet = DefaultRules,
book: Option[PolyglotBook] = None,
fixedMoveTimeMs: Option[Long] = None,
): Bot =
val search = AlphaBetaSearch(rules, weights = EvaluationNNUE)
context =>
@@ -28,7 +29,8 @@ object NNUEBot:
else
val scored = batchEvaluateRoot(rules, context, moves)
val bestMove = scored.maxBy(_._2)._1
search.bestMoveWithTime(context, allocateTime(scored), blockedMoves, scored.toMap).orElse(Some(bestMove))
val budget = fixedMoveTimeMs.getOrElse(allocateTime(scored))
search.bestMoveWithTime(context, budget, blockedMoves, scored.toMap).orElse(Some(bestMove))
}
private def batchEvaluateRoot(rules: RuleSet, context: GameContext, moves: List[Move]): List[(Move, Int)] =
@@ -1,6 +1,7 @@
package de.nowchess.bot.bots.nnue
import java.io.InputStream
import java.nio.file.{Files, Path}
import java.nio.{ByteBuffer, ByteOrder}
import java.nio.charset.StandardCharsets
@@ -17,13 +18,28 @@ object NbaiLoader:
val weights = descs.map(_ => readLayerWeights(buf))
NbaiModel(metadata, descs, weights)
/** Tries /nnue_weights.nbai on the classpath; falls back to migrating /nnue_weights.bin. */
/** Loads weights from the `nnue.weights` system property if it points at a readable file; otherwise tries
* /nnue_weights.nbai on the classpath, falling back to migrating /nnue_weights.bin.
*/
def loadDefault(): NbaiModel =
Option(getClass.getResourceAsStream("/nnue_weights.nbai")) match
case Some(s) =>
overrideModel().getOrElse {
Option(getClass.getResourceAsStream("/nnue_weights.nbai")) match
case Some(s) =>
try load(s)
finally s.close()
case None => NbaiMigrator.migrateFromBin()
}
private def overrideModel(): Option[NbaiModel] =
sys.props
.get("nnue.weights")
.map(Path.of(_))
.filter(Files.isRegularFile(_))
.map { path =>
val s = Files.newInputStream(path)
try load(s)
finally s.close()
case None => NbaiMigrator.migrateFromBin()
}
private def checkHeader(buf: ByteBuffer): Unit =
val magic = buf.getInt()
@@ -0,0 +1,112 @@
package de.nowchess.bot.selfplay
import de.nowchess.api.game.GameContext
import de.nowchess.api.move.Move
import de.nowchess.api.rules.RuleSet
import de.nowchess.bot.BotDifficulty
import de.nowchess.bot.bots.NNUEBot
import de.nowchess.io.fen.FenExporter
import de.nowchess.rules.sets.DefaultRules
import java.io.{BufferedWriter, FileWriter}
import java.nio.file.{Files, Path}
import scala.collection.mutable
import scala.util.Random
/** Standalone self-play harness. Runs NNUEBot against itself from randomised openings and writes the visited positions
* as one FEN per line — the input format expected by the Python labeler. No microservices.
*
* Games run sequentially because EvaluationNNUE holds a shared accumulator; the small per-move time budget keeps
* throughput high. Stockfish relabels every position later, so shallow self-play search is sufficient.
*/
object SelfPlayMain:
private case class Config(
games: Int = 500,
out: String = "modules/official-bots/python/data/selfplay.txt",
weights: Option[String] = None,
moveTimeMs: Long = 50L,
randomPlies: Int = 8,
maxPlies: Int = 200,
seed: Long = System.nanoTime(),
)
def main(args: Array[String]): Unit =
val config = parse(args.toList, Config())
config.weights.foreach(System.setProperty("nnue.weights", _))
val rules = DefaultRules
val bot = NNUEBot(BotDifficulty.Hard, rules, fixedMoveTimeMs = Some(config.moveTimeMs))
val rng = new Random(config.seed)
val seen = mutable.HashSet.empty[String]
Files.createDirectories(Path.of(config.out).toAbsolutePath.getParent)
val writer = new BufferedWriter(new FileWriter(config.out))
try
var game = 0
while game < config.games do
playGame(rules, bot, rng, config, seen, writer)
game += 1
if game % 25 == 0 then
writer.flush()
println(s"games=$game/${config.games} positions=${seen.size}")
finally writer.close()
println(s"Done. ${seen.size} unique positions -> ${config.out}")
private def playGame(
rules: RuleSet,
bot: GameContext => Option[Move],
rng: Random,
config: Config,
seen: mutable.HashSet[String],
writer: BufferedWriter,
): Unit =
randomOpening(rules, rng, config.randomPlies, GameContext.initial) match
case None => ()
case Some(start) =>
var ctx = start
var plies = config.randomPlies
var live = true
while live && plies < config.maxPlies do
if isTerminal(rules, ctx) then live = false
else
bot(ctx) match
case None => live = false
case Some(move) =>
ctx = rules.applyMove(ctx)(move)
plies += 1
record(rules, ctx, seen, writer)
private def randomOpening(rules: RuleSet, rng: Random, plies: Int, start: GameContext): Option[GameContext] =
var ctx = start
var i = 0
while i < plies do
val legal = rules.allLegalMoves(ctx)
if legal.isEmpty then return None
ctx = rules.applyMove(ctx)(legal(rng.nextInt(legal.size)))
i += 1
Some(ctx)
private def record(rules: RuleSet, ctx: GameContext, seen: mutable.HashSet[String], writer: BufferedWriter): Unit =
if !rules.isCheck(ctx) && !isTerminal(rules, ctx) then
val fen = FenExporter.gameContextToFen(ctx)
if seen.add(fen) then
writer.write(fen)
writer.newLine()
private def isTerminal(rules: RuleSet, ctx: GameContext): Boolean =
rules.allLegalMoves(ctx).isEmpty ||
rules.isInsufficientMaterial(ctx) ||
rules.isFiftyMoveRule(ctx) ||
rules.isThreefoldRepetition(ctx)
private def parse(args: List[String], acc: Config): Config = args match
case "--games" :: v :: rest => parse(rest, acc.copy(games = v.toInt))
case "--out" :: v :: rest => parse(rest, acc.copy(out = v))
case "--weights" :: v :: rest => parse(rest, acc.copy(weights = Some(v)))
case "--move-ms" :: v :: rest => parse(rest, acc.copy(moveTimeMs = v.toLong))
case "--random-plies" :: v :: rest => parse(rest, acc.copy(randomPlies = v.toInt))
case "--max-plies" :: v :: rest => parse(rest, acc.copy(maxPlies = v.toInt))
case "--seed" :: v :: rest => parse(rest, acc.copy(seed = v.toLong))
case Nil => acc
case unknown :: rest => println(s"Ignoring unknown arg: $unknown"); parse(rest, acc)