feat(official-bots): standalone self-play + one-shot dataset builder for NNUE training

Add an easy local data pipeline feeding GPU training on Colab. - SelfPlayMain: standalone NNUEBot self-play (no microservices) writing FENs for labeling; randomised openings for game diversity, sequential due to the shared EvaluationNNUE accumulator. Exposed via the `selfPlay` Gradle task and selfplay.sh. - NNUEBot: optional fixedMoveTimeMs so self-play runs fast (default unchanged). - NbaiLoader: honor `-Dnnue.weights=<path>` to load weights from a file before falling back to the bundled resource. - build_dataset.py / dataset.sh: one command builds the entire dataset (Lichess eval-DB backbone + self-play + tactical + random filler), dedups, balances the eval histogram, writes append-only zstd shards + manifest, and rclone-pushes to Drive. - train.py: NNUEDataset reads a directory of .jsonl.zst shards (streaming) in addition to a single file. - NNUETraining.ipynb: clone to ephemeral /content, sync shards from Drive (cache-aware), train on the shards dir; removed Colab generation/upload steps. - Concept + implementation plan docs. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-24 22:04:22 +02:00
parent c8cbcdca3b
commit 1c80abdb8a
11 changed files with 909 additions and 198 deletions
@@ -15,6 +15,7 @@ object NNUEBot:
      difficulty: BotDifficulty,
      rules: RuleSet = DefaultRules,
      book: Option[PolyglotBook] = None,
+      fixedMoveTimeMs: Option[Long] = None,
  ): Bot =
    val search = AlphaBetaSearch(rules, weights = EvaluationNNUE)
    context =>
@@ -28,7 +29,8 @@ object NNUEBot:
          else
            val scored   = batchEvaluateRoot(rules, context, moves)
            val bestMove = scored.maxBy(_._2)._1
-            search.bestMoveWithTime(context, allocateTime(scored), blockedMoves, scored.toMap).orElse(Some(bestMove))
+            val budget   = fixedMoveTimeMs.getOrElse(allocateTime(scored))
+            search.bestMoveWithTime(context, budget, blockedMoves, scored.toMap).orElse(Some(bestMove))
        }

  private def batchEvaluateRoot(rules: RuleSet, context: GameContext, moves: List[Move]): List[(Move, Int)] =
@@ -1,6 +1,7 @@
 package de.nowchess.bot.bots.nnue

 import java.io.InputStream
+import java.nio.file.{Files, Path}
 import java.nio.{ByteBuffer, ByteOrder}
 import java.nio.charset.StandardCharsets

@@ -17,13 +18,28 @@ object NbaiLoader:
    val weights  = descs.map(_ => readLayerWeights(buf))
    NbaiModel(metadata, descs, weights)

-  /** Tries /nnue_weights.nbai on the classpath; falls back to migrating /nnue_weights.bin. */
+  /** Loads weights from the `nnue.weights` system property if it points at a readable file; otherwise tries
+    * /nnue_weights.nbai on the classpath, falling back to migrating /nnue_weights.bin.
+    */
  def loadDefault(): NbaiModel =
-    Option(getClass.getResourceAsStream("/nnue_weights.nbai")) match
-      case Some(s) =>
+    overrideModel().getOrElse {
+      Option(getClass.getResourceAsStream("/nnue_weights.nbai")) match
+        case Some(s) =>
+          try load(s)
+          finally s.close()
+        case None => NbaiMigrator.migrateFromBin()
+    }
+
+  private def overrideModel(): Option[NbaiModel] =
+    sys.props
+      .get("nnue.weights")
+      .map(Path.of(_))
+      .filter(Files.isRegularFile(_))
+      .map { path =>
+        val s = Files.newInputStream(path)
        try load(s)
        finally s.close()
-      case None => NbaiMigrator.migrateFromBin()
+      }

  private def checkHeader(buf: ByteBuffer): Unit =
    val magic = buf.getInt()
@@ -0,0 +1,112 @@
+package de.nowchess.bot.selfplay
+
+import de.nowchess.api.game.GameContext
+import de.nowchess.api.move.Move
+import de.nowchess.api.rules.RuleSet
+import de.nowchess.bot.BotDifficulty
+import de.nowchess.bot.bots.NNUEBot
+import de.nowchess.io.fen.FenExporter
+import de.nowchess.rules.sets.DefaultRules
+
+import java.io.{BufferedWriter, FileWriter}
+import java.nio.file.{Files, Path}
+import scala.collection.mutable
+import scala.util.Random
+
+/** Standalone self-play harness. Runs NNUEBot against itself from randomised openings and writes the visited positions
+  * as one FEN per line — the input format expected by the Python labeler. No microservices.
+  *
+  * Games run sequentially because EvaluationNNUE holds a shared accumulator; the small per-move time budget keeps
+  * throughput high. Stockfish relabels every position later, so shallow self-play search is sufficient.
+  */
+object SelfPlayMain:
+
+  private case class Config(
+      games: Int = 500,
+      out: String = "modules/official-bots/python/data/selfplay.txt",
+      weights: Option[String] = None,
+      moveTimeMs: Long = 50L,
+      randomPlies: Int = 8,
+      maxPlies: Int = 200,
+      seed: Long = System.nanoTime(),
+  )
+
+  def main(args: Array[String]): Unit =
+    val config = parse(args.toList, Config())
+    config.weights.foreach(System.setProperty("nnue.weights", _))
+
+    val rules = DefaultRules
+    val bot   = NNUEBot(BotDifficulty.Hard, rules, fixedMoveTimeMs = Some(config.moveTimeMs))
+    val rng   = new Random(config.seed)
+    val seen  = mutable.HashSet.empty[String]
+
+    Files.createDirectories(Path.of(config.out).toAbsolutePath.getParent)
+    val writer = new BufferedWriter(new FileWriter(config.out))
+    try
+      var game = 0
+      while game < config.games do
+        playGame(rules, bot, rng, config, seen, writer)
+        game += 1
+        if game % 25 == 0 then
+          writer.flush()
+          println(s"games=$game/${config.games} positions=${seen.size}")
+    finally writer.close()
+    println(s"Done. ${seen.size} unique positions -> ${config.out}")
+
+  private def playGame(
+      rules: RuleSet,
+      bot: GameContext => Option[Move],
+      rng: Random,
+      config: Config,
+      seen: mutable.HashSet[String],
+      writer: BufferedWriter,
+  ): Unit =
+    randomOpening(rules, rng, config.randomPlies, GameContext.initial) match
+      case None => ()
+      case Some(start) =>
+        var ctx   = start
+        var plies = config.randomPlies
+        var live  = true
+        while live && plies < config.maxPlies do
+          if isTerminal(rules, ctx) then live = false
+          else
+            bot(ctx) match
+              case None => live = false
+              case Some(move) =>
+                ctx = rules.applyMove(ctx)(move)
+                plies += 1
+                record(rules, ctx, seen, writer)
+
+  private def randomOpening(rules: RuleSet, rng: Random, plies: Int, start: GameContext): Option[GameContext] =
+    var ctx = start
+    var i   = 0
+    while i < plies do
+      val legal = rules.allLegalMoves(ctx)
+      if legal.isEmpty then return None
+      ctx = rules.applyMove(ctx)(legal(rng.nextInt(legal.size)))
+      i += 1
+    Some(ctx)
+
+  private def record(rules: RuleSet, ctx: GameContext, seen: mutable.HashSet[String], writer: BufferedWriter): Unit =
+    if !rules.isCheck(ctx) && !isTerminal(rules, ctx) then
+      val fen = FenExporter.gameContextToFen(ctx)
+      if seen.add(fen) then
+        writer.write(fen)
+        writer.newLine()
+
+  private def isTerminal(rules: RuleSet, ctx: GameContext): Boolean =
+    rules.allLegalMoves(ctx).isEmpty ||
+      rules.isInsufficientMaterial(ctx) ||
+      rules.isFiftyMoveRule(ctx) ||
+      rules.isThreefoldRepetition(ctx)
+
+  private def parse(args: List[String], acc: Config): Config = args match
+    case "--games" :: v :: rest        => parse(rest, acc.copy(games = v.toInt))
+    case "--out" :: v :: rest          => parse(rest, acc.copy(out = v))
+    case "--weights" :: v :: rest      => parse(rest, acc.copy(weights = Some(v)))
+    case "--move-ms" :: v :: rest      => parse(rest, acc.copy(moveTimeMs = v.toLong))
+    case "--random-plies" :: v :: rest => parse(rest, acc.copy(randomPlies = v.toInt))
+    case "--max-plies" :: v :: rest    => parse(rest, acc.copy(maxPlies = v.toInt))
+    case "--seed" :: v :: rest         => parse(rest, acc.copy(seed = v.toLong))
+    case Nil                           => acc
+    case unknown :: rest               => println(s"Ignoring unknown arg: $unknown"); parse(rest, acc)