From 1c80abdb8a45814d642d43c633cde81ce7374c4f Mon Sep 17 00:00:00 2001 From: Janis Eccarius Date: Wed, 24 Jun 2026 22:04:22 +0200 Subject: [PATCH] feat(official-bots): standalone self-play + one-shot dataset builder for NNUE training Add an easy local data pipeline feeding GPU training on Colab. - SelfPlayMain: standalone NNUEBot self-play (no microservices) writing FENs for labeling; randomised openings for game diversity, sequential due to the shared EvaluationNNUE accumulator. Exposed via the `selfPlay` Gradle task and selfplay.sh. - NNUEBot: optional fixedMoveTimeMs so self-play runs fast (default unchanged). - NbaiLoader: honor `-Dnnue.weights=` to load weights from a file before falling back to the bundled resource. - build_dataset.py / dataset.sh: one command builds the entire dataset (Lichess eval-DB backbone + self-play + tactical + random filler), dedups, balances the eval histogram, writes append-only zstd shards + manifest, and rclone-pushes to Drive. - train.py: NNUEDataset reads a directory of .jsonl.zst shards (streaming) in addition to a single file. - NNUETraining.ipynb: clone to ephemeral /content, sync shards from Drive (cache-aware), train on the shards dir; removed Colab generation/upload steps. - Concept + implementation plan docs. Co-Authored-By: Claude Opus 4.8 --- modules/official-bots/build.gradle.kts | 8 + .../python/COLAB_TRAINING_CONCEPT.md | 212 +++++++++++++ .../python/IMPLEMENTATION_PLAN.md | 180 +++++++++++ .../official-bots/python/NNUETraining.ipynb | 182 +----------- modules/official-bots/python/build_dataset.py | 281 ++++++++++++++++++ modules/official-bots/python/dataset.sh | 17 ++ modules/official-bots/python/selfplay.sh | 23 ++ modules/official-bots/python/src/train.py | 64 ++-- .../scala/de/nowchess/bot/bots/NNUEBot.scala | 4 +- .../nowchess/bot/bots/nnue/NbaiLoader.scala | 24 +- .../nowchess/bot/selfplay/SelfPlayMain.scala | 112 +++++++ 11 files changed, 909 insertions(+), 198 deletions(-) create mode 100644 modules/official-bots/python/COLAB_TRAINING_CONCEPT.md create mode 100644 modules/official-bots/python/IMPLEMENTATION_PLAN.md create mode 100644 modules/official-bots/python/build_dataset.py create mode 100644 modules/official-bots/python/dataset.sh create mode 100644 modules/official-bots/python/selfplay.sh create mode 100644 modules/official-bots/src/main/scala/de/nowchess/bot/selfplay/SelfPlayMain.scala diff --git a/modules/official-bots/build.gradle.kts b/modules/official-bots/build.gradle.kts index fe19643..b1d6c43 100644 --- a/modules/official-bots/build.gradle.kts +++ b/modules/official-bots/build.gradle.kts @@ -47,6 +47,14 @@ tasks.withType { options.compilerArgs.add("-parameters") } +tasks.register("selfPlay") { + group = "nnue" + description = "Run standalone NNUEBot self-play and write FENs for labeling." + mainClass.set("de.nowchess.bot.selfplay.SelfPlayMain") + classpath = sourceSets["main"].runtimeClasspath + args((project.findProperty("spArgs")?.toString() ?: "").split(" ").filter { it.isNotBlank() }) +} + dependencies { compileOnly("org.scala-lang:scala3-compiler_3") { diff --git a/modules/official-bots/python/COLAB_TRAINING_CONCEPT.md b/modules/official-bots/python/COLAB_TRAINING_CONCEPT.md new file mode 100644 index 0000000..d185f79 --- /dev/null +++ b/modules/official-bots/python/COLAB_TRAINING_CONCEPT.md @@ -0,0 +1,212 @@ +# Concept: NNUE Training Data — Quality, Scale, and Transfer to Colab + +Local generation + labeling is **not** a constraint (Ryzen 9800X3D / RTX 5070 / 32 GB). +So the design splits cleanly: + +- **Data plane = local box.** Generate, label, shard, publish. Cheap, fast, no limits. +- **Train plane = Colab.** Pull a dataset version, GPU-train, export `.nbai`. + +Colab never runs Stockfish and never sees a browser upload. Three problems below: +**(1) good data, (2) growing it over time, (3) getting it there easily** — (3) is the priority. + +--- + +## 1. Generating *good* training sets + +### The current weak spot + +`generate.py` plays **fully random games** (`random.choice(legal_moves)`). Random play +produces positions that never occur in real games — material chaos, nonsense pawn +structures. An NNUE trained on that learns to evaluate a distribution it will never +face. Fine as filler, wrong as the backbone. + +### What a good NNUE dataset needs + +1. **Realistic position distribution.** Positions should resemble what the bot actually + reaches in search — from real games and engine play, not coin-flip moves. +2. **Phase coverage.** Openings, middlegames, endgames all represented. Endgames are + under-sampled by random play and matter most for precise eval. +3. **Eval balance.** Real game data is dominated by near-equal positions. If 80% of + labels sit in `[-0.5, +0.5]`, the net learns "everything is roughly equal." Resample + to flatten the eval histogram (cap per-bucket counts). +4. **Accurate labels.** Deeper Stockfish = better target. Locally you can afford + depth 16–20. Or skip labeling entirely with the Lichess eval DB (below). +5. **Clean positions.** Dedup by FEN; drop terminal/checkmate/stalemate; the side to + move should not already be in check unless intended; tag the game phase. + +### Recommended source mix (per dataset version) + +| Source | Role | How | Weight | +|---|---|---|---| +| **Lichess eval DB** | Backbone | `lichess_importer.py` — millions of FENs **pre-labeled** by deep Stockfish, real human positions, correct sign convention | 50–70% | +| **Engine self-play** | Bot's own distribution | NNUEBot (or vs Stockfish) plays games; sample positions; label with local Stockfish | 20–40% | +| **Tactical puzzles** | Sharp/critical positions | `tactical_positions_extractor.py` (Lichess puzzle DB) | 5–15% | +| **Random play** | Cheap diversity filler | existing `generate.py`, capped low | ≤10% | + +The backbone is real, pre-labeled data — so labeling cost is near zero and quality is +high. Self-play is the part that adapts data to *your* bot. Random play stays only as +a thin diversity sprinkle. + +### Self-play flywheel (the quality engine over time) + +The strongest lever: **net N generates the games that train net N+1.** + +``` +net_vN ──play self-play games──► sample positions ──label (Stockfish)──► + ▲ │ + └──────────────── train on (backbone + new self-play) ◄─────────────────┘ + net_v(N+1) +``` + +Each generation, the bot reaches positions closer to its real playing distribution, +labels them with a stronger-than-bot oracle (Stockfish), and learns the gap. Standard +modern NNUE practice. Keep the Lichess backbone mixed in every round so the net does +not overfit to its own blind spots. + +--- + +## 2. Scaling datasets over time — append-only shards + +Do **not** maintain one growing `labeled.jsonl` and re-copy it. Make a dataset an +**immutable set of shards plus a manifest**: + +``` +datasets/ + shards/ + lichess_000001.jsonl.zst # ~50–100k positions each, ~5–10 MB compressed + lichess_000002.jsonl.zst + selfplay_v7_000001.jsonl.zst + tactical_000001.jsonl.zst + ... + manifest.json +``` + +`manifest.json`: + +```json +{ + "dataset_version": 7, + "created": "2026-06-24T...", + "total_positions": 4200000, + "scale": 300.0, + "shards": [ + {"file": "lichess_000001.jsonl.zst", "positions": 100000, + "sha256": "...", "source": "lichess_eval", "stockfish_depth": 0}, + {"file": "selfplay_v7_000001.jsonl.zst", "positions": 80000, + "sha256": "...", "source": "selfplay", "net": "v7", "stockfish_depth": 18} + ] +} +``` + +Properties this buys: + +- **Growth = add shards.** Generate a new batch, label it, write one new shard, append + one manifest entry. Never touch existing shards. O(new data), not O(total). +- **Provenance.** Each shard records source + net + depth. You can later down-weight or + drop a bad batch by editing the manifest, no relabeling. +- **Dedup across shards** by FEN hash at build time; record dropped counts in metadata. +- **Reproducible mixes.** A "dataset version" is just a manifest selecting shards + + per-source sampling weights. Cheap to define many mixes over the same shard pool. +- **Resumable, cache-friendly transfer** (next section) — the whole reason for shards. + +`dataset.py`'s existing `ds_vN` + `metadata.json` scheme generalizes to this directly: +the dataset dir holds `shards/` + `manifest.json` instead of one `labeled.jsonl`. + +--- + +## 3. Getting data to Colab easily ← top priority + +Shards make this trivial: **incremental sync, never a full re-upload.** + +### Recommended: rclone → Google Drive, read from mounted Drive + +Colab mounts Drive natively, so the cheapest path is to make Drive the shard store and +sync into it with `rclone` (only uploads new/changed shards): + +```bash +# Local, after building shards: +rclone copy datasets/ gdrive:NowChess/datasets --progress +# ^ uploads only shards Drive doesn't have yet. Adding 80k positions = one small file. +``` + +Colab side, one cell: + +```python +SRC = '/content/drive/MyDrive/NowChess/datasets' # mounted, no download +import json, shutil, pathlib +manifest = json.load(open(f'{SRC}/manifest.json')) +local = pathlib.Path('/content/datasets'); local.mkdir(exist_ok=True) +for sh in manifest['shards']: # copy Drive→local SSD (fast seq read) + dst = local / sh['file'] + if not dst.exists(): # cache: only copy missing shards + shutil.copy(f"{SRC}/shards/{sh['file']}", dst) +``` + +Why this wins on "easy": +- **No browser upload, ever.** One `rclone copy` from your PC. +- **Incremental both directions.** Add a shard locally → next `rclone copy` ships only + that shard. Colab copies only shards it doesn't already have on `/content`. +- **Zero new infra.** Drive is already mounted in the notebook. + +### Alternative: Gitea release per dataset version (if Drive quota hurts) + +You self-host `git.janis-eccarius.de`. Tag `ds_v7`, attach shards + `manifest.json` as +release assets. Colab reads the manifest, then parallel-`wget` only the shards it lacks +(checksum-verified). Versioned, immutable, no Drive quota, token-gated. Slightly more +wiring than rclone→Drive. + +Pick rclone→Drive for minimum friction; Gitea releases if you want hard versioning and +to keep Drive small. + +### Notebook changes either way + +- Clone repo to **ephemeral `/content`** (fast), not Drive. Persist only datasets + + checkpoints. +- Drop Option A (no Colab generation) and Option B (no browser upload). One "sync + dataset version" cell instead. +- Train reads shards via a streaming `.jsonl.zst` loader (apply per-source sampling + weights + eval-bucket balancing here). Keep burst-train + Drive checkpoints + `.nbai` + export. + +--- + +## Resulting workflow + +``` +LOCAL (9800X3D / RTX5070) COLAB (GPU) +───────────────────────── ─────────── +import Lichess eval DB ─┐ +self-play with net_vN ─┼─► label ─► dedup ─► write new shard(s) ─► manifest++ +tactical / random ─┘ │ + rclone copy ────────┘ + datasets/ → Drive + │ (only new shards move) + ▼ + sync version → copy missing shards → train (GPU) + │ + export .nbai + ▼ + place in src/main/resources/, rebuild native image +``` + +## Build order + +1. **Shard format + manifest** in `dataset.py`: write/read `shards/*.jsonl.zst` + + `manifest.json`; dedup-across-shards on build; provenance per shard. +2. **Streaming `.zst` dataloader** in `train.py`: read shards, apply per-source weights + and eval-bucket balancing. +3. **Self-play generator** in `src/`: NNUEBot/Stockfish self-play → positions → local + Stockfish label → new shard. This is the scaling engine. +4. **`dataset_sync.py`**: `push` (rclone→Drive or Gitea upload) / `pull` (cache-aware). +5. **Notebook rewrite**: ephemeral clone, single sync cell, weighted streaming loader. +6. Wire `lichess_importer.py` as the backbone shard source. + +## Open decisions + +- **Transfer backend** — rclone→Drive (easiest, recommended) vs Gitea releases (hard + versioning). +- **Self-play opponent** — NNUEBot vs itself (own distribution) vs vs-Stockfish + (stronger, more decisive games). Likely a mix. +- **Backbone/self-play ratio** — start ~60/30/10 (lichess/selfplay/tactical), tune by + measured strength. +- **Shard size** — 50k vs 100k positions/shard (transfer granularity vs file count). diff --git a/modules/official-bots/python/IMPLEMENTATION_PLAN.md b/modules/official-bots/python/IMPLEMENTATION_PLAN.md new file mode 100644 index 0000000..641d0e2 --- /dev/null +++ b/modules/official-bots/python/IMPLEMENTATION_PLAN.md @@ -0,0 +1,180 @@ +# Implementation Plan: Two One-Liner Tools (self-play + dataset) + +Goal: **two tools, two start scripts, minimal params.** + +``` +./selfplay.sh # bot plays games against itself, writes selfplay FENs (Scala, standalone) +./dataset.sh # builds the ENTIRE training dataset + rclone push to Drive (Python, one script) +``` + +Both default-everything. Optional first positional arg only when you want to override +the one number that matters. + +--- + +## Tool 1 — `selfplay.sh` (standalone bot, no microservices) + +### Why it can be standalone + +`Bot` is just `GameContext => Option[Move]` (`Bot.scala`). `NNUEBot.apply` needs only +`DefaultRules` (rule module) + `EvaluationNNUE` (loads the bundled `.nbai`). No Quarkus, +no coordinator/account/ws. The bot module already depends on `api, rule, io`, and `io` +has `FenExporter` + `GameContext.initial` exists. So a plain JVM `main` can run games +with zero service wiring. + +### New file: `SelfPlayMain.scala` + +`modules/official-bots/src/main/scala/de/nowchess/bot/selfplay/SelfPlayMain.scala` + +Loop per game: + +1. Start from `GameContext.initial`. +2. **Opening diversity** — play `R` random legal plies (default 8). Without this, + NNUEBot vs itself is deterministic → the *same game every time*. Random openings are + what make the games diverse. (Optional later: seed from polyglot book instead.) +3. Then both sides = `NNUEBot(difficulty)`. Apply moves via `DefaultRules.applyMove`. +4. Stop on `isCheckmate / isStalemate / isInsufficientMaterial / isFiftyMoveRule / + isThreefoldRepetition`, or ply cap (default 200). +5. Emit one **FEN per ply** (via `FenExporter`), skipping positions where side-to-move + is in check and terminal positions — same filter philosophy the labeler wants. +6. Append FENs to the output file (one per line) — exactly the format `label.py` reads. + +Config = a small `case class` with defaults; read from env/args. Defaults: +`games=2000`, `randomOpeningPlies=8`, `maxPlies=200`, `out=python/data/selfplay.txt`, +`threads = availableProcessors`. Parallelize games across threads (each game is +independent; bot is pure). + +Output is **FENs only** — labeling happens in Tool 2 with Stockfish. Keeps the bot tool +single-responsibility and fast. + +### Gradle: a plain run task (not Quarkus) + +Add to `modules/official-bots/build.gradle.kts`: + +```kotlin +tasks.register("selfPlay") { + group = "nnue" + mainClass.set("de.nowchess.bot.selfplay.SelfPlayMain") + classpath = sourceSets["main"].runtimeClasspath + args(project.findProperty("spArgs")?.toString()?.split(" ") ?: emptyList()) +} +``` + +### `selfplay.sh` (repo `python/` dir) + +```bash +#!/usr/bin/env bash +set -euo pipefail +GAMES="${1:-2000}" +cd "$(dirname "$0")/../../.." # repo root +./gradlew -q :official-bots:selfPlay -PspArgs="--games $GAMES --out modules/official-bots/python/data/selfplay.txt" +echo "Self-play FENs -> modules/official-bots/python/data/selfplay.txt" +``` + +Usage: + +```bash +./selfplay.sh # 2000 games, bundled net +./selfplay.sh 8000 # more games +``` + +--- + +## Tool 2 — `dataset.sh` → `build_dataset.py` (builds EVERYTHING) + +One Python script that produces a complete, sharded, pushed dataset. No TUI, no +multi-step menus. It runs the whole data plane end to end: + +``` +lichess eval DB ─┐ +selfplay.txt ─┼─► label (local Stockfish, skip already-labeled) ─► dedup ─► +tactical ─┤ eval-bucket +random filler ─┘ balance ─► + write shards/*.jsonl.zst + manifest.json ─► rclone push +``` + +### New file: `build_dataset.py` (top-level `python/`) + +Reuses existing modules — orchestrates, doesn't reinvent: + +- **Backbone:** `lichess_importer.py` — download + sample N pre-labeled positions from + the Lichess eval DB (no Stockfish cost). +- **Self-play:** read `data/selfplay.txt` FENs → `label.py` with local Stockfish + (depth 18, all cores — your box eats this). +- **Tactical:** `tactical_positions_extractor.py` → `label.py`. +- **Random filler:** `generate.py` (small cap) → `label.py`. +- **Merge:** dedup by FEN across all sources; **eval-bucket balancing** (cap positions + per eval bin so near-equal positions don't dominate). +- **Shard + manifest:** split into `shards/*.jsonl.zst` (~100k positions each) + write + `manifest.json` (positions, sha256, source, net, depth per shard). Append-only: + existing shards untouched, new run adds shards + entries (the scaling story from the + concept). +- **Push:** `rclone copy datasets/ gdrive:NowChess/datasets` — ships only new shards. + +### One config block, sane defaults + +Top of the script — the *only* thing you ever touch: + +```python +LICHESS_POSITIONS = 2_000_000 # backbone +USE_SELFPLAY = True # reads data/selfplay.txt if present +TACTICAL_PUZZLES = 200_000 +RANDOM_FILLER = 100_000 +STOCKFISH_DEPTH = 18 +RCLONE_REMOTE = "gdrive:NowChess/datasets" +``` + +Everything else (paths, workers=all cores, shard size, balancing bins) is internal. + +### `dataset.sh` + +```bash +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" +python build_dataset.py "$@" +``` + +Usage: + +```bash +./dataset.sh # full dataset (lichess + selfplay + tactical + filler) -> Drive +``` + +That single command: downloads backbone, labels self-play/tactical/filler, dedups, +balances, shards, and rclone-pushes to Drive. Colab then syncs (concept doc §3). + +--- + +## End-to-end loop (the flywheel) + +``` +./selfplay.sh # bot generates games with the current net +./dataset.sh # fold them into a new dataset version, push to Drive +# (Colab) sync + train -> export nnue_weights.nbai +# drop .nbai into modules/official-bots/src/main/resources/, rebuild +./selfplay.sh # next net plays stronger, better games... repeat +``` + +--- + +## Build order + +1. `SelfPlayMain.scala` — standalone game loop, random openings, parallel games, FEN out. +2. `selfPlay` Gradle `JavaExec` task + `selfplay.sh`. +3. `build_dataset.py` — orchestrate existing importer/label/tactical/generate into + shards + manifest; rclone push. +4. `dataset.sh`. +5. Shard/manifest read support in `dataset.py` + zstd streaming loader in `train.py` + (consumed on Colab). +6. Notebook: single "sync dataset version" cell, ephemeral `/content` clone. + +## Decisions to confirm + +- **Self-play opponent:** NNUEBot vs itself + random openings (planned). Add vs-Stockfish + later if more decisive games wanted. +- **Self-play net source:** use the `.nbai` bundled in `resources` (simplest), or accept + a `--weights path`? Plan = bundled by default. +- **rclone remote name:** confirm `gdrive` is your configured rclone remote, and the + target folder `NowChess/datasets`. +- **Stockfish path on your box:** `$STOCKFISH_PATH` or `/usr/games/stockfish`? diff --git a/modules/official-bots/python/NNUETraining.ipynb b/modules/official-bots/python/NNUETraining.ipynb index 9101915..3fc92e1 100644 --- a/modules/official-bots/python/NNUETraining.ipynb +++ b/modules/official-bots/python/NNUETraining.ipynb @@ -21,15 +21,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "# NNUE Training Pipeline\n", - "\n", - "End-to-end notebook: data generation → Stockfish labeling → training → `.nbai` export.\n", - "\n", - "**Runtime:** GPU (T4 or better). Runtime → Change runtime type → T4 GPU.\n", - "\n", - "**Persistence:** Checkpoints and datasets are saved to Google Drive so training can resume after session timeout." - ], + "source": "# NNUE Training Pipeline\n\nGPU training on Colab. Data is built **locally** (`./dataset.sh` → sharded, pushed to\nDrive via rclone); this notebook only **syncs shards → trains → exports `.nbai`**.\nNo generation, no Stockfish labeling, no browser uploads here.\n\n**Runtime:** GPU (T4 or better). Runtime → Change runtime type → T4 GPU.\n\n**Persistence:** Datasets and checkpoints live on Google Drive, so training resumes\nafter a session timeout. The repo is cloned to ephemeral `/content` for speed.", "id": "intro-md" }, { @@ -58,25 +50,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import os\n", - "\n", - "# ── Configure these paths once ───────────────────────────────────────────────\n", - "REPO_URL = 'https://git.janis-eccarius.de/NowChess/NowChessSystems.git'\n", - "DRIVE_ROOT = '/content/drive/MyDrive/NowChess'\n", - "REPO_DIR = f'{DRIVE_ROOT}/NowChessSystems'\n", - "PYTHON_DIR = f'{REPO_DIR}/modules/official-bots/python'\n", - "# ─────────────────────────────────────────────────────────────────────────────\n", - "\n", - "os.makedirs(DRIVE_ROOT, exist_ok=True)\n", - "\n", - "if not os.path.isdir(REPO_DIR):\n", - " !git clone --depth=1 \"{REPO_URL}\" \"{REPO_DIR}\"\n", - " print('Repo cloned to Drive.')\n", - "else:\n", - " !git -C \"{REPO_DIR}\" pull --ff-only\n", - " print('Repo updated.')" - ], + "source": "import os\n\n# ── Configure these paths once ───────────────────────────────────────────────\nREPO_URL = 'https://git.janis-eccarius.de/NowChess/NowChessSystems.git'\nDRIVE_ROOT = '/content/drive/MyDrive/NowChess' # datasets + weights persist here\nREPO_DIR = '/content/NowChessSystems' # ephemeral, fast local clone\nPYTHON_DIR = f'{REPO_DIR}/modules/official-bots/python'\n# ─────────────────────────────────────────────────────────────────────────────\n\nos.makedirs(DRIVE_ROOT, exist_ok=True)\n\n# Clone to ephemeral /content (NOT Drive) — fast checkout, no Drive bloat.\nif not os.path.isdir(REPO_DIR):\n !git clone --depth=1 \"{REPO_URL}\" \"{REPO_DIR}\"\n print('Repo cloned to /content.')\nelse:\n !git -C \"{REPO_DIR}\" pull --ff-only\n print('Repo updated.')", "id": "clone-repo" }, { @@ -84,35 +58,13 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Install Python dependencies\n", - "!pip install -q chess tqdm rich zstandard\n", - "\n", - "# Stockfish for position labeling\n", - "!apt-get install -q -y stockfish\n", - "import shutil\n", - "STOCKFISH_PATH = shutil.which('stockfish') or '/usr/games/stockfish'\n", - "print(f'Stockfish: {STOCKFISH_PATH}')\n", - "\n", - "# Add pipeline source to path\n", - "import sys\n", - "sys.path.insert(0, f'{PYTHON_DIR}/src')\n", - "sys.path.insert(0, PYTHON_DIR)\n", - "print('Python path configured.')" - ], + "source": "# Install Python dependencies. No Stockfish — labeling happens on the local box,\n# this notebook only trains on already-labeled shards.\n!pip install -q chess tqdm rich zstandard\n\nimport sys\nsys.path.insert(0, f'{PYTHON_DIR}/src')\nsys.path.insert(0, PYTHON_DIR)\nprint('Python path configured.')", "id": "install-deps" }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "---\n", - "## 🗄️ 2 — Data\n", - "\n", - "Choose **one** of the two options below:\n", - "- **Option A** — generate FEN positions with random play, then label them with Stockfish.\n", - "- **Option B** — upload an existing `labeled.jsonl` from your machine or Drive." - ], + "source": "---\n## 🗄️ 2 — Data\n\nDatasets are built **locally** (`./dataset.sh`) and pushed to Drive with rclone as\ncompressed shards under `MyDrive/NowChess/datasets/`. Here we just sync those shards\nto the fast local disk — no generation, no labeling, no browser uploads.\n\nThe cell reads `manifest.json` and copies only shards not already cached on `/content`.", "id": "data-md" }, { @@ -120,91 +72,9 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from pathlib import Path\n", - "\n", - "# Paths (all on Drive so they survive session restarts)\n", - "DATA_DIR = Path(DRIVE_ROOT) / 'training_data'\n", - "DATA_DIR.mkdir(parents=True, exist_ok=True)\n", - "POSITIONS_FILE = DATA_DIR / 'positions.txt' # raw FENs\n", - "LABELED_FILE = DATA_DIR / 'labeled.jsonl' # FEN + eval pairs\n", - "\n", - "print(f'Data directory: {DATA_DIR}')" - ], + "source": "import json, shutil\nfrom pathlib import Path\n\n# Source: shards synced from the local box via `rclone copy datasets/ gdrive:NowChess/datasets`\nDRIVE_DATASETS = Path(DRIVE_ROOT) / 'datasets'\nLOCAL_DATASETS = Path('/content/datasets')\n(LOCAL_DATASETS / 'shards').mkdir(parents=True, exist_ok=True)\n\nmanifest = json.load(open(DRIVE_DATASETS / 'manifest.json'))\nprint(f\"Dataset v{manifest['dataset_version']}: \"\n f\"{manifest['total_positions']:,} positions across {len(manifest['shards'])} shards\")\n\ncopied = 0\nfor sh in manifest['shards']:\n dst = LOCAL_DATASETS / 'shards' / sh['file']\n if not dst.exists(): # cache: only copy shards we don't already have\n shutil.copy(DRIVE_DATASETS / 'shards' / sh['file'], dst)\n copied += 1\nshutil.copy(DRIVE_DATASETS / 'manifest.json', LOCAL_DATASETS / 'manifest.json')\n\nDATA_PATH = str(LOCAL_DATASETS) # train_nnue / burst_train read this dir of shards directly\nprint(f\"Synced {copied} new shard(s). Dataset ready at {DATA_PATH}\")", "id": "data-paths" }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ── Option A: Generate + label ────────────────────────────────────────────────\n", - "# Adjust NUM_POSITIONS to taste. 50 000 trains in ~10 min on T4;\n", - "# 200 000+ gives better generalisation.\n", - "NUM_POSITIONS = 50_000\n", - "STOCKFISH_DEPTH = 12\n", - "LABEL_WORKERS = 4 # parallel Stockfish processes\n", - "MIN_MOVE = 5 # skip opening book moves\n", - "MAX_MOVE = 60\n", - "\n", - "from generate import play_random_game_and_collect_positions\n", - "from label import label_positions_with_stockfish\n", - "\n", - "print(f'Generating {NUM_POSITIONS:,} positions...')\n", - "count = play_random_game_and_collect_positions(\n", - " str(POSITIONS_FILE),\n", - " total_positions=NUM_POSITIONS,\n", - " samples_per_game=1,\n", - " min_move=MIN_MOVE,\n", - " max_move=MAX_MOVE,\n", - " num_workers=4,\n", - ")\n", - "print(f'{count:,} positions written to {POSITIONS_FILE}')\n", - "\n", - "print('Labeling with Stockfish (this is the slow step)...')\n", - "ok = label_positions_with_stockfish(\n", - " str(POSITIONS_FILE),\n", - " str(LABELED_FILE),\n", - " STOCKFISH_PATH,\n", - " depth=STOCKFISH_DEPTH,\n", - " num_workers=LABEL_WORKERS,\n", - ")\n", - "if ok:\n", - " print(f'Labeled dataset saved: {LABELED_FILE}')\n", - "else:\n", - " print('ERROR: labeling failed')" - ], - "id": "option-a-generate" - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ── Option B: Upload existing labeled.jsonl ───────────────────────────────────\n", - "# Run this cell instead of Option A if you already have a labeled dataset.\n", - "#\n", - "# To upload from local machine:\n", - "# from google.colab import files\n", - "# uploaded = files.upload() # pick your labeled.jsonl\n", - "# import shutil, os\n", - "# shutil.move(next(iter(uploaded)), str(LABELED_FILE))\n", - "#\n", - "# Or copy from Drive:\n", - "# import shutil\n", - "# shutil.copy('/content/drive/MyDrive/path/to/labeled.jsonl', str(LABELED_FILE))\n", - "\n", - "import os\n", - "if LABELED_FILE.exists():\n", - " lines = sum(1 for _ in open(LABELED_FILE))\n", - " print(f'Ready: {lines:,} labeled positions at {LABELED_FILE}')\n", - "else:\n", - " print('No labeled.jsonl found — run Option A first or upload one.')" - ], - "id": "option-b-upload" - }, { "cell_type": "markdown", "metadata": {}, @@ -251,22 +121,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# ── Standard training ─────────────────────────────────────────────────────────\n", - "# Use this when you have a reliable long-running session.\n", - "\n", - "train_nnue(\n", - " data_file=str(LABELED_FILE),\n", - " output_file=OUTPUT_FILE,\n", - " epochs=EPOCHS,\n", - " batch_size=BATCH_SIZE,\n", - " checkpoint=CHECKPOINT,\n", - " use_versioning=True,\n", - " early_stopping_patience=EARLY_STOPPING,\n", - " subsample_ratio=SUBSAMPLE_RATIO,\n", - " hidden_sizes=HIDDEN_SIZES,\n", - ")" - ], + "source": "# ── Standard training ─────────────────────────────────────────────────────────\n# Use this when you have a reliable long-running session.\n\ntrain_nnue(\n data_file=DATA_PATH,\n output_file=OUTPUT_FILE,\n epochs=EPOCHS,\n batch_size=BATCH_SIZE,\n checkpoint=CHECKPOINT,\n use_versioning=True,\n early_stopping_patience=EARLY_STOPPING,\n subsample_ratio=SUBSAMPLE_RATIO,\n hidden_sizes=HIDDEN_SIZES,\n)", "id": "standard-train" }, { @@ -274,28 +129,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# ── Burst training (recommended for Colab free tier) ─────────────────────────\n", - "# Restarts from the global best each time early stopping fires.\n", - "# Set BURST_MINUTES to slightly less than the Colab session limit (~70 min).\n", - "\n", - "BURST_MINUTES = 70\n", - "EPOCHS_PER_SEASON = 30\n", - "BURST_PATIENCE = 8\n", - "\n", - "burst_train(\n", - " data_file=str(LABELED_FILE),\n", - " output_file=OUTPUT_FILE,\n", - " duration_minutes=BURST_MINUTES,\n", - " epochs_per_season=EPOCHS_PER_SEASON,\n", - " early_stopping_patience=BURST_PATIENCE,\n", - " batch_size=BATCH_SIZE,\n", - " initial_checkpoint=CHECKPOINT,\n", - " use_versioning=True,\n", - " subsample_ratio=SUBSAMPLE_RATIO,\n", - " hidden_sizes=HIDDEN_SIZES,\n", - ")" - ], + "source": "# ── Burst training (recommended for Colab free tier) ─────────────────────────\n# Restarts from the global best each time early stopping fires.\n# Set BURST_MINUTES to slightly less than the Colab session limit (~70 min).\n\nBURST_MINUTES = 70\nEPOCHS_PER_SEASON = 30\nBURST_PATIENCE = 8\n\nburst_train(\n data_file=DATA_PATH,\n output_file=OUTPUT_FILE,\n duration_minutes=BURST_MINUTES,\n epochs_per_season=EPOCHS_PER_SEASON,\n early_stopping_patience=BURST_PATIENCE,\n batch_size=BATCH_SIZE,\n initial_checkpoint=CHECKPOINT,\n use_versioning=True,\n subsample_ratio=SUBSAMPLE_RATIO,\n hidden_sizes=HIDDEN_SIZES,\n)", "id": "burst-train" }, { @@ -374,4 +208,4 @@ "id": "download-cell" } ] -} +} \ No newline at end of file diff --git a/modules/official-bots/python/build_dataset.py b/modules/official-bots/python/build_dataset.py new file mode 100644 index 0000000..e88d818 --- /dev/null +++ b/modules/official-bots/python/build_dataset.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 +"""Build the ENTIRE NNUE training dataset with one command. + +Orchestrates the existing source modules (Lichess eval DB, self-play, tactical puzzles, +random filler), labels what needs labeling with local Stockfish, deduplicates, balances +the eval distribution, writes append-only compressed shards + a manifest, and pushes to +Google Drive with rclone. + + ./dataset.sh # build everything + push + ./dataset.sh --no-push # build only + ./dataset.sh --no-lichess # skip the (large) Lichess backbone + +Tune the CONFIG block below — that is the only thing you normally touch. +""" + +import argparse +import hashlib +import json +import os +import random +import subprocess +import sys +import urllib.request +from datetime import datetime, timezone +from pathlib import Path + +import zstandard as zstd + +HERE = Path(__file__).resolve().parent +sys.path.insert(0, str(HERE / "src")) + +from generate import play_random_game_and_collect_positions +from label import label_positions_with_stockfish +from lichess_importer import import_lichess_evals +from tactical_positions_extractor import download_and_extract_puzzle_db, extract_tactical_only + +# ── CONFIG — the only knobs you normally touch ─────────────────────────────── +LICHESS_POSITIONS = 2_000_000 # backbone positions from the Lichess eval DB +USE_SELFPLAY = True # label data/selfplay.txt if present +TACTICAL_PUZZLES = 200_000 # tactical positions from the Lichess puzzle DB +RANDOM_FILLER = 100_000 # cheap random-play positions +STOCKFISH_DEPTH = 14 # local labeling depth (selfplay/tactical/random) +RCLONE_REMOTE = "gdrive:NowChess/datasets" +# ───────────────────────────────────────────────────────────────────────────── + +LABEL_BATCH = 64 # positions per Stockfish task (small = smooth progress + load balance) +SHARD_SIZE = 100_000 # positions per shard +BALANCE_BINS = 64 # eval histogram bins over [-1, 1] +BALANCE_FACTOR = 2.0 # cap each bin at FACTOR x the uniform bin size +LICHESS_EVAL_URL = "https://database.lichess.org/lichess_db_eval.jsonl.zst" + +STOCKFISH_PATH = os.environ.get("STOCKFISH_PATH", "/usr/games/stockfish") +WORKERS = os.cpu_count() or 4 + +DATA_DIR = HERE / "data" +WORK_DIR = HERE / "data" / "_build" +DATASETS_DIR = HERE / "datasets" +SHARDS_DIR = DATASETS_DIR / "shards" +MANIFEST = DATASETS_DIR / "manifest.json" +LICHESS_DB = HERE / "trainingdata" / "lichess_db_eval.jsonl.zst" + + +def label(fens_file: Path, out: Path) -> int: + """Label a FEN file with local Stockfish. Returns positions written.""" + if not fens_file.exists(): + return 0 + label_positions_with_stockfish( + str(fens_file), str(out), STOCKFISH_PATH, + batch_size=LABEL_BATCH, depth=STOCKFISH_DEPTH, num_workers=WORKERS, + ) + return count_lines(out) + + +def count_lines(path: Path) -> int: + if not path.exists(): + return 0 + with open(path) as f: + return sum(1 for _ in f) + + +def source_lichess(out: Path) -> int: + if not LICHESS_DB.exists(): + print(f"Downloading Lichess eval DB → {LICHESS_DB} (large, one-time)...") + LICHESS_DB.parent.mkdir(parents=True, exist_ok=True) + urllib.request.urlretrieve(LICHESS_EVAL_URL, LICHESS_DB) + return import_lichess_evals(str(LICHESS_DB), str(out), max_positions=LICHESS_POSITIONS) + + +def source_selfplay(out: Path) -> int: + return label(DATA_DIR / "selfplay.txt", out) + + +def source_tactical(out: Path) -> int: + puzzle_csv = download_and_extract_puzzle_db(output_dir=str(HERE / "tactical_data")) + if puzzle_csv is None: + return 0 + fens = WORK_DIR / "tactical_fens.txt" + extract_tactical_only(str(puzzle_csv), str(fens), max_puzzles=TACTICAL_PUZZLES) + return label(fens, out) + + +def source_random(out: Path) -> int: + fens = WORK_DIR / "random_fens.txt" + play_random_game_and_collect_positions( + str(fens), total_positions=RANDOM_FILLER, num_workers=WORKERS, + ) + return label(fens, out) + + +def build_sources(args) -> dict[str, Path]: + """Run each enabled source into its own labeled jsonl. Returns {name: path}.""" + WORK_DIR.mkdir(parents=True, exist_ok=True) + plan = [ + ("lichess", args.lichess, source_lichess), + ("selfplay", args.selfplay, source_selfplay), + ("tactical", args.tactical, source_tactical), + ("random", args.random, source_random), + ] + outputs: dict[str, Path] = {} + for name, enabled, fn in plan: + if not enabled: + continue + out = WORK_DIR / f"{name}_labeled.jsonl" + out.unlink(missing_ok=True) + print(f"\n=== Source: {name} ===") + written = fn(out) + print(f"{name}: {written:,} labeled positions") + if written: + outputs[name] = out + return outputs + + +def existing_fens() -> set[str]: + """FENs already present in the dataset, so growth stays deduplicated.""" + seen: set[str] = set() + if not MANIFEST.exists(): + return seen + manifest = json.loads(MANIFEST.read_text()) + for shard in manifest.get("shards", []): + for rec in read_shard(SHARDS_DIR / shard["file"]): + seen.add(rec["fen"]) + return seen + + +def read_shard(path: Path): + dctx = zstd.ZstdDecompressor() + with open(path, "rb") as fh, dctx.stream_reader(fh) as reader: + for line in iter_text(reader): + yield json.loads(line) + + +def iter_text(reader): + import io + yield from io.TextIOWrapper(reader, encoding="utf-8") + + +def merge_dedup(outputs: dict[str, Path], skip: set[str]): + """Merge all source jsonl, drop dupes (within batch + vs existing dataset).""" + seen = set(skip) + records, per_source = [], {} + for name, path in outputs.items(): + kept = 0 + with open(path) as f: + for line in f: + rec = json.loads(line) + fen = rec["fen"] + if fen in seen: + continue + seen.add(fen) + rec["source"] = name + records.append(rec) + kept += 1 + per_source[name] = kept + return records, per_source + + +def balance(records: list) -> list: + """Flatten the eval histogram: cap each bin at FACTOR x the uniform bin size.""" + if not records: + return records + cap = max(1, int(BALANCE_FACTOR * len(records) / BALANCE_BINS)) + bins: dict[int, int] = {} + kept = [] + random.shuffle(records) + for rec in records: + b = min(BALANCE_BINS - 1, int((rec["eval"] + 1.0) / 2.0 * BALANCE_BINS)) + if bins.get(b, 0) < cap: + bins[b] = bins.get(b, 0) + 1 + kept.append(rec) + return kept + + +def sha256(path: Path) -> str: + h = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(1 << 20), b""): + h.update(chunk) + return h.hexdigest() + + +def write_shards(records: list, build_id: str) -> list[dict]: + SHARDS_DIR.mkdir(parents=True, exist_ok=True) + cctx = zstd.ZstdCompressor(level=10) + entries = [] + for i in range(0, len(records), SHARD_SIZE): + chunk = records[i : i + SHARD_SIZE] + name = f"{build_id}_{i // SHARD_SIZE:05d}.jsonl.zst" + path = SHARDS_DIR / name + with open(path, "wb") as fh, cctx.stream_writer(fh) as w: + for rec in chunk: + w.write((json.dumps(rec) + "\n").encode("utf-8")) + entries.append({"file": name, "positions": len(chunk), + "sha256": sha256(path), "build_id": build_id}) + print(f" wrote {name} ({len(chunk):,} positions)") + return entries + + +def update_manifest(new_shards: list[dict], build: dict) -> None: + manifest = json.loads(MANIFEST.read_text()) if MANIFEST.exists() else { + "dataset_version": 0, "scale": 300.0, "builds": [], "shards": [], + } + manifest["dataset_version"] += 1 + manifest["created"] = build["created"] + manifest["builds"].append(build) + manifest["shards"].extend(new_shards) + manifest["total_positions"] = sum(s["positions"] for s in manifest["shards"]) + MANIFEST.write_text(json.dumps(manifest, indent=2)) + print(f"\nDataset version {manifest['dataset_version']}: " + f"{manifest['total_positions']:,} total positions across {len(manifest['shards'])} shards") + + +def push() -> None: + if not subprocess.run(["which", "rclone"], capture_output=True).stdout: + print("rclone not found — skipping push.") + return + print(f"\nPushing {DATASETS_DIR} → {RCLONE_REMOTE} ...") + subprocess.run(["rclone", "copy", str(DATASETS_DIR), RCLONE_REMOTE, "--progress"], check=True) + + +def parse_args(): + p = argparse.ArgumentParser(description="Build the entire NNUE dataset.") + for name in ("lichess", "selfplay", "tactical", "random", "push"): + p.add_argument(f"--no-{name}", dest=name, action="store_false") + p.add_argument("--push-only", action="store_true", help="Push the existing dataset, build nothing.") + return p.parse_args() + + +def main() -> None: + args = parse_args() + if args.push_only: + push() + return + build_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + + outputs = build_sources(args) + if not outputs: + print("No sources produced data — nothing to build.") + return + + print("\n=== Merge / dedup / balance ===") + records, per_source = merge_dedup(outputs, existing_fens()) + print(f"merged unique (new): {len(records):,}") + records = balance(records) + print(f"after balancing: {len(records):,}") + + new_shards = write_shards(records, build_id) + update_manifest(new_shards, { + "build_id": build_id, + "created": datetime.now(timezone.utc).isoformat(), + "stockfish_depth": STOCKFISH_DEPTH, + "sources": per_source, + "kept_after_balance": len(records), + }) + + if args.push: + push() + print("\nDone.") + + +if __name__ == "__main__": + main() diff --git a/modules/official-bots/python/dataset.sh b/modules/official-bots/python/dataset.sh new file mode 100644 index 0000000..bb6c6ee --- /dev/null +++ b/modules/official-bots/python/dataset.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Build the ENTIRE NNUE training dataset + push to Drive. One command. +# +# ./dataset.sh # build everything + rclone push +# ./dataset.sh --no-push # build only +# ./dataset.sh --no-lichess # skip the large Lichess backbone +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "$SCRIPT_DIR" + +PY="python3" +if [[ -x "$SCRIPT_DIR/.venv/bin/python" ]]; then + PY="$SCRIPT_DIR/.venv/bin/python" +fi + +exec "$PY" build_dataset.py "$@" diff --git a/modules/official-bots/python/selfplay.sh b/modules/official-bots/python/selfplay.sh new file mode 100644 index 0000000..c09d5d9 --- /dev/null +++ b/modules/official-bots/python/selfplay.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Standalone bot self-play -> FENs for labeling. No microservices. +# +# ./selfplay.sh # 500 games with the bundled net +# ./selfplay.sh 2000 # more games +# ./selfplay.sh 2000 path.nbai # play with a specific net +set -euo pipefail + +GAMES="${1:-500}" +WEIGHTS="${2:-}" + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" +OUT="$SCRIPT_DIR/data/selfplay.txt" +cd "$REPO_ROOT" + +SP_ARGS="--games $GAMES --out $OUT" +if [[ -n "$WEIGHTS" ]]; then + SP_ARGS="$SP_ARGS --weights $WEIGHTS" +fi + +./gradlew -q :modules:official-bots:selfPlay -PspArgs="$SP_ARGS" +echo "Self-play FENs -> $OUT" diff --git a/modules/official-bots/python/src/train.py b/modules/official-bots/python/src/train.py index 5ed5b21..fdf633a 100644 --- a/modules/official-bots/python/src/train.py +++ b/modules/official-bots/python/src/train.py @@ -14,6 +14,33 @@ from datetime import datetime, timedelta import re import numpy as np + +def _shard_files(data_file): + """Resolve a data path to a list of shard files. Accepts a single .jsonl/.jsonl.zst + file, or a directory (searched recursively for shards, e.g. a synced datasets/ dir).""" + p = Path(data_file) + if p.is_dir(): + shards = sorted(p.rglob("*.jsonl.zst")) or sorted(p.rglob("*.jsonl")) + if not shards: + raise FileNotFoundError(f"No .jsonl/.jsonl.zst shards found under {p}") + print(f"Loading {len(shards)} shard(s) from {p}") + return shards + return [p] + + +def _iter_dataset_lines(data_file): + """Yield text lines from every shard, transparently decompressing .zst shards.""" + import io + for shard in _shard_files(data_file): + if str(shard).endswith(".zst"): + import zstandard as zstd + with open(shard, "rb") as fh, zstd.ZstdDecompressor().stream_reader(fh) as reader: + yield from io.TextIOWrapper(reader, encoding="utf-8") + else: + with open(shard, "r") as fh: + yield from fh + + class NNUEDataset(Dataset): """Dataset of chess positions with evaluations.""" @@ -23,27 +50,26 @@ class NNUEDataset(Dataset): self.evals_raw = [] self.is_normalized = None - with open(data_file, 'r') as f: - for line in f: - try: - data = json.loads(line) - fen = data['fen'] - eval_val = data['eval'] - self.positions.append(fen) - self.evals.append(eval_val) + for line in _iter_dataset_lines(data_file): + try: + data = json.loads(line) + fen = data['fen'] + eval_val = data['eval'] + self.positions.append(fen) + self.evals.append(eval_val) - # Check if normalized or raw - if self.is_normalized is None: - # If eval is in range [-1, 1], assume normalized - self.is_normalized = abs(eval_val) <= 1.0 + # Check if normalized or raw + if self.is_normalized is None: + # If eval is in range [-1, 1], assume normalized + self.is_normalized = abs(eval_val) <= 1.0 - # Store raw if available - if 'eval_raw' in data: - self.evals_raw.append(data['eval_raw']) - else: - self.evals_raw.append(eval_val) - except (json.JSONDecodeError, KeyError): - pass + # Store raw if available + if 'eval_raw' in data: + self.evals_raw.append(data['eval_raw']) + else: + self.evals_raw.append(eval_val) + except (json.JSONDecodeError, KeyError): + pass def __len__(self): return len(self.positions) diff --git a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/NNUEBot.scala b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/NNUEBot.scala index a37e89b..1faa1bf 100644 --- a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/NNUEBot.scala +++ b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/NNUEBot.scala @@ -15,6 +15,7 @@ object NNUEBot: difficulty: BotDifficulty, rules: RuleSet = DefaultRules, book: Option[PolyglotBook] = None, + fixedMoveTimeMs: Option[Long] = None, ): Bot = val search = AlphaBetaSearch(rules, weights = EvaluationNNUE) context => @@ -28,7 +29,8 @@ object NNUEBot: else val scored = batchEvaluateRoot(rules, context, moves) val bestMove = scored.maxBy(_._2)._1 - search.bestMoveWithTime(context, allocateTime(scored), blockedMoves, scored.toMap).orElse(Some(bestMove)) + val budget = fixedMoveTimeMs.getOrElse(allocateTime(scored)) + search.bestMoveWithTime(context, budget, blockedMoves, scored.toMap).orElse(Some(bestMove)) } private def batchEvaluateRoot(rules: RuleSet, context: GameContext, moves: List[Move]): List[(Move, Int)] = diff --git a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NbaiLoader.scala b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NbaiLoader.scala index 268e98f..5465b4d 100644 --- a/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NbaiLoader.scala +++ b/modules/official-bots/src/main/scala/de/nowchess/bot/bots/nnue/NbaiLoader.scala @@ -1,6 +1,7 @@ package de.nowchess.bot.bots.nnue import java.io.InputStream +import java.nio.file.{Files, Path} import java.nio.{ByteBuffer, ByteOrder} import java.nio.charset.StandardCharsets @@ -17,13 +18,28 @@ object NbaiLoader: val weights = descs.map(_ => readLayerWeights(buf)) NbaiModel(metadata, descs, weights) - /** Tries /nnue_weights.nbai on the classpath; falls back to migrating /nnue_weights.bin. */ + /** Loads weights from the `nnue.weights` system property if it points at a readable file; otherwise tries + * /nnue_weights.nbai on the classpath, falling back to migrating /nnue_weights.bin. + */ def loadDefault(): NbaiModel = - Option(getClass.getResourceAsStream("/nnue_weights.nbai")) match - case Some(s) => + overrideModel().getOrElse { + Option(getClass.getResourceAsStream("/nnue_weights.nbai")) match + case Some(s) => + try load(s) + finally s.close() + case None => NbaiMigrator.migrateFromBin() + } + + private def overrideModel(): Option[NbaiModel] = + sys.props + .get("nnue.weights") + .map(Path.of(_)) + .filter(Files.isRegularFile(_)) + .map { path => + val s = Files.newInputStream(path) try load(s) finally s.close() - case None => NbaiMigrator.migrateFromBin() + } private def checkHeader(buf: ByteBuffer): Unit = val magic = buf.getInt() diff --git a/modules/official-bots/src/main/scala/de/nowchess/bot/selfplay/SelfPlayMain.scala b/modules/official-bots/src/main/scala/de/nowchess/bot/selfplay/SelfPlayMain.scala new file mode 100644 index 0000000..e112df0 --- /dev/null +++ b/modules/official-bots/src/main/scala/de/nowchess/bot/selfplay/SelfPlayMain.scala @@ -0,0 +1,112 @@ +package de.nowchess.bot.selfplay + +import de.nowchess.api.game.GameContext +import de.nowchess.api.move.Move +import de.nowchess.api.rules.RuleSet +import de.nowchess.bot.BotDifficulty +import de.nowchess.bot.bots.NNUEBot +import de.nowchess.io.fen.FenExporter +import de.nowchess.rules.sets.DefaultRules + +import java.io.{BufferedWriter, FileWriter} +import java.nio.file.{Files, Path} +import scala.collection.mutable +import scala.util.Random + +/** Standalone self-play harness. Runs NNUEBot against itself from randomised openings and writes the visited positions + * as one FEN per line — the input format expected by the Python labeler. No microservices. + * + * Games run sequentially because EvaluationNNUE holds a shared accumulator; the small per-move time budget keeps + * throughput high. Stockfish relabels every position later, so shallow self-play search is sufficient. + */ +object SelfPlayMain: + + private case class Config( + games: Int = 500, + out: String = "modules/official-bots/python/data/selfplay.txt", + weights: Option[String] = None, + moveTimeMs: Long = 50L, + randomPlies: Int = 8, + maxPlies: Int = 200, + seed: Long = System.nanoTime(), + ) + + def main(args: Array[String]): Unit = + val config = parse(args.toList, Config()) + config.weights.foreach(System.setProperty("nnue.weights", _)) + + val rules = DefaultRules + val bot = NNUEBot(BotDifficulty.Hard, rules, fixedMoveTimeMs = Some(config.moveTimeMs)) + val rng = new Random(config.seed) + val seen = mutable.HashSet.empty[String] + + Files.createDirectories(Path.of(config.out).toAbsolutePath.getParent) + val writer = new BufferedWriter(new FileWriter(config.out)) + try + var game = 0 + while game < config.games do + playGame(rules, bot, rng, config, seen, writer) + game += 1 + if game % 25 == 0 then + writer.flush() + println(s"games=$game/${config.games} positions=${seen.size}") + finally writer.close() + println(s"Done. ${seen.size} unique positions -> ${config.out}") + + private def playGame( + rules: RuleSet, + bot: GameContext => Option[Move], + rng: Random, + config: Config, + seen: mutable.HashSet[String], + writer: BufferedWriter, + ): Unit = + randomOpening(rules, rng, config.randomPlies, GameContext.initial) match + case None => () + case Some(start) => + var ctx = start + var plies = config.randomPlies + var live = true + while live && plies < config.maxPlies do + if isTerminal(rules, ctx) then live = false + else + bot(ctx) match + case None => live = false + case Some(move) => + ctx = rules.applyMove(ctx)(move) + plies += 1 + record(rules, ctx, seen, writer) + + private def randomOpening(rules: RuleSet, rng: Random, plies: Int, start: GameContext): Option[GameContext] = + var ctx = start + var i = 0 + while i < plies do + val legal = rules.allLegalMoves(ctx) + if legal.isEmpty then return None + ctx = rules.applyMove(ctx)(legal(rng.nextInt(legal.size))) + i += 1 + Some(ctx) + + private def record(rules: RuleSet, ctx: GameContext, seen: mutable.HashSet[String], writer: BufferedWriter): Unit = + if !rules.isCheck(ctx) && !isTerminal(rules, ctx) then + val fen = FenExporter.gameContextToFen(ctx) + if seen.add(fen) then + writer.write(fen) + writer.newLine() + + private def isTerminal(rules: RuleSet, ctx: GameContext): Boolean = + rules.allLegalMoves(ctx).isEmpty || + rules.isInsufficientMaterial(ctx) || + rules.isFiftyMoveRule(ctx) || + rules.isThreefoldRepetition(ctx) + + private def parse(args: List[String], acc: Config): Config = args match + case "--games" :: v :: rest => parse(rest, acc.copy(games = v.toInt)) + case "--out" :: v :: rest => parse(rest, acc.copy(out = v)) + case "--weights" :: v :: rest => parse(rest, acc.copy(weights = Some(v))) + case "--move-ms" :: v :: rest => parse(rest, acc.copy(moveTimeMs = v.toLong)) + case "--random-plies" :: v :: rest => parse(rest, acc.copy(randomPlies = v.toInt)) + case "--max-plies" :: v :: rest => parse(rest, acc.copy(maxPlies = v.toInt)) + case "--seed" :: v :: rest => parse(rest, acc.copy(seed = v.toLong)) + case Nil => acc + case unknown :: rest => println(s"Ignoring unknown arg: $unknown"); parse(rest, acc)