feat: Implement dataset versioning and management for NNUE training data
This commit is contained in:
@@ -17,7 +17,7 @@ from generate import play_random_game_and_collect_positions
|
||||
|
||||
def download_and_extract_puzzle_db(
|
||||
url: str = 'https://database.lichess.org/lichess_db_puzzle.csv.zst',
|
||||
output_dir: str = 'trainingdata'
|
||||
output_dir: str = 'tactical_data'
|
||||
):
|
||||
"""Download and extract the Lichess puzzle database."""
|
||||
output_path = Path(output_dir)
|
||||
@@ -141,6 +141,31 @@ def merge_positions(
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
|
||||
def extract_tactical_only(
|
||||
puzzle_csv: str,
|
||||
output_file: str,
|
||||
max_puzzles: int = 300_000
|
||||
) -> int:
|
||||
"""Extract tactical positions and save to file (no merge prompts).
|
||||
|
||||
Args:
|
||||
puzzle_csv: Path to Lichess puzzle CSV
|
||||
output_file: Where to save the FEN positions
|
||||
max_puzzles: Maximum puzzles to extract
|
||||
|
||||
Returns:
|
||||
Number of positions extracted
|
||||
"""
|
||||
print("Extracting tactical positions from puzzle database...")
|
||||
tactical_positions = extract_puzzle_positions(puzzle_csv, max_puzzles)
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
for fen in tactical_positions:
|
||||
f.write(fen + '\n')
|
||||
|
||||
return len(tactical_positions)
|
||||
|
||||
|
||||
def interactive_merge_positions(
|
||||
puzzle_csv: str,
|
||||
output_file: str = 'position.txt',
|
||||
|
||||
Reference in New Issue
Block a user