feat(analytics): always write results to PostgreSQL regardless of input source
Build & Test (NowChessSystems) TeamCity build failed
Build & Test (NowChessSystems) TeamCity build failed
Remove isPgnMode JDBC guard from all 4 original jobs so staging (Lichess PGN mode) and production (game_records JDBC mode) both persist analytics results to the DB. Add JDBC write-back to all 7 new jobs: - GameLengthJob → analytics_game_length_distribution + analytics_game_length_by_result - ColorAdvantageJob → analytics_color_advantage - EloDistributionJob → analytics_elo_distribution - TimeControlJob → analytics_time_control_stats - DailyActivityJob → analytics_hourly_activity + analytics_weekly_activity - RatingMismatchJob → analytics_rating_mismatch - TerminationStatsJob → analytics_termination_stats Add analytics_component_sizes JDBC write to PlayerGraphJob. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -60,3 +60,13 @@ object ColorAdvantageJob:
|
||||
.mode("overwrite")
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/color_advantage")
|
||||
|
||||
stats.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_color_advantage")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
@@ -49,6 +49,16 @@ object DailyActivityJob:
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/hourly_activity")
|
||||
|
||||
hourly.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_hourly_activity")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
val dayName = F
|
||||
.when(F.col("dow") === 1, "Sunday")
|
||||
.when(F.col("dow") === 2, "Monday")
|
||||
@@ -77,3 +87,13 @@ object DailyActivityJob:
|
||||
.mode("overwrite")
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/weekly_activity")
|
||||
|
||||
weekly.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_weekly_activity")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
@@ -46,3 +46,13 @@ object EloDistributionJob:
|
||||
.mode("overwrite")
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/elo_distribution")
|
||||
|
||||
distribution.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_elo_distribution")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
@@ -76,6 +76,16 @@ object GameLengthJob:
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/game_length_distribution")
|
||||
|
||||
distribution.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_game_length_distribution")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
val byResult = games
|
||||
.groupBy("result")
|
||||
.agg(
|
||||
@@ -89,3 +99,13 @@ object GameLengthJob:
|
||||
.mode("overwrite")
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/game_length_by_result")
|
||||
|
||||
byResult.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_game_length_by_result")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
@@ -72,16 +72,15 @@ object OpeningBookJob:
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/opening_book_top1000")
|
||||
|
||||
if !GameSource.isPgnMode then
|
||||
top1000.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_opening_stats")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
top1000.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_opening_stats")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
/** Extracts the first `maxPlies` moves from a PGN column as a space-separated string.
|
||||
*
|
||||
|
||||
@@ -119,26 +119,25 @@ object PlayerClusteringJob:
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/cluster_archetypes")
|
||||
|
||||
if !GameSource.isPgnMode then
|
||||
clustersDf.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_player_clusters")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
clustersDf.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_player_clusters")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
archetypes.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_cluster_archetypes")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
archetypes.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_cluster_archetypes")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
private def buildPlayerStats(games: org.apache.spark.sql.DataFrame): org.apache.spark.sql.DataFrame =
|
||||
val asWhite = games.select(
|
||||
|
||||
@@ -109,16 +109,15 @@ object PlayerGraphJob:
|
||||
.mode("overwrite")
|
||||
.parquet(s"$outputDir/player_graph")
|
||||
|
||||
if !GameSource.isPgnMode then
|
||||
result.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_player_graph")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
result.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_player_graph")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
// How many players belong to each connected component?
|
||||
// A large dominant component + many singletons is the expected shape.
|
||||
@@ -135,6 +134,16 @@ object PlayerGraphJob:
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/component_sizes")
|
||||
|
||||
componentSizes.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_component_sizes")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
// Build a two-column DataFrame (vertex_id: Long, valueCol: valueType) from an RDD.
|
||||
// Used to bridge GraphX RDD results into the DataFrame API without implicits.
|
||||
private def rddToFrame[T](
|
||||
|
||||
@@ -82,13 +82,12 @@ object PlayerStatsJob:
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/player_stats_csv")
|
||||
|
||||
if !GameSource.isPgnMode then
|
||||
stats.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_player_stats")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
stats.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_player_stats")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
@@ -63,3 +63,13 @@ object RatingMismatchJob:
|
||||
.mode("overwrite")
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/rating_mismatch")
|
||||
|
||||
stats.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_rating_mismatch")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
@@ -42,3 +42,13 @@ object TerminationStatsJob:
|
||||
.mode("overwrite")
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/termination_stats")
|
||||
|
||||
stats.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_termination_stats")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
@@ -56,3 +56,13 @@ object TimeControlJob:
|
||||
.mode("overwrite")
|
||||
.option("header", "true")
|
||||
.csv(s"$outputDir/time_control_stats")
|
||||
|
||||
stats.write
|
||||
.mode("overwrite")
|
||||
.format("jdbc")
|
||||
.option("url", jdbcUrl)
|
||||
.option("dbtable", "analytics_time_control_stats")
|
||||
.option("user", dbUser)
|
||||
.option("password", dbPass)
|
||||
.option("driver", "org.postgresql.Driver")
|
||||
.save()
|
||||
|
||||
Reference in New Issue
Block a user