From deb74da5525fb27f1aa2e9933dbf0d8b59b4f795 Mon Sep 17 00:00:00 2001 From: Edward Shen Date: Sun, 5 Apr 2020 23:39:19 -0400 Subject: [PATCH] much refactor --- .gitignore | 2 + src/actors/genetic.rs | 116 +++++++++++++++++++++++++++++++--------- src/actors/mod.rs | 28 +++++----- src/actors/qlearning.rs | 73 +++++++++++++++---------- src/cli.rs | 1 + src/game.rs | 23 +++++++- src/main.rs | 21 ++++---- 7 files changed, 183 insertions(+), 81 deletions(-) diff --git a/.gitignore b/.gitignore index 53eaa21..32fdbc8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /target **/*.rs.bk +flamegraph.svg +perf.* diff --git a/src/actors/genetic.rs b/src/actors/genetic.rs index 43b3b78..fce6799 100644 --- a/src/actors/genetic.rs +++ b/src/actors/genetic.rs @@ -2,12 +2,14 @@ use super::{Actor, State}; use crate::{ - game::Action, + cli::Train, + game::{Action, Controllable, Game}, playfield::{PLAYFIELD_HEIGHT, PLAYFIELD_WIDTH}, }; use rand::rngs::SmallRng; -use rand::Rng; +use rand::{seq::SliceRandom, Rng, SeedableRng}; +#[derive(Copy, Clone, Debug)] pub struct Parameters { total_height: f64, bumpiness: f64, @@ -57,6 +59,7 @@ impl Parameters { } } +#[derive(Clone, Copy)] pub struct GeneticHeuristicAgent { params: Parameters, } @@ -115,40 +118,101 @@ impl GeneticHeuristicAgent { } } - fn get_heuristic(&self, state: &State, action: &Action) -> f64 { + fn get_heuristic(&self, game: &Game, action: &Action) -> f64 { todo!(); } + + pub fn breed(&self, self_fitness: u32, other: &Self, other_fitness: u32) -> Self { + let weight = (self_fitness + other_fitness) as f64; + let self_weight = self_fitness as f64 / weight; + let other_weight = other_fitness as f64 / weight; + Self { + params: Parameters { + total_height: self.params.total_height * self_weight + + other.params.total_height * other_weight, + bumpiness: self.params.total_height * self_weight + + other.params.total_height * other_weight, + holes: self.params.total_height * self_weight + + other.params.total_height * other_weight, + complete_lines: self.params.total_height * self_weight + + other.params.total_height * other_weight, + }, + } + } + + fn mutate(&mut self, rng: &mut SmallRng) { + self.params.mutate(rng); + } } impl Actor for GeneticHeuristicAgent { - fn get_action(&self, rng: &mut SmallRng, state: &State, legal_actions: &[Action]) -> Action { + fn get_action(&self, _: &mut SmallRng, game: &Game, legal_actions: &[Action]) -> Action { *legal_actions .iter() - .map(|action| (action, self.get_heuristic(state, action))) - .max_by_key(|(action, heuristic)| (heuristic * 1_000_00.0) as usize) + .map(|action| (action, self.get_heuristic(game, action))) + .max_by_key(|(_, heuristic)| (heuristic * 1_000_00.0) as usize) .unwrap() .0 } - fn update( - &mut self, - state: State, - action: Action, - next_state: State, - next_legal_actions: &[Action], - reward: f64, - ) { - unimplemented!() - } - fn set_learning_rate(&mut self, learning_rate: f64) { - unimplemented!() - } - fn set_exploration_prob(&mut self, exploration_prob: f64) { - unimplemented!() - } - fn set_discount_rate(&mut self, discount_rate: f64) { - unimplemented!() - } + fn dbg(&self) { - unimplemented!() + // unimplemented!() } } + +pub fn train_actor(opts: &Train) -> Box { + let mut rng = SmallRng::from_entropy(); + let mut population = vec![(GeneticHeuristicAgent::default(), 0); 1000]; + + for _ in 0..opts.episodes { + let mut new_population: Vec<(GeneticHeuristicAgent, u32)> = population + .iter() + .map(|(agent, _)| { + let mut fitness = 0; + for _ in 0..100 { + let mut game = Game::default(); + game.set_piece_limit(500); + while (&game).is_game_over().is_none() { + super::apply_action_to_game( + agent.get_action( + &mut rng, + &game.clone().into(), + &game.get_legal_actions(), + ), + &mut game, + ) + } + fitness += game.line_clears; + } + (*agent, fitness) + }) + .collect::>(); + + let mut breeded_population: Vec<(GeneticHeuristicAgent, u32)> = Vec::with_capacity(300); + for _ in 0..3 { + let mut random_selection = new_population + .choose_multiple(&mut rng, 100) + .collect::>(); + random_selection.sort_unstable_by(|e1, e2| e1.1.cmp(&e2.1)); + let best_two = random_selection.iter().rev().take(2).collect::>(); + let parent1 = best_two[0]; + let parent2 = best_two[1]; + for _ in 0..100 { + let breeded = parent1.0.breed(parent1.1, &parent2.0, parent2.1); + let mut cloned = breeded.clone(); + if rng.gen::() < 0.05 { + cloned.mutate(&mut rng); + } + breeded_population.push((cloned, 0)); + } + } + + new_population.sort_unstable_by_key(|e| e.1); + + new_population.splice(..breeded_population.len(), breeded_population); + + population = new_population; + } + population.sort_unstable_by_key(|e| e.1); + Box::new(population.iter().rev().next().unwrap().0) +} diff --git a/src/actors/mod.rs b/src/actors/mod.rs index 867abda..90ee554 100644 --- a/src/actors/mod.rs +++ b/src/actors/mod.rs @@ -40,20 +40,7 @@ impl From for State { } pub trait Actor { - fn get_action(&self, rng: &mut SmallRng, state: &State, legal_actions: &[Action]) -> Action; - - fn update( - &mut self, - state: State, - action: Action, - next_state: State, - next_legal_actions: &[Action], - reward: f64, - ); - - fn set_learning_rate(&mut self, learning_rate: f64); - fn set_exploration_prob(&mut self, exploration_prob: f64); - fn set_discount_rate(&mut self, discount_rate: f64); + fn get_action(&self, rng: &mut SmallRng, game: &Game, legal_actions: &[Action]) -> Action; fn dbg(&self); } @@ -80,3 +67,16 @@ impl Predictable for Game { game } } + +pub fn apply_action_to_game(action: Action, game: &mut Game) { + match action { + Action::Nothing => (), + Action::MoveLeft => game.move_left(), + Action::MoveRight => game.move_right(), + Action::SoftDrop => game.move_down(), + Action::HardDrop => game.hard_drop(), + Action::Hold => game.hold(), + Action::RotateLeft => game.rotate_left(), + Action::RotateRight => game.rotate_right(), + } +} diff --git a/src/actors/qlearning.rs b/src/actors/qlearning.rs index d9d9b0f..b5b2087 100644 --- a/src/actors/qlearning.rs +++ b/src/actors/qlearning.rs @@ -12,6 +12,21 @@ use rand::Rng; use rand::SeedableRng; use std::collections::HashMap; +pub trait QLearningActor: Actor { + fn update( + &mut self, + state: State, + action: Action, + next_state: State, + next_legal_actions: &[Action], + reward: f64, + ); + + fn set_learning_rate(&mut self, learning_rate: f64); + fn set_exploration_prob(&mut self, exploration_prob: f64); + fn set_discount_rate(&mut self, discount_rate: f64); +} + pub struct QLearningAgent { pub learning_rate: f64, pub exploration_prob: f64, @@ -64,14 +79,20 @@ impl QLearningAgent { impl Actor for QLearningAgent { // Because doing (Nothing) is in the set of legal actions, this will never // be empty - fn get_action(&self, rng: &mut SmallRng, state: &State, legal_actions: &[Action]) -> Action { + fn get_action(&self, rng: &mut SmallRng, game: &Game, legal_actions: &[Action]) -> Action { if rng.gen::() < self.exploration_prob { *legal_actions.choose(rng).unwrap() } else { - self.get_action_from_q_values(state, legal_actions) + self.get_action_from_q_values(&game.into(), legal_actions) } } + fn dbg(&self) { + debug!("Total states: {}", self.q_values.len()); + } +} + +impl QLearningActor for QLearningAgent { fn update( &mut self, state: State, @@ -94,10 +115,6 @@ impl Actor for QLearningAgent { .insert(action, new_q_val); } - fn dbg(&self) { - debug!("Total states: {}", self.q_values.len()); - } - fn set_learning_rate(&mut self, learning_rate: f64) { self.learning_rate = learning_rate; } @@ -208,14 +225,20 @@ impl ApproximateQLearning { } impl Actor for ApproximateQLearning { - fn get_action(&self, rng: &mut SmallRng, state: &State, legal_actions: &[Action]) -> Action { + fn get_action(&self, rng: &mut SmallRng, game: &Game, legal_actions: &[Action]) -> Action { if rng.gen::() < self.exploration_prob { *legal_actions.choose(rng).unwrap() } else { - self.get_action_from_q_values(state, legal_actions) + self.get_action_from_q_values(&game.into(), legal_actions) } } + fn dbg(&self) { + dbg!(&self.weights); + } +} + +impl QLearningActor for ApproximateQLearning { fn update( &mut self, state: State, @@ -248,15 +271,15 @@ impl Actor for ApproximateQLearning { fn set_discount_rate(&mut self, discount_rate: f64) { self.discount_rate = discount_rate; } - - fn dbg(&self) { - dbg!(&self.weights); - } } -pub fn train_actor(episodes: usize, mut actor: Box, opts: &Train) -> Box { +pub fn train_actor( + mut actor: T, + opts: &Train, +) -> Box { let mut rng = SmallRng::from_entropy(); let mut avg = 0.0; + let episodes = opts.episodes; actor.set_learning_rate(opts.learning_rate); actor.set_discount_rate(opts.discount_rate); @@ -274,20 +297,11 @@ pub fn train_actor(episodes: usize, mut actor: Box, opts: &Train) -> } let mut game = Game::default(); while (&game).is_game_over().is_none() { - let cur_state = (&game).into(); + let cur_state = game.clone(); let cur_score = game.score(); let action = actor.get_action(&mut rng, &cur_state, &((&game).get_legal_actions())); - match action { - Action::Nothing => (), - Action::MoveLeft => game.move_left(), - Action::MoveRight => game.move_right(), - Action::SoftDrop => game.move_down(), - Action::HardDrop => game.hard_drop(), - Action::Hold => game.hold(), - Action::RotateLeft => game.rotate_left(), - Action::RotateRight => game.rotate_right(), - } + super::apply_action_to_game(action, &mut game); let new_state = (&game).into(); let mut reward = game.score() as f64 - cur_score as f64; @@ -301,7 +315,13 @@ pub fn train_actor(episodes: usize, mut actor: Box, opts: &Train) -> let new_legal_actions = game.get_legal_actions(); - actor.update(cur_state, action, new_state, &new_legal_actions, reward); + actor.update( + cur_state.into(), + action, + new_state, + &new_legal_actions, + reward, + ); game.tick(); } @@ -316,6 +336,5 @@ pub fn train_actor(episodes: usize, mut actor: Box, opts: &Train) -> if opts.no_learn_during_evaluation { actor.set_learning_rate(0.0); } - - actor + Box::new(actor) } diff --git a/src/cli.rs b/src/cli.rs index cdfc8ef..3070d49 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -74,6 +74,7 @@ arg_enum! { pub enum Agent { QLearning, ApproximateQLearning, + HeuristicGenetic } } diff --git a/src/game.rs b/src/game.rs index 35ae74b..2232e54 100644 --- a/src/game.rs +++ b/src/game.rs @@ -16,6 +16,7 @@ const LINE_CLEAR_DELAY: u64 = TICKS_PER_SECOND as u64 * 41 / 60; pub enum LossReason { TopOut, LockOut, + PieceLimitReached, BlockOut(Position), } @@ -35,6 +36,9 @@ pub struct Game { /// bonus is needed. last_clear_action: ClearAction, pub line_clears: u32, + // used if we set a limit on how long a game can last. + pieces_placed: usize, + piece_limit: usize, } impl fmt::Debug for Game { @@ -59,6 +63,8 @@ impl Default for Game { is_game_over: None, last_clear_action: ClearAction::Single, // Doesn't matter what it's initialized to line_clears: 0, + pieces_placed: 0, + piece_limit: 0, } } } @@ -168,10 +174,18 @@ impl Game { // It's possible that the player moved the piece in the meantime. if !self.playfield.can_active_piece_move_down() { let positions = self.playfield.lock_active_piece(); + if self.pieces_placed < self.piece_limit { + self.pieces_placed += 1; + if self.pieces_placed >= self.piece_limit { + trace!("Loss due to piece limit!"); + self.is_game_over = Some(LossReason::PieceLimitReached); + } + } + self.is_game_over = self.is_game_over.or_else(|| { if positions.iter().map(|p| p.y).all(|y| y < 20) { - trace!("Loss due to topout! {:?}", positions); - Some(LossReason::TopOut) + trace!("Loss due to lockout! {:?}", positions); + Some(LossReason::LockOut) } else { None } @@ -183,6 +197,7 @@ impl Game { self.line_clears += cleared_lines as u32; self.score += (cleared_lines * 100 * self.level as usize) as u32; self.level = (self.line_clears / 10) as u8; + self.playfield.active_piece = None; self.next_spawn_tick = self.tick + LINE_CLEAR_DELAY; } else { @@ -195,6 +210,10 @@ impl Game { } } + pub fn set_piece_limit(&mut self, size: usize) { + self.piece_limit = size; + } + pub fn playfield(&self) -> &PlayField { &self.playfield } diff --git a/src/main.rs b/src/main.rs index 113542a..8f4e82b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -30,18 +30,15 @@ async fn main() -> Result<(), Box> { init_verbosity(&opts)?; let agent = match opts.subcmd { - SubCommand::Play(sub_opts) => None, + SubCommand::Play(_) => None, SubCommand::Train(sub_opts) => Some(match sub_opts.agent { - Agent::QLearning => qlearning::train_actor( - sub_opts.episodes, - Box::new(qlearning::QLearningAgent::default()), - &sub_opts, - ), - Agent::ApproximateQLearning => qlearning::train_actor( - sub_opts.episodes, - Box::new(qlearning::ApproximateQLearning::default()), - &sub_opts, - ), + Agent::QLearning => { + qlearning::train_actor(qlearning::QLearningAgent::default(), &sub_opts) + } + Agent::ApproximateQLearning => { + qlearning::train_actor(qlearning::ApproximateQLearning::default(), &sub_opts) + } + Agent::HeuristicGenetic => genetic::train_actor(&sub_opts), }), }; @@ -70,7 +67,7 @@ async fn play_game(mut actor: Option>) -> Result<(), Box (), } - let cur_state = (&game).into(); + let cur_state = game.clone(); // If there's an actor, the player action will get overridden. If not, // then then the player action falls through, if there is one. This is