From 710a7dbebbb13a30027d33a054d23846d37c450a Mon Sep 17 00:00:00 2001 From: Edward Shen Date: Sun, 5 Apr 2020 20:18:48 -0400 Subject: [PATCH] make qlearning train_agent specific --- src/actors/genetic.rs | 109 +++++++++++++++++++++++++++++++++++----- src/actors/qlearning.rs | 19 ++++++- src/cli.rs | 7 --- src/main.rs | 46 ++++++----------- 4 files changed, 130 insertions(+), 51 deletions(-) diff --git a/src/actors/genetic.rs b/src/actors/genetic.rs index 2a5965a..43b3b78 100644 --- a/src/actors/genetic.rs +++ b/src/actors/genetic.rs @@ -1,6 +1,10 @@ // https://codemyroad.wordpress.com/2013/04/14/tetris-ai-the-near-perfect-player/ -use super::Actor; +use super::{Actor, State}; +use crate::{ + game::Action, + playfield::{PLAYFIELD_HEIGHT, PLAYFIELD_WIDTH}, +}; use rand::rngs::SmallRng; use rand::Rng; @@ -11,6 +15,17 @@ pub struct Parameters { complete_lines: f64, } +impl Default for Parameters { + fn default() -> Self { + Self { + total_height: 1.0, + bumpiness: 1.0, + holes: 1.0, + complete_lines: 1.0, + } + } +} + impl Parameters { fn mutate(mut self, rng: &mut SmallRng) { let mutation_amt = rng.gen_range(-0.2, 0.2); @@ -33,25 +48,93 @@ impl Parameters { self.holes /= normalization_factor; self.complete_lines /= normalization_factor; } + + fn dot_multiply(&self, other: &Self) -> f64 { + self.total_height * other.total_height + + self.bumpiness * other.bumpiness + + self.holes * other.holes + + self.complete_lines * other.complete_lines + } } -pub struct GeneticHeuristicAgent {} +pub struct GeneticHeuristicAgent { + params: Parameters, +} + +impl Default for GeneticHeuristicAgent { + fn default() -> Self { + Self { + params: Parameters::default(), + } + } +} + +impl GeneticHeuristicAgent { + fn extract_features_from_state(state: &State) -> Parameters { + let mut heights = [None; PLAYFIELD_WIDTH]; + for r in 0..PLAYFIELD_HEIGHT { + for c in 0..PLAYFIELD_WIDTH { + if heights[c].is_none() && state.matrix[r][c].is_some() { + heights[c] = Some(PLAYFIELD_HEIGHT - r); + } + } + } + + let total_height = heights + .iter() + .map(|o| o.unwrap_or_else(|| 0)) + .sum::() as f64; + + let bumpiness = heights + .iter() + .map(|o| o.unwrap_or_else(|| 0) as isize) + .fold((0, 0), |(acc, prev), cur| (acc + (prev - cur).abs(), cur)) + .0 as f64; + + let complete_lines = state + .matrix + .iter() + .map(|row| row.iter().all(Option::is_some)) + .map(|c| if c { 1.0 } else { 0.0 }) + .sum::(); + + let mut holes = 0; + for r in 1..PLAYFIELD_HEIGHT { + for c in 0..PLAYFIELD_WIDTH { + if state.matrix[r][c].is_none() && state.matrix[r - 1][c].is_some() { + holes += 1; + } + } + } + + Parameters { + total_height, + bumpiness, + complete_lines, + holes: holes as f64, + } + } + + fn get_heuristic(&self, state: &State, action: &Action) -> f64 { + todo!(); + } +} impl Actor for GeneticHeuristicAgent { - fn get_action( - &self, - rng: &mut SmallRng, - state: &super::State, - legal_actions: &[crate::game::Action], - ) -> crate::game::Action { - unimplemented!() + fn get_action(&self, rng: &mut SmallRng, state: &State, legal_actions: &[Action]) -> Action { + *legal_actions + .iter() + .map(|action| (action, self.get_heuristic(state, action))) + .max_by_key(|(action, heuristic)| (heuristic * 1_000_00.0) as usize) + .unwrap() + .0 } fn update( &mut self, - state: super::State, - action: crate::game::Action, - next_state: super::State, - next_legal_actions: &[crate::game::Action], + state: State, + action: Action, + next_state: State, + next_legal_actions: &[Action], reward: f64, ) { unimplemented!() diff --git a/src/actors/qlearning.rs b/src/actors/qlearning.rs index bc3ccd5..d9d9b0f 100644 --- a/src/actors/qlearning.rs +++ b/src/actors/qlearning.rs @@ -1,5 +1,6 @@ use crate::actors::{Actor, State}; use crate::{ + cli::Train, game::{Action, Controllable, Game, Tickable}, playfield::{PLAYFIELD_HEIGHT, PLAYFIELD_WIDTH}, }; @@ -253,10 +254,18 @@ impl Actor for ApproximateQLearning { } } -pub fn train_actor(episodes: usize, mut actor: Box) -> Box { +pub fn train_actor(episodes: usize, mut actor: Box, opts: &Train) -> Box { let mut rng = SmallRng::from_entropy(); let mut avg = 0.0; + actor.set_learning_rate(opts.learning_rate); + actor.set_discount_rate(opts.discount_rate); + actor.set_exploration_prob(opts.exploration_prob); + info!( + "Training an actor with learning_rate = {}, discount_rate = {}, exploration_rate = {}", + opts.learning_rate, opts.discount_rate, opts.exploration_prob + ); + for i in (0..episodes).progress() { if i != 0 && i % (episodes / 10) == 0 { info!("Last {} scores avg: {}", (episodes / 10), avg); @@ -300,5 +309,13 @@ pub fn train_actor(episodes: usize, mut actor: Box) -> Box avg += game.score() as f64 / (episodes / 10) as f64; } + if opts.no_explore_during_evaluation { + actor.set_exploration_prob(0.0); + } + + if opts.no_learn_during_evaluation { + actor.set_learning_rate(0.0); + } + actor } diff --git a/src/cli.rs b/src/cli.rs index 913b09c..cdfc8ef 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -89,10 +89,3 @@ pub fn init_verbosity(opts: &Opts) -> Result<(), Box> { Ok(()) } - -pub fn get_actor(agent: Agent) -> Box { - match agent { - Agent::QLearning => Box::new(qlearning::QLearningAgent::default()), - Agent::ApproximateQLearning => Box::new(qlearning::ApproximateQLearning::default()), - } -} diff --git a/src/main.rs b/src/main.rs index 2df9b44..113542a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,7 +6,6 @@ use graphics::standard_renderer; use graphics::COLOR_BACKGROUND; use indicatif::ProgressIterator; use log::{debug, info, trace}; -use qlearning::train_actor; use rand::SeedableRng; use sdl2::event::Event; use sdl2::keyboard::Keycode; @@ -29,37 +28,24 @@ async fn main() -> Result<(), Box> { let opts = crate::cli::Opts::parse(); init_verbosity(&opts)?; - let mut actor = None; - match opts.subcmd { - SubCommand::Play(sub_opts) => {} - SubCommand::Train(sub_opts) => { - let mut to_train = get_actor(sub_opts.agent); - to_train.set_learning_rate(sub_opts.learning_rate); - to_train.set_discount_rate(sub_opts.discount_rate); - to_train.set_exploration_prob(sub_opts.exploration_prob); + let agent = match opts.subcmd { + SubCommand::Play(sub_opts) => None, + SubCommand::Train(sub_opts) => Some(match sub_opts.agent { + Agent::QLearning => qlearning::train_actor( + sub_opts.episodes, + Box::new(qlearning::QLearningAgent::default()), + &sub_opts, + ), + Agent::ApproximateQLearning => qlearning::train_actor( + sub_opts.episodes, + Box::new(qlearning::ApproximateQLearning::default()), + &sub_opts, + ), + }), + }; - info!( - "Training an actor with learning_rate = {}, discount_rate = {}, exploration_rate = {}", - sub_opts.learning_rate, - sub_opts.discount_rate, - sub_opts.exploration_prob - ); - let mut trained_actor = train_actor(sub_opts.episodes, to_train); - if sub_opts.no_explore_during_evaluation { - trained_actor.set_exploration_prob(0.0); - } - - if sub_opts.no_learn_during_evaluation { - trained_actor.set_learning_rate(0.0); - } - - actor = Some(trained_actor); - } - } - - play_game(actor).await?; - Ok(()) + play_game(agent).await } async fn play_game(mut actor: Option>) -> Result<(), Box> {