genetic algo base

2020-04-05 19:36:00 -04:00 · 2020-04-05 19:36:00 -04:00 · a65f48f585
commit a65f48f585
parent f3b48fbc85
6 changed files with 157 additions and 55 deletions
--- a/src/actors/genetic.rs
+++ b/src/actors/genetic.rs
@ -0,0 +1,71 @@
+// https://codemyroad.wordpress.com/2013/04/14/tetris-ai-the-near-perfect-player/
+
+use super::Actor;
+use rand::rngs::SmallRng;
+use rand::Rng;
+
+pub struct Parameters {
+    total_height: f64,
+    bumpiness: f64,
+    holes: f64,
+    complete_lines: f64,
+}
+
+impl Parameters {
+    fn mutate(mut self, rng: &mut SmallRng) {
+        let mutation_amt = rng.gen_range(-0.2, 0.2);
+        match rng.gen_range(0, 4) {
+            0 => self.total_height += mutation_amt,
+            1 => self.bumpiness += mutation_amt,
+            2 => self.holes += mutation_amt,
+            3 => self.complete_lines += mutation_amt,
+            _ => unreachable!(),
+        }
+
+        let normalization_factor = (self.total_height.powi(2)
+            + self.bumpiness.powi(2)
+            + self.holes.powi(2)
+            + self.complete_lines.powi(2))
+        .sqrt();
+
+        self.total_height /= normalization_factor;
+        self.bumpiness /= normalization_factor;
+        self.holes /= normalization_factor;
+        self.complete_lines /= normalization_factor;
+    }
+}
+
+pub struct GeneticHeuristicAgent {}
+
+impl Actor for GeneticHeuristicAgent {
+    fn get_action(
+        &self,
+        rng: &mut SmallRng,
+        state: &super::State,
+        legal_actions: &[crate::game::Action],
+    ) -> crate::game::Action {
+        unimplemented!()
+    }
+    fn update(
+        &mut self,
+        state: super::State,
+        action: crate::game::Action,
+        next_state: super::State,
+        next_legal_actions: &[crate::game::Action],
+        reward: f64,
+    ) {
+        unimplemented!()
+    }
+    fn set_learning_rate(&mut self, learning_rate: f64) {
+        unimplemented!()
+    }
+    fn set_exploration_prob(&mut self, exploration_prob: f64) {
+        unimplemented!()
+    }
+    fn set_discount_rate(&mut self, discount_rate: f64) {
+        unimplemented!()
+    }
+    fn dbg(&self) {
+        unimplemented!()
+    }
+}
--- a/src/actors/mod.rs
+++ b/src/actors/mod.rs
@ -1,8 +1,9 @@
-use crate::game::{Action, Game};
+use crate::game::{Action, Controllable, Game};
 use crate::playfield::{Matrix, PlayField};
 use crate::tetromino::{Tetromino, TetrominoType};
 use rand::rngs::SmallRng;

+pub mod genetic;
 pub mod qlearning;

 #[derive(Hash, PartialEq, Eq, Clone, Debug)]
@ -56,3 +57,26 @@ pub trait Actor {

    fn dbg(&self);
 }
+
+pub trait Predictable {
+    fn get_next_state(&self, action: Action) -> Self;
+}
+
+impl Predictable for Game {
+    /// Expensive, performs a full clone.
+    fn get_next_state(&self, action: Action) -> Self {
+        let mut game = self.clone();
+        match action {
+            Action::Nothing => (),
+            Action::MoveLeft => game.move_left(),
+            Action::MoveRight => game.move_right(),
+            Action::SoftDrop => game.move_down(),
+            Action::HardDrop => game.hard_drop(),
+            Action::Hold => game.hold(),
+            Action::RotateLeft => game.rotate_left(),
+            Action::RotateRight => game.rotate_right(),
+        };
+
+        game
+    }
+}
--- a/src/actors/qlearning.rs
+++ b/src/actors/qlearning.rs
@ -1,12 +1,14 @@
 use crate::actors::{Actor, State};
 use crate::{
-    game::Action,
+    game::{Action, Controllable, Game, Tickable},
    playfield::{PLAYFIELD_HEIGHT, PLAYFIELD_WIDTH},
 };
-use log::debug;
+use indicatif::ProgressIterator;
+use log::{debug, info};
 use rand::rngs::SmallRng;
 use rand::seq::SliceRandom;
 use rand::Rng;
+use rand::SeedableRng;
 use std::collections::HashMap;

 pub struct QLearningAgent {
@ -250,3 +252,53 @@ impl Actor for ApproximateQLearning {
        dbg!(&self.weights);
    }
 }
+
+pub fn train_actor(episodes: usize, mut actor: Box<dyn Actor>) -> Box<dyn Actor> {
+    let mut rng = SmallRng::from_entropy();
+    let mut avg = 0.0;
+
+    for i in (0..episodes).progress() {
+        if i != 0 && i % (episodes / 10) == 0 {
+            info!("Last {} scores avg: {}", (episodes / 10), avg);
+            println!();
+            avg = 0.0;
+        }
+        let mut game = Game::default();
+        while (&game).is_game_over().is_none() {
+            let cur_state = (&game).into();
+            let cur_score = game.score();
+            let action = actor.get_action(&mut rng, &cur_state, &((&game).get_legal_actions()));
+
+            match action {
+                Action::Nothing => (),
+                Action::MoveLeft => game.move_left(),
+                Action::MoveRight => game.move_right(),
+                Action::SoftDrop => game.move_down(),
+                Action::HardDrop => game.hard_drop(),
+                Action::Hold => game.hold(),
+                Action::RotateLeft => game.rotate_left(),
+                Action::RotateRight => game.rotate_right(),
+            }
+
+            let new_state = (&game).into();
+            let mut reward = game.score() as f64 - cur_score as f64;
+            if action != Action::Nothing {
+                reward -= 0.0;
+            }
+
+            if game.is_game_over().is_some() {
+                reward = -1.0;
+            }
+
+            let new_legal_actions = game.get_legal_actions();
+
+            actor.update(cur_state, action, new_state, &new_legal_actions, reward);
+
+            game.tick();
+        }
+
+        avg += game.score() as f64 / (episodes / 10) as f64;
+    }
+
+    actor
+}
--- a/src/game.rs
+++ b/src/game.rs
@ -18,6 +18,8 @@ pub enum LossReason {
    LockOut,
    BlockOut(Position),
 }
+
+#[derive(Clone)]
 // Logic is based on 60 ticks / second
 pub struct Game {
    playfield: PlayField,
@ -100,6 +102,7 @@ impl Tickable for Game {
    }
 }

+#[derive(Clone, Copy)]
 enum ClearAction {
    Single,
    Double,
@ -266,7 +269,7 @@ impl Controllable for Game {
                active_piece.position = active_piece.position.offset(x, y);
                active_piece.rotate_left();
                self.playfield.active_piece = Some(active_piece);
-                self.update_lock_tick();
+                // self.update_lock_tick();
            }
            Err(_) => (),
        }
@ -283,7 +286,7 @@ impl Controllable for Game {
                active_piece.position = active_piece.position.offset(x, y);
                active_piece.rotate_right();
                self.playfield.active_piece = Some(active_piece);
-                self.update_lock_tick();
+                // self.update_lock_tick();
            }
            Err(_) => (),
        }
--- a/src/main.rs
+++ b/src/main.rs
@ -6,6 +6,7 @@ use graphics::standard_renderer;
 use graphics::COLOR_BACKGROUND;
 use indicatif::ProgressIterator;
 use log::{debug, info, trace};
+use qlearning::train_actor;
 use rand::SeedableRng;
 use sdl2::event::Event;
 use sdl2::keyboard::Keycode;
@ -61,56 +62,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    Ok(())
 }

-fn train_actor(episodes: usize, mut actor: Box<dyn Actor>) -> Box<dyn Actor> {
-    let mut rng = rand::rngs::SmallRng::from_entropy();
-    let mut avg = 0.0;
-
-    for i in (0..episodes).progress() {
-        if i != 0 && i % (episodes / 10) == 0 {
-            info!("Last {} scores avg: {}", (episodes / 10), avg);
-            println!();
-            avg = 0.0;
-        }
-        let mut game = Game::default();
-        while (&game).is_game_over().is_none() {
-            let cur_state = (&game).into();
-            let cur_score = game.score();
-            let action = actor.get_action(&mut rng, &cur_state, &((&game).get_legal_actions()));
-
-            match action {
-                Action::Nothing => (),
-                Action::MoveLeft => game.move_left(),
-                Action::MoveRight => game.move_right(),
-                Action::SoftDrop => game.move_down(),
-                Action::HardDrop => game.hard_drop(),
-                Action::Hold => game.hold(),
-                Action::RotateLeft => game.rotate_left(),
-                Action::RotateRight => game.rotate_right(),
-            }
-
-            let new_state = (&game).into();
-            let mut reward = game.score() as f64 - cur_score as f64;
-            if action != Action::Nothing {
-                reward -= 0.0;
-            }
-
-            if game.is_game_over().is_some() {
-                reward = -1.0;
-            }
-
-            let new_legal_actions = game.get_legal_actions();
-
-            actor.update(cur_state, action, new_state, &new_legal_actions, reward);
-
-            game.tick();
-        }
-
-        avg += game.score() as f64 / (episodes / 10) as f64;
-    }
-
-    actor
-}
-
 async fn play_game(mut actor: Option<Box<dyn Actor>>) -> Result<(), Box<dyn std::error::Error>> {
    let mut rng = rand::rngs::SmallRng::from_entropy();
    let sdl_context = sdl2::init()?;
--- a/src/srs.rs
+++ b/src/srs.rs
@ -10,6 +10,7 @@ pub trait RotationSystem {
    fn rotate_right(&self, playfield: &PlayField) -> Result<Position, ()>;
 }

+#[derive(Clone)]
 pub struct SRS {
    jlstz_offset_data: HashMap<RotationState, Vec<Position>>,
    i_offset_data: HashMap<RotationState, Vec<Position>>,