diff --git a/src/actors/genetic.rs b/src/actors/genetic.rs index 4e81c30..e917f92 100644 --- a/src/actors/genetic.rs +++ b/src/actors/genetic.rs @@ -290,8 +290,10 @@ pub async fn train_actor(opts: &Train) -> Box { .collect::>(); random_selection.sort_unstable_by(|e1, e2| e1.1.cmp(&e2.1)); let best_two = random_selection.iter().rev().take(2).collect::>(); - let parent1 = dbg!(best_two[0]); - let parent2 = dbg!(best_two[1]); + let parent1 = &best_two[0]; + println!("{:?}", &best_two[0]); + let parent2 = &best_two[1]; + println!("{:?}", &best_two[1]); for _ in 0..new_pop_size / 3 { let breeded = parent1.0.breed(parent1.1, &parent2.0, parent2.1); let mut cloned = breeded.clone(); diff --git a/src/actors/qlearning.rs b/src/actors/qlearning.rs index 540a903..c8c7d6c 100644 --- a/src/actors/qlearning.rs +++ b/src/actors/qlearning.rs @@ -5,8 +5,7 @@ use crate::{ game::{Action, Controllable, Game, Tickable}, playfield::{Matrix, PLAYFIELD_HEIGHT, PLAYFIELD_WIDTH}, }; -use indicatif::ProgressIterator; -use log::{debug, info, trace}; +use log::{debug, error, info, trace}; use rand::rngs::SmallRng; use rand::seq::SliceRandom; use rand::Rng; @@ -28,6 +27,7 @@ pub trait QLearningActor: Actor { fn set_discount_rate(&mut self, discount_rate: f64); } +#[derive(Debug)] pub struct QLearningAgent { pub learning_rate: f64, pub exploration_prob: f64, @@ -150,6 +150,7 @@ impl QLearningActor for QLearningAgent { } } +#[derive(Debug)] pub struct ApproximateQLearning { pub learning_rate: f64, pub exploration_prob: f64, @@ -183,7 +184,7 @@ enum Feature { impl ApproximateQLearning { fn get_features(&self, game: &Game, action: &Action) -> HashMap { - // let game = game.get_next_state(*action); + let game = game.get_next_state(*action); let mut features = HashMap::default(); let field = game.playfield().field(); @@ -255,6 +256,8 @@ impl ApproximateQLearning { .map(|action| (action, self.get_q_value(game, action))) .collect::>(); + // dbg!(&legal_actions); + let max_val = legal_actions .iter() .max_by_key(|(_, q1)| ((q1 * 1_000_000.0) as isize)) @@ -273,10 +276,16 @@ impl ApproximateQLearning { ); } - *actions_to_choose - .choose(&mut SmallRng::from_entropy()) - .unwrap() - .0 + let action = actions_to_choose.choose(&mut SmallRng::from_entropy()); + + match action { + Some(a) => *a.0, + None => { + dbg!(&legal_actions); + dbg!(&actions_to_choose); + panic!("wtf???"); + } + } } fn get_value(&self, game: &Game) -> f64 { @@ -325,7 +334,7 @@ impl QLearningActor for ApproximateQLearning { game_state: Game, action: Action, next_game_state: Game, - next_legal_actions: &[Action], + _: &[Action], reward: f64, ) { let difference = reward + self.discount_rate * self.get_value(&next_game_state) @@ -352,7 +361,7 @@ impl QLearningActor for ApproximateQLearning { } } -pub fn train_actor( +pub fn train_actor( mut actor: T, opts: &Train, ) -> Box { @@ -368,10 +377,11 @@ pub fn train_actor( opts.learning_rate, opts.discount_rate, opts.exploration_prob ); - for i in (0..episodes).progress() { + for i in 0..episodes { if i != 0 && i % (episodes / 10) == 0 { - info!("Last {} scores avg: {}", (episodes / 10), avg); - println!(); + println!("{}", avg); + eprintln!("iteration {}", i); + // println!("{:?}", &actor); avg = 0.0; } let mut game = Game::default(); diff --git a/src/main.rs b/src/main.rs index 3e1198e..b833a81 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,16 +54,16 @@ async fn main() -> Result<(), Box> { async fn play_game(mut actor: Option>) -> Result<(), Box> { let mut rng = rand::rngs::SmallRng::from_entropy(); let sdl_context = sdl2::init()?; - let video_subsystem = sdl_context.video()?; - let window = video_subsystem - .window("retris", 800, 800) - .position_centered() - .build()?; - let mut canvas = window.into_canvas().build()?; + // let video_subsystem = sdl_context.video()?; + // let window = video_subsystem + // .window("retris", 800, 800) + // .position_centered() + // .build()?; + // let mut canvas = window.into_canvas().build()?; let mut event_pump = sdl_context.event_pump()?; let mut interval = interval(Duration::from_millis(1000 / TICKS_PER_SECOND as u64)); - 'escape: loop { + 'escape: for _ in 0..10 { let mut game = Game::default(); loop { @@ -89,7 +89,7 @@ async fn play_game(mut actor: Option>) -> Result<(), Box { debug!("Escape registered"); - break 'escape Ok(()); + break 'escape; } Event::KeyDown { keycode: Some(Keycode::Left), @@ -175,13 +175,15 @@ async fn play_game(mut actor: Option>) -> Result<(), Box