__init__(self, Env env, PPOAgent ppo_agent, mcts_params=None, debug=False) | rl.training.trainer.Trainer | |
_format_parameters(self, action, params) | rl.training.trainer.Trainer | protected |
_save_results_to_file(self, problem, steps, max_steps, is_terminal, action_trace, action_counts, optimized_steps, original_steps, execution_time, formatted_time) | rl.training.trainer.Trainer | protected |
action_mapping | rl.training.trainer.Trainer | |
avg_rewards | rl.training.trainer.Trainer | |
best_score | rl.training.trainer.Trainer | |
debug | rl.training.trainer.Trainer | |
env | rl.training.trainer.Trainer | |
episode_rewards | rl.training.trainer.Trainer | |
epsilon_decay | rl.training.trainer.Trainer | |
epsilon_end | rl.training.trainer.Trainer | |
epsilon_start | rl.training.trainer.Trainer | |
evaluateModel(self, n_eval_episodes=10, max_steps_per_episode=200, plot=False) | rl.training.trainer.Trainer | |
evaluateProblem(self, problem, max_steps=2000, loop_detection=True, exploration_rate=0.1, save_to_file=False) | rl.training.trainer.Trainer | |
get_valid_actions(self, obs) | rl.training.trainer.Trainer | |
invalid_action_counts | rl.training.trainer.Trainer | |
learn_iters | rl.training.trainer.Trainer | |
mcts | rl.training.trainer.Trainer | |
ppo_agent | rl.training.trainer.Trainer | |
score_history | rl.training.trainer.Trainer | |
steps_per_episode | rl.training.trainer.Trainer | |
total_steps | rl.training.trainer.Trainer | |
train(self, n_episodes=2000, N=5, max_steps_per_episode=200, train_on_old_models=False, start_learn_after=500, use_permutation=False) | rl.training.trainer.Trainer | |