FoPra Beluga Challenge - Reinforcement Learning
v1.0
Deep Reinforcement Learning solution for the Beluga Challenge shipping container optimization problem using PPO and MCTS
Here is a list of all variables with links to the classes they belong to:
- a -
action :
rl.mcts.mcts_node.MCTSNode
action_mapping :
rl.training.trainer.Trainer
actions :
rl.agents.high_level.ppo_agent.PPOMemory
actor :
rl.agents.high_level.ppo_agent.ActorNetwork
,
rl.agents.high_level.ppo_agent.PPOAgent
avg_rewards :
rl.training.trainer.Trainer
- b -
base_index :
rl.env.environment.Env
batch_size :
rl.agents.high_level.ppo_agent.PPOMemory
belugas :
rl.env.state.ProblemState
belugas_finished :
rl.env.state.ProblemState
belugas_unloaded :
rl.env.state.ProblemState
best_score :
rl.training.trainer.Trainer
block_size :
rl.env.environment.Env
- c -
check_action_map :
rl.env.environment.Env
checkpoint_file :
rl.agents.high_level.ppo_agent.ActorNetwork
,
rl.agents.high_level.ppo_agent.CriticNetwork
children :
rl.mcts.mcts_node.MCTSNode
critic :
rl.agents.high_level.ppo_agent.CriticNetwork
,
rl.agents.high_level.ppo_agent.PPOAgent
current_jigs :
rl.env.state.Beluga
,
rl.env.state.Rack
- d -
debug :
rl.mcts.mcts.MCTS
,
rl.training.trainer.Trainer
depth :
rl.mcts.mcts.MCTS
,
rl.mcts.mcts_node.MCTSNode
device :
rl.agents.high_level.ppo_agent.ActorNetwork
,
rl.agents.high_level.ppo_agent.CriticNetwork
dones :
rl.agents.high_level.ppo_agent.PPOMemory
- e -
empty :
rl.env.state.Jig
env :
rl.training.trainer.Trainer
episode_rewards :
rl.training.trainer.Trainer
epsilon_decay :
rl.training.trainer.Trainer
epsilon_end :
rl.training.trainer.Trainer
epsilon_start :
rl.training.trainer.Trainer
- g -
gae_lambda :
rl.agents.high_level.ppo_agent.PPOAgent
gamma :
rl.agents.high_level.ppo_agent.PPOAgent
- h -
hangars :
rl.env.state.ProblemState
- i -
invalid_action_counts :
rl.training.trainer.Trainer
- j -
jig_type :
rl.env.state.Jig
jigs :
rl.env.state.ProblemState
- l -
learn_iters :
rl.training.trainer.Trainer
- m -
mcts :
rl.training.trainer.Trainer
memory :
rl.agents.high_level.ppo_agent.PPOAgent
- n -
n_epochs :
rl.agents.high_level.ppo_agent.PPOAgent
n_simulations :
rl.mcts.mcts.MCTS
name :
rl.env.state.JigType
- o -
optimizer :
rl.agents.high_level.ppo_agent.ActorNetwork
,
rl.agents.high_level.ppo_agent.CriticNetwork
outgoing :
rl.env.state.Beluga
- p -
parent :
rl.mcts.mcts_node.MCTSNode
path :
rl.env.environment.Env
policy_clip :
rl.agents.high_level.ppo_agent.PPOAgent
ppo_agent :
rl.training.trainer.Trainer
problem_count :
rl.env.environment.Env
problem_name :
rl.env.environment.Env
problem_solved :
rl.env.state.ProblemState
problems_solved :
rl.env.environment.Env
probs :
rl.agents.high_level.ppo_agent.PPOMemory
production_lines :
rl.env.state.ProblemState
production_lines_finished :
rl.env.state.ProblemState
- r -
racks :
rl.env.state.ProblemState
rewards :
rl.agents.high_level.ppo_agent.PPOMemory
root :
rl.mcts.mcts.MCTS
- s -
scheduled_jigs :
rl.env.state.ProductionLine
score_history :
rl.training.trainer.Trainer
size :
rl.env.state.Rack
size_empty :
rl.env.state.JigType
size_loaded :
rl.env.state.JigType
sorted_problems :
rl.env.environment.Env
state :
rl.env.environment.Env
,
rl.mcts.mcts_node.MCTSNode
states :
rl.agents.high_level.ppo_agent.PPOMemory
step_count :
rl.env.environment.Env
steps_per_episode :
rl.training.trainer.Trainer
- t -
total_belugas :
rl.env.state.ProblemState
total_lines :
rl.env.state.ProblemState
total_reward :
rl.mcts.mcts_node.MCTSNode
total_steps :
rl.training.trainer.Trainer
trailers_beluga :
rl.env.state.ProblemState
trailers_factory :
rl.env.state.ProblemState
- v -
values :
rl.agents.high_level.ppo_agent.PPOMemory
visits :
rl.mcts.mcts_node.MCTSNode
Generated by
1.12.0