FoPra Beluga Challenge - Reinforcement Learning v1.0
Deep Reinforcement Learning solution for the Beluga Challenge shipping container optimization problem using PPO and MCTS
rl.mcts.mcts.MCTS Class Reference

Monte Carlo Tree Search algorithm implementation. More...

Public Member Functions

 __init__ (self, MCTSNode root, int depth=5, int n_simulations=300, bool debug=False)
 Initialize MCTS algorithm.
 
 search (self)
 Run the MCTS search algorithm.
 
 select (self, node)
 Traverse the tree until we find a not fully expanded node or terminal node.
 
 rollout (self, node)
 Simulate random actions from the node until terminal state or max depth.
 
 get_best_path (self)
 Get the path of best-visited child nodes from the root.
 

Public Attributes

 root = root
 
 depth = depth
 
 n_simulations = n_simulations
 
 debug = debug
 
bool debug = True
 
int debug = self.depth - 1:
 

Detailed Description

Monte Carlo Tree Search algorithm implementation.

MCTS uses random sampling to build a search tree and find optimal action sequences for the container optimization problem.

Constructor & Destructor Documentation

◆ __init__()

rl.mcts.mcts.MCTS.__init__ ( self,
MCTSNode root,
int depth = 5,
int n_simulations = 300,
bool debug = False )

Initialize MCTS algorithm.

Parameters
rootRoot node of the search tree
depthMaximum search depth
n_simulationsNumber of simulations to run
debugEnable debug output
23 def __init__(self, root: MCTSNode, depth: int = 5, n_simulations: int = 300, debug: bool = False):
24 """!
25 @brief Initialize MCTS algorithm
26 @param root Root node of the search tree
27 @param depth Maximum search depth
28 @param n_simulations Number of simulations to run
29 @param debug Enable debug output
30 """
31 self.root = root
32 self.depth = depth
33 self.n_simulations = n_simulations
34 self.debug = debug
35

Member Function Documentation

◆ get_best_path()

rl.mcts.mcts.MCTS.get_best_path ( self)

Get the path of best-visited child nodes from the root.

Returns
List of actions representing the best path found
170 def get_best_path(self):
171 """!
172 @brief Get the path of best-visited child nodes from the root
173 @return List of actions representing the best path found
174 """
175 path = []
176 node = self.root
177 while True:
178 best_child = node.best_child(exploration_weight=0)
179 if best_child is None:
180 break
181 path.append(best_child.action)
182 node = best_child
183 return path

◆ rollout()

rl.mcts.mcts.MCTS.rollout ( self,
node )

Simulate random actions from the node until terminal state or max depth.

Parameters
nodeStarting node for rollout simulation
Returns
Reward value from the rollout
120 def rollout(self, node):
121 """!
122 @brief Simulate random actions from the node until terminal state or max depth
123 @param node Starting node for rollout simulation
124 @return Reward value from the rollout
125 """
126 state: ProblemState = node.state.copy()
127 depth = node.depth # Start from the node's current depth
128
129 #print(f"DEBUG - Starting rollout from depth {depth}")
130 rollout_actions = []
131
132 while not state.is_terminal() and depth < self.depth:
133 # Get possible actions as (action_name, params) tuples
134 possible_actions = state.get_possible_actions()
135
136 if not possible_actions:
137 debuglog(f"DEBUG - No possible actions at depth {depth}")
138 break
139
140 # Choose a random action
141 action_name, params = random.choice(possible_actions)
142
143 # Apply the action
144 #print(f"DEBUG - Rollout action: {action_name} with params {params}")
145 rollout_actions.append((action_name, params))
146
147 # Apply action to state
148 state.apply_action(action_name, params)
149 depth += 1
150
151 if self.debug:
152 print(f"DEBUG - Rollout completed with {len(rollout_actions)} actions")
153 print(f"DEBUG - Final rollout actions: {rollout_actions[:5]}{'...' if len(rollout_actions) > 5 else ''}")
154
155 # Calculate reward based on final state
156 reward = state.evaluate(depth)
157 if self.debug:
158 print(f"DEBUG - Rollout ended at depth {depth}, final reward: {reward}")
159 # Output all subgoals and their fulfillment
160 # self.belugas_unloaded
161 # self.belugas_finished
162 # self.production_lines_finished
163 print(f"DEBUG - Subgoals: {state.belugas_unloaded} unloaded, {state.belugas_finished} finished, {state.production_lines_finished} production lines finished")
164
165 # After rollout:
166 if state.is_terminal() and self.debug:
167 print("Terminal state reached in rollout!")
168 return reward
169

◆ search()

rl.mcts.mcts.MCTS.search ( self)

Run the MCTS search algorithm.

Returns
Best child node found after all simulations
36 def search(self):
37 """!
38 @brief Run the MCTS search algorithm
39 @return Best child node found after all simulations
40 """
41 terminal_node_found = False
42
43 for i in range(self.n_simulations):
44 if self.debug:
45 print(f"\nIteration {i+1}/{self.n_simulations}")
46
47 # 1. Selection
48 node = self.select(self.root)
49 if self.debug:
50 print(f"Selected node: depth={node.depth}, action={node.action}")
51
52 # 2. Expansion
53 if not node.is_terminal():
54 untried_actions = node.get_untried_action()
55 if untried_actions:
56 action = random.choice(untried_actions)
57 if self.debug:
58 print(f"Expanding node with action: {action}")
59 node = node.expand(action)
60
61
62 if node.state.is_terminal():
63 if self.debug:
64 print("Terminal state reached! Solution found.")
65 terminal_node_found = True
66 # Reward is already set by evaluate()
67 reward = node.state.evaluate(node.depth)
68 node.backpropagate(reward)
69 if self.debug:
70 print(f"Rollout reward: {reward}")
71 break # Abort MCTS
72 else:
73 # Abort if no untried actions are available
74 if not node.children or node.depth >= self.depth - 1:
75 if self.debug:
76 print(f"No further actions possible at depth {node.depth}. Aborting MCTS.")
77 # Final selection as at the end of the method
78 debuglog("\nFinal selection (early):")
79 best_child = self.root.best_child(exploration_weight=0)
80 return best_child
81 else:
82 if self.debug:
83 print("No untried actions available, skipping expansion.")
84
85 # 3. Simulation
86 reward = self.rollout(node)
87 if self.debug:
88 print(f"Rollout reward: {reward}")
89
90 # 4. Backpropagation
91 node.backpropagate(reward)
92
93 # Final selection - always the same, regardless of how we got here
94 debuglog("\nFinal selection:")
95 best_child = self.root.best_child(exploration_weight=0)
96 if best_child is None:
97 debuglog("WARNING: Root has no children!")
98 return None
99 else:
100 debuglog(f"Best child: action={best_child.action}, visits={best_child.visits}, reward={best_child.total_reward/best_child.visits if best_child.visits > 0 else 0}")
101 if terminal_node_found:
102 debuglog("Note: A terminal state was found!")
103 return best_child
104

◆ select()

rl.mcts.mcts.MCTS.select ( self,
node )

Traverse the tree until we find a not fully expanded node or terminal node.

Parameters
nodeStarting node for selection
Returns
Selected node for expansion
105 def select(self, node):
106 """!
107 @brief Traverse the tree until we find a not fully expanded node or terminal node
108 @param node Starting node for selection
109 @return Selected node for expansion
110 """
111 current_depth = 0
112 while not node.is_terminal() and node.is_fully_expanded() and current_depth < self.depth:
113 next_node = node.best_child()
114 if next_node is None:
115 break # If no children are present
116 node = next_node
117 current_depth += 1
118 return node
119

Member Data Documentation

◆ debug [1/3]

rl.mcts.mcts.MCTS.debug = debug

◆ debug [2/3]

bool rl.mcts.mcts.MCTS.debug = True

◆ debug [3/3]

int rl.mcts.mcts.MCTS.debug = self.depth - 1:

◆ depth

rl.mcts.mcts.MCTS.depth = depth

◆ n_simulations

rl.mcts.mcts.MCTS.n_simulations = n_simulations

◆ root

rl.mcts.mcts.MCTS.root = root

The documentation for this class was generated from the following file: