36 def search(self):
37 """!
38 @brief Run the MCTS search algorithm
39 @return Best child node found after all simulations
40 """
41 terminal_node_found = False
42
43 for i in range(self.n_simulations):
44 if self.debug:
45 print(f"\nIteration {i+1}/{self.n_simulations}")
46
47
48 node = self.select(self.root)
49 if self.debug:
50 print(f"Selected node: depth={node.depth}, action={node.action}")
51
52
53 if not node.is_terminal():
54 untried_actions = node.get_untried_action()
55 if untried_actions:
56 action = random.choice(untried_actions)
57 if self.debug:
58 print(f"Expanding node with action: {action}")
59 node = node.expand(action)
60
61
62 if node.state.is_terminal():
63 if self.debug:
64 print("Terminal state reached! Solution found.")
65 terminal_node_found = True
66
67 reward = node.state.evaluate(node.depth)
68 node.backpropagate(reward)
69 if self.debug:
70 print(f"Rollout reward: {reward}")
71 break
72 else:
73
74 if not node.children or node.depth >= self.depth - 1:
75 if self.debug:
76 print(f"No further actions possible at depth {node.depth}. Aborting MCTS.")
77
78 debuglog("\nFinal selection (early):")
79 best_child = self.root.best_child(exploration_weight=0)
80 return best_child
81 else:
82 if self.debug:
83 print("No untried actions available, skipping expansion.")
84
85
86 reward = self.rollout(node)
87 if self.debug:
88 print(f"Rollout reward: {reward}")
89
90
91 node.backpropagate(reward)
92
93
94 debuglog("\nFinal selection:")
95 best_child = self.root.best_child(exploration_weight=0)
96 if best_child is None:
97 debuglog("WARNING: Root has no children!")
98 return None
99 else:
100 debuglog(f"Best child: action={best_child.action}, visits={best_child.visits}, reward={best_child.total_reward/best_child.visits if best_child.visits > 0 else 0}")
101 if terminal_node_found:
102 debuglog("Note: A terminal state was found!")
103 return best_child
104