Complete state representation for the Beluga Challenge. More...

Public Member Functions
	__init__ (self, list[Jig] jigs, list[Beluga] belugas, list[int\|None] trailers_beluga, list[int\|None] trailers_factory, list[Rack] racks, list[ProductionLine] production_lines, list[int\|None] hangars)
	Initialize the complete problem state.

	copy (self)
	Create a deep copy of the entire problem state.

	clone (self)
	Create a clone of the current state (alias for copy)

	is_terminal (self)
	Check if this state represents a terminal (goal) state.

float	evaluate (self, int depth, mu=0.05)
	Evaluate the current state for MCTS scoring.

dict[str, float]	get_subgoals (self)
	Calculate subgoal achievements for evaluation.

	apply_action (self, action_name, params)
	Apply an action to this state.

bool	check_action_valid (self, str action_name, params=None)
	Check if an action with given parameters is valid.

	enumerate_valid_params (self, action)
	Enumerate all valid parameter combinations for a given action.

	get_possible_actions (self)
	Get list of all possible actions in the current state.

bool	beluga_complete (self)
	Mark current beluga as complete and remove it.

	get_observation_high_level (self)
	Get high-level observation array for RL agents.

	__str__ (self)

	__repr__ (self)

	__hash__ (self)

	__eq__ (self, other)

Public Attributes
	jigs = jigs

	belugas = belugas

	trailers_beluga = trailers_beluga

	trailers_factory = trailers_factory

	racks = racks

	production_lines = production_lines

	hangars = hangars

int	belugas_unloaded = 0

int	belugas_finished = 0

int	production_lines_finished = 0

	total_lines = len(self.production_lines)

	total_belugas = len(self.belugas)

bool	problem_solved = False

str	jigs = "\t" + str(count) + ": " + str(jig) + "\n"

str	belugas = "\t" + str(count) + ": " + str(beluga) + "\n"

str	racks = "\t" + str(count) + ": " + str(rack) + "\n"

str	production_lines = "\t" + str(count) + ": " + str(production_line) + "\n"

Detailed Description

Complete state representation for the Beluga Challenge.

Contains all components of the problem: jigs, ships, storage, and facilities. Provides the main API for MCTS and RL algorithms including state transitions, validation, and evaluation functions.

Constructor & Destructor Documentation

◆ init()

rl.env.state.ProblemState.__init__	(		self,
		list[Jig]	jigs,
		list[Beluga]	belugas,
		list[int \| None]	trailers_beluga,
		list[int \| None]	trailers_factory,
		list[Rack]	racks,
		list[ProductionLine]	production_lines,
		list[int \| None]	hangars )

Initialize the complete problem state.

Parameters

jigs	List of all jigs in the problem
belugas	List of Beluga ships
trailers_beluga	List of Beluga trailer slots (jig IDs or None)
trailers_factory	List of factory trailer slots (jig IDs or None)
racks	List of storage racks
production_lines	List of production lines
hangars	List of hangar slots (jig IDs or None)

    def __init__(self, jigs : list[Jig], belugas: list[Beluga], trailers_beluga: list[int | None], trailers_factory: list[int | None], racks: list[Rack], production_lines: list[ProductionLine], hangars: list[int | None]):
        """!
        @brief Initialize the complete problem state
        @param jigs List of all jigs in the problem
        @param belugas List of Beluga ships
        @param trailers_beluga List of Beluga trailer slots (jig IDs or None)
        @param trailers_factory List of factory trailer slots (jig IDs or None)
        @param racks List of storage racks
        @param production_lines List of production lines
        @param hangars List of hangar slots (jig IDs or None)
        """
        self.jigs = jigs
        self.belugas = belugas
        self.trailers_beluga = trailers_beluga
        self.trailers_factory = trailers_factory
        self.racks = racks
        self.production_lines = production_lines
        self.hangars = hangars
 
        # Subgoals
        # for reward (High-Level) and evaluation (Low-Level-MCTS) 
        self.belugas_unloaded = 0 #counter
        self.belugas_finished = 0 #counter
        self.production_lines_finished = 0 #counter
        self.total_lines = len(self.production_lines) # total production lines, for evaluation
        self.total_belugas = len(self.belugas) # total belugas, for evaluation
        self.problem_solved = False
        
 
 

Member Function Documentation

◆ eq()

rl.env.state.ProblemState.__eq__	(		self,
			other )

    def __eq__(self, other):    
        return str(self) == str(other)

◆ hash()

rl.env.state.ProblemState.__hash__ ( self )

    def __hash__(self):
        return hash(str(self)) 
 

◆ repr()

rl.env.state.ProblemState.__repr__ ( self )

    def __repr__(self):
        return self.__str__()
    

◆ str()

rl.env.state.ProblemState.__str__ ( self )

    def __str__(self):
        count = 0
        out = "jigs:\n"
        for jig in self.jigs:
            out += "\t" + str(count) + ": " + str(jig) + "\n"
            count += 1
        out += "belugas:\n"
        count = 0
        for beluga in self.belugas:
            out += "\t" + str(count) + ": " + str(beluga) + "\n"
            count += 1
        out += "trailers_beluga: " + str(self.trailers_beluga) + "\n"
        out += "trailers_factory: " + str(self.trailers_factory) + "\n"
        out += "racks:\n"
        count = 0
        for rack in self.racks:
            out += "\t" + str(count) + ": " + str(rack) + "\n"
            count += 1
        out += "production_lines:\n"
        count = 0
        for production_line in self.production_lines:
            out += "\t" + str(count) + ": " + str(production_line) + "\n"
            count += 1
        out += "hangars: " + str(self.hangars)
        return out
 

◆ apply_action()

rl.env.state.ProblemState.apply_action	(	self,
		action_name,
		params )

Apply an action to this state.

Parameters

action_name	Name of the action to execute
params	Parameters for the action (dict or list)

Returns: True if action was successfully applied, False otherwise

    def apply_action(self, action_name, params):
        """!
        @brief Apply an action to this state
        @param action_name Name of the action to execute
        @param params Parameters for the action (dict or list)
        @return True if action was successfully applied, False otherwise
        """
        params = list(params.values()) if isinstance(params, dict) else list(params)  # ensure params is a list
        #action_name, params = candidate
        if action_name == "left_stack_rack":
            return left_stack_rack(self, *params)
        elif action_name == "right_stack_rack":
            return right_stack_rack(self, *params)
        elif action_name == "left_unstack_rack":
            return left_unstack_rack(self, *params)
        elif action_name == "right_unstack_rack":
            return right_unstack_rack(self, *params)
        elif action_name == "load_beluga":
            return load_beluga(self, *params)
        elif action_name == "unload_beluga":
            return unload_beluga(self)
        elif action_name == "get_from_hangar":
            return get_from_hangar(self, *params)
        elif action_name == "deliver_to_hangar":
            return deliver_to_hangar(self, *params)
        else:
            raise NotImplementedError(f"Action name not known: {action_name}")
      
 

◆ beluga_complete()

bool rl.env.state.ProblemState.beluga_complete ( self )

Mark current beluga as complete and remove it.

Returns: True if beluga was successfully marked complete, False otherwise

    def beluga_complete(self) -> bool:
        """!
        @brief Mark current beluga as complete and remove it
        @return True if beluga was successfully marked complete, False otherwise
        """
        if not self.belugas:
            return False
            
        beluga = self.belugas[0]
        if beluga.outgoing or beluga.current_jigs:
            return False
        
        # Effects
        self.belugas.pop(0)
        return True 
    
 
 

◆ check_action_valid()

bool rl.env.state.ProblemState.check_action_valid	(		self,
		str	action_name,
			params = None )

Check if an action with given parameters is valid.

Parameters

action_name	Name of the action to validate
params	Parameters for the action (optional)

Returns: True if action is valid, False otherwise

This function validates an action without modifying the current state.

    def check_action_valid(self, action_name: str, params=None) -> bool:
        """!
        @brief Check if an action with given parameters is valid
        @param action_name Name of the action to validate
        @param params Parameters for the action (optional)
        @return True if action is valid, False otherwise
        
        This function validates an action without modifying the current state.
        """
        state_copy = self.copy()
        
        try:
            if action_name == "left_stack_rack":
                return left_stack_rack(state_copy, *params)
            elif action_name == "right_stack_rack":
                return right_stack_rack(state_copy, *params)
            elif action_name == "left_unstack_rack":
                return left_unstack_rack(state_copy, *params)
            elif action_name == "right_unstack_rack":
                return right_unstack_rack(state_copy, *params)
            elif action_name == "load_beluga":
                return load_beluga(state_copy, *params)
            elif action_name == "unload_beluga":
                return unload_beluga(state_copy)
            elif action_name == "get_from_hangar":
                return get_from_hangar(state_copy, *params)
            elif action_name == "deliver_to_hangar":
                return deliver_to_hangar(state_copy, *params)
            else:
                return False
        except Exception as e:
            print(f"Error in action {action_name} with params {params}: {e}")
            return False
 

◆ clone()

rl.env.state.ProblemState.clone ( self )

Create a clone of the current state (alias for copy)

Returns: Deep copy of this ProblemState

    def clone(self):
        """!
        @brief Create a clone of the current state (alias for copy)
        @return Deep copy of this ProblemState
        """
        return self.copy()
    

◆ copy()

rl.env.state.ProblemState.copy ( self )

Create a deep copy of the entire problem state.

Returns: New ProblemState instance with all components copied

    def copy(self):
        """!
        @brief Create a deep copy of the entire problem state
        @return New ProblemState instance with all components copied
        """
        new_state = ProblemState(
            jigs=[jig.copy() for jig in self.jigs],
            belugas=[beluga.copy() for beluga in self.belugas],
            trailers_beluga=self.trailers_beluga[:],
            trailers_factory=self.trailers_factory[:],
            racks=[rack.copy() for rack in self.racks],
            production_lines=[pl.copy() for pl in self.production_lines],
            hangars=self.hangars[:]  # List of ints or None
        )
        new_state.belugas_unloaded = self.belugas_unloaded
        new_state.belugas_finished = self.belugas_finished
        new_state.production_lines_finished = self.production_lines_finished
        new_state.total_lines = self.total_lines
        new_state.total_belugas = self.total_belugas 
        new_state.problem_solved = self.problem_solved
        return new_state
    

◆ enumerate_valid_params()

rl.env.state.ProblemState.enumerate_valid_params	(		self,
			action )

Enumerate all valid parameter combinations for a given action.

Parameters

action Name of the action to enumerate parameters for

Returns: List of valid parameter tuples for the action

    def enumerate_valid_params(self, action):
        """!
        @brief Enumerate all valid parameter combinations for a given action
        @param action Name of the action to enumerate parameters for
        @return List of valid parameter tuples for the action
        """
        action_name = action
        params = []
        
        if action_name == "left_stack_rack":
            all_param = [(rack_id, trailer_id) 
                        for rack_id in range(len(self.racks)) 
                        for trailer_id in range(len(self.trailers_beluga))]
                        
            for t in all_param:
                if self.check_action_valid(action_name, t):
                    params.append(t)
        
        elif action_name == "right_stack_rack":
            all_param = [(rack_id, trailer_id) 
                        for rack_id in range(len(self.racks)) 
                        for trailer_id in range(len(self.trailers_factory))]
                        
            for t in all_param:
                if self.check_action_valid(action_name, t):
                    params.append(t)
 
        elif action_name == "left_unstack_rack":
            all_param = [(rack_id, trailer_id) 
                        for rack_id in range(len(self.racks)) 
                        for trailer_id in range(len(self.trailers_beluga))]
                        
            for t in all_param:
                if self.check_action_valid(action_name, t):
                    params.append(t)
 
        elif action_name == "right_unstack_rack":
            all_param = [(rack_id, trailer_id) 
                        for rack_id in range(len(self.racks)) 
                        for trailer_id in range(len(self.trailers_factory))]
                        
            for t in all_param:
                if self.check_action_valid(action_name, t):
                    params.append(t)
        
        elif action_name == "load_beluga":
            all_param = [trailer_id for trailer_id in range(len(self.trailers_beluga))]
            for t in all_param:
                if self.check_action_valid(action_name, (t, None)):
                    params.append((t, None))
 
        elif action_name == "deliver_to_hangar":
            all_param = [(hangar_id, trailer_id) 
                        for hangar_id in range(len(self.hangars)) 
                        for trailer_id in range(len(self.trailers_factory))]
                        
            for t in all_param:
                if self.check_action_valid(action_name, t):
                    params.append(t)
        
        elif action_name == "get_from_hangar":
            all_param = [(hangar_id, trailer_id) 
                        for hangar_id in range(len(self.hangars)) 
                        for trailer_id in range(len(self.trailers_factory))]
                        
            for t in all_param:
                if self.check_action_valid(action_name, t):
                    params.append(t)
        
        return params
 
 
 

◆ evaluate()

float rl.env.state.ProblemState.evaluate	(		self,
		int	depth,
			mu = 0.05 )

Evaluate the current state for MCTS scoring.

Parameters

depth	Current depth in the search tree
mu	Penalty factor for depth (default 0.05)

Returns: Floating point score for this state

    def evaluate(self, depth: int, mu = 0.05) -> float:
        """!
        @brief Evaluate the current state for MCTS scoring
        @param depth Current depth in the search tree
        @param mu Penalty factor for depth (default 0.05)
        @return Floating point score for this state
        """
        score = 0.0
        subgoals = self.get_subgoals()
        score += sum(subgoals.values())
        # Penalty based on path depth
        score -= mu * depth
        return score
    
 

◆ get_observation_high_level()

rl.env.state.ProblemState.get_observation_high_level ( self )

Get high-level observation array for RL agents.

Returns: NumPy array representing the current state for high-level agents

The observation includes information about belugas, trailers, hangars, and racks. High-level agents convert this array into tensors for neural network processing.

    def get_observation_high_level(self):
        """!
        @brief Get high-level observation array for RL agents
        @return NumPy array representing the current state for high-level agents
        
        The observation includes information about belugas, trailers, hangars, and racks.
        High-level agents convert this array into tensors for neural network processing.
        """
        # Return the current state of the environment for a high-level agent as array
        # High-Level-Agents converts array into tensor
 
        
        n_racks = 10
 
        out = np.zeros(10 + 3*n_racks)
 
        needed_outgoing_types = []
        needed_in_production_lines = []
 
        for pl in self.production_lines:
            if len(pl.scheduled_jigs) > 0:
                needed_in_production_lines.append(pl.scheduled_jigs[0])
 
        # First slot 0 beluga
        if len(self.belugas) > 0:
            out[0] = max(0, min(len(self.belugas[0].current_jigs), 1))
            if out[0] == 0:
                needed_outgoing_types = self.belugas[0].outgoing
        else:
            out[0] = -1
 
        # Slot 1-3 Beluga Trailer
        slot = 1
        for i in range(3):
            if i < len(self.trailers_beluga):
                if self.trailers_beluga[i] is None:
                    out[slot + i] = 0.5
                else:
                    if self.jigs[self.trailers_beluga[i]].empty and out[0] == 0:
                        if needed_outgoing_types.__contains__(
                                self.jigs[self.trailers_beluga[i]].jig_type):
                            out[slot + i] = 0
                        else:
                            out[slot + i] = 0.25
                    else:
                        out[slot + i] = 1
            else:
                out[slot + i] = -1
 
        # Slot 4-6 Factory Trailer
        slot = 4
        for i in range(3):
            if i < len(self.trailers_factory):
                if self.trailers_factory[i] is None:
                    out[slot + i] = 0.5
                else:
                    if not self.jigs[self.trailers_factory[i]].empty:
                        if needed_in_production_lines.__contains__(self.trailers_factory[i]):
                            out[slot + i] = 1
                        else:
                            out[slot + i] = 0.75
                    else:
                        out[slot + i] = 0
            else:
                out[slot + i] = -1
 
        # Slot 7-9 Hangars
        slot = 7
        for i in range(3):
            if i < len(self.hangars):
                if self.hangars[i] is None:
                    out[slot + i] = 0
                else:
                    out[slot + i] = 1
            else:
                out[slot + i] = -1
 
        # Slot 10-39 Racks
        slot = 10
        for i in range(n_racks):
            if i < len(self.racks):
                rack = self.racks[i]
                items = len(rack.current_jigs)
                if items == 0:
                    out[slot + i * 3] = 0
                    out[slot + i * 3 + 1] = 0
                    out[slot + i * 3 + 2] = 0
 
                else:
                    out[slot + i * 3] = 0
                    out[slot + i * 3 + 1] = 0
                    out[slot + i * 3 + 2] = rack.get_free_space(self.jigs)/rack.size
                    for k in range(items):
                        jig = self.jigs[rack.current_jigs[k]]
                        if jig.empty and needed_outgoing_types.__contains__(jig.jig_type):
                            out[slot + i * 3] = (items - k) / items
                            continue
                    for k in range(items):
                        if needed_in_production_lines.__contains__(rack.current_jigs[k]):
                            out[slot + i * 3 + 1] = (k + 1) / items
                            continue
            else:
                out[slot + i * 3] = -1
                out[slot + i * 3 + 1] = -1
                out[slot + i * 3 + 2] = -1
 
 
        return out
 
 
 

◆ get_possible_actions()

rl.env.state.ProblemState.get_possible_actions ( self )

Get list of all possible actions in the current state.

Returns: List of (action_name, parameters) tuples for all valid actions

An action is considered possible if at least one valid parameter combination exists.

    def get_possible_actions(self):
        """!
        @brief Get list of all possible actions in the current state
        @return List of (action_name, parameters) tuples for all valid actions
        
        An action is considered possible if at least one valid parameter combination exists.
        """
        # action = ("action_name", "params")
        possible_actions = []
        
        # Check unload_beluga (no parameters)
        if self.check_action_valid("unload_beluga"):
            possible_actions.append(("unload_beluga", {}))
        
        # Check actions with parameters
        param_actions = [
            "left_stack_rack",
            "right_stack_rack",
            "left_unstack_rack",
            "right_unstack_rack",
            "load_beluga",
            "get_from_hangar",
            "deliver_to_hangar"
        ]
        for action in param_actions:
            # all actions with parameters, if there are no params, no legal actions
            params = self.enumerate_valid_params(action)
            possible_actions.extend([(action, param) for param in params])
        
        
        return possible_actions
 
 

◆ get_subgoals()

dict[str, float] rl.env.state.ProblemState.get_subgoals ( self )

Calculate subgoal achievements for evaluation.

Returns: Dictionary mapping subgoal names to their scores

    def get_subgoals(self) -> dict[str, float]:
        """!
        @brief Calculate subgoal achievements for evaluation
        @return Dictionary mapping subgoal names to their scores
        """
        self.belugas_finished = self.total_belugas - len(self.belugas)
        self.production_lines_finished = self.total_lines - len(self.production_lines)
 
        
        if len(self.belugas) == 0 and len(self.production_lines) == 0:
            self.problem_solved = True
        return {
            "subgoal_1": self.belugas_unloaded * 15,
            "subgoal_2": self.belugas_finished * 60,
            "subgoal_3": self.production_lines_finished * 100,
            "goal": self.problem_solved * 1000    
        }
        

◆ is_terminal()

rl.env.state.ProblemState.is_terminal ( self )

Check if this state represents a terminal (goal) state.

Returns: True if all belugas and production lines are finished

    def is_terminal(self):
        """!
        @brief Check if this state represents a terminal (goal) state
        @return True if all belugas and production lines are finished
        """
        return len(self.belugas) == 0 and len(self.production_lines) == 0
 

Member Data Documentation

◆ belugas [1/2]

rl.env.state.ProblemState.belugas = belugas

◆ belugas [2/2]

str rl.env.state.ProblemState.belugas = "\t" + str(count) + ": " + str(beluga) + "\n"

◆ belugas_finished

int rl.env.state.ProblemState.belugas_finished = 0

◆ belugas_unloaded

int rl.env.state.ProblemState.belugas_unloaded = 0

◆ hangars

rl.env.state.ProblemState.hangars = hangars

◆ jigs [1/2]

rl.env.state.ProblemState.jigs = jigs

◆ jigs [2/2]

str rl.env.state.ProblemState.jigs = "\t" + str(count) + ": " + str(jig) + "\n"

◆ problem_solved

bool rl.env.state.ProblemState.problem_solved = False

◆ production_lines [1/2]

rl.env.state.ProblemState.production_lines = production_lines

◆ production_lines [2/2]

str rl.env.state.ProblemState.production_lines = "\t" + str(count) + ": " + str(production_line) + "\n"

◆ production_lines_finished

int rl.env.state.ProblemState.production_lines_finished = 0

◆ racks [1/2]

rl.env.state.ProblemState.racks = racks

◆ racks [2/2]

str rl.env.state.ProblemState.racks = "\t" + str(count) + ": " + str(rack) + "\n"

◆ total_belugas

rl.env.state.ProblemState.total_belugas = len(self.belugas)

◆ total_lines

rl.env.state.ProblemState.total_lines = len(self.production_lines)

◆ trailers_beluga

rl.env.state.ProblemState.trailers_beluga = trailers_beluga

◆ trailers_factory

rl.env.state.ProblemState.trailers_factory = trailers_factory

The documentation for this class was generated from the following file:

rl/env/state.py

Public Member Functions

Public Attributes

Detailed Description

Constructor & Destructor Documentation

◆ __init__()

Member Function Documentation

◆ __eq__()

◆ __hash__()

◆ __repr__()

◆ __str__()

◆ apply_action()

◆ beluga_complete()

◆ check_action_valid()

◆ clone()

◆ copy()

◆ enumerate_valid_params()

◆ evaluate()

◆ get_observation_high_level()

◆ get_possible_actions()

◆ get_subgoals()

◆ is_terminal()

Member Data Documentation

◆ belugas [1/2]

◆ belugas [2/2]

◆ belugas_finished

◆ belugas_unloaded

◆ hangars

◆ jigs [1/2]

◆ jigs [2/2]

◆ problem_solved

◆ production_lines [1/2]

◆ production_lines [2/2]

◆ production_lines_finished

◆ racks [1/2]

◆ racks [2/2]

◆ total_belugas

◆ total_lines

◆ trailers_beluga

◆ trailers_factory

◆ init()

◆ eq()

◆ hash()

◆ repr()

◆ str()