RL Graph Environment: Learning from one instance¶

[1]:

from job_shop_lib.reinforcement_learning import (
    # MakespanReward,
    SingleJobShopGraphEnv,
    ObservationSpaceKey,
    IdleTimeReward,
    ObservationDict,
)
from job_shop_lib.dispatching.feature_observers import (
    FeatureObserverConfig,
    FeatureObserverType,
    FeatureType,
)
from job_shop_lib.graphs import build_disjunctive_graph
from job_shop_lib.benchmarking import load_benchmark_instance

[2]:

instance = load_benchmark_instance("ft06")
job_shop_graph = build_disjunctive_graph(instance)
feature_observer_configs = [
    FeatureObserverConfig(
        FeatureObserverType.IS_READY,
        kwargs={"feature_types": [FeatureType.JOBS]},
    )
]

env = SingleJobShopGraphEnv(
    job_shop_graph=job_shop_graph,
    feature_observer_configs=feature_observer_configs,
    reward_function_type=IdleTimeReward,
    render_mode="human",  # Try "save_video"
    render_config={
        "video_config": {"fps": 4}
    }
)

[3]:

env.observation_space

[3]:

Dict('edge_list': MultiDiscrete([[38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38]
 [38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38]]), 'jobs': Box(-inf, inf, (6, 1), float32), 'removed_nodes': MultiBinary(38))

[4]:

len(job_shop_graph.nodes)

[4]:

[5]:

import random

random.seed(42)


def random_action(observation: ObservationDict) -> tuple[int, int]:
    ready_operations = []
    for operation_id, is_ready in enumerate(
        observation[ObservationSpaceKey.JOBS.value].ravel()
    ):
        if is_ready == 1.0:
            ready_operations.append(operation_id)

    operation_id = random.choice(ready_operations)
    machine_id = -1  # We can use -1 if each operation can only be scheduled
    # in one machine.
    return (operation_id, machine_id)

[6]:

env.get_observation()[ObservationSpaceKey.JOBS]

[6]:

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.]], dtype=float32)

[7]:

random_action(env.get_observation())

[7]:

(5, -1)

[8]:

from IPython.display import clear_output

done = False
obs, _ = env.reset()
while not done:
    action = random_action(obs)
    obs, reward, done, *_ = env.step(action)
    if env.render_mode == "human":
        env.render()
        clear_output(wait=True)

if env.render_mode == "save_video" or env.render_mode == "save_gif":
    env.render()

../_images/examples_09-SingleJobShopGraphEnv_8_0.png

[9]:

import numpy as np

rewards = np.array(env.reward_function.rewards)
print(f"{len(list(filter(lambda x: x != 0, rewards)))} zeros out of {len(rewards)}")

19 zeros out of 36

[10]:

rewards.mean()

[10]:

-9.11111111111111

[11]:

rewards.sum()

[11]:

-328