RL Graph Environment: Learning from one instanceΒΆ

[1]:
from job_shop_lib.reinforcement_learning import (
    # MakespanReward,
    SingleJobShopGraphEnv,
    ObservationSpaceKey,
    IdleTimeReward,
    ObservationDict,
)
from job_shop_lib.dispatching.feature_observers import (
    FeatureObserverConfig,
    FeatureObserverType,
    FeatureType,
)
from job_shop_lib.graphs import build_disjunctive_graph
from job_shop_lib.benchmarking import load_benchmark_instance
[2]:
instance = load_benchmark_instance("ft06")
job_shop_graph = build_disjunctive_graph(instance)
feature_observer_configs = [
    FeatureObserverConfig(
        FeatureObserverType.IS_READY,
        kwargs={"feature_types": [FeatureType.JOBS]},
    )
]

env = SingleJobShopGraphEnv(
    job_shop_graph=job_shop_graph,
    feature_observer_configs=feature_observer_configs,
    reward_function_type=IdleTimeReward,
    render_mode="human",  # Try "save_video"
    render_config={
        "video_config": {"fps": 4}
    }
)
[3]:
env.observation_space
[3]:
Dict('edge_list': MultiDiscrete([[38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38]
 [38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38
  38 38 38 38 38 38]]), 'jobs': Box(-inf, inf, (6, 1), float32), 'removed_nodes': MultiBinary(38))
[4]:
len(job_shop_graph.nodes)
[4]:
38
[5]:
import random

random.seed(42)


def random_action(observation: ObservationDict) -> tuple[int, int]:
    ready_operations = []
    for operation_id, is_ready in enumerate(
        observation[ObservationSpaceKey.JOBS.value].ravel()
    ):
        if is_ready == 1.0:
            ready_operations.append(operation_id)

    operation_id = random.choice(ready_operations)
    machine_id = -1  # We can use -1 if each operation can only be scheduled
    # in one machine.
    return (operation_id, machine_id)
[6]:
env.get_observation()[ObservationSpaceKey.JOBS]
[6]:
array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.]], dtype=float32)
[7]:
random_action(env.get_observation())
[7]:
(5, -1)
[8]:
from IPython.display import clear_output

done = False
obs, _ = env.reset()
while not done:
    action = random_action(obs)
    obs, reward, done, *_ = env.step(action)
    if env.render_mode == "human":
        env.render()
        clear_output(wait=True)

if env.render_mode == "save_video" or env.render_mode == "save_gif":
    env.render()
../_images/examples_09-SingleJobShopGraphEnv_8_0.png
[9]:
import numpy as np

rewards = np.array(env.reward_function.rewards)
print(f"{len(list(filter(lambda x: x != 0, rewards)))} zeros out of {len(rewards)}")
19 zeros out of 36
[10]:
rewards.mean()
[10]:
-9.11111111111111
[11]:
rewards.sum()
[11]:
-328