YAML configuration#

Beobench uses YAML files to define experiment configurations. The file below is the default configuration for every Beobench experiment. The comments above each setting describe its functionality.

# Default Beobench configuration.
#
# Some of the descriptions of RLlib config
# values are taken from
# https://docs.ray.io/en/latest/rllib/rllib-training.html.
#
# Agent config
agent:
  # Either path to agent script or name of built-in agent
  # (e.g. `rllib`)
  origin: rllib
  # Config used by agent. In this case given to
  # ray.tune.run() as arguments.
  # (since rllib set before)
  config:
    # RLlib algorithm name
    run_or_experiment: PPO
    # Stopping condition
    stop:
      timesteps_total: 400000
    # Config only used by ray.RLlib and not other ray modules.
    config:
      # Learning rate
      lr: 0.005
      # Neural network model for PPO
      model:
        fcnet_activation: relu
        fcnet_hiddens: [256,256,256,256]
        post_fcnet_activation: tanh
      batch_mode: complete_episodes
      # Discount factor
      gamma: 0.999
      # Number of steps after which the episode is forced to
      # terminate. Defaults to `env.spec.max_episode_steps`
      # (if present) for Gym envs.
      horizon: 1000
      # Calculate rewards but don't reset the environment when
      # the horizon is hit. This allows value estimation and
      # RNN state to span across logical episodes denoted by
      # horizon. This only has an effect if horizon != inf.
      soft_horizon: True
      # Number of episodes over which RLlib internal metrics
      # are smoothed
      metrics_num_episodes_for_smoothing: 5
      # Deep learning framework
      framework: torch
      # Number of parallel workers interacting with
      # environment for most gym frameworks this has to be
      # set to 1 as often the background building simulations
      # are not setup for parallel usage
      # (can fail silently otherwise).
      num_workers: 1
# Environment config
env:
  # Gym framework of environment
  gym: energym
  # Configuration passed to integration's `create_env()`
  # function. Specific to whatever gym framework you use
  # (in this case Energym).
  config: null
# Wrappers (None added by default)
wrappers: []
# General Beobench config
general:
  # Directory to write experiment files to. This argument
  # is equivalent to the `local_dir` argument in
  # `tune.run()`.
  local_dir: ./beobench_results
  # Name of wandb project.
  wandb_project: null
  # Name of wandb entity.
  wandb_entity: null
  # Name of wandb run group.
  wandb_group: null
  # Wandb API key
  wandb_api_key: null
  # Name of MLflow experiment
  mlflow_name: null
  # Whether to use GPU from the host system. Requires that
  # GPU is available.
  use_gpu: False
  # Size of the shared memory available to the experiment
  # container.
  docker_shm_size: 4gb
  # Whether to force a re-build, even if image already
  # exists.
  force_build: False
  # Whether to use cache IF building experiment container.
  # This will not do anything if force_build is disabled,
  # and image already exists.
  use_no_cache: False
  # File or github path to beobench package. For
  # developement purpose only. This will install a custom
  # beobench version inside the experiment container.
  # By default the latest PyPI version is installed.
  dev_path: null
  # List of docker flags to be added to docker run command
  # of Beobench experiment container.
  docker_flags: null
  # Extra dependencies to install with beobench. Used
  # during pip installation in experiment image,
  # as in using the command:
  # `pip install beobench[<beobench_extras>]`
  beobench_extras: "extended"
  # Number of experiment samples to run. This defaults
  # to a single sample, i.e. just running the
  # experiment once.
  num_samples: 1
  # Beobench version
  # version: 0.5.4

Gym-specific configurations#

Each gym framework integration has its own configuration setup. See below for the default environment configurations of the three supported frameworks.

BOPTEST#

# BOPTEST default config
# Some of the descriptions taken from
# https://github.com/ibpsa/project1-boptest-gym/blob/master/boptestGymEnv.py

env:
  gym: boptest
  config:
    name: bestest_hydronic_heat_pump
    # whether to normalise the observations and actions
    normalize: True
    discretize: True
    gym_kwargs:
      actions: ["oveHeaPumY_u"]
      # Dictionary mapping observation keys to a tuple with the lower
      # and upper bound of each observation. Observation keys must
      # belong either to the set of measurements or to the set of
      # forecasting variables of the BOPTEST test case. Contrary to
      # the actions, the expected minimum and maximum values of the
      # measurement and forecasting variables are not provided from
      # the BOPTEST framework, although they are still relevant here
      # e.g. for normalization or discretization. Therefore, these
      # bounds need to be provided by the user.
      # If `time` is included as an observation, the time in seconds
      # will be passed to the agent. This is the remainder time from
      # the beginning of the episode and for periods of the length
      # specified in the upper bound of the time feature.
      observations:
        reaTZon_y: [280.0, 310.0]
      # Set to True if desired to use a random start time for each episode
      random_start_time: True
      # Maximum duration of each episode in seconds
      max_episode_length: 31536000 # one year in seconds
      # Desired simulation period to initialize each episode
      warmup_period: 10
      # Sampling time in seconds
      step_period: 900 # = 15min

Energym#

# Energym default config

env:
  gym: energym
  config:
    # Number of simulation days
    days: 365
    # Name of energym environment
    name: Apartments2Thermal-v0
    gym_kwargs:
      # Maximum number of timesteps in one episode
      max_episode_length: 35040 # corresponds to a year of 15 min steps
      # Whether state and action spaces are normalized
      normalize: true
      # Number of real-world minutes between timesteps in building simulation
      step_period: 15
      # Whether to allow a complete environment reset
      ignore_reset: True
    # Weather file to use for the scenario (possible files depend on env chosen)
    weather: ESP_CT_Barcelona

Sinergym#

# Sinergym default config

env:
  gym: sinergym
  config:
    # Sinergym env name
    name: Eplus-5Zone-hot-continuous-v1
    # whether to normalise the observations
    normalize: False