Source code for epyt_control.controllers.lqr

  1"""
  2This module contains implementations of different Linear Quadratic Regulator (LQR) variants.
  3"""
  4from typing import Optional, Callable
  5import numpy as np
  6
  7from .utils import is_mat_spd, is_mat_spsd
  8
  9
[docs] 10def linear_quadratic_regulator(current_state: np.ndarray, 11 state_cost_mat: np.ndarray, action_cost_mat: np.ndarray, 12 state_transition_mat: np.ndarray, 13 action_transition_mat: np.ndarray, time_horizon: int, 14 final_state_cost_mat: Optional[np.ndarray] = None 15 ) -> list[np.ndarray]: 16 """ 17 Computes the Linear Quadratic Regulator (LQR) control solution of a given 18 inite-horizon & discrete-time LQR problem. 19 20 Parameters 21 ---------- 22 current_state : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_ 23 Current system state. 24 state_cost_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_ 25 Cost matrix of states -- i.e. a s.p.s.d. matrix specifying the cost of a given state. 26 action_cost_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_ 27 Cost matrix of actions -- i.e. a s.p.d. matrix specifying the cost of a action state. 28 state_transition_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_ 29 State transition matrix -- i.e. mapping a given state to the next state 30 (without any action). 31 action_transition_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_ 32 Action transition matrix -- i.e. mapping specifying the state change/influence 33 of taking an action. 34 time_horizon : int 35 Time horizon. 36 final_state_cost_mat: `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_, optional 37 Cost matrix of the final state -- i.e. a s.p.s.d. matrix specifying the cost of the final state. 38 If None, 'state_cost_mat' will be used for the final state cost. 39 40 Returns 41 ------- 42 list[`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_] 43 List of actions for reaching the specified target space. 44 """ 45 if not isinstance(current_state, np.ndarray): 46 raise TypeError("'current_state' must be an instance of 'numpy.ndarray' " + 47 f"but not of '{type(current_state)}'") 48 if current_state.ndim != 1: 49 raise ValueError("'current_state' must be a 1-dimensional array -- " + 50 f"but not of shape {current_state.shape}") 51 if not isinstance(state_cost_mat, np.ndarray): 52 raise TypeError("'state_cost_mat' must be an instance of 'numpy.ndarray' " + 53 f"but not of '{type(state_cost_mat)}'") 54 if not is_mat_spsd(state_cost_mat): 55 raise ValueError("'state_cost_mat' must be symmetric positive semi-definite") 56 if state_cost_mat.ndim != 2 or not state_cost_mat.shape[0] == current_state.shape[0]: 57 raise ValueError("Invalid shape of 'state_cost_mat' -- " + 58 f"expecting {(current_state.shape[0], current_state.shape[0])}") 59 if not isinstance(action_cost_mat, np.ndarray): 60 raise TypeError("'action_cost_mat' must be an instance of 'numpy.ndarray' " + 61 f"but not of '{type(action_cost_mat)}'") 62 if not is_mat_spd(action_cost_mat): 63 raise ValueError("'action_cost_mat' must be symmetric positive definite") 64 if not isinstance(state_transition_mat, np.ndarray): 65 raise TypeError("'state_transition_mat' must be an instance of 'numpy.ndarray' " + 66 f"but not of '{type(state_transition_mat)}'") 67 if state_transition_mat.shape != (current_state.shape[0], current_state.shape[0]): 68 raise ValueError("Invalid shape of 'state_transition_mat' -- " + 69 f"expecting {(current_state.shape[0], current_state.shape[0])}") 70 if not isinstance(action_transition_mat, np.ndarray): 71 raise TypeError("'action_transition_mat' must be an instance of 'numpy.ndarray' " + 72 f"but not of '{type(action_transition_mat)}'") 73 if action_transition_mat.shape[0] != current_state.shape[0] or \ 74 len(action_transition_mat.shape) != 2: 75 raise ValueError("Invalid shape of 'action_transition_mat' -- expecting 2-dimensional " + 76 f"matrix where the first dimension is equal to {current_state.shape[0]}") 77 if not isinstance(time_horizon, int): 78 raise TypeError("'time_horizon' must be an instance of 'int' " + 79 f"but not of '{type(time_horizon)}'") 80 if time_horizon <= 0: 81 raise ValueError("'time_horizon' must be positive") 82 if final_state_cost_mat is not None: 83 if not isinstance(final_state_cost_mat, np.ndarray): 84 raise TypeError("'final_state_cost_mat' must be an instance of 'numpy.ndarray' " + 85 f"but not of '{type(final_state_cost_mat)}'") 86 if not is_mat_spsd(final_state_cost_mat): 87 raise ValueError("'final_state_cost_mat' must be symmetric positive semi-definite") 88 if final_state_cost_mat.ndim != 2 or \ 89 not final_state_cost_mat.shape[0] == current_state.shape[0]: 90 raise ValueError("Invalid shape of 'final_state_cost_mat' -- " + 91 f"expecting {(current_state.shape[0], current_state.shape[0])}") 92 else: 93 final_state_cost_mat = state_cost_mat 94 95 P = [None] * (time_horizon + 1) 96 P[time_horizon] = final_state_cost_mat 97 98 for t in range(time_horizon, 0, -1): 99 P[t-1] = state_cost_mat + state_transition_mat.T @ P[t] @ state_transition_mat - \ 100 (state_transition_mat.T @ P[t] @ action_transition_mat) @ \ 101 np.linalg.pinv(action_cost_mat + action_transition_mat.T @ P[t] @ 102 action_transition_mat) @ (action_transition_mat.T @ P[t] @ 103 state_transition_mat) 104 105 actions = [] 106 x = np.copy(current_state) 107 for t in range(0, time_horizon): 108 K = -np.linalg.pinv(action_cost_mat + action_transition_mat.T @ P[t+1] @ 109 action_transition_mat) @ \ 110 action_transition_mat.T @ P[t+1] @ state_transition_mat 111 u = K @ x 112 x = state_transition_mat @ x + action_transition_mat @ u 113 actions.append(u) 114 115 return actions
116 117
[docs] 118def time_varying_lqr(current_state: np.ndarray, state_cost: Callable[[int], np.ndarray], 119 action_cost: Callable[[int], np.ndarray], 120 state_transition: Callable[[int], np.ndarray], 121 action_transition: Callable[[int], np.ndarray], 122 time_horizon: int, 123 final_state_cost_mat: Optional[np.ndarray] = None) -> list[np.ndarray]: 124 """ 125 Computes the Linear Quadratic Regulator (LQR) control solution of a given 126 inite-horizon & discrete-time LQR problem with time varying parameters. 127 128 Parameters 129 ---------- 130 current_state : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_ 131 Current system state. 132 state_cost : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_] 133 Time varying cost matrix of states -- i.e. mapping time to a s.p.s.d. matrix specifying 134 the cost of a given state. 135 action_cost : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_] 136 Time varying cost matrix of actions -- i.e. mapping time to a s.p.d. matrix specifying 137 the cost of a action state. 138 state_transition : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_] 139 Time varying state transition matrix -- i.e. mapping time to a matrix for mapping a given 140 state to the next state (without any action). 141 action_transition : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_] 142 Time varying action transition matrix -- i.e. mapping time to a matri for specifying 143 the state change/influence of taking an action. 144 time_horizon : int 145 Time horizon. 146 final_state_cost_mat: `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_, optional 147 Cost matrix of the final state -- i.e. a s.p.s.d. matrix specifying the cost of the final state. 148 If None, 'state_cost' will be used for getting final state cost. 149 150 Returns 151 ------- 152 list[`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_] 153 List of actions for reaching the specified target space. 154 """ 155 if not isinstance(current_state, np.ndarray): 156 raise TypeError("'current_state' must be an instance of 'numpy.ndarray' " + 157 f"but not of '{type(current_state)}'") 158 if current_state.ndim != 1: 159 raise ValueError("'current_state' must be a 1-dimensional array -- " + 160 f"but not of shape {current_state.shape}") 161 if not callable(state_cost): 162 raise TypeError("'state_cost' must be callable -- i.e. mapping time to a " + 163 "state cost matrix") 164 if not callable(action_cost): 165 raise TypeError("'action_cost' must be callable -- i.e. mapping time to a " + 166 "action cost matrix") 167 if not callable(state_transition): 168 raise TypeError("'state_transition' msut be callable -- " + 169 "i.e. mapping time to a state transiton matrix") 170 if not callable(action_transition): 171 raise TypeError("'action_transition' must be callable -- i.e. mapping time to a " + 172 "action transition matrix") 173 if not isinstance(time_horizon, int): 174 raise TypeError("'time_horizon' must be an instance of 'int' " + 175 f"but not of '{type(time_horizon)}'") 176 if time_horizon <= 0: 177 raise ValueError("'time_horizon' must be positive") 178 if final_state_cost_mat is not None: 179 if not isinstance(final_state_cost_mat, np.ndarray): 180 raise TypeError("'final_state_cost_mat' must be an instance of 'numpy.ndarray' " + 181 f"but not of '{type(final_state_cost_mat)}'") 182 if not is_mat_spsd(final_state_cost_mat): 183 raise ValueError("'final_state_cost_mat' must be symmetric positive semi-definite") 184 if final_state_cost_mat.ndim != 2 or \ 185 not final_state_cost_mat.shape[0] == current_state.shape[0]: 186 raise ValueError("Invalid shape of 'final_state_cost_mat' -- " + 187 f"expecting {(current_state.shape[0], current_state.shape[0])}") 188 else: 189 final_state_cost_mat = state_cost(time_horizon) 190 191 P = [None] * (time_horizon + 1) 192 P[time_horizon] = final_state_cost_mat 193 194 for t in range(time_horizon, 0, -1): 195 A = state_transition(t) 196 B = action_transition(t) 197 P[t-1] = state_cost(t) + A.T @ P[t] @ A - (A.T @ P[t] @ B) @ \ 198 np.linalg.pinv(action_cost(t) + B.T @ P[t] @ B) @ (B.T @ P[t] @ A) 199 200 actions = [] 201 x = np.copy(current_state) 202 for t in range(0, time_horizon): 203 A = state_transition(t) 204 B = action_transition(t) 205 K = -np.linalg.pinv(action_cost(t) + B.T @ P[t+1] @ B) @ B.T @ P[t+1] @ A 206 u = K @ x 207 x = A @ x + B @ u 208 actions.append(u) 209 210 return actions