Source code for epyt_control.controllers.lqr

  1"""
  2This module contains implementations of different Linear Quadratic Regulator (LQR) variants.
  3"""
  4from typing import Optional, Callable
  5import numpy as np
  6
  7from .utils import is_mat_spd, is_mat_spsd
  8
  9

[docs]
 10def linear_quadratic_regulator(current_state: np.ndarray,
 11                               state_cost_mat: np.ndarray, action_cost_mat: np.ndarray,
 12                               state_transition_mat: np.ndarray,
 13                               action_transition_mat: np.ndarray, time_horizon: int,
 14                               final_state_cost_mat: Optional[np.ndarray] = None
 15                               ) -> list[np.ndarray]:
 16    """
 17    Computes the Linear Quadratic Regulator (LQR) control solution of a given
 18    inite-horizon & discrete-time LQR problem.
 19
 20    Parameters
 21    ----------
 22    current_state : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
 23        Current system state.
 24    state_cost_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
 25        Cost matrix of states -- i.e. a s.p.s.d. matrix specifying the cost of a given state.
 26    action_cost_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
 27        Cost matrix of actions -- i.e. a s.p.d. matrix specifying the cost of a action state.
 28    state_transition_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
 29        State transition matrix -- i.e. mapping a given state to the next state
 30        (without any action).
 31    action_transition_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
 32        Action transition matrix -- i.e. mapping specifying the state change/influence
 33        of taking an action.
 34    time_horizon : int
 35        Time horizon.
 36    final_state_cost_mat: `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_, optional
 37        Cost matrix of the final state -- i.e. a s.p.s.d. matrix specifying the cost of the final state.
 38        If None, 'state_cost_mat' will be used for the final state cost.
 39
 40    Returns
 41    -------
 42    list[`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
 43        List of actions for reaching the specified target space.
 44    """
 45    if not isinstance(current_state, np.ndarray):
 46        raise TypeError("'current_state' must be an instance of 'numpy.ndarray' " +
 47                        f"but not of '{type(current_state)}'")
 48    if current_state.ndim != 1:
 49        raise ValueError("'current_state' must be a 1-dimensional array -- " +
 50                         f"but not of shape {current_state.shape}")
 51    if not isinstance(state_cost_mat, np.ndarray):
 52        raise TypeError("'state_cost_mat' must be an instance of 'numpy.ndarray' " +
 53                        f"but not of '{type(state_cost_mat)}'")
 54    if not is_mat_spsd(state_cost_mat):
 55        raise ValueError("'state_cost_mat' must be symmetric positive semi-definite")
 56    if state_cost_mat.ndim != 2 or not state_cost_mat.shape[0] == current_state.shape[0]:
 57        raise ValueError("Invalid shape of 'state_cost_mat' -- " +
 58                         f"expecting {(current_state.shape[0], current_state.shape[0])}")
 59    if not isinstance(action_cost_mat, np.ndarray):
 60        raise TypeError("'action_cost_mat' must be an instance of 'numpy.ndarray' " +
 61                        f"but not of '{type(action_cost_mat)}'")
 62    if not is_mat_spd(action_cost_mat):
 63        raise ValueError("'action_cost_mat' must be symmetric positive definite")
 64    if not isinstance(state_transition_mat, np.ndarray):
 65        raise TypeError("'state_transition_mat' must be an instance of 'numpy.ndarray' " +
 66                        f"but not of '{type(state_transition_mat)}'")
 67    if state_transition_mat.shape != (current_state.shape[0], current_state.shape[0]):
 68        raise ValueError("Invalid shape of 'state_transition_mat' -- " +
 69                         f"expecting {(current_state.shape[0], current_state.shape[0])}")
 70    if not isinstance(action_transition_mat, np.ndarray):
 71        raise TypeError("'action_transition_mat' must be an instance of 'numpy.ndarray' " +
 72                        f"but not of '{type(action_transition_mat)}'")
 73    if action_transition_mat.shape[0] != current_state.shape[0] or \
 74            len(action_transition_mat.shape) != 2:
 75        raise ValueError("Invalid shape of 'action_transition_mat' -- expecting 2-dimensional " +
 76                         f"matrix where the first dimension is equal to {current_state.shape[0]}")
 77    if not isinstance(time_horizon, int):
 78        raise TypeError("'time_horizon' must be an instance of 'int' " +
 79                        f"but not of '{type(time_horizon)}'")
 80    if time_horizon <= 0:
 81        raise ValueError("'time_horizon' must be positive")
 82    if final_state_cost_mat is not None:
 83        if not isinstance(final_state_cost_mat, np.ndarray):
 84            raise TypeError("'final_state_cost_mat' must be an instance of 'numpy.ndarray' " +
 85                            f"but not of '{type(final_state_cost_mat)}'")
 86        if not is_mat_spsd(final_state_cost_mat):
 87            raise ValueError("'final_state_cost_mat' must be symmetric positive semi-definite")
 88        if final_state_cost_mat.ndim != 2 or \
 89                not final_state_cost_mat.shape[0] == current_state.shape[0]:
 90            raise ValueError("Invalid shape of 'final_state_cost_mat' -- " +
 91                             f"expecting {(current_state.shape[0], current_state.shape[0])}")
 92    else:
 93        final_state_cost_mat = state_cost_mat
 94
 95    P = [None] * (time_horizon + 1)
 96    P[time_horizon] = final_state_cost_mat
 97
 98    for t in range(time_horizon, 0, -1):
 99        P[t-1] = state_cost_mat + state_transition_mat.T @ P[t] @ state_transition_mat - \
100            (state_transition_mat.T @ P[t] @ action_transition_mat) @ \
101            np.linalg.pinv(action_cost_mat + action_transition_mat.T @ P[t] @
102                           action_transition_mat) @ (action_transition_mat.T @ P[t] @
103                                                     state_transition_mat)
104
105    actions = []
106    x = np.copy(current_state)
107    for t in range(0, time_horizon):
108        K = -np.linalg.pinv(action_cost_mat + action_transition_mat.T @ P[t+1] @
109                            action_transition_mat) @ \
110                                action_transition_mat.T @ P[t+1] @ state_transition_mat
111        u = K @ x
112        x = state_transition_mat @ x + action_transition_mat @ u
113        actions.append(u)
114
115    return actions

116
117

[docs]
118def time_varying_lqr(current_state: np.ndarray, state_cost: Callable[[int], np.ndarray],
119                     action_cost: Callable[[int], np.ndarray],
120                     state_transition: Callable[[int], np.ndarray],
121                     action_transition: Callable[[int], np.ndarray],
122                     time_horizon: int,
123                     final_state_cost_mat: Optional[np.ndarray] = None) -> list[np.ndarray]:
124    """
125    Computes the Linear Quadratic Regulator (LQR) control solution of a given
126    inite-horizon & discrete-time LQR problem with time varying parameters.
127
128    Parameters
129    ----------
130    current_state : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
131        Current system state.
132    state_cost : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
133        Time varying cost matrix of states -- i.e. mapping time to a s.p.s.d. matrix specifying
134        the cost of a given state.
135    action_cost : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
136        Time varying cost matrix of actions -- i.e. mapping time to a s.p.d. matrix specifying
137        the cost of a action state.
138    state_transition : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
139        Time varying state transition matrix -- i.e. mapping time to a matrix for mapping a given
140        state to the next state (without any action).
141    action_transition : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
142        Time varying action transition matrix -- i.e. mapping time to a matri for specifying
143        the state change/influence of taking an action.
144    time_horizon : int
145        Time horizon.
146    final_state_cost_mat: `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_, optional
147        Cost matrix of the final state -- i.e. a s.p.s.d. matrix specifying the cost of the final state.
148        If None, 'state_cost' will be used for getting final state cost.
149
150    Returns
151    -------
152    list[`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
153        List of actions for reaching the specified target space.
154    """
155    if not isinstance(current_state, np.ndarray):
156        raise TypeError("'current_state' must be an instance of 'numpy.ndarray' " +
157                        f"but not of '{type(current_state)}'")
158    if current_state.ndim != 1:
159        raise ValueError("'current_state' must be a 1-dimensional array -- " +
160                         f"but not of shape {current_state.shape}")
161    if not callable(state_cost):
162        raise TypeError("'state_cost' must be callable -- i.e. mapping time to a " +
163                        "state cost matrix")
164    if not callable(action_cost):
165        raise TypeError("'action_cost' must be callable -- i.e. mapping time to a " +
166                        "action cost matrix")
167    if not callable(state_transition):
168        raise TypeError("'state_transition' msut be callable -- " +
169                        "i.e. mapping time to a state transiton matrix")
170    if not callable(action_transition):
171        raise TypeError("'action_transition' must be callable -- i.e. mapping time to a " +
172                        "action transition matrix")
173    if not isinstance(time_horizon, int):
174        raise TypeError("'time_horizon' must be an instance of 'int' " +
175                        f"but not of '{type(time_horizon)}'")
176    if time_horizon <= 0:
177        raise ValueError("'time_horizon' must be positive")
178    if final_state_cost_mat is not None:
179        if not isinstance(final_state_cost_mat, np.ndarray):
180            raise TypeError("'final_state_cost_mat' must be an instance of 'numpy.ndarray' " +
181                            f"but not of '{type(final_state_cost_mat)}'")
182        if not is_mat_spsd(final_state_cost_mat):
183            raise ValueError("'final_state_cost_mat' must be symmetric positive semi-definite")
184        if final_state_cost_mat.ndim != 2 or \
185                not final_state_cost_mat.shape[0] == current_state.shape[0]:
186            raise ValueError("Invalid shape of 'final_state_cost_mat' -- " +
187                             f"expecting {(current_state.shape[0], current_state.shape[0])}")
188    else:
189        final_state_cost_mat = state_cost(time_horizon)
190
191    P = [None] * (time_horizon + 1)
192    P[time_horizon] = final_state_cost_mat
193
194    for t in range(time_horizon, 0, -1):
195        A = state_transition(t)
196        B = action_transition(t)
197        P[t-1] = state_cost(t) + A.T @ P[t] @ A - (A.T @ P[t] @ B) @ \
198            np.linalg.pinv(action_cost(t) + B.T @ P[t] @ B) @ (B.T @ P[t] @ A)
199
200    actions = []
201    x = np.copy(current_state)
202    for t in range(0, time_horizon):
203        A = state_transition(t)
204        B = action_transition(t)
205        K = -np.linalg.pinv(action_cost(t) + B.T @ P[t+1] @ B) @ B.T @ P[t+1] @ A
206        u = K @ x
207        x = A @ x + B @ u
208        actions.append(u)
209
210    return actions