1"""
2This module contains implementations of different Linear Quadratic Regulator (LQR) variants.
3"""
4from typing import Optional, Callable
5import numpy as np
6
7from .utils import is_mat_spd, is_mat_spsd
8
9
[docs]
10def linear_quadratic_regulator(current_state: np.ndarray,
11 state_cost_mat: np.ndarray, action_cost_mat: np.ndarray,
12 state_transition_mat: np.ndarray,
13 action_transition_mat: np.ndarray, time_horizon: int,
14 final_state_cost_mat: Optional[np.ndarray] = None
15 ) -> list[np.ndarray]:
16 """
17 Computes the Linear Quadratic Regulator (LQR) control solution of a given
18 inite-horizon & discrete-time LQR problem.
19
20 Parameters
21 ----------
22 current_state : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
23 Current system state.
24 state_cost_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
25 Cost matrix of states -- i.e. a s.p.s.d. matrix specifying the cost of a given state.
26 action_cost_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
27 Cost matrix of actions -- i.e. a s.p.d. matrix specifying the cost of a action state.
28 state_transition_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
29 State transition matrix -- i.e. mapping a given state to the next state
30 (without any action).
31 action_transition_mat : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
32 Action transition matrix -- i.e. mapping specifying the state change/influence
33 of taking an action.
34 time_horizon : int
35 Time horizon.
36 final_state_cost_mat: `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_, optional
37 Cost matrix of the final state -- i.e. a s.p.s.d. matrix specifying the cost of the final state.
38 If None, 'state_cost_mat' will be used for the final state cost.
39
40 Returns
41 -------
42 list[`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
43 List of actions for reaching the specified target space.
44 """
45 if not isinstance(current_state, np.ndarray):
46 raise TypeError("'current_state' must be an instance of 'numpy.ndarray' " +
47 f"but not of '{type(current_state)}'")
48 if current_state.ndim != 1:
49 raise ValueError("'current_state' must be a 1-dimensional array -- " +
50 f"but not of shape {current_state.shape}")
51 if not isinstance(state_cost_mat, np.ndarray):
52 raise TypeError("'state_cost_mat' must be an instance of 'numpy.ndarray' " +
53 f"but not of '{type(state_cost_mat)}'")
54 if not is_mat_spsd(state_cost_mat):
55 raise ValueError("'state_cost_mat' must be symmetric positive semi-definite")
56 if state_cost_mat.ndim != 2 or not state_cost_mat.shape[0] == current_state.shape[0]:
57 raise ValueError("Invalid shape of 'state_cost_mat' -- " +
58 f"expecting {(current_state.shape[0], current_state.shape[0])}")
59 if not isinstance(action_cost_mat, np.ndarray):
60 raise TypeError("'action_cost_mat' must be an instance of 'numpy.ndarray' " +
61 f"but not of '{type(action_cost_mat)}'")
62 if not is_mat_spd(action_cost_mat):
63 raise ValueError("'action_cost_mat' must be symmetric positive definite")
64 if not isinstance(state_transition_mat, np.ndarray):
65 raise TypeError("'state_transition_mat' must be an instance of 'numpy.ndarray' " +
66 f"but not of '{type(state_transition_mat)}'")
67 if state_transition_mat.shape != (current_state.shape[0], current_state.shape[0]):
68 raise ValueError("Invalid shape of 'state_transition_mat' -- " +
69 f"expecting {(current_state.shape[0], current_state.shape[0])}")
70 if not isinstance(action_transition_mat, np.ndarray):
71 raise TypeError("'action_transition_mat' must be an instance of 'numpy.ndarray' " +
72 f"but not of '{type(action_transition_mat)}'")
73 if action_transition_mat.shape[0] != current_state.shape[0] or \
74 len(action_transition_mat.shape) != 2:
75 raise ValueError("Invalid shape of 'action_transition_mat' -- expecting 2-dimensional " +
76 f"matrix where the first dimension is equal to {current_state.shape[0]}")
77 if not isinstance(time_horizon, int):
78 raise TypeError("'time_horizon' must be an instance of 'int' " +
79 f"but not of '{type(time_horizon)}'")
80 if time_horizon <= 0:
81 raise ValueError("'time_horizon' must be positive")
82 if final_state_cost_mat is not None:
83 if not isinstance(final_state_cost_mat, np.ndarray):
84 raise TypeError("'final_state_cost_mat' must be an instance of 'numpy.ndarray' " +
85 f"but not of '{type(final_state_cost_mat)}'")
86 if not is_mat_spsd(final_state_cost_mat):
87 raise ValueError("'final_state_cost_mat' must be symmetric positive semi-definite")
88 if final_state_cost_mat.ndim != 2 or \
89 not final_state_cost_mat.shape[0] == current_state.shape[0]:
90 raise ValueError("Invalid shape of 'final_state_cost_mat' -- " +
91 f"expecting {(current_state.shape[0], current_state.shape[0])}")
92 else:
93 final_state_cost_mat = state_cost_mat
94
95 P = [None] * (time_horizon + 1)
96 P[time_horizon] = final_state_cost_mat
97
98 for t in range(time_horizon, 0, -1):
99 P[t-1] = state_cost_mat + state_transition_mat.T @ P[t] @ state_transition_mat - \
100 (state_transition_mat.T @ P[t] @ action_transition_mat) @ \
101 np.linalg.pinv(action_cost_mat + action_transition_mat.T @ P[t] @
102 action_transition_mat) @ (action_transition_mat.T @ P[t] @
103 state_transition_mat)
104
105 actions = []
106 x = np.copy(current_state)
107 for t in range(0, time_horizon):
108 K = -np.linalg.pinv(action_cost_mat + action_transition_mat.T @ P[t+1] @
109 action_transition_mat) @ \
110 action_transition_mat.T @ P[t+1] @ state_transition_mat
111 u = K @ x
112 x = state_transition_mat @ x + action_transition_mat @ u
113 actions.append(u)
114
115 return actions
116
117
[docs]
118def time_varying_lqr(current_state: np.ndarray, state_cost: Callable[[int], np.ndarray],
119 action_cost: Callable[[int], np.ndarray],
120 state_transition: Callable[[int], np.ndarray],
121 action_transition: Callable[[int], np.ndarray],
122 time_horizon: int,
123 final_state_cost_mat: Optional[np.ndarray] = None) -> list[np.ndarray]:
124 """
125 Computes the Linear Quadratic Regulator (LQR) control solution of a given
126 inite-horizon & discrete-time LQR problem with time varying parameters.
127
128 Parameters
129 ----------
130 current_state : `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_
131 Current system state.
132 state_cost : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
133 Time varying cost matrix of states -- i.e. mapping time to a s.p.s.d. matrix specifying
134 the cost of a given state.
135 action_cost : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
136 Time varying cost matrix of actions -- i.e. mapping time to a s.p.d. matrix specifying
137 the cost of a action state.
138 state_transition : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
139 Time varying state transition matrix -- i.e. mapping time to a matrix for mapping a given
140 state to the next state (without any action).
141 action_transition : Callable[[int], [`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
142 Time varying action transition matrix -- i.e. mapping time to a matri for specifying
143 the state change/influence of taking an action.
144 time_horizon : int
145 Time horizon.
146 final_state_cost_mat: `numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_, optional
147 Cost matrix of the final state -- i.e. a s.p.s.d. matrix specifying the cost of the final state.
148 If None, 'state_cost' will be used for getting final state cost.
149
150 Returns
151 -------
152 list[`numpy.ndarray <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_]
153 List of actions for reaching the specified target space.
154 """
155 if not isinstance(current_state, np.ndarray):
156 raise TypeError("'current_state' must be an instance of 'numpy.ndarray' " +
157 f"but not of '{type(current_state)}'")
158 if current_state.ndim != 1:
159 raise ValueError("'current_state' must be a 1-dimensional array -- " +
160 f"but not of shape {current_state.shape}")
161 if not callable(state_cost):
162 raise TypeError("'state_cost' must be callable -- i.e. mapping time to a " +
163 "state cost matrix")
164 if not callable(action_cost):
165 raise TypeError("'action_cost' must be callable -- i.e. mapping time to a " +
166 "action cost matrix")
167 if not callable(state_transition):
168 raise TypeError("'state_transition' msut be callable -- " +
169 "i.e. mapping time to a state transiton matrix")
170 if not callable(action_transition):
171 raise TypeError("'action_transition' must be callable -- i.e. mapping time to a " +
172 "action transition matrix")
173 if not isinstance(time_horizon, int):
174 raise TypeError("'time_horizon' must be an instance of 'int' " +
175 f"but not of '{type(time_horizon)}'")
176 if time_horizon <= 0:
177 raise ValueError("'time_horizon' must be positive")
178 if final_state_cost_mat is not None:
179 if not isinstance(final_state_cost_mat, np.ndarray):
180 raise TypeError("'final_state_cost_mat' must be an instance of 'numpy.ndarray' " +
181 f"but not of '{type(final_state_cost_mat)}'")
182 if not is_mat_spsd(final_state_cost_mat):
183 raise ValueError("'final_state_cost_mat' must be symmetric positive semi-definite")
184 if final_state_cost_mat.ndim != 2 or \
185 not final_state_cost_mat.shape[0] == current_state.shape[0]:
186 raise ValueError("Invalid shape of 'final_state_cost_mat' -- " +
187 f"expecting {(current_state.shape[0], current_state.shape[0])}")
188 else:
189 final_state_cost_mat = state_cost(time_horizon)
190
191 P = [None] * (time_horizon + 1)
192 P[time_horizon] = final_state_cost_mat
193
194 for t in range(time_horizon, 0, -1):
195 A = state_transition(t)
196 B = action_transition(t)
197 P[t-1] = state_cost(t) + A.T @ P[t] @ A - (A.T @ P[t] @ B) @ \
198 np.linalg.pinv(action_cost(t) + B.T @ P[t] @ B) @ (B.T @ P[t] @ A)
199
200 actions = []
201 x = np.copy(current_state)
202 for t in range(0, time_horizon):
203 A = state_transition(t)
204 B = action_transition(t)
205 K = -np.linalg.pinv(action_cost(t) + B.T @ P[t+1] @ B) @ B.T @ P[t+1] @ A
206 u = K @ x
207 x = A @ x + B @ u
208 actions.append(u)
209
210 return actions