import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
from gmmvi.models.gmm_wrapper import GmmWrapper
tfd = tfp.distributions
from collections import OrderedDict
from tf_robot_learning import kinematic as tk
from tf_robot_learning import distributions as rlds
from gmmvi.experiments.target_distributions.lnpdf import LNPDF
from pathlib import Path
import os
import numpy as np
[docs]class Talos(LNPDF):
"""
The target distribution for the humanoid inverse kinematik problem. The unnormalized target distribution is given
by a mixture of experts, that penalize the task space error of the left endeffector, joint-limit violations,
unstable configurations, etc.
This class requires a fork of tf_robot_learning that can be found
`here <https://github.com/OlegArenz/tf_robot_learning>`_.
Parameters:
context: list[float]
A list of three floats, specifying the desired x, y, z coordinates of the left endeffector.
"""
def __init__(self, context):
super().__init__(use_log_density_and_grad=True, safe_for_tf_graph=False)
self.context = context
root_path = Path(__file__).parent.resolve()
self.urdf_path = os.path.join(root_path, "datasets/talos_reduced.urdf")
self.chain_names = ['r_gripper', 'l_gripper', 'r_foot', 'l_foot']
self.left_foot_target = [-0.02, 0.09, -0., 1., 0., 0., 0., 1., 0., 0., 0., 1.]
self.right_foot_target = [-0.02, -0.09, -0., 1., 0., 0., 0., 1., 0., 0., 0., 1.]
self.chain = self.create_chain()
# tf.print(self.chain.mean_pose)
# tf.print(self.chain.actuated_joint_names)
self.mean_joints = tf.constant(self.chain.mean_pose + [0., 0., 1.08, 0., 0., 0.], dtype=tf.float32)
self._num_dimensions = 34
_ = self.log_density_and_grad(tf.random.normal((10, self._num_dimensions)))
self.log_density_and_grad = tf.function(self.log_density_and_grad)
[docs] def log_density_and_grad(self, samples):
with tf.GradientTape(persistent=False) as tape:
tape.watch(samples)
target = self.log_density(samples)
gradient = tape.gradient(target, samples)
return target, gradient
[docs] def create_chain(self): # cannot be created just once, needs to be recreated in each tf graph
urdf = tk.urdf_from_file(self.urdf_path)
tips = OrderedDict({
'r_gripper': 'gripper_right_base_link',
'l_gripper': 'gripper_left_base_link',
'r_foot': 'right_sole_link',
'l_foot': 'left_sole_link',
})
# get chains of arms and leg from base
chain = tk.ChainDict({
name: tk.kdl_chain_from_urdf_model(urdf, 'base_link', tip=tip)
for name, tip in tips.items()
})
_ = chain.actuated_joint_names
return chain
[docs] def get_left_gripper(self, x, chain): # l_gripper
_q, _p, _m = param_to_joint_pos(x, chain)
return chain.xs(_q, floating_base=(_p, _m), name='l_gripper')[:, -1]
[docs] def get_joint_angles(self, x, chain): # joint angles
return x[..., :chain.nb_joint]
[docs] def get_com_feet_diff(self, x, chain): # relative position between center of feet and projection of com
# basic implementation, needs to be replaced with more complete differentiable formula
_q, _p, _m = param_to_joint_pos(x, chain)
# get center of mass projection on the ground
com_xy = chain.xs(_q, floating_base=(_p, _m), get_links=True)[-1][:, :2]
center_feet = chain.xs(_q, floating_base=(_p, _m), name='l_foot')[:, -1, :2]
return com_xy - center_feet
[docs] def get_joint_prior(self, x, chain):
joint_limits = tf.constant(chain.joint_limits, dtype=tf.float32)
joint_limits_std = 0.05
joint_limits_temp = 1.
joint_limits_exp = rlds.SoftUniformNormalCdf(
low=joint_limits[:, 0],
high=joint_limits[:, 1],
std=joint_limits_std,
temp=joint_limits_temp,
reduce_axis=-1
)
return joint_limits_exp.log_prob(self.get_joint_angles(x, chain))
[docs] def get_left_gripper_reward(self, x, context, chain):
pos = context
stds = tf.ones_like(pos) * 0.02
target_dist = tfp.distributions.MultivariateNormalDiag(pos, stds)
return target_dist.log_prob(self.get_left_gripper(x, chain)[:, :3])
[docs] def get_com_reward(self, x, chain):
com_limits = 0.14
com_limits_std = 0.01
com_limits_temp = 1.
com_limits_exp = rlds.SoftUniformNormalCdf(
low=-com_limits,
high=com_limits,
std=com_limits_std,
temp=com_limits_temp,
reduce_axis=-1
)
return com_limits_exp.log_prob(self.get_com_feet_diff(x, chain))
def _reward(self, sample, context):
chain = self.chain #self.create_chain()
rew = self.get_joint_prior(sample, chain)
rew += self.get_com_reward(sample, chain)
rew += self.get_right_foot_reward(sample, context, chain)
rew += self.get_left_foot_reward(sample, context, chain)
rew += self.get_left_gripper_reward(sample, context, chain)
return rew
[docs] def get_num_dimensions(self):
return self._num_dimensions
[docs] def log_density(self, thetas):
return self._reward(thetas, self.context)
[docs] def expensive_metrics(self, model: GmmWrapper, samples: tf.Tensor) -> dict:
expensive_metrics = dict()
fig = self.plot(self.context, samples[:100])
expensive_metrics.update(
{"plot": fig})
return expensive_metrics
[docs] def plot(self, context, samples):
chain = self.chain #create_chain()
plt.ioff()
plt.figure(1)
plt.clf()
_, samples_position, samples_rotation = param_to_joint_pos(samples, chain)
links, _, coms = chain.xs(samples, floating_base=(samples_position, samples_rotation), get_links=True)
fig, ax = plt.subplots(ncols=2, sharex=True, figsize=(15, 10))
for i in range(2):
dim = [i, 2]
chain.plot(
links, feed_dict={}, ax=ax[i],
dim=dim, alpha=0.2, color='k'
)
ax[i].plot(coms[:, dim[0]], coms[:, dim[1]], 'yx')
ax[i].plot(self.left_foot_target[dim[0]], self.left_foot_target[dim[1]], ls=' ', marker='s',
label="left_foot_target", color="red")
ax[i].plot(self.right_foot_target[dim[0]], self.right_foot_target[dim[1]], ls=' ', marker='s',
label="right_foot_target", color="green")
ax[i].plot(context[dim[0]], context[dim[1]], ls=' ', marker="o", markersize=6,
label="left_gripper_target", color="orange")
ax[i].legend()
return fig
def param_to_joint_pos(x, chain):
return (x[..., :chain.nb_joint], # 28 joint angle
x[..., chain.nb_joint:chain.nb_joint + 3], # 3 position of floating base
tk.rotation.rpy(x[..., -3:])) # 3 orientation of floating base euler to rotation matrix
class TalosLeftGripperTargetPdf:
def __init__(self):
self.radius_prior = tfd.Uniform(low=0.2, high=1)
self.xy_ang_prior = tfd.Uniform(low=-np.pi/2, high=np.pi/2)
self.z_ang_prior = tfd.Uniform(low=float(np.deg2rad(10)), high=float(np.rad2deg(170)))
def sample(self, n):
xy_ang = self.xy_ang_prior.sample(n)
rad = self.radius_prior.sample(n)
z_ang = self.z_ang_prior.sample(n)
x = rad * tf.sin(z_ang) * tf.cos(xy_ang)
y = rad * tf.sin(z_ang) * tf.sin(xy_ang)
z = rad * tf.cos(z_ang)
return tf.stack([x, y, z], axis=-1)
def make_talos_target(context):
"""
planar robot experiment with one goal from vips, here with conditioning on the goal position.
Parameters:
context: list[float]
A list of three floats, specifying the desired x, y, z coordinates of the left endeffector.
"""
target = Talos(context)
goal_prior = TalosLeftGripperTargetPdf()
return target
if __name__ == "__main__":
target = make_talos_target([-0.02, 0.09, -0.0])
target.log_density(tf.random.normal((2, target._num_dimensions)))
print("done")