Source code for gmmvi.experiments.target_distributions.talos_ik

import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt

from gmmvi.models.gmm_wrapper import GmmWrapper

tfd = tfp.distributions
from collections import OrderedDict
from tf_robot_learning import kinematic as tk
from tf_robot_learning import distributions as rlds
from gmmvi.experiments.target_distributions.lnpdf import LNPDF
from pathlib import Path
import os
import numpy as np

[docs]class Talos(LNPDF):
    """
    The target distribution for the humanoid inverse kinematik problem. The unnormalized target distribution is given
    by a mixture of experts, that penalize the task space error of the left endeffector, joint-limit violations,
    unstable configurations, etc.

    This class requires a fork of tf_robot_learning that can be found
    `here <https://github.com/OlegArenz/tf_robot_learning>`_.

    Parameters:
        context: list[float]
            A list of three floats, specifying the desired x, y, z coordinates of the left endeffector.
    """

    def __init__(self, context):
        super().__init__(use_log_density_and_grad=True, safe_for_tf_graph=False)
        self.context = context
        root_path = Path(__file__).parent.resolve()
        self.urdf_path = os.path.join(root_path, "datasets/talos_reduced.urdf")
        self.chain_names = ['r_gripper', 'l_gripper', 'r_foot', 'l_foot']
        self.left_foot_target = [-0.02, 0.09, -0., 1., 0., 0., 0., 1., 0., 0., 0., 1.]
        self.right_foot_target = [-0.02, -0.09, -0., 1., 0., 0., 0., 1., 0., 0., 0., 1.]
        self.chain = self.create_chain()
        # tf.print(self.chain.mean_pose)
        # tf.print(self.chain.actuated_joint_names)
        self.mean_joints = tf.constant(self.chain.mean_pose + [0., 0., 1.08, 0., 0., 0.], dtype=tf.float32)
        self._num_dimensions = 34
        _ = self.log_density_and_grad(tf.random.normal((10, self._num_dimensions)))
        self.log_density_and_grad = tf.function(self.log_density_and_grad)

[docs]    def log_density_and_grad(self, samples):
        with tf.GradientTape(persistent=False) as tape:
            tape.watch(samples)
            target = self.log_density(samples)
        gradient = tape.gradient(target, samples)
        return target, gradient

[docs]    def create_chain(self):  # cannot be created just once, needs to be recreated in each tf graph
        urdf = tk.urdf_from_file(self.urdf_path)
        tips = OrderedDict({
            'r_gripper': 'gripper_right_base_link',
            'l_gripper': 'gripper_left_base_link',
            'r_foot': 'right_sole_link',
            'l_foot': 'left_sole_link',
        })

        # get chains of arms and leg from base
        chain = tk.ChainDict({
            name: tk.kdl_chain_from_urdf_model(urdf, 'base_link', tip=tip)
            for name, tip in tips.items()
        })
        _ = chain.actuated_joint_names
        return chain

[docs]    def get_left_gripper(self, x, chain):  # l_gripper
        _q, _p, _m = param_to_joint_pos(x, chain)
        return chain.xs(_q, floating_base=(_p, _m), name='l_gripper')[:, -1]

[docs]    def get_right_foot(self, x, chain):  # r_foot
        _q, _p, _m = param_to_joint_pos(x, chain)
        return chain.xs(_q, floating_base=(_p, _m), name='r_foot')[:, -1]

[docs]    def get_left_foot(self, x, chain):  # l_foot
        _q, _p, _m = param_to_joint_pos(x, chain)
        return chain.xs(_q, floating_base=(_p, _m), name='l_foot')[:, -1]

[docs]    def get_joint_angles(self, x, chain):  # joint angles
        return x[..., :chain.nb_joint]

[docs]    def get_com_feet_diff(self, x, chain):  # relative position between center of feet and projection of com
        # basic implementation, needs to be replaced with more complete differentiable formula
        _q, _p, _m = param_to_joint_pos(x, chain)
        # get center of mass projection on the ground
        com_xy = chain.xs(_q, floating_base=(_p, _m), get_links=True)[-1][:, :2]

        center_feet = chain.xs(_q, floating_base=(_p, _m), name='l_foot')[:, -1, :2]

        return com_xy - center_feet

[docs]    def get_joint_prior(self, x, chain):
        joint_limits = tf.constant(chain.joint_limits, dtype=tf.float32)
        joint_limits_std = 0.05
        joint_limits_temp = 1.

        joint_limits_exp = rlds.SoftUniformNormalCdf(
            low=joint_limits[:, 0],
            high=joint_limits[:, 1],
            std=joint_limits_std,
            temp=joint_limits_temp,
            reduce_axis=-1
        )
        return joint_limits_exp.log_prob(self.get_joint_angles(x, chain))

[docs]    def get_left_gripper_reward(self, x, context, chain):
        pos = context
        stds = tf.ones_like(pos) * 0.02
        target_dist = tfp.distributions.MultivariateNormalDiag(pos, stds)
        return target_dist.log_prob(self.get_left_gripper(x, chain)[:, :3])

[docs]    def get_left_foot_reward(self, x, context, chain):
        std = [0.02] * 3 + [0.1] * 9
        target_dist = tfp.distributions.MultivariateNormalDiag(self.left_foot_target, std)
        return target_dist.log_prob(self.get_left_foot(x, chain))

[docs]    def get_right_foot_reward(self, x, context, chain):
        std = [0.02] * 3 + [0.1] * 9
        target_dist = tfp.distributions.MultivariateNormalDiag(self.right_foot_target, std)
        return target_dist.log_prob(self.get_right_foot(x, chain))

[docs]    def get_com_reward(self, x, chain):
        com_limits = 0.14
        com_limits_std = 0.01
        com_limits_temp = 1.

        com_limits_exp = rlds.SoftUniformNormalCdf(
            low=-com_limits,
            high=com_limits,
            std=com_limits_std,
            temp=com_limits_temp,
            reduce_axis=-1
        )

        return com_limits_exp.log_prob(self.get_com_feet_diff(x, chain))

    def _reward(self, sample, context):
        chain = self.chain #self.create_chain()
        rew = self.get_joint_prior(sample, chain)
        rew += self.get_com_reward(sample, chain)
        rew += self.get_right_foot_reward(sample, context, chain)
        rew += self.get_left_foot_reward(sample, context, chain)
        rew += self.get_left_gripper_reward(sample, context, chain)
        return rew

[docs]    def get_num_dimensions(self):
        return self._num_dimensions

[docs]    def log_density(self, thetas):
        return self._reward(thetas, self.context)

[docs]    def expensive_metrics(self, model: GmmWrapper, samples: tf.Tensor) -> dict:
        expensive_metrics = dict()
        fig = self.plot(self.context, samples[:100])
        expensive_metrics.update(
            {"plot": fig})
        return expensive_metrics

[docs]    def plot(self, context, samples):
        chain = self.chain #create_chain()
        plt.ioff()
        plt.figure(1)
        plt.clf()
        _, samples_position, samples_rotation = param_to_joint_pos(samples, chain)
        links, _, coms = chain.xs(samples, floating_base=(samples_position, samples_rotation), get_links=True)
        fig, ax = plt.subplots(ncols=2, sharex=True, figsize=(15, 10))

        for i in range(2):
            dim = [i, 2]
            chain.plot(
                links, feed_dict={}, ax=ax[i],
                dim=dim, alpha=0.2, color='k'
            )
            ax[i].plot(coms[:, dim[0]], coms[:, dim[1]], 'yx')

            ax[i].plot(self.left_foot_target[dim[0]], self.left_foot_target[dim[1]], ls=' ', marker='s',
                       label="left_foot_target", color="red")
            ax[i].plot(self.right_foot_target[dim[0]], self.right_foot_target[dim[1]], ls=' ', marker='s',
                       label="right_foot_target", color="green")
            ax[i].plot(context[dim[0]], context[dim[1]], ls=' ', marker="o", markersize=6,
                       label="left_gripper_target", color="orange")
            ax[i].legend()
        return fig


def param_to_joint_pos(x, chain):
    return (x[..., :chain.nb_joint],  # 28 joint angle
            x[..., chain.nb_joint:chain.nb_joint + 3],  # 3 position of floating base
            tk.rotation.rpy(x[..., -3:]))  # 3 orientation of floating base euler to rotation matrix


class TalosLeftGripperTargetPdf:

    def __init__(self):
        self.radius_prior = tfd.Uniform(low=0.2, high=1)
        self.xy_ang_prior = tfd.Uniform(low=-np.pi/2, high=np.pi/2)
        self.z_ang_prior = tfd.Uniform(low=float(np.deg2rad(10)), high=float(np.rad2deg(170)))

    def sample(self, n):
        xy_ang = self.xy_ang_prior.sample(n)
        rad = self.radius_prior.sample(n)
        z_ang = self.z_ang_prior.sample(n)

        x = rad * tf.sin(z_ang) * tf.cos(xy_ang)
        y = rad * tf.sin(z_ang) * tf.sin(xy_ang)
        z = rad * tf.cos(z_ang)
        return tf.stack([x, y, z], axis=-1)


def make_talos_target(context):
    """
    planar robot experiment with one goal from vips, here with conditioning on the goal position.

    Parameters:
        context: list[float]
            A list of three floats, specifying the desired x, y, z coordinates of the left endeffector.
    """
    target = Talos(context)
    goal_prior = TalosLeftGripperTargetPdf()

    return target

if __name__ == "__main__":
    target = make_talos_target([-0.02, 0.09, -0.0])
    target.log_density(tf.random.normal((2, target._num_dimensions)))
    print("done")