How can robot machine learning be realized with RoboDK API? - Printable Version

Code:
import os
import numpy as np
import gym
from gym import spaces
import robodk
from robodk.robodk import Mat
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
import mlflow
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import transforms
from PIL import Image
from ray.rllib.agents.ppo import PPOTrainer

# Define the RoboDK environment
class RoboDKEnv(gym.Env):
def __init__(self, config):
self.robodk = robodk.Robolink()
self.robot = self.robodk.Item('Kuka iiwa')
self.camera = self.robodk.Item('Camera')
self.target = self.robodk.Item('Target')
self.action_space = spaces.Discrete(6)
self.observation_space = spaces.Box(low=0, high=255, shape=(3, 224, 224), dtype=np.uint8)

def reset(self):
self.robot.MoveJ(self.target)
img = self.capture_image()
return img

def step(self, action):
# Define the action space for the robot
action_space = [
(10, 0, 0, 0, 0, 0),
(-10, 0, 0, 0, 0, 0),
(0, 10, 0, 0, 0, 0),
(0, -10, 0, 0, 0, 0),
(0, 0, 10, 0, 0, 0),
(0, 0, -10, 0, 0, 0),
]

# Execute the selected action
self.robot.MoveJ(self.robot.Pose() * robodk.transl(*action_space[action]))

# Capture the image after the action
img = self.capture_image()

# Calculate the reward based on the distance to the target
distance_to_target = self.robot.Pose().dist(self.target.Pose())
reward = -distance_to_target

# Check if the robot has reached the target
done = distance_to_target < 10

return img, reward, done, {}

def render(self, mode='human'):
# Update the RoboDK simulator view
self.robodk.Render()

def capture_image(self):
img = self.camera.CaptureImage()
img = Image.fromarray(img)
img = transforms.ToTensor()(img)
return img

# Main function
if __name__ == "__main__":
ray.init()

config = {
"env": RoboDKEnv,
"num_workers": 31,
"num_gpus": 2,
"num_cpus_per_worker": 1,
"framework": "torch",
"lr": tune.loguniform(1e-4, 1e-1),
"train_batch_size": 1000,
"sgd_minibatch_size": 128,
"num_sgd_iter": 10,
"rollout_fragment_length": 200,
"model": {
"custom_model": "ppo_model",
"custom_model_config": {
"num_actions": 6,
},
},
}

scheduler = ASHAScheduler(
metric="episode_reward_mean",
mode="max",
max_t=100,
grace_period=10,
reduction_factor=2,
)

reporter = CLIReporter(metric_columns=["episode_reward_mean", "training_iteration"])

result = tune.run(
PPOTrainer,
resources_per_trial={"cpu": 32, "gpu": 2},
config=config,
num_samples=32,
scheduler=scheduler,
progress_reporter=reporter,
)

best_trial = result.get_best_trial(“episode_reward_mean", "max", "last")
print(f"Best trial config: {best_trial.config}")
print(f"Best trial final episode reward mean: {best_trial.last_result['episode_reward_mean']}")

best_checkpoint_dir = best_trial.checkpoint.value
best_agent = PPOTrainer(config=best_trial.config, env=RoboDKEnv)
best_agent.restore(best_checkpoint_dir)

# Save the trained model
torch.save(best_agent.get_policy().model.state_dict(), "best_model.pth")