import ray
|
|
from ray import tune
|
|
|
|
from ray.rllib.agents.ppo import PPOTrainer
|
|
from ray.rllib.agents.trainer import Trainer
|
|
from ray.tune.resources import Resources
|
|
|
|
|
|
class FixedPPOTrainer(PPOTrainer):
|
|
"""
|
|
We define the Gym/MuJoCo/RealWorldRL simulator as a custom resource in ray, while in ray current released version,
|
|
tune doesn't support custom resources, so here we fix that.
|
|
"""
|
|
|
|
@classmethod
|
|
def default_resource_request(cls, config: dict) -> Resources:
|
|
cf = dict(cls._default_config, **config)
|
|
Trainer._validate_config(cf)
|
|
num_workers = cf['num_workers'] + cf['evaluation_num_workers']
|
|
return Resources(
|
|
cpu=cf['num_cpus_for_driver'],
|
|
gpu=cf['num_gpus'],
|
|
memory=cf['memory'],
|
|
object_store_memory=cf['object_store_memory'],
|
|
extra_cpu=cf['num_cpus_per_worker'] * num_workers,
|
|
extra_gpu=cf['num_gpus_per_worker'] * num_workers,
|
|
extra_memory=cf['memory_per_worker'] * num_workers,
|
|
extra_object_store_memory=cf['object_store_memory_per_worker'] * num_workers,
|
|
custom_resources={key: value * num_workers
|
|
for key, value in cf['custom_resources_per_worker'].items()}
|
|
)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from agit import ray_init
|
|
from agit import EnvCreator
|
|
|
|
ray_init()
|
|
|
|
print('Ray Cluster Resources: {}'.format(ray.cluster_resources()))
|
|
print('Ray Cluster Nodes: {}'.format(ray.nodes()))
|
|
|
|
with open('./humanoid.xml', 'r', encoding='utf-8') as file:
|
|
xml_string = file.read()
|
|
|
|
tune.run(
|
|
FixedPPOTrainer,
|
|
queue_trials=True, # Don't modify this parameter unless you know what you do.
|
|
stop={'training_iteration': 10},
|
|
config={
|
|
'env': EnvCreator,
|
|
'env_config': {
|
|
'env_id': 'Humanoid-v3',
|
|
'mujoco_model_xml': xml_string
|
|
},
|
|
|
|
'num_cpus_per_worker': 1,
|
|
# Simulator resource
|
|
'custom_resources_per_worker': {'simulator': 0.5}
|
|
}
|
|
)
|