Browse Source

newrl->neorl

revive
Yi 3 years ago
parent
commit
e9bd86cdb2
9 changed files with 25 additions and 25 deletions
  1. +7
    -7
      README.md
  2. +2
    -2
      examples/train_task.py
  3. +3
    -3
      examples/train_tune.py
  4. +5
    -5
      offlinerl/data/__init__.py
  5. +1
    -1
      offlinerl/data/neorl.py
  6. +1
    -1
      offlinerl/evaluation/__init__.py
  7. +0
    -0
      offlinerl/evaluation/neorl.py
  8. +5
    -5
      offlinerl/utils/env.py
  9. +1
    -1
      setup.py

+ 7
- 7
README.md View File

@ -12,15 +12,15 @@ OfflineRL is a repository for Offline RL (batch reinforcement learning or offlin
- **MOPO**: Yu, Tianhe, et al. “MOPO: Model-Based Offline Policy Optimization.” Advances in Neural Information Processing Systems, vol. 33, 2020. [paper](https://papers.nips.cc/paper/2020/hash/a322852ce0df73e204b7e67cbbef0d0a-Abstract.html) [code](https://github.com/tianheyu927/mopo)
## Install Datasets
### NewRL
### neorl
```shell
git clone https://agit.ai/Polixir/newrl.git
cd newrl
git clone https://agit.ai/Polixir/neorl.git
cd neorl
pip install -e .
```
For more details on use, please see [newrl](https://agit.ai/Polixir/newrl).
For more details on use, please see [neorl](https://agit.ai/Polixir/neorl).
### D4RL (Optional)
```shell
@ -52,9 +52,9 @@ python examples/train_d4rl.py --algo_name=cql --exp_name=d4rl-halfcheetah-medium
- ​**algo_name**: Algorithm name . There are now bc, cql, plas, bcq and mopo algorithms available.
- ​**exp_name**: Experiment name for easy visualization using aim.
- ​**task**: Task name, See [newrl](https://agit.ai/Polixir/newrl/wiki/Tasks) for details.
- ​**task_data_type**: Data level. Each task collects data using low, medium, and high level strategies in [newrl](https://agit.ai/Polixir/newrl).
- ​**task_train_num**: Number of training data trajectories. For each task, newrl provides training data for up to 9999 trajectories.
- ​**task**: Task name, See [neorl](https://agit.ai/Polixir/neorl/wiki/Tasks) for details.
- ​**task_data_type**: Data level. Each task collects data using low, medium, and high level strategies in [neorl](https://agit.ai/Polixir/neorl).
- ​**task_train_num**: Number of training data trajectories. For each task, neorl provides training data for up to 9999 trajectories.


+ 2
- 2
examples/train_task.py View File

@ -1,13 +1,13 @@
import fire
from offlinerl.algo import algo_select
from offlinerl.data import load_data_from_newrl
from offlinerl.data import load_data_from_neorl
from offlinerl.evaluation import get_defalut_callback, OnlineCallBackFunction
def run_algo(**kwargs):
algo_init_fn, algo_trainer_obj, algo_config = algo_select(kwargs)
train_buffer, val_buffer = load_data_from_newrl(algo_config["task"], algo_config["task_data_type"], algo_config["task_train_num"])
train_buffer, val_buffer = load_data_from_neorl(algo_config["task"], algo_config["task_data_type"], algo_config["task_train_num"])
algo_init = algo_init_fn(algo_config)
algo_trainer = algo_trainer_obj(algo_init, algo_config)
callback = OnlineCallBackFunction()


+ 3
- 3
examples/train_tune.py View File

@ -3,12 +3,12 @@ import random
from ray import tune
from offlinerl.algo import algo_select
from offlinerl.data import load_data_from_newrl
from offlinerl.data import load_data_from_neorl
from offlinerl.evaluation import get_defalut_callback, OnlineCallBackFunction
def training_function(config):
algo_init_fn, algo_trainer_obj, algo_config = algo_select(config["kwargs"])
train_buffer, val_buffer = load_data_from_newrl(algo_config["task"], algo_config["task_data_type"], algo_config["task_train_num"])
train_buffer, val_buffer = load_data_from_neorl(algo_config["task"], algo_config["task_data_type"], algo_config["task_train_num"])
algo_config.update(config)
algo_config["device"] = "cuda"
algo_init = algo_init_fn(algo_config)
@ -27,7 +27,7 @@ def run_algo(**kwargs):
config["kwargs"]['seed'] = random.randint(0, 1000000)
_, _, algo_config = algo_select(kwargs)
# Prepare Dataset
load_data_from_newrl(algo_config["task"], algo_config["task_data_type"], algo_config["task_train_num"])
load_data_from_neorl(algo_config["task"], algo_config["task_data_type"], algo_config["task_train_num"])
grid_tune = algo_config["grid_tune"]
for k,v in grid_tune.items():
config[k] = tune.grid_search(v)


+ 5
- 5
offlinerl/data/__init__.py View File

@ -6,17 +6,17 @@ from loguru import logger
from offlinerl.utils.logger import log_path
from offlinerl.utils.io import create_dir, download_helper, read_json
from offlinerl.data.newrl import load_newrl_buffer
from offlinerl.data.neorl import load_neorl_buffer
dataset_dir = os.path.join(log_path(),"./offlinerl_datasets")
create_dir(dataset_dir)
def load_data_from_newrl(task, task_data_type = "low", task_train_num = 99):
import newrl
env = newrl.make(task)
def load_data_from_neorl(task, task_data_type = "low", task_train_num = 99):
import neorl
env = neorl.make(task)
train_data, val_data = env.get_dataset(data_type = task_data_type, train_num = task_train_num)
train_buffer, val_buffer = load_newrl_buffer(train_data), load_newrl_buffer(val_data)
train_buffer, val_buffer = load_neorl_buffer(train_data), load_neorl_buffer(val_data)
return train_buffer, val_buffer

offlinerl/data/newrl.py → offlinerl/data/neorl.py View File


+ 1
- 1
offlinerl/evaluation/__init__.py View File

@ -9,7 +9,7 @@ from tianshou.data import to_numpy, to_torch
from offlinerl.utils.env import get_env
from offlinerl.utils.net.common import MLP
from offlinerl.evaluation.newrl import test_on_real_env
from offlinerl.evaluation.neorl import test_on_real_env
from offlinerl.evaluation.fqe import FQE, fqe_eval_fn
class CallBackFunction:


offlinerl/evaluation/newrl.py → offlinerl/evaluation/neorl.py View File


+ 5
- 5
offlinerl/utils/env.py View File

@ -1,21 +1,21 @@
import gym
import newrl
import neorl
def get_env(task):
try:
if task.startswith("HalfCheetah-v3"):
env = newrl.make("HalfCheetah-v3")
env = neorl.make("HalfCheetah-v3")
elif task.startswith("Hopper-v3"):
env = newrl.make("Hopper-v3")
env = neorl.make("Hopper-v3")
elif task.startswith("Walker2d-v3"):
env = newrl.make("Walker2d-v3")
env = neorl.make("Walker2d-v3")
elif task.startswith('d4rl'):
import d4rl
env = gym.make(task[5:])
else:
task_name = task.strip().split("-")[0]
env = newrl.make(task_name)
env = neorl.make(task_name)
except:
raise NotImplementedError


+ 1
- 1
setup.py View File

@ -11,7 +11,7 @@ def get_version() -> str:
setup(
name='offlinerl',
description="A Library for Offline RL(Batch RL)",
url="https://agit.ai/Yi/offlinerl.git",
url="https://agit.ai/Polixir/OfflineRL",
version=get_version(),
author="SongyiGao",
author_email="songyigao@gmail.com",


Loading…
Cancel
Save