Python源码示例:rllab.core.serializable.Serializable.quick_init()

示例1
def __init__(
            self,
            env,
            scale_reward=1.,
            normalize_obs=False,
            normalize_reward=False,
            obs_alpha=0.001,
            reward_alpha=0.001,
    ):
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)
        self._scale_reward = scale_reward
        self._normalize_obs = normalize_obs
        self._normalize_reward = normalize_reward
        self._obs_alpha = obs_alpha
        self._obs_mean = np.zeros(env.observation_space.flat_dim)
        self._obs_var = np.ones(env.observation_space.flat_dim)
        self._reward_alpha = reward_alpha
        self._reward_mean = 0.
        self._reward_var = 1. 
示例2
def clone(self, out=None):
        """Clones state of this environment, optionally into an existing one."""
        if out is None:
            # Reconstruct using the state saved by Serializable.quick_init
            out = type(self)(*self.__args, **self.__kwargs)
        
        if type(out) != type(self):
            raise Exception("out has the wrong type")
        if out._Serializable__args != self._Serializable__args or out._Serializable__kwargs != self._Serializable__kwargs:
            raise Exception("out was constructed with the wrong arguments")

        out.num_steps = self.num_steps
        out.terminated = self.terminated
        out.state['__last_action_name'] = self.state['__last_action_name']
        
        for module, out_module in zip(self.modules, out.modules):
            module.clone(self.state, out_module, out.state)
        return out 
示例3
def __init__(self, env, record_video=True, video_schedule=None,
            log_dir=None, timestep_limit=9999):
        # Ensure the version saved to disk doesn't monitor into our log_dir
        locals_no_monitor = dict(locals())
        locals_no_monitor['log_dir'] = None
        locals_no_monitor['record_video'] = False
        locals_no_monitor['video_schedule'] = None
        Serializable.quick_init(self, locals_no_monitor)

        self.env = env
        self._observation_space = to_rllab_space(env.observation_space)
        self._action_space = to_rllab_space(env.action_space)        
        self.env.spec = EnvSpec('GymEnv-v0')

        monitor.logger.setLevel(logging.WARNING)
        if not record_video:
            self.video_schedule = NoVideoSchedule()
        else:
            if video_schedule is None:
                self.video_schedule = CappedCubicVideoSchedule()
            else:
                self.video_schedule = video_schedule
        self.set_log_dir(log_dir)

        self._horizon = timestep_limit 
示例4
def __init__(
            self,
            env,
            policy,
            baseline,
            optimizer=None,
            optimizer_args=None,
            **kwargs):
        Serializable.quick_init(self, locals())
        if optimizer is None:
            default_args = dict(
                batch_size=None,
                max_epochs=1,
            )
            if optimizer_args is None:
                optimizer_args = default_args
            else:
                optimizer_args = dict(default_args, **optimizer_args)
            optimizer = FirstOrderOptimizer(**optimizer_args)
        self.optimizer = optimizer
        self.opt_info = None
        super(VPG, self).__init__(env=env, policy=policy, baseline=baseline, **kwargs) 
示例5
def __init__(self):
        Serializable.quick_init(self, locals())
        self.qpos = None
        self.qvel = None
        self.mass = 0.1
        self.dt = 0.01
        self.frame_skip = 5
        self.boundary = np.array([-10, 10])
        self.vel_bounds = [-np.inf, np.inf]
        """
        In 1 frame forward,
            qpos' = qpos + qvel*dt
            qvel' = qvel + u/m*dt
        """
        eig_vec = np.array([[0.7, -0.6], [-0.3, -0.1]])
        self.A = np.identity(2)# eig_vec @ np.diag([1.0, 0.8]) @ np.linalg.inv(eig_vec)
        self.B = np.array([[0.2, -0.04], [.3, .9]])
        self.c = np.array([0.0, 0.0])
        self.goal = None
        self.init_mean = np.zeros(2)
        self.init_std = 0.1
        self.ctrl_cost_coeff = 0.01 
示例6
def __init__(
            self,
            env_spec,
            subsample_factor=1.,
            num_seq_inputs=1,
            regressor_args=None,
    ):
        Serializable.quick_init(self, locals())
        super(GaussianMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
            output_dim=1,
            name="vf",
            **regressor_args
        ) 
示例7
def __init__(self, game_name, agent_num, action_num=12):
        Serializable.quick_init(self, locals())
        self.game = game_name
        self.agent_num = agent_num
        self.action_num = action_num
        self.action_spaces = MADiscrete([action_num] * self.agent_num)
        self.observation_spaces = MADiscrete([1] * self.agent_num)
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.t = 0
        self.numplots = 0
        self.payoff = {}

        if self.game == 'lemonade':
            assert self.agent_num == 3
            def get_distance(a_n, i):
                assert len(a_n) == 3
                a_n_i = np.copy(a_n)
                a_n_i[0], a_n_i[i] = a_n_i[i], a_n_i[0]
                return np.abs(a_n_i[0] - a_n_i[1]) + np.abs(a_n_i[0] - a_n_i[2])
            self.payoff = lambda a_n, i: get_distance(a_n, i) 
示例8
def __init__(self,
                 ctrl_cost_coeff=1e-2,  # gym has 1 here!
                 rew_speed=False,  # if True the dot product is taken with the speed instead of the position
                 rew_dir=None,  # (x,y,z) -> Rew=dot product of the CoM SPEED with this dir. Otherwise, DIST to 0
                 ego_obs=False,
                 no_contact=False,
                 sparse=False,
                 *args, **kwargs):
        self.ctrl_cost_coeff = ctrl_cost_coeff
        self.reward_dir = rew_dir
        self.rew_speed = rew_speed
        self.ego_obs = ego_obs
        self.no_cntct = no_contact
        self.sparse = sparse

        super(AntEnv, self).__init__(*args, **kwargs)
        Serializable.quick_init(self, locals()) 
示例9
def __init__(
            self,
            *args,
            **kwargs):

        Serializable.quick_init(self, locals())
        MazeEnv.__init__(self, *args, **kwargs)
        self._blank_maze = False
        self.blank_maze_obs = np.concatenate([np.zeros(self._n_bins), np.zeros(self._n_bins)])

        # The following caches the spaces so they are not re-instantiated every time
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        self._observation_space = spaces.Box(ub * -1, ub)

        shp = self.get_current_robot_obs().shape
        ub = BIG * np.ones(shp)
        self._robot_observation_space = spaces.Box(ub * -1, ub)

        shp = self.get_current_maze_obs().shape
        ub = BIG * np.ones(shp)
        self._maze_observation_space = spaces.Box(ub * -1, ub) 
示例10
def __init__(self):
        Serializable.quick_init(self, locals()) 
示例11
def __init__(self, obj, method_name, args, kwargs):
        self._serializable_initialized = False
        Serializable.quick_init(self, locals())
        self.obj = obj
        self.method_name = method_name
        self.args = args
        self.kwargs = kwargs 
示例12
def __init__(self, env_name, record_video=True, video_schedule=None, log_dir=None, record_log=True,
                 force_reset=False):
        if log_dir is None:
            if logger.get_snapshot_dir() is None:
                logger.log("Warning: skipping Gym environment monitoring since snapshot_dir not configured.")
            else:
                log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log")
        Serializable.quick_init(self, locals())

        env = gym.envs.make(env_name)
        self.env = env
        self.env_id = env.spec.id

        assert not (not record_log and record_video)

        if log_dir is None or record_log is False:
            self.monitoring = False
        else:
            if not record_video:
                video_schedule = NoVideoSchedule()
            else:
                if video_schedule is None:
                    video_schedule = CappedCubicVideoSchedule()
            self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True)
            self.monitoring = True

        self._observation_space = convert_gym_space(env.observation_space)
        logger.log("observation space: {}".format(self._observation_space))
        self._action_space = convert_gym_space(env.action_space)
        logger.log("action space: {}".format(self._action_space))
        self._horizon = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps']
        self._log_dir = log_dir
        self._force_reset = force_reset 
示例13
def __init__(self, goal_reward=10):
        super().__init__()
        Serializable.quick_init(self, locals())

        self.dynamics = PointDynamics(dim=2, sigma=0)
        self.init_mu = np.array((0, 0), dtype=np.float32)
        self.init_sigma = 0
        self.goal_positions = np.array(
            [
                [5, 0],
                [-5, 0],
                [0, 5],
                [0, -5]
            ],
            dtype=np.float32
        )
        self.goal_threshold = 1.
        self.goal_reward = goal_reward
        self.action_cost_coeff = 30.
        self.xlim = (-7, 7)
        self.ylim = (-7, 7)
        self.vel_bound = 1.
        self.reset()
        self.observation = None

        self.fig = None
        self.ax = None
        self.fixed_plots = None
        self.dynamic_plots = [] 
示例14
def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            output_b_init=None,
            weight_signal=1.0,
            weight_nonsignal=1.0, 
            weight_smc=1.0):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :return:
        """
        Serializable.quick_init(self, locals())
        assert isinstance(env_spec.action_space, Discrete)
        output_b_init = compute_output_b_init(env_spec.action_space.names,
            output_b_init, weight_signal, weight_nonsignal, weight_smc)

        prob_network = MLP(
            input_shape=(env_spec.observation_space.flat_dim,),
            output_dim=env_spec.action_space.n,
            hidden_sizes=hidden_sizes,
            hidden_nonlinearity=hidden_nonlinearity,
            output_nonlinearity=NL.softmax,
            output_b_init=output_b_init
        )
        super(InitCategoricalMLPPolicy, self).__init__(env_spec, hidden_sizes,
            hidden_nonlinearity, prob_network)


# Modified from RLLab GRUNetwork 
示例15
def __init__(
            self,
            name,
            max_opt_itr=20,
            initial_penalty=1.0,
            min_penalty=1e-2,
            max_penalty=1e6,
            increase_penalty_factor=2,
            decrease_penalty_factor=0.5,
            max_penalty_itr=10,
            adapt_penalty=True):
        Serializable.quick_init(self, locals())
        self._name = name
        self._max_opt_itr = max_opt_itr
        self._penalty = initial_penalty
        self._initial_penalty = initial_penalty
        self._min_penalty = min_penalty
        self._max_penalty = max_penalty
        self._increase_penalty_factor = increase_penalty_factor
        self._decrease_penalty_factor = decrease_penalty_factor
        self._max_penalty_itr = max_penalty_itr
        self._adapt_penalty = adapt_penalty

        self._opt_fun = None
        self._target = None
        self._max_constraint_val = None
        self._constraint_name = None 
示例16
def __init__(self, env_name, gym_wrappers=(),
                 register_fn=None, wrapper_args = (), record_log=False, record_video=False,
                 post_create_env_seed=None, force_reset=True):
        Serializable.quick_init(self, locals())
        if register_fn is None:
            import inverse_rl.envs
            register_fn = inverse_rl.envs.register_custom_envs
        register_fn()  # Force register
        self.env_name = env_name
        super(CustomGymEnv, self).__init__(env_name, wrappers=gym_wrappers,
                                           wrapper_args=wrapper_args,
                                           record_log=record_log, record_video=record_video,
                                           post_create_env_seed=post_create_env_seed,
                                           video_schedule=FixedIntervalVideoSchedule(50), force_reset=force_reset) 
示例17
def __init__(
            self,
            ctrl_cost_coeff=1e-2,
            *args, **kwargs):
        self.ctrl_cost_coeff = ctrl_cost_coeff
        super(SwimmerEnv, self).__init__(*args, **kwargs)
        Serializable.quick_init(self, locals()) 
示例18
def __init__(self, xs, ys, xres, yres):
        Serializable.quick_init(self, locals())
        self.xs = xs
        self.ys = ys
        self.xres = xres
        self.yres=yres 
示例19
def __init__(
            self,
            ctrl_cost_coeff=1e-2,
            *args, **kwargs):
        self.ctrl_cost_coeff = ctrl_cost_coeff
        super(SwimmerEnv, self).__init__(*args, **kwargs)
        Serializable.quick_init(self, locals()) 
示例20
def __init__(
            self,
            alive_coeff=1,
            ctrl_cost_coeff=0.01,
            *args, **kwargs):
        self.alive_coeff = alive_coeff
        self.ctrl_cost_coeff = ctrl_cost_coeff
        super(HopperEnv, self).__init__(*args, **kwargs)
        Serializable.quick_init(self, locals())

    # Consist of 11 dimensions
    # 0: z-com
    # 1: forward pitch along y-axis
    # 5: x-comvel
    # 6: z-comvel 
示例21
def __init__(
            self,
            vel_deviation_cost_coeff=1e-2,
            alive_bonus=0.2,
            ctrl_cost_coeff=1e-3,
            impact_cost_coeff=1e-5,
            *args, **kwargs):
        self.vel_deviation_cost_coeff = vel_deviation_cost_coeff
        self.alive_bonus = alive_bonus
        self.ctrl_cost_coeff = ctrl_cost_coeff
        self.impact_cost_coeff = impact_cost_coeff
        super(SimpleHumanoidEnv, self).__init__(*args, **kwargs)
        Serializable.quick_init(self, locals()) 
示例22
def __init__(
            self,
            ctrl_cost_coeff=1e-2,
            ego_obs=False,
            sparse_rew=False,
            *args, **kwargs):
        self.ctrl_cost_coeff = ctrl_cost_coeff
        self.ego_obs = ego_obs
        self.sparse_rew = sparse_rew
        super(SnakeEnv, self).__init__(*args, **kwargs)
        Serializable.quick_init(self, locals()) 
示例23
def __init__(self, env, ma_mode):
        Serializable.quick_init(self, locals())

        self.env = env
        if hasattr(env, 'id'):
            self.env_id = env.id
        else:
            self.env_id = 'MA-Wrapper-v0'

        if ma_mode == 'centralized':
            obsfeat_space = convert_gym_space(env.agents[0].observation_space,
                                              n_agents=len(env.agents))
            action_space = convert_gym_space(env.agents[0].action_space, n_agents=len(env.agents))
        elif ma_mode in ['decentralized', 'concurrent']:
            obsfeat_space = convert_gym_space(env.agents[0].observation_space, n_agents=1)
            action_space = convert_gym_space(env.agents[0].action_space, n_agents=1)

        else:
            raise NotImplementedError

        self._observation_space = obsfeat_space
        self._action_space = action_space
        if hasattr(env, 'timestep_limit'):
            self._horizon = env.timestep_limit
        else:
            self._horizon = 250 
示例24
def __init__(
            self,
            name,
            max_opt_itr=20,
            initial_penalty=1.0,
            min_penalty=1e-2,
            max_penalty=1e6,
            increase_penalty_factor=2,
            decrease_penalty_factor=0.5,
            max_penalty_itr=10,
            adapt_penalty=True):
        Serializable.quick_init(self, locals())
        self._name = name
        self._max_opt_itr = max_opt_itr
        self._penalty = initial_penalty
        self._initial_penalty = initial_penalty
        self._min_penalty = min_penalty
        self._max_penalty = max_penalty
        self._increase_penalty_factor = increase_penalty_factor
        self._decrease_penalty_factor = decrease_penalty_factor
        self._max_penalty_itr = max_penalty_itr
        self._adapt_penalty = adapt_penalty

        self._opt_fun = None
        self._target = None
        self._max_constraint_val = None
        self._constraint_name = None 
示例25
def __init__(self, env_name, gym_wrappers=(),
                 register_fn=None, wrapper_args = (), record_log=False, record_video=False,
                 post_create_env_seed=None):
        Serializable.quick_init(self, locals())
        if register_fn is None:
            import inverse_rl.envs
            register_fn = inverse_rl.envs.register_custom_envs
        register_fn()  # Force register
        self.env_name = env_name
        super(CustomGymEnv, self).__init__(env_name, wrappers=gym_wrappers,
                                           wrapper_args=wrapper_args,
                                           record_log=record_log, record_video=record_video,
                                           post_create_env_seed=post_create_env_seed,
                                           video_schedule=FixedIntervalVideoSchedule(50)) 
示例26
def __init__(self, xs, ys, xres, yres):
        Serializable.quick_init(self, locals())
        self.xs = xs
        self.ys = ys
        self.xres = xres
        self.yres=yres 
示例27
def __init__(self, k=np.inf):
        Serializable.quick_init(self, locals())
        self.k = k
        self.counters = None 
示例28
def __init__(
            self,
            policy_name,
            env_spec,
            latent_sampler,
            hidden_sizes=(32,32),
            hidden_nonlinearity=tf.nn.tanh,
            prob_network=None):
        Serializable.quick_init(self, locals())
        name = policy_name
        self.latent_sampler = latent_sampler

        with tf.variable_scope(name):
            if prob_network is None:
                input_dim = env_spec.observation_space.flat_dim + self.latent_sampler.dim
                l_input = L.InputLayer(shape=(None, input_dim), name="input")
                prob_network = MLP(
                    input_layer=l_input,
                    output_dim=env_spec.action_space.n,
                    hidden_sizes=hidden_sizes,
                    hidden_nonlinearity=hidden_nonlinearity,
                    output_nonlinearity=tf.nn.softmax,
                    name="prob_network"
                )
                self._output = prob_network.output
                self._inputs = prob_network.input_var

        super(CategoricalLatentVarMLPPolicy, self).__init__(
            name=name, 
            env_spec=env_spec, 
            prob_network=prob_network
        ) 
示例29
def __init__(
            self, 
            scheduler,
            max_n_envs=20,
            **kwargs):
        Serializable.quick_init(self, locals())
        super(CategoricalLatentSampler, self).__init__(**kwargs)
        self._scheduler = scheduler
        self._latent_values = np.zeros((max_n_envs, self.action_space.n)) 
示例30
def __init__(
            self,
            env,
            clip_std_multiple=np.inf,
            **kwargs):
        Serializable.quick_init(self, locals())
        self.clip_std_multiple = clip_std_multiple
        super(VectorizedNormalizedEnv, self).__init__(env, **kwargs)