Java源码示例:org.deeplearning4j.rl4j.learning.HistoryProcessor

示例1
public static void loadMalmoCliffWalk() throws MalmoConnectionError, IOException {
    MalmoEnv mdp = createMDP(10000);

    //load the previous agent
    DQNPolicy<MalmoBox> pol = DQNPolicy.load("cliffwalk_pixel.policy");

    //evaluate the agent
    double rewards = 0;
    for (int i = 0; i < 10; i++) {
        double reward = pol.play(mdp, new HistoryProcessor(MALMO_HPROC));
        rewards += reward;
        Logger.getAnonymousLogger().info("Reward: " + reward);
    }

    // Clean up
    mdp.close();

    Logger.getAnonymousLogger().info("average: " + rewards / 10);
}
 
示例2
public static void loadMalmoCliffWalk() throws MalmoConnectionError, IOException {
    MalmoEnv mdp = createMDP(10000);

    //load the previous agent
    DQNPolicy<MalmoBox> pol = DQNPolicy.load("cliffwalk_pixel.policy");

    //evaluate the agent
    double rewards = 0;
    for (int i = 0; i < 10; i++) {
        double reward = pol.play(mdp, new HistoryProcessor(MALMO_HPROC));
        rewards += reward;
        Logger.getAnonymousLogger().info("Reward: " + reward);
    }

    // Clean up
    mdp.close();

    Logger.getAnonymousLogger().info("average: " + rewards / 10);
}
 
示例3
@Deprecated
public A3CDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, IActorCritic IActorCritic,
                       HistoryProcessor.Configuration hpconf, A3CConfiguration conf) {

    super(mdp, IActorCritic, conf.toLearningConfiguration());
    this.hpconf = hpconf;
    setHistoryProcessor(hpconf);
}
 
示例4
@Deprecated
public QLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, IDQN dqn, HistoryProcessor.Configuration hpconf,
                             QLConfiguration conf, IDataManager dataManager) {
    this(mdp, dqn, hpconf, conf);
    addListener(new DataManagerTrainingListener(dataManager));
}
 
示例5
@Deprecated
public QLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, IDQN dqn, HistoryProcessor.Configuration hpconf,
                             QLConfiguration conf) {
    super(mdp, dqn, conf.toLearningConfiguration(), conf.getEpsilonNbStep() * hpconf.getSkipFrame());
    setHistoryProcessor(hpconf);
}
 
示例6
public QLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, IDQN dqn, HistoryProcessor.Configuration hpconf,
                             QLearningConfiguration conf) {
    super(mdp, dqn, conf, conf.getEpsilonNbStep() * hpconf.getSkipFrame());
    setHistoryProcessor(hpconf);
}
 
示例7
@Deprecated
public QLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, DQNFactory factory,
                             HistoryProcessor.Configuration hpconf, QLConfiguration conf, IDataManager dataManager) {
    this(mdp, factory.buildDQN(hpconf.getShape(), mdp.getActionSpace().getSize()), hpconf, conf, dataManager);
}
 
示例8
@Deprecated
public QLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, DQNFactory factory,
                             HistoryProcessor.Configuration hpconf, QLConfiguration conf) {
    this(mdp, factory.buildDQN(hpconf.getShape(), mdp.getActionSpace().getSize()), hpconf, conf);
}
 
示例9
public QLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, DQNFactory factory,
                             HistoryProcessor.Configuration hpconf, QLearningConfiguration conf) {
    this(mdp, factory.buildDQN(hpconf.getShape(), mdp.getActionSpace().getSize()), hpconf, conf);
}
 
示例10
@Deprecated
public QLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, DQNFactoryStdConv.Configuration netConf,
                             HistoryProcessor.Configuration hpconf, QLConfiguration conf, IDataManager dataManager) {
    this(mdp, new DQNFactoryStdConv(netConf.toNetworkConfiguration()), hpconf, conf, dataManager);
}
 
示例11
@Deprecated
public QLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, DQNFactoryStdConv.Configuration netConf,
                             HistoryProcessor.Configuration hpconf, QLConfiguration conf) {
    this(mdp, new DQNFactoryStdConv(netConf.toNetworkConfiguration()), hpconf, conf);
}
 
示例12
public QLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, NetworkConfiguration netConf,
                             HistoryProcessor.Configuration hpconf, QLearningConfiguration conf) {
    this(mdp, new DQNFactoryStdConv(netConf), hpconf, conf);
}
 
示例13
public void setHistoryProcessor(IHistoryProcessor.Configuration conf) {
    setHistoryProcessor(new HistoryProcessor(conf));
}
 
示例14
@Deprecated
public A3CDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, IActorCritic actorCritic,
                       HistoryProcessor.Configuration hpconf, A3CConfiguration conf, IDataManager dataManager) {
    this(mdp, actorCritic, hpconf, conf);
    addListener(new DataManagerTrainingListener(dataManager));
}
 
示例15
public A3CDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, IActorCritic IActorCritic,
                       HistoryProcessor.Configuration hpconf, A3CLearningConfiguration conf) {
    super(mdp, IActorCritic, conf);
    this.hpconf = hpconf;
    setHistoryProcessor(hpconf);
}
 
示例16
@Deprecated
public A3CDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, ActorCriticFactoryCompGraph factory,
                       HistoryProcessor.Configuration hpconf, A3CConfiguration conf, IDataManager dataManager) {
    this(mdp, factory.buildActorCritic(hpconf.getShape(), mdp.getActionSpace().getSize()), hpconf, conf, dataManager);
}
 
示例17
@Deprecated
public A3CDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, ActorCriticFactoryCompGraph factory,
                       HistoryProcessor.Configuration hpconf, A3CConfiguration conf) {
    this(mdp, factory.buildActorCritic(hpconf.getShape(), mdp.getActionSpace().getSize()), hpconf, conf);
}
 
示例18
public A3CDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, ActorCriticFactoryCompGraph factory,
                       HistoryProcessor.Configuration hpconf, A3CLearningConfiguration conf) {
    this(mdp, factory.buildActorCritic(hpconf.getShape(), mdp.getActionSpace().getSize()), hpconf, conf);
}
 
示例19
@Deprecated
public A3CDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, ActorCriticFactoryCompGraphStdConv.Configuration netConf,
                       HistoryProcessor.Configuration hpconf, A3CConfiguration conf, IDataManager dataManager) {
    this(mdp, new ActorCriticFactoryCompGraphStdConv(netConf.toNetworkConfiguration()), hpconf, conf, dataManager);
}
 
示例20
@Deprecated
public A3CDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, ActorCriticFactoryCompGraphStdConv.Configuration netConf,
                       HistoryProcessor.Configuration hpconf, A3CConfiguration conf) {
    this(mdp, new ActorCriticFactoryCompGraphStdConv(netConf.toNetworkConfiguration()), hpconf, conf);
}
 
示例21
public A3CDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, ActorCriticNetworkConfiguration netConf,
                       HistoryProcessor.Configuration hpconf, A3CLearningConfiguration conf) {
    this(mdp, new ActorCriticFactoryCompGraphStdConv(netConf), hpconf, conf);
}
 
示例22
@Deprecated
public AsyncNStepQLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, IDQN dqn,
                                       HistoryProcessor.Configuration hpconf, AsyncQLearningConfiguration conf, IDataManager dataManager) {
    this(mdp, dqn, hpconf, conf);
    addListener(new DataManagerTrainingListener(dataManager));
}
 
示例23
public AsyncNStepQLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, IDQN dqn,
                                       HistoryProcessor.Configuration hpconf, AsyncQLearningConfiguration conf) {
    super(mdp, dqn, conf);
    this.hpconf = hpconf;
    setHistoryProcessor(hpconf);
}
 
示例24
@Deprecated
public AsyncNStepQLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, DQNFactory factory,
                                       HistoryProcessor.Configuration hpconf, AsyncQLearningConfiguration conf, IDataManager dataManager) {
    this(mdp, factory.buildDQN(hpconf.getShape(), mdp.getActionSpace().getSize()), hpconf, conf, dataManager);
}
 
示例25
public AsyncNStepQLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, DQNFactory factory,
                                       HistoryProcessor.Configuration hpconf, AsyncQLearningConfiguration conf) {
    this(mdp, factory.buildDQN(hpconf.getShape(), mdp.getActionSpace().getSize()), hpconf, conf);
}
 
示例26
@Deprecated
public AsyncNStepQLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, NetworkConfiguration netConf,
                                       HistoryProcessor.Configuration hpconf, AsyncQLearningConfiguration conf, IDataManager dataManager) {
    this(mdp, new DQNFactoryStdConv(netConf), hpconf, conf, dataManager);
}
 
示例27
public AsyncNStepQLearningDiscreteConv(MDP<OBSERVATION, Integer, DiscreteSpace> mdp, NetworkConfiguration netConf,
                                       HistoryProcessor.Configuration hpconf, AsyncQLearningConfiguration conf) {
    this(mdp, new DQNFactoryStdConv(netConf), hpconf, conf);
}
 
示例28
public <O extends Encodable, AS extends ActionSpace<A>> double play(MDP<O, A, AS> mdp, HistoryProcessor.Configuration conf) {
    return play(mdp, new HistoryProcessor(conf));
}