package edu.uky.ai.rl;

import edu.uky.ai.OperationsBudgetExceededException;
import edu.uky.ai.SearchBudget;
import edu.uky.ai.TimeBudgetExceededException;

/**
 * A learner is an algorithm which solves a reinforcement learning problem
 * (i.e. a {@link StochasticProcess}) by finding a {@link Policy}.
 * 
 * @author Stephen G. Ware
 */
public abstract class Learner {

	/** The learner's name */
	public final String name;
	
	/**
	 * Constructs a new learner with the given name.
	 * 
	 * @param name the learner's name
	 */
	public Learner(String name) {
		this.name = name;
	}
	
	@Override
	public String toString() {
		return name;
	}
	
	/**
	 * Learns and evaluates a {@link Policy} for a given
	 * {@link StochasticProcess}
	 * 
	 * @param process the process for which a policy should be learned
	 * @param learningNodeLimit the maximum number of calls to {@link StochasticProcess#transition(State, Action)} that can be made while learning the policy
	 * @param learningTimeLimit the maximum number of milliseconds that can be spent learning the policy
	 * @param evaluationNodeLimit the maximum number of calls to {@link StochasticProcess#transition(State, Action)} that can be made while evaluating the policy
	 * @param evaluationTimeLimit the maximum number of milliseconds that can be spent evaluating the policy
	 * @param frame the GUI that will visualize the learning process (ignored if null)
	 * @return a {@link Result}
	 */
	public final Result learn(StochasticProcess process, int learningNodeLimit, long learningTimeLimit, int evaluationNodeLimit, long evaluationTimeLimit, ProcessFrame frame) {
		Policy policy = null;
		String reason = "success";
		Object[] extra = new Object[512];
		extra[0] = null;
		SearchBudget learning = new SearchBudget(learningNodeLimit, learningTimeLimit);
		StochasticProcess wrapper = new BudgetWrapper(process.clone(), learning);
		if(frame != null)
			frame.set(this, wrapper, "Learning");
		try {
			policy = learn(wrapper);
		}
		catch(OperationsBudgetExceededException ex) {
			reason = "transition limit exceeded";
		}
		catch(TimeBudgetExceededException ex) {
			reason = "time limit exceeded";
		}
		catch(OutOfMemoryError ex) {
			extra = null;
			System.gc();
			reason = "out of memory";
		}
		catch(Throwable ex) {
			reason = ex.getMessage();
		}
		int transitions = learning.getOperations();
		long time = learning.getTime();
		double score = 0;		
		if(policy != null) {
			wrapper = new BudgetWrapper(process.clone(), new SearchBudget(evaluationNodeLimit, evaluationTimeLimit));
			if(frame != null)
				frame.set(this, wrapper, "Evaluation");
			score = Learner.evaluate(policy, wrapper);
		}
		return new Result(this, process, policy, reason, transitions, time, score);
	}
	
	/**
	 * Given some {@link StochasticProcess}, this method tries to find a
	 * {@link Policy} which maximizes expected reward. This method may use up
	 * to (but not more than) the {@link StochasticProcess#budget process's
	 * search budget}.
	 * 
	 * @param process the process for which a policy should be found
	 * @return a policy that maximizes expected reward
	 */
	public abstract Policy learn(StochasticProcess process);
	
	/**
	 * This method evaluate's a policy on a given {@link StochasticProcess}.
	 * The policy should have been produced by a call to
	 * {@link #learn(StochasticProcess)} and should have been learned from the
	 * same process on which it is evaluated. If the process's search budget
	 * is infinite, the policy is evaluated on exactly one episode; otherwise,
	 * the policy is continuously evaluated until the process's search budget
	 * is exhausted.
	 * 
	 * @param policy the policy to be evaluated
	 * @param process the process on which to evaluate the policy
	 * @return the total reward earned by the policy
	 */
	public static final double evaluate(Policy policy, StochasticProcess process) {
		double score = 0;
		try {
			do {
				State state = process.initial;
				while(!state.isTerminal()) {
					Transition result = process.transition(state, policy.choose(state));
					state = result.state;
					score += result.reward;
				}
			} while(!process.budget.isInfinite());
		}
		catch(Exception ex) {
			// evaluation is over
		}
		return score;
	}
}
