import logist.simulation.Vehicle;
import logist.agent.Agent;
import logist.behavior.ReactiveBehavior;
import logist.plan.Action;
import logist.task.Task;
import logist.task.TaskDistribution;
import logist.topology.Topology;

public class ReactiveAgent implements ReactiveBehavior {

	private int numActions;
	private Agent myAgent;
	// Instantiate my Q Value Iteration class object
	private QValueIteration optimizer;

	@Override
	public void setup(Topology topology, TaskDistribution td, Agent agent) {
		// Ininitialize my Q Value Iteration class object
		// We need to know the topology, task distribution, and agent status
		// Offline optimization should happen here in setup (we perform it in the constructor)
		this.optimizer = new QValueIteration(topology, td, agent);

		this.numActions = 0;
		this.myAgent = agent;
	}

	@Override
	public Action act(Vehicle vehicle, Task availableTask) {
		
		if (numActions >= 1) {
			System.out.println("The total profit after "+numActions+" actions is "+myAgent.getTotalProfit()+" (average profit: "+(myAgent.getTotalProfit() / (double)numActions)+")");
		}
		numActions++;
		
		// Get optimal action from computed policy
		return optimizer.optimalAction(vehicle, availableTask);
	}
}
