% Adapted from earlier work by C E Rasmussen, F Doshi and M P Deisenroth

% ----- Introduction ----- %

% The model is a gridworld park.  The goal is to travel
% from some (specified) start location to some (specified) final location
% to receive a large reward.  There may be
% some trees with large penalties.  The aerial bot has four
% control actions: up, down, left, and right.  The execution of these
% actions is noisy, but once completed, the gridbot knows its new state.

% For your convenience, you can use the following variables instead of 
% having to use the indices for each action:
aUp = 1;
aDown = 2;
aLeft = 3;
aRight = 4;

% ----- Initialise a true model ----- %
% DO NOT CHANGE %
% set the size of the gridworld (rows and columns) as well as specify which grids
% contain the trees
paramSet.colCount = 5;
paramSet.rowCount = 5;
% simulation ends on reaching a state in obsSet
paramSet.obsSet = [3 3; 4 4];
paramSet.goalState = [1 5];
%negative reward states: trees
paramSet.badSet = [3 3; 4 4];



% set the reward parameters: rStep is the cost of an action, rGoal is the
% reward associated with reaching the goal, and rBad is the reward for
% passing through a bad spot.
paramSet.rStep = -0.1;
paramSet.rGoal = 10;
paramSet.rBad = -5;
paramSet.gamma = 0.97;

% Parameter for controlling the number of iterations%
% Assume that the learning converges after 20 successive iterations require only one step for Value-Iteration to converge. %
paramSet.END_LEARNING_THRESHOLD = 20;

%display%
paramSet

% creates a model based on the parameters.  The fields are given by:
model = initGridworld( paramSet );

% solve the gridworld MDP. %
% PLEASE IMPLEMENT solveRL function, and any other functions you may need.
% solveRL: returns the optimal value and policy for all states
[V_opt, policy_opt] = solveRL(paramSet, model);

%display the final policy %
policy_opt
