I have the following draft for a neural network class. This neural network should learn with TD-lambda. It is started by calling the getRating() function.
But unfortunately, there is an EInvalidOp (invalid floading point operation) error after about 1000 iterations in the following lines:
neuronsHidden[j] := neuronsHidden[j]+neuronsInput[t][i]*weightsInput[i][j]; // input -> hidden
weightsHidden[j][k] := weightsHidden[j][k]+LEARNING_RATE_HIDDEN*tdError[k]*eligibilityTraceOutput[j][k]; // adjust hidden->output weights according to TD-lambda
Why is this error? I can't find the mistake in my code :( Can you help me? Thank you very much in advance!
unit uNeuronalesNetz;
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, ExtCtrls, StdCtrls, Grids, Menus, Math;
const NEURONS_INPUT = 43; // number of neurons in the input layer
NEURONS_HIDDEN = 60; // number of neurons in the hidden layer
NEURONS_OUTPUT = 1; // number of neurons in the output layer
NEURONS_TOTAL = NEURONS_INPUT+NEURONS_HIDDEN+NEURONS_OUTPUT; // total number of neurons in the network
MAX_TIMESTEPS = 42; // maximum number of timesteps possible (after 42 moves: board is full)
LEARNING_RATE_INPUT = 0.25; // in ideal case: decrease gradually in course of training
LEARNING_RATE_HIDDEN = 0.15; // in ideal case: decrease gradually in course of training
GAMMA = 0.9;
LAMBDA = 0.7; // decay parameter for eligibility traces
type
TFeatureVector = Array[1..43] of SmallInt; // definition of the array type TFeatureVector
TArtificialNeuralNetwork = class // definition of the class TArtificialNeuralNetwork
private
// GENERAL SETTINGS START
learningMode: Boolean; // does the network learn and change its weights?
// GENERAL SETTINGS END
// NETWORK CONFIGURATION START
neuronsInput: Array[1..MAX_TIMESTEPS] of Array[1..NEURONS_INPUT] of Extended; // array of all input neurons (their values) for every timestep
neuronsHidden: Array[1..NEURONS_HIDDEN] of Extended; // array of all hidden neurons (their values)
neuronsOutput: Array[1..NEURONS_OUTPUT] of Extended; // array of output neurons (their values)
weightsInput: Array[1..NEURONS_INPUT] of Array[1..NEURONS_HIDDEN] of Extended; // array of weights: input->hidden
weightsHidden: Array[1..NEURONS_HIDDEN] of Array[1..NEURONS_OUTPUT] of Extended; // array of weights: hidden->output
// NETWORK CONFIGURATION END
// LEARNING SETTINGS START
outputBefore: Array[1..NEURONS_OUTPUT] of Extended; // the network's output value in the last timestep (the one before)
eligibilityTraceHidden: Array[1..NEURONS_INPUT] of Array[1..NEURONS_HIDDEN] of Array[1..NEURONS_OUTPUT] of Extended; // array of eligibility traces: hidden layer
eligibilityTraceOutput: Array[1..NEURONS_TOTAL] of Array[1..NEURONS_TOTAL] of Extended; // array of eligibility traces: output layer
reward: Array[1..MAX_TIMESTEPS] of Array[1..NEURONS_OUTPUT] of Extended; // the reward value for all output neurons in every timestep
tdError: Array[1..NEURONS_OUTPUT] of Extended; // the network's error value for every single output neuron
t: Byte; // current timestep
cyclesTrained: Integer; // number of cycles trained so far (learning rates could be decreased accordingly)
last50errors: Array[1..50] of Extended;
// LEARNING SETTINGS END
public
constructor Create; // create the network object and do the initialization
procedure UpdateEligibilityTraces; // update the eligibility traces for the hidden and output layer
procedure tdLearning; // learning algorithm: adjust the network's weights
procedure ForwardPropagation; // propagate the input values through the network to the output layer
function getRating(state: TFeatureVector; explorative: Boolean): Extended; // get the rating for a given state (feature vector)
function HyperbolicTangent(x: Extended): Extended; // calculate the hyperbolic tangent [-1;1]
procedure StartNewCycle; // start a new cycle with everything set to default except for the weights
procedure setLearningMode(activated: Boolean=TRUE); // switch the learning mode on/off
procedure setInputs(state: TFeatureVector); // transfer the given feature vector to the input layer (set input neurons' values)
procedure setReward(currentReward: SmallInt); // set the reward for the current timestep (with learning then or without)
procedure nextTimeStep; // increase timestep t
function getCyclesTrained(): Integer; // get the number of cycles trained so far
procedure Visualize(imgHidden: Pointer); // visualize the neural network's hidden layer
end;
implementation
procedure TArtificialNeuralNetwork.UpdateEligibilityTraces;
var i, j, k: Integer;
begin
// how worthy is a weight to be adjusted?
for j := 1 to NEURONS_HIDDEN do begin
for k := 1 to NEURONS_OUTPUT do begin
eligibilityTraceOutput[j][k] := LAMBDA*eligibilityTraceOutput[j][k]+(neuronsOutput[k]*(1-neuronsOutput[k]))*neuronsHidden[j];
for i := 1 to NEURONS_INPUT do begin
eligibilityTraceHidden[i][j][k] := LAMBDA*eligibilityTraceHidden[i][j][k]+(neuronsOutput[k]*(1-neuronsOutput[k]))*weightsHidden[j][k]*neuronsHidden[j]*(1-neuronsHidden[j])*neuronsInput[t][i];
end;
end;
end;
end;
procedure TArtificialNeuralNetwork.setReward;
VAR i: Integer;
begin
for i := 1 to NEURONS_OUTPUT do begin
// +1 = player A wins
// 0 = draw
// -1 = player B wins
reward[t][i] := currentReward;
end;
end;
procedure TArtificialNeuralNetwork.tdLearning;
var i, j, k: Integer;
begin
if learningMode then begin
for k := 1 to NEURONS_OUTPUT do begin
if reward[t][k] = 0 then begin
tdError[k] := GAMMA*neuronsOutput[k]-outputBefore[k]; // network's error value when reward is 0
end
else begin
tdError[k] := reward[t][k]-outputBefore[k]; // network's error value in the final state (reward received)
end;
for j := 1 to NEURONS_HIDDEN do begin
weightsHidden[j][k] := weightsHidden[j][k]+LEARNING_RATE_HIDDEN*tdError[k]*eligibilityTraceOutput[j][k]; // adjust hidden->output weights according to TD-lambda
for i := 1 to NEURONS_INPUT do begin
weightsInput[i][j] := weightsInput[i][j]+LEARNING_RATE_INPUT*tdError[k]*eligibilityTraceHidden[i][j][k]; // adjust input->hidden weights according to TD-lambda
end;
end;
end;
end;
end;
procedure TArtificialNeuralNetwork.ForwardPropagation;
var i, j, k: Integer;
begin
for j := 1 to NEURONS_HIDDEN do begin
neuronsHidden[j] := 0;
for i := 1 to NEURONS_INPUT do begin
neuronsHidden[j] := neuronsHidden[j]+neuronsInput[t][i]*weightsInput[i][j]; // input -> hidden
end;
neuronsHidden[j] := HyperbolicTangent(neuronsHidden[j]); // activation of hidden neuron j
end;
for k := 1 to NEURONS_OUTPUT do begin
neuronsOutput[k] := 0;
for j := 1 to NEURONS_HIDDEN do begin
neuronsOutput[k] := neuronsOutput[k]+neuronsHidden[j]*weightsHidden[j][k]; // hidden -> output
end;
neuronsOutput[k] := HyperbolicTangent(neuronsOutput[k]); // activation of output neuron k
end;
end;
procedure TArtificialNeuralNetwork.setLearningMode;
begin
learningMode := activated;
end;
constructor TArtificialNeuralNetwork.Create;
var i, j, k: Integer;
begin
inherited Create;
Randomize; // initialize random numbers generator
learningMode := TRUE;
cyclesTrained := -2; // only set to -2 because it will be increased twice in the beginning
StartNewCycle;
for j := 1 to NEURONS_HIDDEN do begin
for k := 1 to NEURONS_OUTPUT do begin
weightsHidden[j][k] := abs(Random-0.5); // initialize weights: 0 <= random < 0.5
end;
for i := 1 to NEURONS_INPUT do begin
weightsInput[i][j] := abs(Random-0.5); // initialize weights: 0 <= random < 0.5
end;
end;
for i := 1 to 50 do begin
last50errors[i] := 0;
end;
end;
procedure TArtificialNeuralNetwork.nextTimeStep;
begin
t := t+1;
end;
procedure TArtificialNeuralNetwork.StartNewCycle;
var i, j, k, m: Integer;
begin
t := 1; // start in timestep 1
cyclesTrained := cyclesTrained+1; // increase the number of cycles trained so far
for j := 1 to NEURONS_HIDDEN do begin
neuronsHidden[j] := 0;
for k := 1 to NEURONS_OUTPUT do begin
eligibilityTraceOutput[j][k] := 0;
outputBefore[k] := 0;
neuronsOutput[k] := 0;
for m := 1 to MAX_TIMESTEPS do begin
reward[m][k] := 0;
end;
end;
for i := 1 to NEURONS_INPUT do begin
for k := 1 to NEURONS_OUTPUT do begin
eligibilityTraceHidden[i][j][k] := 0;
end;
end;
end;
end;
function TArtificialNeuralNetwork.getCyclesTrained;
begin
result := cyclesTrained;
end;
procedure TArtificialNeuralNetwork.setInputs;
var k: Integer;
begin
for k := 1 to NEURONS_INPUT do begin
neuronsInput[t][k] := state[k];
end;
end;
function TArtificialNeuralNetwork.getRating;
begin
setInputs(state);
ForwardPropagation;
result := neuronsOutput[1];
if not explorative then begin
tdLearning; // adjust the weights according to TD-lambda
ForwardPropagation; // calculate the network's output again
outputBefore[1] := neuronsOutput[1]; // set outputBefore which will then be used in the next timestep
UpdateEligibilityTraces; // update the eligibility traces for the next timestep
nextTimeStep; // go to the next timestep
end;
end;
function TArtificialNeuralNetwork.HyperbolicTangent;
begin
if x > 5500 then // prevent overflow
result := 1
else
result := (Exp(2*x)-1)/(Exp(2*x)+1);
end;
end.