Delphi: EInvalidOp in neural network class (TD-lambda)

Posted by user89818 on Stack Overflow See other posts from Stack Overflow or by user89818
Published on 2011-02-15T23:46:45Z Indexed on 2011/02/16 7:25 UTC
Read the original article Hit count: 192

I have the following draft for a neural network class. This neural network should learn with TD-lambda. It is started by calling the getRating() function.

But unfortunately, there is an EInvalidOp (invalid floading point operation) error after about 1000 iterations in the following lines:

neuronsHidden[j] := neuronsHidden[j]+neuronsInput[t][i]*weightsInput[i][j]; // input -> hidden

weightsHidden[j][k] := weightsHidden[j][k]+LEARNING_RATE_HIDDEN*tdError[k]*eligibilityTraceOutput[j][k]; // adjust hidden->output weights according to TD-lambda

Why is this error? I can't find the mistake in my code :( Can you help me? Thank you very much in advance!

unit uNeuronalesNetz;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, ExtCtrls, StdCtrls, Grids, Menus, Math;

const NEURONS_INPUT = 43; // number of neurons in the input layer
      NEURONS_HIDDEN = 60; // number of neurons in the hidden layer
      NEURONS_OUTPUT = 1; // number of neurons in the output layer
      NEURONS_TOTAL = NEURONS_INPUT+NEURONS_HIDDEN+NEURONS_OUTPUT; // total number of neurons in the network
      MAX_TIMESTEPS = 42; // maximum number of timesteps possible (after 42 moves: board is full)
      LEARNING_RATE_INPUT = 0.25; // in ideal case: decrease gradually in course of training
      LEARNING_RATE_HIDDEN = 0.15; // in ideal case: decrease gradually in course of training
      GAMMA = 0.9;
      LAMBDA = 0.7; // decay parameter for eligibility traces

type
  TFeatureVector = Array[1..43] of SmallInt; // definition of the array type TFeatureVector
  TArtificialNeuralNetwork = class // definition of the class TArtificialNeuralNetwork
  private
    // GENERAL SETTINGS START
    learningMode: Boolean; // does the network learn and change its weights?
    // GENERAL SETTINGS END
    // NETWORK CONFIGURATION START
    neuronsInput: Array[1..MAX_TIMESTEPS] of Array[1..NEURONS_INPUT] of Extended; // array of all input neurons (their values) for every timestep
    neuronsHidden: Array[1..NEURONS_HIDDEN] of Extended; // array of all hidden neurons (their values)
    neuronsOutput: Array[1..NEURONS_OUTPUT] of Extended; // array of output neurons (their values)
    weightsInput: Array[1..NEURONS_INPUT] of Array[1..NEURONS_HIDDEN] of Extended; // array of weights: input->hidden
    weightsHidden: Array[1..NEURONS_HIDDEN] of Array[1..NEURONS_OUTPUT] of Extended; // array of weights: hidden->output
    // NETWORK CONFIGURATION END
    // LEARNING SETTINGS START
    outputBefore: Array[1..NEURONS_OUTPUT] of Extended; // the network's output value in the last timestep (the one before)
    eligibilityTraceHidden: Array[1..NEURONS_INPUT] of Array[1..NEURONS_HIDDEN] of Array[1..NEURONS_OUTPUT] of Extended; // array of eligibility traces: hidden layer
    eligibilityTraceOutput: Array[1..NEURONS_TOTAL] of Array[1..NEURONS_TOTAL] of Extended; // array of eligibility traces: output layer
    reward: Array[1..MAX_TIMESTEPS] of Array[1..NEURONS_OUTPUT] of Extended; // the reward value for all output neurons in every timestep
    tdError: Array[1..NEURONS_OUTPUT] of Extended; // the network's error value for every single output neuron
    t: Byte; // current timestep
    cyclesTrained: Integer; // number of cycles trained so far (learning rates could be decreased accordingly)
    last50errors: Array[1..50] of Extended;
    // LEARNING SETTINGS END
  public
    constructor Create; // create the network object and do the initialization
    procedure UpdateEligibilityTraces; // update the eligibility traces for the hidden and output layer
    procedure tdLearning; // learning algorithm: adjust the network's weights
    procedure ForwardPropagation; // propagate the input values through the network to the output layer
    function getRating(state: TFeatureVector; explorative: Boolean): Extended; // get the rating for a given state (feature vector)
    function HyperbolicTangent(x: Extended): Extended; // calculate the hyperbolic tangent [-1;1]
    procedure StartNewCycle; // start a new cycle with everything set to default except for the weights
    procedure setLearningMode(activated: Boolean=TRUE); // switch the learning mode on/off
    procedure setInputs(state: TFeatureVector); // transfer the given feature vector to the input layer (set input neurons' values)
    procedure setReward(currentReward: SmallInt); // set the reward for the current timestep (with learning then or without)
    procedure nextTimeStep; // increase timestep t
    function getCyclesTrained(): Integer; // get the number of cycles trained so far
    procedure Visualize(imgHidden: Pointer); // visualize the neural network's hidden layer
  end;

implementation

procedure TArtificialNeuralNetwork.UpdateEligibilityTraces;
var i, j, k: Integer;
begin
    // how worthy is a weight to be adjusted?
    for j := 1 to NEURONS_HIDDEN do begin
        for k := 1 to NEURONS_OUTPUT do begin
            eligibilityTraceOutput[j][k] := LAMBDA*eligibilityTraceOutput[j][k]+(neuronsOutput[k]*(1-neuronsOutput[k]))*neuronsHidden[j];
            for i := 1 to NEURONS_INPUT do begin
        eligibilityTraceHidden[i][j][k] := LAMBDA*eligibilityTraceHidden[i][j][k]+(neuronsOutput[k]*(1-neuronsOutput[k]))*weightsHidden[j][k]*neuronsHidden[j]*(1-neuronsHidden[j])*neuronsInput[t][i];
            end;
        end;
    end;
end;

procedure TArtificialNeuralNetwork.setReward;
VAR i: Integer;
begin
  for i := 1 to NEURONS_OUTPUT do begin
    // +1 = player A wins
    // 0 = draw
    // -1 = player B wins
    reward[t][i] := currentReward;
  end;
end;

procedure TArtificialNeuralNetwork.tdLearning;
var i, j, k: Integer;
begin
  if learningMode then begin
    for k := 1 to NEURONS_OUTPUT do begin
      if reward[t][k] = 0 then begin
        tdError[k] := GAMMA*neuronsOutput[k]-outputBefore[k]; // network's error value when reward is 0
      end
      else begin
        tdError[k] := reward[t][k]-outputBefore[k]; // network's error value in the final state (reward received)
      end;
      for j := 1 to NEURONS_HIDDEN do begin
        weightsHidden[j][k] := weightsHidden[j][k]+LEARNING_RATE_HIDDEN*tdError[k]*eligibilityTraceOutput[j][k]; // adjust hidden->output weights according to TD-lambda
        for i := 1 to NEURONS_INPUT do begin
          weightsInput[i][j] := weightsInput[i][j]+LEARNING_RATE_INPUT*tdError[k]*eligibilityTraceHidden[i][j][k]; // adjust input->hidden weights according to TD-lambda
        end;
      end;
    end;
  end;
end;

procedure TArtificialNeuralNetwork.ForwardPropagation;
var i, j, k: Integer;
begin
    for j := 1 to NEURONS_HIDDEN do begin
        neuronsHidden[j] := 0;
        for i := 1 to NEURONS_INPUT do begin
      neuronsHidden[j] := neuronsHidden[j]+neuronsInput[t][i]*weightsInput[i][j]; // input -> hidden
        end;
    neuronsHidden[j] := HyperbolicTangent(neuronsHidden[j]); // activation of hidden neuron j
    end;
    for k := 1 to NEURONS_OUTPUT do begin
        neuronsOutput[k] := 0;
        for j := 1 to NEURONS_HIDDEN do begin
      neuronsOutput[k] := neuronsOutput[k]+neuronsHidden[j]*weightsHidden[j][k]; // hidden -> output
        end;
        neuronsOutput[k] := HyperbolicTangent(neuronsOutput[k]); // activation of output neuron k
    end;
end;

procedure TArtificialNeuralNetwork.setLearningMode;
begin
  learningMode := activated;
end;

constructor TArtificialNeuralNetwork.Create;
var i, j, k: Integer;
begin
    inherited Create;
    Randomize; // initialize random numbers generator
    learningMode := TRUE;
    cyclesTrained := -2; // only set to -2 because it will be increased twice in the beginning
    StartNewCycle;
    for j := 1 to NEURONS_HIDDEN do begin
        for k := 1 to NEURONS_OUTPUT do begin
            weightsHidden[j][k] := abs(Random-0.5); // initialize weights: 0 <= random < 0.5
        end;
        for i := 1 to NEURONS_INPUT do begin
            weightsInput[i][j] := abs(Random-0.5); // initialize weights: 0 <= random < 0.5
        end;
    end;
  for i := 1 to 50 do begin
    last50errors[i] := 0;
  end;
end;

procedure TArtificialNeuralNetwork.nextTimeStep;
begin
  t := t+1;
end;

procedure TArtificialNeuralNetwork.StartNewCycle;
var i, j, k, m: Integer;
begin
  t := 1; // start in timestep 1
  cyclesTrained := cyclesTrained+1; // increase the number of cycles trained so far
    for j := 1 to NEURONS_HIDDEN do begin
    neuronsHidden[j] := 0;
        for k := 1 to NEURONS_OUTPUT do begin
            eligibilityTraceOutput[j][k] := 0;
            outputBefore[k] := 0;
      neuronsOutput[k] := 0;
      for m := 1 to MAX_TIMESTEPS do begin
        reward[m][k] := 0;
      end;
        end;
        for i := 1 to NEURONS_INPUT do begin
            for k := 1 to NEURONS_OUTPUT do begin
                eligibilityTraceHidden[i][j][k] := 0;
            end;
        end;
    end;
end;

function TArtificialNeuralNetwork.getCyclesTrained;
begin
  result := cyclesTrained;
end;

procedure TArtificialNeuralNetwork.setInputs;
var k: Integer;
begin
  for k := 1 to NEURONS_INPUT do begin
    neuronsInput[t][k] := state[k];
  end;
end;

function TArtificialNeuralNetwork.getRating;
begin
  setInputs(state);
  ForwardPropagation;
  result := neuronsOutput[1];
  if not explorative then begin
    tdLearning; // adjust the weights according to TD-lambda
    ForwardPropagation; // calculate the network's output again
    outputBefore[1] := neuronsOutput[1]; // set outputBefore which will then be used in the next timestep
    UpdateEligibilityTraces; // update the eligibility traces for the next timestep
    nextTimeStep; // go to the next timestep
  end;
end;

function TArtificialNeuralNetwork.HyperbolicTangent;
begin
  if x > 5500 then // prevent overflow
    result := 1
  else
    result := (Exp(2*x)-1)/(Exp(2*x)+1);
end;

end.

© Stack Overflow or respective owner

Related posts about delphi

Related posts about exception