Delphi: EInvalidOp in neural network class (TD-lambda)

Posted by user89818 on Stack Overflow See other posts from Stack Overflow or by user89818
Published on 2011-02-15T23:46:45Z Indexed on 2011/02/16 7:25 UTC
Read the original article Hit count: 237

Filed under:

delphi

|

exception

|

neural-network

|

pascal

|

temporal-difference

I have the following draft for a neural network class. This neural network should learn with TD-lambda. It is started by calling the getRating() function.

But unfortunately, there is an EInvalidOp (invalid floading point operation) error after about 1000 iterations in the following lines:

neuronsHidden[j] := neuronsHidden[j]+neuronsInput[t][i]*weightsInput[i][j]; // input -> hidden

weightsHidden[j][k] := weightsHidden[j][k]+LEARNING_RATE_HIDDEN*tdError[k]*eligibilityTraceOutput[j][k]; // adjust hidden->output weights according to TD-lambda

Why is this error? I can't find the mistake in my code :( Can you help me? Thank you very much in advance!

unit uNeuronalesNetz;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, ExtCtrls, StdCtrls, Grids, Menus, Math;

const NEURONS_INPUT = 43; // number of neurons in the input layer
      NEURONS_HIDDEN = 60; // number of neurons in the hidden layer
      NEURONS_OUTPUT = 1; // number of neurons in the output layer
      NEURONS_TOTAL = NEURONS_INPUT+NEURONS_HIDDEN+NEURONS_OUTPUT; // total number of neurons in the network
      MAX_TIMESTEPS = 42; // maximum number of timesteps possible (after 42 moves: board is full)
      LEARNING_RATE_INPUT = 0.25; // in ideal case: decrease gradually in course of training
      LEARNING_RATE_HIDDEN = 0.15; // in ideal case: decrease gradually in course of training
      GAMMA = 0.9;
      LAMBDA = 0.7; // decay parameter for eligibility traces

type
  TFeatureVector = Array[1..43] of SmallInt; // definition of the array type TFeatureVector
  TArtificialNeuralNetwork = class // definition of the class TArtificialNeuralNetwork
  private
    // GENERAL SETTINGS START
    learningMode: Boolean; // does the network learn and change its weights?
    // GENERAL SETTINGS END
    // NETWORK CONFIGURATION START
    neuronsInput: Array[1..MAX_TIMESTEPS] of Array[1..NEURONS_INPUT] of Extended; // array of all input neurons (their values) for every timestep
    neuronsHidden: Array[1..NEURONS_HIDDEN] of Extended; // array of all hidden neurons (their values)
    neuronsOutput: Array[1..NEURONS_OUTPUT] of Extended; // array of output neurons (their values)
    weightsInput: Array[1..NEURONS_INPUT] of Array[1..NEURONS_HIDDEN] of Extended; // array of weights: input->hidden
    weightsHidden: Array[1..NEURONS_HIDDEN] of Array[1..NEURONS_OUTPUT] of Extended; // array of weights: hidden->output
    // NETWORK CONFIGURATION END
    // LEARNING SETTINGS START
    outputBefore: Array[1..NEURONS_OUTPUT] of Extended; // the network's output value in the last timestep (the one before)
    eligibilityTraceHidden: Array[1..NEURONS_INPUT] of Array[1..NEURONS_HIDDEN] of Array[1..NEURONS_OUTPUT] of Extended; // array of eligibility traces: hidden layer
    eligibilityTraceOutput: Array[1..NEURONS_TOTAL] of Array[1..NEURONS_TOTAL] of Extended; // array of eligibility traces: output layer
    reward: Array[1..MAX_TIMESTEPS] of Array[1..NEURONS_OUTPUT] of Extended; // the reward value for all output neurons in every timestep
    tdError: Array[1..NEURONS_OUTPUT] of Extended; // the network's error value for every single output neuron
    t: Byte; // current timestep
    cyclesTrained: Integer; // number of cycles trained so far (learning rates could be decreased accordingly)
    last50errors: Array[1..50] of Extended;
    // LEARNING SETTINGS END
  public
    constructor Create; // create the network object and do the initialization
    procedure UpdateEligibilityTraces; // update the eligibility traces for the hidden and output layer
    procedure tdLearning; // learning algorithm: adjust the network's weights
    procedure ForwardPropagation; // propagate the input values through the network to the output layer
    function getRating(state: TFeatureVector; explorative: Boolean): Extended; // get the rating for a given state (feature vector)
    function HyperbolicTangent(x: Extended): Extended; // calculate the hyperbolic tangent [-1;1]
    procedure StartNewCycle; // start a new cycle with everything set to default except for the weights
    procedure setLearningMode(activated: Boolean=TRUE); // switch the learning mode on/off
    procedure setInputs(state: TFeatureVector); // transfer the given feature vector to the input layer (set input neurons' values)
    procedure setReward(currentReward: SmallInt); // set the reward for the current timestep (with learning then or without)
    procedure nextTimeStep; // increase timestep t
    function getCyclesTrained(): Integer; // get the number of cycles trained so far
    procedure Visualize(imgHidden: Pointer); // visualize the neural network's hidden layer
  end;

implementation

procedure TArtificialNeuralNetwork.UpdateEligibilityTraces;
var i, j, k: Integer;
begin
    // how worthy is a weight to be adjusted?
    for j := 1 to NEURONS_HIDDEN do begin
        for k := 1 to NEURONS_OUTPUT do begin
            eligibilityTraceOutput[j][k] := LAMBDA*eligibilityTraceOutput[j][k]+(neuronsOutput[k]*(1-neuronsOutput[k]))*neuronsHidden[j];
            for i := 1 to NEURONS_INPUT do begin
        eligibilityTraceHidden[i][j][k] := LAMBDA*eligibilityTraceHidden[i][j][k]+(neuronsOutput[k]*(1-neuronsOutput[k]))*weightsHidden[j][k]*neuronsHidden[j]*(1-neuronsHidden[j])*neuronsInput[t][i];
            end;
        end;
    end;
end;

procedure TArtificialNeuralNetwork.setReward;
VAR i: Integer;
begin
  for i := 1 to NEURONS_OUTPUT do begin
    // +1 = player A wins
    // 0 = draw
    // -1 = player B wins
    reward[t][i] := currentReward;
  end;
end;

procedure TArtificialNeuralNetwork.tdLearning;
var i, j, k: Integer;
begin
  if learningMode then begin
    for k := 1 to NEURONS_OUTPUT do begin
      if reward[t][k] = 0 then begin
        tdError[k] := GAMMA*neuronsOutput[k]-outputBefore[k]; // network's error value when reward is 0
      end
      else begin
        tdError[k] := reward[t][k]-outputBefore[k]; // network's error value in the final state (reward received)
      end;
      for j := 1 to NEURONS_HIDDEN do begin
        weightsHidden[j][k] := weightsHidden[j][k]+LEARNING_RATE_HIDDEN*tdError[k]*eligibilityTraceOutput[j][k]; // adjust hidden->output weights according to TD-lambda
        for i := 1 to NEURONS_INPUT do begin
          weightsInput[i][j] := weightsInput[i][j]+LEARNING_RATE_INPUT*tdError[k]*eligibilityTraceHidden[i][j][k]; // adjust input->hidden weights according to TD-lambda
        end;
      end;
    end;
  end;
end;

procedure TArtificialNeuralNetwork.ForwardPropagation;
var i, j, k: Integer;
begin
    for j := 1 to NEURONS_HIDDEN do begin
        neuronsHidden[j] := 0;
        for i := 1 to NEURONS_INPUT do begin
      neuronsHidden[j] := neuronsHidden[j]+neuronsInput[t][i]*weightsInput[i][j]; // input -> hidden
        end;
    neuronsHidden[j] := HyperbolicTangent(neuronsHidden[j]); // activation of hidden neuron j
    end;
    for k := 1 to NEURONS_OUTPUT do begin
        neuronsOutput[k] := 0;
        for j := 1 to NEURONS_HIDDEN do begin
      neuronsOutput[k] := neuronsOutput[k]+neuronsHidden[j]*weightsHidden[j][k]; // hidden -> output
        end;
        neuronsOutput[k] := HyperbolicTangent(neuronsOutput[k]); // activation of output neuron k
    end;
end;

procedure TArtificialNeuralNetwork.setLearningMode;
begin
  learningMode := activated;
end;

constructor TArtificialNeuralNetwork.Create;
var i, j, k: Integer;
begin
    inherited Create;
    Randomize; // initialize random numbers generator
    learningMode := TRUE;
    cyclesTrained := -2; // only set to -2 because it will be increased twice in the beginning
    StartNewCycle;
    for j := 1 to NEURONS_HIDDEN do begin
        for k := 1 to NEURONS_OUTPUT do begin
            weightsHidden[j][k] := abs(Random-0.5); // initialize weights: 0 <= random < 0.5
        end;
        for i := 1 to NEURONS_INPUT do begin
            weightsInput[i][j] := abs(Random-0.5); // initialize weights: 0 <= random < 0.5
        end;
    end;
  for i := 1 to 50 do begin
    last50errors[i] := 0;
  end;
end;

procedure TArtificialNeuralNetwork.nextTimeStep;
begin
  t := t+1;
end;

procedure TArtificialNeuralNetwork.StartNewCycle;
var i, j, k, m: Integer;
begin
  t := 1; // start in timestep 1
  cyclesTrained := cyclesTrained+1; // increase the number of cycles trained so far
    for j := 1 to NEURONS_HIDDEN do begin
    neuronsHidden[j] := 0;
        for k := 1 to NEURONS_OUTPUT do begin
            eligibilityTraceOutput[j][k] := 0;
            outputBefore[k] := 0;
      neuronsOutput[k] := 0;
      for m := 1 to MAX_TIMESTEPS do begin
        reward[m][k] := 0;
      end;
        end;
        for i := 1 to NEURONS_INPUT do begin
            for k := 1 to NEURONS_OUTPUT do begin
                eligibilityTraceHidden[i][j][k] := 0;
            end;
        end;
    end;
end;

function TArtificialNeuralNetwork.getCyclesTrained;
begin
  result := cyclesTrained;
end;

procedure TArtificialNeuralNetwork.setInputs;
var k: Integer;
begin
  for k := 1 to NEURONS_INPUT do begin
    neuronsInput[t][k] := state[k];
  end;
end;

function TArtificialNeuralNetwork.getRating;
begin
  setInputs(state);
  ForwardPropagation;
  result := neuronsOutput[1];
  if not explorative then begin
    tdLearning; // adjust the weights according to TD-lambda
    ForwardPropagation; // calculate the network's output again
    outputBefore[1] := neuronsOutput[1]; // set outputBefore which will then be used in the next timestep
    UpdateEligibilityTraces; // update the eligibility traces for the next timestep
    nextTimeStep; // go to the next timestep
  end;
end;

function TArtificialNeuralNetwork.HyperbolicTangent;
begin
  if x > 5500 then // prevent overflow
    result := 1
  else
    result := (Exp(2*x)-1)/(Exp(2*x)+1);
end;

end.

Developer IT

Delphi: EInvalidOp in neural network class (TD-lambda) - Developer IT

Delphi: EInvalidOp in neural network class (TD-lambda)

delphi

exception

neural-network

pascal

temporal-difference

Related posts about delphi

TVirtualStringTree compatibility between Delphi 7 and Delphi 2010 - 'Parameter lists differ'

Convert Delphi 7 code to work with Delphi 2009

Book recommendation for moving from Delphi 6 to Delphi 2010

Compile Delphi component package (bpl) for different Delphi versions

Delphi Speech recognition delphi

Related posts about exception

Multiple Exception Handlers for one exception type

Delphi Exception handling problem with multiple Exception handling blocks

Delph Exception handling problem with multiple Exception handling blocks

ClickOnce manifest problem

.NET remoting exception: Exception in the Socket#33711845::DoBind - Only one usage of each socket ad

Categories cloud