/*
// Simple Genetic Program
//
// W. B. Langdon, cwi.nl, 21 February 1999
//
// Modifications:
// Author:  WBL
// Date  :  21 Feb 1999
// Change:  Increase portability by replacing drand48() by park-miller, 
//          and renaming functions to avoid name clashes

// Author:  WBL
// Date  :  18 Feb 1994, 21 April 1994
// Change:  Replace rand() with drand48(),
//          reduce POPULATION_SIZE from 500 to 16
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <float.h>
#include <assert.h>

/* SUNos v4 */
#define EXIT_FAILURE 1
#define FILENAME_MAX 80

#define POPULATION_SIZE  16
#define PCROSS            0.6
#define PMUT               .01
#define MAX_GEN          50
#define MAX_PROG_SIZE    25
#define STACK_SIZE       25
#define LINE_WIDTH      200
#define THRESHOLD          0.01

#define MAX_TEST_POINTS   10

float x[MAX_TEST_POINTS];
float y[MAX_TEST_POINTS];
int num_x;
char output_file[FILENAME_MAX];

struct prog
{
    char string[MAX_PROG_SIZE + 1];
    float fitness;
    int hits;
};

struct prog population[POPULATION_SIZE];
struct prog new_population[POPULATION_SIZE];

/*Data for display purposes only */
#define COPY_MUM -1
#define MUTATE   -1
struct par
{
    int mum, mumcross;
    int dad, dadcross;
};
struct par parents[POPULATION_SIZE];
int num_tests = 0;

int best_prog, max_hits, required_hits;

float new_sumfitness;
float total_fitness;

int generation;


/* start code */

int seed;

read_inputs ()
/*read pairs of numbers from stdin
//
//outputs:    required_hits
//            x, y, num_x
*/
{
    int i;
    time_t now;

    printf ("Enter up to %d x y pairs of numbers> ", MAX_TEST_POINTS);
    fflush (stdout);

    for (i = 0; i < MAX_TEST_POINTS; i++)
      {
	  if (scanf ("%e %e", &x[i], &y[i]) != 2)
	      break;
      }

    if (i <= 0)
      {
	  printf ("\nNo data pairs! Stopping!\n");
	  exit (EXIT_FAILURE);
      }

    num_x = i;
    required_hits = num_x;
    strcpy (output_file, "gp.c");


    printf ("\nGP regression on %d points\n", num_x);
    for (i = 0; i < num_x; i++)
      {
	  printf ("(%e,%e) ", x[i], y[i]);
	  if ((i % 2) == 1)
	      printf ("\n");
      }

    time (&now);
    seed = now;

    printf ("\nPop = %d, PCross %.5f, PMutate %.5f, Max gens %d, seed %d\n",
	    POPULATION_SIZE, PCROSS, PMUT, MAX_GEN, seed);

    printf ("\nGP starting\n");

}				/*end read_inputs() */


/*park-miller.cc   Park-Miller random number generator
 W. Langdon cs.ucl.ac.uk 5 May 1994*/
int
intrnd ()			/* 1<=seed<=m */
{
    double const a = 16807;	/* ie 7**5 */
    double const m = 2147483647;	/* ie 2**31-1 */

    double temp = seed * a;
    seed = (int) (temp - m * floor (temp / m));
    return seed;
}

float
rand_0to1 ()
{
    return (float) (intrnd ()) / 2147483647.0;
}

float
random_number ()		/* single precision ok */
{
    float r;

    for (; (r = rand_0to1 ()) >= 1.0;)
      {
	  printf ("random number >=1.0 %e\n", r);
      }				/* ensure we dont return 1.0 */

    return (r);

}				/* end random_number() */

int
random_point (char string[])
{
/*return a random point in the string, ie from pos zero to length of string -1
*/

    return ((int) (random_number () * (float) (strlen (string))));

}				/*end random_point () */


int
matching_point (char string[], int subtree)
/*Return: Point in string immediately after subtree starting at subtree*/
{
    int i;
    int dangling_limbs;

    if (string[subtree] == '\0')
	dangling_limbs = 0;	/*already at end of tree */
    else
	dangling_limbs = 1;

    for (i = subtree; dangling_limbs > 0; i++)
      {
	  switch (string[i])
	    {
	    case '+':
	    case '-':
	    case '*':
	    case '/':
		dangling_limbs++;	/*function (all have two limbs) */
		break;

	    case '\0':
		assert (1 == 0);	/*oh dear */
		break;

	    default:
		dangling_limbs--;	/*terminal */
		break;
	    }
      }				/*end for */

    return (i);

}				/*end matching_point() */



int
random_function ()
{
#define N_FUNCTIONS 4
    int function[N_FUNCTIONS] = { '+', '-', '*', '/' };

    return (function[(int) (random_number () * N_FUNCTIONS)]);

}				/*end random_function() */


int
random_terminal ()
{
    return ('x');

}				/*end random_terminal() */


int
random_tree (char buff[], int size)
/*generate random program, place in buff
//
//Inputs: size of buff
//
//Returns: length of tree
*/
{
    int dangling_limbs = 1;
    int i;

    for (i = 0; (dangling_limbs > 0) && (i <= size); i++)
      {				/*chose function or terminal at random */

	  if (random_number () >
	      (float) (dangling_limbs * dangling_limbs + 1) / (float) (size -
								       i))
	    {
		buff[i] = random_function ();
		dangling_limbs++;	/*all operators have two limbs */
	    }
	  else
	    {
		buff[i] = random_terminal ();
		dangling_limbs--;
	    }

      }				/*end for */

    assert (dangling_limbs == 0);	/*Opps... */

    return (i);

}				/*end random_tree() */

int
splice (char output[], int outsize,
	char buff1[], int end1,
	char buff2[], int end2, char buff3[], int end3)
/*returns: 0 if ok*/
{
    if ((end1 + end2 + end3) >= outsize)
	return (1);

    memcpy (output, buff1, end1);
    memcpy (&output[end1], buff2, end2);
    memcpy (&output[end1 + end2], buff3, end3);

    output[end1 + end2 + end3] = '\0';	/*terminate string */

    return (0);			/*ie ok */

}				/*end splice() */


mutate (int mum, int child)
/*make a single random change to mum from old population and store
//child in new_population */
{
    int mum_end1;
    int mum_start2, mum_end2;
    int mut_length;

    int sanity = 0;

    char buffer[MAX_PROG_SIZE];

    do
      {
	  assert (sanity++ < 1000);

	  mum_end1 = random_point (population[mum].string);
	  mum_start2 = matching_point (population[mum].string, mum_end1);
	  mum_end2 = strlen (population[mum].string);

	  mut_length = random_tree (buffer, MAX_PROG_SIZE);

      }
    while (splice (new_population[child].string, MAX_PROG_SIZE + 1,
		   population[mum].string, mum_end1,
		   buffer, mut_length,
		   &population[mum].string[mum_start2],
		   mum_end2 - mum_start2) != 0);

/*display data */

    parents[child].mum = mum;
    parents[child].mumcross = mum_end1;
    parents[child].dad = MUTATE;
    parents[child].dadcross = mut_length;


}				/*end mutate() */


int stack;
float value_stack[STACK_SIZE];
char op_stack[STACK_SIZE];
char output_stack[STACK_SIZE][LINE_WIDTH];


push_operator (char op)
{
    op_stack[stack++] = op;
    value_stack[stack] = 0.0;	/* keep it clean */
    output_stack[stack][0] = '\0';	/* and tidy      */

}				/*end push_operator() */


push_value (float value)
{
    float a;
    float result;

    if ((stack <= 0) || (op_stack[stack - 1] != 0))	/* operator on top of stack */
      {				/* or stack empty           */
	  op_stack[stack] = 0;	/* operand on top of stack */
	  value_stack[stack++] = value;
      }
    else
      {
	  a = value_stack[--stack];	/* pop first operand */
	  switch (op_stack[--stack])	/* pop operator */
	    {
	    case '+':
		result = a + value;
		break;

	    case '-':
		result = a - value;
		break;

	    case '*':
		result = a * value;
		break;

	    case '/':
		if (value == 0.0)
		    result = 1.0;
		else
		    result = a / value;
		break;

	    default:
		assert (10 == 0);	/* opps */
		break;
	    }

	  push_value (result);
      }

}				/*end push_value() */

float
prog_value (char string[], float input)
/* return value of program child in new_population with input*/
{
    int i;

    stack = 0;

    for (i = 0; string[i] != 0; i++)
      {
	  switch (string[i])
	    {
	    case '+':
	    case '-':
	    case '*':
	    case '/':
		push_operator (string[i]);
		break;

	    default:
		push_value (input);
		break;
	    }
      }				/*end for */

    assert (stack == 1);

/*printf("prog_value: %s = %e, input = %e\n", string, value_stack[0], input );
*/

    return (value_stack[--stack]);	/*pop value */

}				/*end prog_value() */



update_hits_etc (float fit, int n_hits, int child)
/*
//store fitness and hits of child in new_population
//update best program statistics.
//
//Nb best program is defined to be one with highest number of hits
//regardless of fitness
*/
{
    new_population[child].fitness = fit;

    new_sumfitness += new_population[child].fitness;

    new_population[child].hits = n_hits;

    if (max_hits < new_population[child].hits)
      {
	  max_hits = new_population[child].hits;
	  best_prog = child;
      }

}				/*end update_hits_etc() */

test_fitness (int child)
/*evaluate test_fitness of child in new_population, store answer with it.*/
{
    int i;
    int test_hits = 0;
    float error;
    float total_error = 0.0;

    for (i = 0; i < num_x; i++)
      {
	  error =
	      fabs (y[i] - prog_value (new_population[child].string, x[i]));

	  if (error <= THRESHOLD || error <= THRESHOLD * fabs (y[i]))
	      test_hits++;

	  total_error += error;

      }				/*end for each test switch */

    num_tests++;

    update_hits_etc (num_x / (total_error + num_x), test_hits, child);
/*
  update_hits_etc(1.0/(total_error+1.0), test_hits, child);
*/
}				/*end test_fitness() */

display_stats ()
/*display info on new_population */
{
    int i;
    time_t now;

    float min_fitness = FLT_MAX;
    float max_fitness = -FLT_MAX;

    int mi_hits = num_x;
    int mx_hits = 0;
    int sum_hits = 0;

    float sum_fitness = 0.0;
    float sum_squared = 0.0;
    float mean;


    time (&now);
    printf ("\n Generation %d on %s\n", generation, ctime (&now));

    for (i = 0; i < POPULATION_SIZE; i++)
      {
	  sum_fitness += new_population[i].fitness;
	  sum_squared +=
	      new_population[i].fitness * new_population[i].fitness;

	  if (min_fitness > new_population[i].fitness)
	      min_fitness = new_population[i].fitness;

	  if (max_fitness < new_population[i].fitness)
	      max_fitness = new_population[i].fitness;

	  if (mi_hits > new_population[i].hits)
	      mi_hits = new_population[i].hits;

	  if (mx_hits < new_population[i].hits)
	      mx_hits = new_population[i].hits;

	  sum_hits += new_population[i].hits;

      }				/*end for */

    mean = sum_fitness / POPULATION_SIZE;


    for (i = 0; i < POPULATION_SIZE; i++)
      {
	  if (parents[i].mumcross == COPY_MUM)
	      printf ("copy     [%3d]=%3d             ", i, parents[i].mum);
	  else if (parents[i].dad == MUTATE)
	      printf ("mutate   [%3d]=%3d(%2d)+(%2d)    ",
		      i, parents[i].mum, parents[i].mumcross,
		      parents[i].dadcross);
	  else
	      printf ("crossover[%3d]=%3d(%2d)*%3d(%2d) ",
		      i, parents[i].mum, parents[i].mumcross,
		      parents[i].dad, parents[i].dadcross);

	  printf ("fit = %.5f hits = %2d pselect = %.3f %s\n",
		  new_population[i].fitness,
		  new_population[i].hits,
		  new_population[i].fitness / mean, new_population[i].string);
      }

    printf ("Generation %d, new_sumfitness %e %s",
	    generation, new_sumfitness, ctime (&now));

    printf ("Mean fitness = %.5f, variance = %.5f, min = %.5f, max = %.5f \n",
	    mean,
	    (sum_squared - POPULATION_SIZE * mean * mean) / POPULATION_SIZE,
	    min_fitness, max_fitness);

    printf
	("Min hits = %2d, max = %2d, best_prog = %3d, fitness = %.5f hits = %2d %s\n",
	 mi_hits, mx_hits, best_prog, new_population[best_prog].fitness,
	 new_population[best_prog].hits, new_population[best_prog].string);

}				/*end display_stats() */


replace_old_population ()
{
    int i;

    total_fitness = new_sumfitness;
/* best_prog - retain current value until new generation*/

    for (i = 0; i < POPULATION_SIZE; i++)
      {
	  strcpy (population[i].string, new_population[i].string);
	  population[i].fitness = new_population[i].fitness;
	  population[i].hits = new_population[i].hits;
      }

}				/*end replace_old_population() */



initialise_population ()
{
    int i;

    for (i = 0; i < POPULATION_SIZE; i++)
      {
	  population[i].string[0] = '\0';
	  mutate (i, i);
	  test_fitness (i);
      }

    display_stats ();

    replace_old_population ();

}				/*end initialise_population() */


int
select_parent ()
/* return index or parent in old population*/
{
/*consider binary chop if POPULATION_SIZE becomes big*/

    int i;

    float cumlative_fitness = 0.0;
    float r;

    r = total_fitness * random_number ();

    for (i = 0; i < POPULATION_SIZE; i++)
      {
	  cumlative_fitness += population[i].fitness;
	  if (r <= cumlative_fitness)
	      return (i);
      }

    return (POPULATION_SIZE - 1);

}				/*end select_parent() */


crossover (int mum, int dad, int child)
/*choose a random point in program from old population and another
//also from old population to produce a child which is stored in the new
//population */
{
    int mum_end1;
    int mum_start2, mum_end2;
    int dad_start, dad_end;

    int sanity = 0;

    do
      {
	  assert (sanity++ < 1000);

	  mum_end1 = random_point (population[mum].string);
	  mum_start2 = matching_point (population[mum].string, mum_end1);
	  mum_end2 = strlen (population[mum].string);

	  dad_start = random_point (population[dad].string);
	  dad_end = matching_point (population[dad].string, dad_start);

      }
    while (splice (new_population[child].string, MAX_PROG_SIZE + 1,
		   population[mum].string, mum_end1,
		   &population[dad].string[dad_start], dad_end - dad_start,
		   &population[mum].string[mum_start2],
		   mum_end2 - mum_start2) != 0);

/*display data */
    parents[child].mum = mum;
    parents[child].mumcross = mum_end1;
    parents[child].dad = dad;
    parents[child].dadcross = dad_start;

}				/*end crossover () */


copy (int mum, int child)
/*copy from old population to new_population, including test_fitness*/
{
    strcpy (new_population[child].string, population[mum].string);

/*display data */
    parents[child].mum = mum;
    parents[child].mumcross = COPY_MUM;

    update_hits_etc (population[mum].fitness, population[mum].hits, child);

}				/*end copy() */


push_string (char value[])
{
    char a[LINE_WIDTH];
    char result[LINE_WIDTH];

    if ((stack <= 0) || (op_stack[stack - 1] != 0))	/*operator on top of stack */
      {				/*or stack empty          */
	  op_stack[stack] = 0;	/*operand on top of stack */
	  strcpy (output_stack[stack], value);
	  stack++;
      }
    else
      {
	  --stack;
	  strcpy (a, output_stack[stack]);	/*pop first operand */
	  switch (op_stack[--stack])	/*pop operator */
	    {
	    case '+':
		sprintf (result, "%s+%s", a, value);
		break;

	    case '-':
		sprintf (result, "(%s)-(%s)", a, value);
		break;

	    case '*':
		sprintf (result, "(%s)*(%s)", a, value);
		break;

	    case '/':
		sprintf (result, "div(%s,%s)", a, value);
		break;

	    default:
		assert (11 == 0);	/* opps! */
		break;
	    }

	  push_string (result);
      }

}				/*end push_string() */


output_program (char string[])
/* Convert string to c format*/
{
    char buff[] = " ";
    int i;
    FILE *stream;

    stack = 0;

    for (i = 0; string[i] != 0; i++)
      {
	  switch (string[i])
	    {
	    case '+':
	    case '-':
	    case '*':
	    case '/':
		push_operator (string[i]);
		break;

	    default:
		buff[0] = string[i];
		push_string (buff);
		break;
	    }
      }				/*end for */

    assert (stack == 1);

    if ((stream = fopen (output_file, "w")) == NULL)
      {
	  printf ("Failed to open file %s for output! Stopping!\n",
		  output_file);
	  exit (EXIT_FAILURE);
      }

    fprintf (stream, "\nfloat div ( float top, float bot )\n");
    fprintf (stream, "{if (bot == 0.0)\n");
    fprintf (stream, "   return (1.0);\n");
    fprintf (stream, " else\n");
    fprintf (stream, "   return ( top/bot );\n");
    fprintf (stream, "}\n");

    fprintf (stream, "\nfloat gp( float x )\n{\n");
    fprintf (stream, "  return (%s);\n}\n", output_stack[--stack]);

    fclose (stream);

}				/*end output_program() */



int
main ()
{
    int i, mum, dad;

    read_inputs ();

    generation = 0;

    new_sumfitness = 0.0;
    max_hits = -1;

    initialise_population ();

    while ((++generation <= MAX_GEN) && (max_hits < required_hits))
      {
	  new_sumfitness = 0.0;
	  max_hits = -1;

	  for (i = 0; i < POPULATION_SIZE; i++)
	    {
		mum = select_parent ();
		if (random_number () < PCROSS)
		  {
		      dad = select_parent ();
		      crossover (mum, dad, i);
		      test_fitness (i);
		  }
		else if (random_number () < PMUT)
		  {
		      mutate (mum, i);
		      test_fitness (i);
		  }
		else
		  {
		      copy (mum, i);
		  };
	    }			/*end for - create new_population */

	  display_stats ();

	  replace_old_population ();

      }				/*end while */

    output_program (population[best_prog].string);

    printf ("GP done. %d tests completed\n", num_tests);

    return (0);

}				/*end main() */


/*end program GP*/
