/*
 * C program for Conway's ``game of life''.
 *
 * Input is from command line and a file:
 * 
 * Command-line arguments are as follows:
 *   either the name of an input file or the keyword "random" and the size
 *     of the board and the seed for random-number generation
 *   number of steps
 *   how often to print results (P means print results every P steps)
 *
 * If an input file is specified, it contains a representation of the initial 
 *  board configuration:  N (size of board) and N*N values (each 0 or 1).
 *
 *
 * Parallel version using MPI (and distributing the board among processes)
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "timer.h"
#include <mpi.h>

/* data structure for two-dimensional array */
typedef struct twoD_array {
    int rows;
    int cols;
    int ** elems;
} twoD_array_t;

/* function declarations (comments with code below) */
twoD_array_t * build_array(twoD_array_t * a, int rows, int cols);
void free_array(twoD_array_t * a);
int read_board(FILE* infile, twoD_array_t *board);
int random_board(int size, int seed, twoD_array_t *board);
void update_board(twoD_array_t *board, twoD_array_t *new_board);
void print_board(FILE* outfile, twoD_array_t *board);
void clear_border(twoD_array_t *board);
int rows_per_process(int size);
int local_rows(int proc_id, int size);
int local_start_row(int proc_id, int size);
int local_end_row(int proc_id, int size);
int process_owning_row(int row, int size);

/* global variables */
int nprocs;
int myid;

/* message tags */
#define INITIALIZE_TAG  1
#define EXCHANGE_TAG    2
#define PRINT_TAG       3

/* main */
int main(int argc, char* argv[]) {
    int steps = 0, print_interval = 0, size = 0;
    twoD_array_t board1, board2;
    twoD_array_t *board = &board1;
    twoD_array_t *new_board = &board2;
    double start_time, init_done_time, end_time;
    char* usage_fmt = 
        "usage:  %s [ infile | 'random' boardsize seed ] num_steps print_interval\n";
    char* end_ptr_for_strtol;
    int return_val, max_return_val;

    if (MPI_Init(&argc, &argv) != MPI_SUCCESS) {
        fprintf(stderr, "MPI initialization error\n"); exit(EXIT_FAILURE);
    }
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);

    start_time = get_time();

    /* process command-line arguments */
    if (argc < 4) {
        if (myid == 0) {
	    fprintf(stderr, usage_fmt, argv[0]);
	}
        MPI_Finalize(); exit(EXIT_FAILURE);
    }

    /* initialize board */
    if (strcmp(argv[1], "random") != 0) {

        /* from file */
        FILE* infile;
        infile = fopen(argv[1], "r");
        if (infile == NULL) {
            /* FIXME is it possible for not all processes to fail here? */
            fprintf(stderr, "unable to open input file %s\n", argv[1]);
            MPI_Finalize(); exit(EXIT_FAILURE);
        }
        steps = strtol(argv[2], &end_ptr_for_strtol, 10);
        if (*end_ptr_for_strtol != '\0') {
            if (myid == 0) {
                fprintf(stderr, usage_fmt, argv[0]);
            }
            MPI_Finalize(); exit(EXIT_FAILURE);
        }
        print_interval = strtol(argv[3], &end_ptr_for_strtol, 10);
        if (*end_ptr_for_strtol != '\0') {
            if (myid == 0) {
                fprintf(stderr, usage_fmt, argv[0]);
            }
            MPI_Finalize(); exit(EXIT_FAILURE);
        }
        return_val = read_board(infile, board);
        MPI_Allreduce(&return_val, &max_return_val, 1, MPI_INT, MPI_MAX, 
                MPI_COMM_WORLD);
        if (max_return_val != 0) {
            fclose(infile);
            MPI_Finalize(); exit(EXIT_FAILURE);
        }
        size = board->cols-2;
        fclose(infile);
        if (myid == 0) {
            /* print input information:
             * writes this to standard error so it goes with timing information
             * (see below)
             */
            fprintf(stderr, "\n\nInput:  board read from file %s\n", argv[1]);
            fprintf(stderr, "%d steps, print interval %d\n\n", 
                    steps, print_interval);
        }
    }
    else {

        /* with randomly-generated data */
        int seed = 0;
        if (argc < 6) {
            if (myid == 0) {
                fprintf(stderr, usage_fmt, argv[0]);
            }
            MPI_Finalize(); exit(EXIT_FAILURE);
        }
        size = strtol(argv[2], &end_ptr_for_strtol, 10);
        if (*end_ptr_for_strtol != '\0') {
            if (myid == 0) {
                fprintf(stderr, usage_fmt, argv[0]);
            }
            MPI_Finalize(); exit(EXIT_FAILURE);
        }
        seed = strtol(argv[3], &end_ptr_for_strtol, 10);
        if (*end_ptr_for_strtol != '\0') {
            if (myid == 0) {
                fprintf(stderr, usage_fmt, argv[0]);
            }
            MPI_Finalize(); exit(EXIT_FAILURE);
        }
        steps = strtol(argv[4], &end_ptr_for_strtol, 10);
        if (*end_ptr_for_strtol != '\0') {
            if (myid == 0) {
                fprintf(stderr, usage_fmt, argv[0]);
            }
            MPI_Finalize(); exit(EXIT_FAILURE);
        }
        print_interval = strtol(argv[5], &end_ptr_for_strtol, 10);
        if (*end_ptr_for_strtol != '\0') {
            if (myid == 0) {
                fprintf(stderr, usage_fmt, argv[0]);
            }
            MPI_Finalize(); exit(EXIT_FAILURE);
        }
        return_val = random_board(size, seed, board);
        MPI_Allreduce(&return_val, &max_return_val, 1, MPI_INT, MPI_MAX, 
                MPI_COMM_WORLD);
        if (max_return_val != 0) {
            MPI_Finalize(); exit(EXIT_FAILURE);
        }
        if (myid == 0) {
            /* print input information:
             * writes this to standard error so it goes with timing information
             * (see below)
             */
            fprintf(stderr, 
                    "\n\nInput:  board of size %d generated with seed %d\n",
                    size, seed);
            fprintf(stderr, 
                    "%d steps, print interval %d\n", steps, print_interval);
        }
    }

    /* create "new board" and clear borders */
    if (build_array(new_board, local_rows(myid, size)+2, size+2) == NULL) {
        fprintf(stderr, "unable to allocate space for board of size %d\n",
                size);
        /* MPI_Abort here since we could fail in only one process */
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }
    clear_border(new_board);

    /* print initial configuration */
    if (myid == 0) {
        fprintf(stdout, "\nInitial board\n\n");
    }
    print_board(stdout, board);
    if (myid == 0) {
        fprintf(stdout, "\n\n");
    }

    init_done_time = get_time();

    /* loop to update board and print */
    for (int step = 0; step < steps; ++step) {
        /* update (results in new_board) */
        update_board(board, new_board);
        /* print */
        if (((step+1) % print_interval) == 0) {
            if (myid == 0) {
                fprintf(stdout, "Board after step %d\n\n", step+1);
            }
            print_board(stdout, new_board);
            if (myid == 0) {
                fprintf(stdout, "\n\n");
            }
        }
        /* swap old and new boards */
        {
            twoD_array_t *temp = board;
            board = new_board;
            new_board = temp;
        }
    }

    end_time = get_time();

    /* print timing information:
     * writes this to standard error so it can be easily separated from
     * rest of output (which could be long)
     */
    if (myid == 0) {
        fprintf(stderr, "\nTotal time with %d processes %g\n", nprocs,
                end_time - start_time);
        fprintf(stderr, "Time not counting initialization %g\n", 
                end_time - init_done_time);
    }

    /* tidy up and return */
    free_array(board);
    free_array(new_board);
    MPI_Finalize();
    return EXIT_SUCCESS;
}

/* 
 * constructs twoD_array structure.  returns NULL if unable to allocate
 * space for elements, pointer to structure otherwise.
 */
twoD_array_t * build_array(twoD_array_t * a, int rows, int cols) {
    int * temp;
    a->rows = rows;
    a->cols = cols;
    if ((a->elems = malloc(rows * sizeof(int *))) == NULL) {
        return NULL;
    }
    if ((temp = malloc(rows * cols * sizeof(int))) == NULL) {
        free (a->elems);
        return NULL;
    }
    for (int row = 0; row < rows; ++row, temp+=cols) {
        a->elems[row] = temp;
    }
    return a;
}

/* frees space pointed to by twoD_array structure */
void free_array(twoD_array_t * a) {
    free(a->elems[0]);
    free(a->elems);
}

/*
 * sets unused "edge" cells to 0 
 */
void clear_border(twoD_array_t *board) {
    for (int c = 0; c < board->cols; ++c) {
        board->elems[0][c] = 0;
        board->elems[board->rows-1][c] = 0;
    }
    for (int r = 0; r < board->rows; ++r) {
        board->elems[r][0] = 0;
        board->elems[r][board->cols-1] = 0;
    }
}

/*
 * reads initial configuration from infile 
 * returns 0 if all is well, otherwise prints error message and returns
 *   a positive non-zero value.
 * all processes read the whole file and store only "their" values.
 */
int read_board(FILE* infile, twoD_array_t *board) {
    int size, temp, start_row, end_row;

    if (fscanf(infile, "%d", &size) != 1) {
        if (myid == 0) {
            fprintf(stderr, "unable to read size of board\n");
        }
        return 1;
    }
    start_row = local_start_row(myid, size);
    end_row = local_end_row(myid, size);

    if (build_array(board, local_rows(myid, size)+2, size+2) == NULL) {
        fprintf(stderr, "unable to allocate space for board of size %d\n",
                size);
        return 2;
    }
    for (int i = 1; i <= size; ++i) {
        for (int j = 1; j <= size; ++j) {
            if (fscanf(infile, "%d", &temp) != 1) {
                fprintf(stderr, "unable to read values for board\n");
                return 1;
            }
            if ((temp == 0) || (temp == 1)) {
                if ((start_row <= i) && (i < end_row)) {
                    board->elems[i-start_row+1][j] = temp;
                }
            }
            else {
                fprintf(stderr, "unable to read values for board\n");
                return 1;
            }
        }
    }
    clear_border(board);
    return 0;
}

/*
 * generates random board configuration for given size and seed.
 * returns 0 if all is well, otherwise prints error message and returns
 *   a positive non-zero value.
 * all processes generate the full sequence of numbers and store only 
 *   "their" values.
 */
int random_board(int size, int seed, twoD_array_t *board) {
    int temp, start_row, end_row;

    start_row = local_start_row(myid, size);
    end_row = local_end_row(myid, size);

    if (build_array(board, local_rows(myid, size)+2, size+2) == NULL) {
        fprintf(stderr, "unable to allocate space for board of size %d\n",
                size);
        return 2;
    }
    srand(seed);
    for (int i = 1; i <= size; ++i) {
        for (int j = 1; j <= size; ++j) {
            temp = (rand() < (RAND_MAX/2)) ? 0 : 1;
            if ((start_row <= i) && (i < end_row)) {
                board->elems[i-start_row+1][j] = temp;
            }
        }
    }
    clear_border(board);
    return 0;
}

/*
 * updates board configuration
 */
void update_board(twoD_array_t *board, twoD_array_t *new_board) {
    int size;
    MPI_Status status;
    MPI_Request req_recv_above, req_recv_below, req_send_above, req_send_below;

    size = board->cols-2;

    /* exchange information with neighbors -- using asynchronous
     * communcation to avoid possible deadlock if messages are too big */
    /* initiate communication */
    if (myid != 0) {
        /* receive bottom row from neighbor "above" */
        MPI_Irecv(&(board->elems[0][1]), 
                size, MPI_INT, myid-1, EXCHANGE_TAG, MPI_COMM_WORLD,
                &req_recv_above);
    }
    if (myid != (nprocs-1)) {
        /* receive top row from neighbor "below" */
        MPI_Irecv(&(board->elems[local_rows(myid, size)+1][1]), 
                size, MPI_INT, myid+1, EXCHANGE_TAG, MPI_COMM_WORLD,
                &req_recv_below);
    }
    if (myid != 0) {
        /* send top row to neighbor "above" */
        MPI_Isend(&(board->elems[1][1]), 
                size, MPI_INT, myid-1, EXCHANGE_TAG, MPI_COMM_WORLD,
                &req_send_above);
    }
    if (myid != (nprocs-1)) {
        /* send bottom row to neighbor "below" */
        MPI_Isend(&(board->elems[local_rows(myid, size)][1]), 
                size, MPI_INT, myid+1, EXCHANGE_TAG, MPI_COMM_WORLD,
                &req_send_below);
    }
    /* wait for communication to complete */
    if (myid != 0) {
        /* receive bottom row from neighbor "above" */
        MPI_Wait(&req_recv_above, &status);
    }
    if (myid != (nprocs-1)) {
        /* receive top row from neighbor "below" */
        MPI_Wait(&req_recv_below, &status);
    }
    if (myid != 0) {
        /* send top row to neighbor "above" */
        MPI_Wait(&req_send_above, &status);
    }
    if (myid != (nprocs-1)) {
        /* send bottom row to neighbor "below" */
        MPI_Wait(&req_send_below, &status);
    }

    /* update board */
    for (int i = 1; i <= board->rows-2; ++i) {
        for (int j = 1; j <= board->cols-2; ++j) {
            int nbrs = 
                board->elems[i-1][j-1] +
                board->elems[i-1][j] +
                board->elems[i-1][j+1] +
                board->elems[i][j-1] +
                board->elems[i][j+1] +
                board->elems[i+1][j-1] +
                board->elems[i+1][j] +
                board->elems[i+1][j+1];

            if (board->elems[i][j] == 1) {
                if ((nbrs == 2) || (nbrs == 3))
                    new_board->elems[i][j] = 1;
                else
                    new_board->elems[i][j] = 0;
            }
            else {
                if (nbrs == 3)
                    new_board->elems[i][j] = 1;
                else
                    new_board->elems[i][j] = 0;
            }
        }
    }
}

/*
 * prints current board configuration.
 */
void print_board(FILE* outfile, twoD_array_t *board) {
    int size, temp, process_for_row;
    int *temprow;
    MPI_Status status;

    size = board->cols-2;

    if (myid == 0) {
        /* process 0 prints, accepting values from other processes */
        temprow = malloc(size * sizeof(int));
        if (temprow == NULL) {
            fprintf(stderr, "unable to allocate space for printing board\n");
            /* FIXME do this more gracefully? */
            MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
        }
        for (int i = 1; i <= size; ++i) {
            process_for_row = process_owning_row(i, size);
            if (process_for_row != 0) {
                MPI_Recv(temprow, size, MPI_INT, process_for_row,
                        PRINT_TAG, MPI_COMM_WORLD, &status);
            }
            for (int j = 1; j <= size; ++j) {
                if (process_for_row == 0)
                    temp = board->elems[i][j];
                else
                    temp = temprow[j-1];
                if (temp == 0)
                    fprintf(outfile, ". ");
                else
                    fprintf(outfile, "1 ");
            }
            fprintf(outfile, "\n");
        }
        free(temprow);
    }
    else {
        /* other processes send values to process 0 */
        for (int i = 1; i <= local_rows(myid, size); ++i) {
            MPI_Send(&(board->elems[i][1]), 
                    size, MPI_INT, 0, PRINT_TAG, MPI_COMM_WORLD);
        }
    }
}

/* returns rows per process */
int rows_per_process(int size) {
    return (size + (nprocs-1)) / nprocs;
}
/* returns rows for this process */
int local_rows(int proc_id, int size) {
    if (proc_id == (nprocs-1))  {
        return size - ((nprocs-1) * rows_per_process(size));
    }
    else {
        return rows_per_process(size);
    }
}
/* returns local start row (absolute row number) */
int local_start_row(int proc_id, int size) {
    return proc_id * rows_per_process(size) + 1;
}
/* returns local end row (absolute row number) */
int local_end_row(int proc_id, int size) {
    if (proc_id == (nprocs-1))
        return size + 1;
    else
        return local_start_row(proc_id, size) + rows_per_process(size);
}
/* returns ID of process owning row */
int process_owning_row(int row, int size) {
    return (row-1) / rows_per_process(size);
}

