/******************************************************************************
* Tyler Simon 
* 4/21/2011
*
* FILE: openmp_mpi_work.c
* DESCRIPTION:
* Do some work, specified in cputest.c using OpenMP and MPI.
*
* COMPILE:
* mpicc -o openmp_mpi_work -fopenmp openmp_work.c cputest.c
*
* RUN:
* time mpiexec.hydra -f ~/imachinefile -np 8 ./openmp_mpi_work
*
* NOTES:
* Setting DEBUG will output per node core usage statistics gathererd from the
* getpinfo function in cputest.c. Also increase or decrease N to change the
* amount of work.
*
* PERFORMANCE: 
* Using 'np' Nodes, 8 core Nehalem nodes with N=100000000 and DEBUG=0
* Using 1 MPI Rank per node with 8 threads
  [MPI Ranks] [average runtime (sec)]
  1 20.9
  2 11.6 
  4 6.7
  8 4.2
******************************************************************************/
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <mpi.h>
#include <sys/types.h>
#include <unistd.h>
#define N       100000000

#ifndef DEBUG
#define DEBUG 0
#endif

/*external function declarations*/
double cpu_test(char *,long int, int);
void getpinfo(char *);

int main (int argc, char *argv[]) 
{
int nthreads, tid, chunk;
char hostname[255];
double computeTime;

/*MPI variables*/
int numRanks,myRank;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD,&numRanks);
MPI_Comm_rank(MPI_COMM_WORLD,&myRank);

/*if we want 1thread per proc*/
omp_set_num_threads(omp_get_num_procs());

gethostname(hostname,255);

/* OpenMP work sharing on the node */
#pragma omp parallel shared(nthreads) private(tid,chunk)
{
  tid = omp_get_thread_num();
  /*get number of threads currently in parallel loop*/
  nthreads = omp_get_num_threads();
  chunk=N/numRanks/nthreads;

  /*Here is our Acutual worker process, replace to suit*/
  computeTime=cpu_test(hostname,chunk, tid);

if(DEBUG)printf("[%s] [%d] %d %g\n",hostname,tid,chunk,computeTime);

  }  /* end of parallel OpenMP section */
if(DEBUG)getpinfo(hostname);
MPI_Finalize();
return 0;
}//end main