/* YOUR FILE-HEADER COMMENT HERE */ #define _POSIX_C_SOURCE 199309L #include #include #include #include #include /** * Number of times to execute target function. */ #define NUM_TRIALS 50 /** * Calculate the nth value in the Fibonacci sequence, unoptimized. * You do not need to modify this function. * @param[in] n Which Fibonacci number to return. * @return Value of the @a n-th Fibonacci number. */ static int __attribute__ ((optimize("O0"))) fibonacci_unoptimized(int n) { if (n <= 1) return n; return fibonacci_unoptimized(n - 1) + fibonacci_unoptimized(n - 2); } /** * Calculate the nth value in the Fibonacci sequence, optimized. * You do not need to modify this function. * @param[in] n Which Fibonacci number to return. * @return Value of the @a n-th Fibonacci number. */ static int __attribute__ ((optimize("O2"))) fibonacci_optimized(int n) { if (n <= 1) return n; return fibonacci_optimized(n - 1) + fibonacci_optimized(n - 2); } /** * Calculate the nth value in the "Tribonacci" sequence, optimized. * This sequence is defined as: * -# n if n is less than or equal to 2 * -# else trib(n - 1) + trib(n - 2) + trib(n - 3) * @param[in] n Which Tribonacci number to return. * @return Value of the @a n-th Tribonacci number. */ static int __attribute__ ((optimize("O2"))) tribonacci_optimized(int n) { /* PART 3: YOUR CODE HERE */ } /** * Let a = vals[0], b = vals[1], c = vals[2], and d = vals[3], using 4 * ldrs. Find "X" = a + (7 * b), using 7 adds. Then find "Y" = c - * (7 * d), using 7 subtracts. Then add "X" and Y", returning the sum. * * The incoming pointer will be in x0, and array length will be in * x1. As per the ARMv8-A calling convention, registers x9 through x15 * may be safely used within this routine. * * This assembly routine should be exactly 20 instructions long. * * @param[in] vals Array of at least 4 values from which to perform * calculations. * @param[in] num_elems Number of elements of the array, guaranteed to * be at least 4. * @return X + Y */ extern int asm_equation4(int vals[], int num_elems); /** * Given an array of at least 4 integers, calculate how many of those * elements are evenly divisible by seven. * * @param[in] vals Array of at least 4 values from which to perform * calculations. * @param[in] num_elems Number of elements of the array, guaranteed to * be at least 4. * @return Number of elements that are evenly divisible by 7 */ static int __attribute__ ((optimize("O2"))) num_divisible_by_7(int vals[], int num_elems) { /* PART 4: YOUR CODE HERE */ } /** * Given an array of at least 4 integers, calculate how many of those * elements are evenly divisible by seven. * * SPECIAL RESTRICTION: This function may not use multiplication, * division, or modulus instructions. It must use a loop to determine * if a value is divisible by seven. * * @param[in] vals Array of at least 4 values from which to perform * calculations. * @param[in] num_elems Number of elements of the array, guaranteed to * be at least 4. * @return Number of elements that are evenly divisible by 7 */ extern int asm_num_divisible_by_7(int vals[], int num_elems); /** * Calculate and return the difference between two clock values. * You do not need to modify this function. * @param[in] stop Minuend to subtract from * @param[in] start Subtrahend to subtract * @return Number of nanoseconds that elapsed */ static unsigned long clock_diff(const struct timespec *stop, const struct timespec *start) { time_t sec_diff = stop->tv_sec - start->tv_sec; unsigned long nsec_diff; if (stop->tv_nsec < start->tv_nsec) { nsec_diff = 1000000000lu + stop->tv_nsec - start->tv_nsec; sec_diff--; } else nsec_diff = stop->tv_nsec - start->tv_nsec; nsec_diff += 1000000000lu * sec_diff; return nsec_diff; } /** * This function does nothing, intentionally. */ static int __attribute__ ((optimize("O0"))) do_nothing(int val) { return val; } typedef int (*func1) (int); typedef int (*func4) (int[], int); /** * Lambda to qsort(), to sort a list of unsigned longs in ascending * order. */ static int ul_comp(const void *p1, const void *p2) { unsigned long a = *((unsigned long *)(p1)); unsigned long b = *((unsigned long *)(p2)); return b - a; } static unsigned long overhead_time; /** * Measure how long the system timing takes itself to execute. * You do not need to modify this function. * @return Overhead time, in nanoseconds */ static void measure_overhead(void) { unsigned long samples[NUM_TRIALS]; overhead_time = 0; for (unsigned i = 0; i < NUM_TRIALS; i++) { struct timespec start, stop; clock_gettime(CLOCK_MONOTONIC, &start); clock_gettime(CLOCK_MONOTONIC, &stop); samples[i] = clock_diff(&stop, &start); } qsort(samples, NUM_TRIALS, sizeof(samples[0]), ul_comp); overhead_time = samples[NUM_TRIALS / 2]; if (overhead_time < 1) overhead_time = 1; } /** * Measure how long it takes to execute a function, passing it one * parameter. Call it multiple times, and return the median execution * time. * * You do not need to modify this function. * * @param[in] target_func Function to execute * @param[in] val Parameter to pass into function * @param[out] retval Return value from calling @a target_func * @return Median execution time, in nanoseconds */ static unsigned long measure_function1(func1 target_func, int val, int *retval) { unsigned long samples[NUM_TRIALS]; for (unsigned i = 0; i < NUM_TRIALS; i++) { struct timespec start, stop; int r; clock_gettime(CLOCK_MONOTONIC, &start); r = target_func(val); clock_gettime(CLOCK_MONOTONIC, &stop); *retval = r; samples[i] = clock_diff(&stop, &start); } qsort(samples, NUM_TRIALS, sizeof(samples[0]), ul_comp); unsigned long median = samples[NUM_TRIALS / 2]; if (median > overhead_time) return median - overhead_time; return 1; } /** * Measure how long it takes to execute a function, passing it an * array of 4 parameters. Call it multiple times, and return the * median execution time. * * You do not need to modify this function. * * @param[in] target_func Function to execute * @param[in] part4_args Array of values to pass into @a target_func * @param[in] part4_len Number of elements in @a part4_args * @param[out] retval Return value from calling @a target_func * @return Median execution time, in nanoseconds */ static unsigned long measure_function4(func4 target_func, int *part4_args, int part4_len, int *retval) { unsigned long samples[NUM_TRIALS]; for (unsigned i = 0; i < NUM_TRIALS; i++) { struct timespec start, stop; int *v = malloc(sizeof(*v) * part4_len); if (!v) { fprintf(stderr, "Could not create temp array\n"); exit(EXIT_FAILURE); } memcpy(v, part4_args, sizeof(*v) * part4_len); int r; clock_gettime(CLOCK_MONOTONIC, &start); r = target_func(v, part4_len); clock_gettime(CLOCK_MONOTONIC, &stop); samples[i] = clock_diff(&stop, &start); *retval = r; free(v); } qsort(samples, NUM_TRIALS, sizeof(samples[0]), ul_comp); unsigned long median = samples[NUM_TRIALS / 2]; if (median > overhead_time) return median - overhead_time; return 1; } int main(int argc, char *argv[]) { measure_overhead(); int retval; unsigned long time_do_nothing = measure_function1(do_nothing, 0, &retval); printf("do_nothing: execution time = %lu ns\n", time_do_nothing); if (argc < 2) { fprintf(stderr, "Need at least one argument to continue\n"); exit(EXIT_FAILURE); } int target = atoi(argv[1]); unsigned long time_fib_unopt = measure_function1(fibonacci_unoptimized, target, &retval); printf ("fibonacci (unoptimized): result = %d, execution time = %lu ns\n", retval, time_fib_unopt); unsigned long time_fib_opt = measure_function1(fibonacci_optimized, target, &retval); printf("fibonacci (optimized): result = %d, execution time = %lu ns\n", retval, time_fib_opt); /* ADD CODE FOR YOUR PART 2 HERE */ /* ADD CODE FOR YOUR PART 3 HERE */ if (argc < 5) { fprintf(stderr, "Need at least four arguments to continue\n"); exit(EXIT_FAILURE); } int part4_len = argc - 1; int *part4_args = malloc(sizeof(*part4_args) * part4_len); if (!part4_args) { fprintf(stderr, "Could not allocate part4_args\n"); exit(EXIT_FAILURE); } for (int i = 0; i < part4_len; i++) part4_args[i] = atoi(argv[i + 1]); /* ADD CODE FOR YOUR PART 4 HERE */ free(part4_args); return 0; }