/* YOUR FILE-HEADER COMMENT HERE */ /* PART 1: YOUR COMMENT HERE */ #define _POSIX_C_SOURCE 199309L #include #include #include #include #include #include #if defined(__clang__) #define OPTIMIZED minsize #define UNOPTIMIZED optnone #else #define OPTIMIZED optimize("O2") #define UNOPTIMIZED optimize("O0") #endif /** * Number of times to execute target function. */ #define NUM_TRIALS 50 /** * This function does nothing, intentionally. */ static int __attribute__((UNOPTIMIZED)) do_nothing(int val) { return val; } /** * Calculate the nth value in the Fibonacci sequence, unoptimized. * You do not need to modify this function. * @param[in] n Which Fibonacci number to return. * @return Value of the @a n-th Fibonacci number. */ static int __attribute__((UNOPTIMIZED)) fibonacci_unoptimized(int n) { if (n <= 1) return n; return fibonacci_unoptimized(n - 1) + fibonacci_unoptimized(n - 2); } /** * Calculate the nth value in the Fibonacci sequence, optimized. * You do not need to modify this function. * @param[in] n Which Fibonacci number to return. * @return Value of the @a n-th Fibonacci number. */ static int __attribute__((OPTIMIZED)) fibonacci_optimized(int n) { if (n <= 1) return n; return fibonacci_optimized(n - 1) + fibonacci_optimized(n - 2); } /** * Calculate the number of fizzes and the number of buzzes for a * target value, using a loop. * * Counting from one up to and including @a target, determine number * of fizzes (values that are evenly divisible by three) and buzzes * (values that are evenly divisible by five). If a value is divisible * by both, increment both @a num_fizz and @a num_buzz. * * SPECIAL RESTRICTION: This function may only be * written using an ITERATIVE loop. It may not use * multiplication or division. * * @param[in] target Target value to count towards * @param[out] num_fizz Number of fizzes; will be set to 0 before * entering this function * @param[out] num_buzz Number of buzzes; will be set to 0 before * entering this function */ static void __attribute((UNOPTIMIZED)) fizzbuzz_iterative(int target, int *num_fizz, int *num_buzz) { /* PART 3: YOUR CODE HERE */ } /** * Calculate the number of fizzes and the number of buzzes for a * target value, using recursion. * * Counting from one up to and including @a target, determine number * of fizzes (values that are evenly divisible by three) and buzzes * (values that are evenly divisible by five). If a value is divisible * by both, increment both @a num_fizz and @a num_buzz. * * SPECIAL RESTRICTION: This function may only be * written using an RECURSIVE loop. It may not use * multiplication or division. * * @param[in] target Target value to count towards * @param[out] num_fizz Number of fizzes; will be set to 0 before * entering this function * @param[out] num_buzz Number of buzzes; will be set to 0 before * entering this function */ static void __attribute((UNOPTIMIZED)) fizzbuzz_recursive(int target, int *num_fizz, int *num_buzz) { /* PART 3: YOUR CODE HERE */ } /** * Calculate the number of fizzes and the number of buzzes for a * target value, hand-written in assembly. * * Counting from one up to and including @a target, determine number * of fizzes (values that are evenly divisible by three) and buzzes * (values that are evenly divisible by five). If a value is divisible * by both, increment both @a num_fizz and @a num_buzz. * * SPECIAL RESTRICTION: This function may not use * multiplication or division. * * @param[in] target Target value to count towards * @param[out] num_fizz Number of fizzes; will be set to 0 before * entering this function * @param[out] num_buzz Number of buzzes; will be set to 0 before * entering this function */ extern void fizzbuzz_asm(int target, int *num_fizz, int *num_buzz); /** * Copy bytes from @a src to @a dst. * You do not need to modify this function. * @param[out] dst Starting address for destination buffer * @param[in] src Starting address for source buffer * @param[in] n Number of bytes to copy * @return Always @a dst */ void *memcpy_8bit(void *restrict dst, const void *restrict src, size_t n) { uint8_t *restrict d = (uint8_t * restrict)dst; const uint8_t *restrict s = (const uint8_t * restrict)src; while (n > 0) { *d = *s; d++; s++; n--; } return dst; } /** * Copy bytes from @a src to @a dst, in increments of 64 bits (8 * bytes) at a time. * You do not need to modify this function. * @param[out] dst Starting address for destination buffer * @param[in] src Starting address for source buffer * @param[in] n Number of bytes to copy. This must be evenly divisible * by 8. * @return Always @a dst */ void *memcpy_64bit(void *restrict dst, const void *restrict src, size_t n) { uint64_t *restrict d = (uint64_t * restrict)dst; const uint64_t *restrict s = (const uint64_t * restrict)src; while (n > 0) { *d = *s; d++; s++; n -= 8; } return dst; } /** * Copy bytes from @a src to @a dst, in increments of 256 bits (32 * bytes) at a time. * @param[out] dst Starting address for destination buffer * @param[in] src Starting address for source buffer * @param[in] n Number of bytes to copy. This must be evenly divisible * by 32. * @return Always @a dst */ extern void *asm_memcpy_256bit(void *restrict dst, const void *restrict src, size_t n); /** * Calculate and return the difference between two clock values. * You do not need to modify this function. * @param[in] stop Minuend to subtract from * @param[in] start Subtrahend to subtract * @return Number of nanoseconds that elapsed */ static unsigned long clock_diff(const struct timespec *stop, const struct timespec *start) { time_t sec_diff = stop->tv_sec - start->tv_sec; unsigned long nsec_diff; if (stop->tv_nsec < start->tv_nsec) { nsec_diff = 1000000000lu + stop->tv_nsec - start->tv_nsec; sec_diff--; } else nsec_diff = stop->tv_nsec - start->tv_nsec; nsec_diff += 1000000000lu * sec_diff; return nsec_diff; } typedef int (*func1)(int); typedef void (*fizzbuzz_func)(int, int *, int *); typedef void *(*func3)(void *restrict, const void *restrict, size_t); /** * Lambda to qsort(), to sort a list of unsigned longs in ascending * order. */ static int ul_comp(const void *p1, const void *p2) { unsigned long a = *((unsigned long *)(p1)); unsigned long b = *((unsigned long *)(p2)); return b - a; } static double overhead_time; /** * Calculate the execution time for a function, given a series of data * points. Compensate for how long it takes to measure time itself. * * You do not need to modify this function. * * @param[in] samples Array of raw execution times * @param[in] num_elems Number of elements in @a samples * @return Average execution time, in nanoseconds */ static double calc_execution_time(unsigned long *samples, unsigned num_elems) { qsort(samples, NUM_TRIALS, sizeof(samples[0]), ul_comp); double median; median = samples[NUM_TRIALS / 2]; if (median >= overhead_time) return median - overhead_time; return 0.1; } /** * Measure how long the system timing takes itself to execute. * You do not need to modify this function. * @return Overhead time, in nanoseconds */ static void measure_overhead(void) { unsigned long samples[NUM_TRIALS]; overhead_time = 0; for (unsigned i = 0; i < NUM_TRIALS; i++) { struct timespec start, stop; clock_gettime(CLOCK_MONOTONIC, &start); clock_gettime(CLOCK_MONOTONIC, &stop); samples[i] = clock_diff(&stop, &start); } overhead_time = calc_execution_time(samples, NUM_TRIALS); } /** * Measure how long it takes to execute a function, passing it one * parameter. Call it multiple times, and return the median execution * time. * * You do not need to modify this function. * * @param[in] target_func Function to execute * @param[in] val Parameter to pass into function * @param[out] retval Return value from calling @a target_func * @return Median execution time, in nanoseconds */ static double measure_function1(func1 target_func, int val, int *retval) { unsigned long samples[NUM_TRIALS]; for (unsigned i = 0; i < NUM_TRIALS; i++) { struct timespec start, stop; int r; clock_gettime(CLOCK_MONOTONIC, &start); r = target_func(val); clock_gettime(CLOCK_MONOTONIC, &stop); *retval = r; samples[i] = clock_diff(&stop, &start); } return calc_execution_time(samples, NUM_TRIALS); } /** * Measure how long it takes to execute a fizzbuzz function, passing * it three parameters. Call it multiple times, and return the median * execution time. * * You do not need to modify this function. * * @param[in] target_func Function to execute * @param[in] target_val Target value to pass into function * @param[out] num_fizz Number of fizzes calculated for @a target_val * @param[out] num_buzz Number of buzzes calculated for @a target_val * @return Median execution time, in nanoseconds */ static double measure_fizzbuzz(fizzbuzz_func target_func, int target_val, int *num_fizz, int *num_buzz) { unsigned long samples[NUM_TRIALS]; for (unsigned i = 0; i < NUM_TRIALS; i++) { struct timespec start, stop; *num_fizz = 0; *num_buzz = 0; clock_gettime(CLOCK_MONOTONIC, &start); target_func(target_val, num_fizz, num_buzz); clock_gettime(CLOCK_MONOTONIC, &stop); samples[i] = clock_diff(&stop, &start); } return calc_execution_time(samples, NUM_TRIALS); } /** * Measure how long it takes to execute fizzbuzz_recursive() against * an array of 3 parameters. Return the median execution time. * * You do not need to modify this function. * * @param[in] part4_args Array of values to pass into * fizzbuzz_recursive() * @return Median execution time, in nanoseconds */ static double measure_fizzbuzz_calls(const int *part4_args) { unsigned long samples[NUM_TRIALS]; int num_fizz = 0; int num_buzz = 0; for (unsigned i = 0; i < NUM_TRIALS; i++) { struct timespec start, stop; clock_gettime(CLOCK_MONOTONIC, &start); fizzbuzz_recursive(part4_args[0], &num_fizz, &num_buzz); fizzbuzz_recursive(part4_args[1], &num_fizz, &num_buzz); fizzbuzz_recursive(part4_args[2], &num_fizz, &num_buzz); clock_gettime(CLOCK_MONOTONIC, &stop); samples[i] = clock_diff(&stop, &start); } return calc_execution_time(samples, NUM_TRIALS); } /** * Measure how long it takes to execute a memcpy()-like function, * passing three parameters. Call it multiple times, and return the * median execution time. * * You do not need to modify this function. * * @param[in] target_func Function to execute * @param[in] n Number of bytes to copy * @param[out] retval Return value from calling @a target_func * @return Median execution time, in nanoseconds */ static double measure_function3(func3 target_func, size_t n) { static unsigned seed = 0; unsigned long samples[NUM_TRIALS]; unsigned char *src = malloc(n); if (!src) { perror("malloc"); exit(EXIT_FAILURE); } unsigned char *dst = malloc(n); if (!dst) { perror("malloc"); exit(EXIT_FAILURE); } seed++; memset(src, 0xa5 + seed, n); memset(dst, 0x5a - seed, n); target_func(dst, src, n); unsigned char *s = src; unsigned char *d = dst; for (size_t i = 0; i < n; i++) { if (*s != *d) { fprintf(stderr, "WARNING! memcpy implementation is incorrect!\n"); fprintf(stderr, "First incorrect byte:\n"); fprintf(stderr, " offset %zu: is 0x%02x, should be 0x%02x\n", i, *d, *s); exit(EXIT_FAILURE); } s++; d++; } for (unsigned i = 0; i < NUM_TRIALS; i++) { struct timespec start, stop; clock_gettime(CLOCK_MONOTONIC, &start); target_func(dst, src, n); clock_gettime(CLOCK_MONOTONIC, &stop); samples[i] = clock_diff(&stop, &start); } free(src); free(dst); return calc_execution_time(samples, NUM_TRIALS); } int main(int argc, char *argv[]) { measure_overhead(); int retval; double time_do_nothing = measure_function1(do_nothing, 0, &retval); printf("PART 1:\n"); printf("do_nothing: execution time = %f ns\n", time_do_nothing); if (argc < 2) { fprintf(stderr, "Need at least one argument to continue\n"); exit(EXIT_FAILURE); } int target = atoi(argv[1]); printf("\nPART 2:\n"); double time_fib_unopt = measure_function1(fibonacci_unoptimized, target, &retval); printf ("fibonacci (unoptimized): result = %d, execution time = %f ns\n", retval, time_fib_unopt); double time_fib_opt = measure_function1(fibonacci_optimized, target, &retval); printf("fibonacci (optimized): result = %d, execution time = %f ns\n", retval, time_fib_opt); /* PART 2: YOUR CODE HERE */ if (argc < 3) { fprintf(stderr, "Need at least two arguments to continue\n"); exit(EXIT_FAILURE); } target = atoi(argv[2]); printf("\nPART 3:\n"); int num_fizz; int num_buzz; double time_fizzbuzz_iter = measure_fizzbuzz(fizzbuzz_iterative, target, &num_fizz, &num_buzz); printf("target fizzbuzz value is %d\n", target); printf(" iterative: num fizz: %d, num buzz: %d, execution time = %f ns\n", num_fizz, num_buzz, time_fizzbuzz_iter); double time_fizzbuzz_recursive = measure_fizzbuzz(fizzbuzz_recursive, target, &num_fizz, &num_buzz); printf(" recursive: num fizz: %d, num buzz: %d, execution time = %f ns\n", num_fizz, num_buzz, time_fizzbuzz_recursive); double time_fizzbuzz_asm = measure_fizzbuzz(fizzbuzz_asm, target, &num_fizz, &num_buzz); printf(" assembly: num fizz: %d, num buzz: %d, execution time = %f ns\n", num_fizz, num_buzz, time_fizzbuzz_asm); if (argc < 6) { fprintf(stderr, "Need at least five arguments to continue\n"); exit(EXIT_FAILURE); } int part4_args[3]; for (int i = 0; i < 3; i++) { part4_args[i] = atoi(argv[i + 3]); } printf("\nPART 4:\n"); /* PART 4: YOUR CODE HERE */ double actual_time = measure_fizzbuzz_calls(part4_args); printf(" actual = %f ns\n", actual_time); if (argc < 7) { fprintf(stderr, "Need at least six arguments to continue\n"); exit(EXIT_FAILURE); } target = atoi(argv[6]); if ((target <= 0) || ((target & 0x7f) != 0)) { fprintf(stderr, "Sixth argument must be positive and evenly divisible by 128\n"); exit(EXIT_FAILURE); } printf("\nPART 5:\n"); double time_memcpy = measure_function3(memcpy, target); printf("memcpy (built-in): execution time = %f ns\n", time_memcpy); double time_memcpy_8bit = measure_function3(memcpy_8bit, target); printf("memcpy (8 bits at a time): execution time = %f ns\n", time_memcpy_8bit); double time_memcpy_64bit = measure_function3(memcpy_64bit, target); printf("memcpy (64 bits at a time): execution time = %f ns\n", time_memcpy_64bit); double time_memcpy_256bit = measure_function3(asm_memcpy_256bit, target); printf("memcpy (256 bits at a time): execution time = %f ns\n", time_memcpy_256bit); return 0; }