#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include "em_device.h"
#include "em_chip.h"
#include "em_cmu.h"
#include "em_emu.h"
#include "em_rtc.h"
#include "bsp.h"
#include "bsp_trace.h"

// core clock divider
// TODO: set to 1 (48MHz), 2 (24 MHz), or 4 (12 MHz)
#define CORE_CLKDIV 1

///////////////////////////////////////////////////////////
// TASK 1

// number of iterations of the workload function, for one workload
#define WORKLOAD_FACTOR 250

// real-time counter properties
#define RTC_FREQ (32 * 1024)
#define RTC_CLKDIV 512

// workload data (matrices)
#define MAT_ROWS 8
#define MAT_SIZE (MAT_ROWS * MAT_ROWS)
uint32_t mat_a[MAT_SIZE];
uint32_t mat_b[MAT_SIZE];
uint32_t mat_c[MAT_SIZE];

// ready flag, controls start of workload processing
volatile uint32_t ready = 0;

// RTC interrupt handler
void RTC_IRQHandler(void) {
  // clear interrupt source
  RTC_IntClear(RTC_IFC_COMP0);
  // set ready flag
  ready = 1;
}

// setup of periodic RTC interrupts
void rtc_setup(void) {
  RTC_Init_TypeDef rtcInit = RTC_INIT_DEFAULT;

  // enable LE domain registers
  CMU_ClockEnable(cmuClock_CORELE, true);

  // use LXFO for EM1 & EM2 (accurate osc.)
  CMU_ClockSelectSet(cmuClock_LFA, cmuSelect_LFXO);

  // set a clock divider of 512 to reduce power consumption
  CMU_ClockDivSet(cmuClock_RTC, RTC_CLKDIV);

  // enable RTC clock
  CMU_ClockEnable(cmuClock_RTC, true);

  // initialize RTC
  rtcInit.enable   = false;  /* Do not start RTC after initialization is complete. */
  rtcInit.debugRun = false;  /* Halt RTC when debugging. */
  rtcInit.comp0Top = true;   /* Wrap around on COMP0 match. */
  RTC_Init(&rtcInit);

  // interrupt every second
  RTC_CompareSet(0, RTC_FREQ / RTC_CLKDIV - 1);

  // enable interrupt
  NVIC_EnableIRQ(RTC_IRQn);
  RTC_IntEnable(RTC_IEN_COMP0);

  // start counter
  RTC_Enable(true);
}

// initialize workload data with random numbers
void init_workload(void) {
  int i;
  for (i = 0; i < MAT_SIZE; i++) {
    mat_a[i] = rand();
    mat_b[i] = rand();
  }
}

// workload: matrix multiplication
// n^3 multiplications & additions
void workload(int n, uint32_t *a, uint32_t *b, uint32_t *c) {
  int i, j, k;
  uint32_t sum;

  for (i = 0; i < n; i++) {
    for (j = 0; j < n; j++) {
      sum = 0;
      for (k = 0; k < n; k++) {
        sum += a[i * n + k] * b[k * n + j];
      }
      c[i * n + j] = sum;
    }
  }
}

///////////////////////////////////////////////////////////
// TASK 2

#define RUN_LENGTH 1000000

volatile int task2_result = 0;

int add_in_register() {
  register unsigned int i = 0;
  register int tmp = 0;
  for (i = 0; i < RUN_LENGTH; i++) {
    tmp = tmp + 42;
  }
  return tmp;
}

int add_in_mem() {
  register unsigned int i = 0;
  volatile int tmp = 0;  // forces tmp to be written to the stack
  for (i = 0; i < RUN_LENGTH; i++) {
    tmp = tmp + 42;
  }
  return tmp;
}

void task2() {
  while (1) {
	  // addition vs. multiplication in registers
	  // ADD
	  task2_result = add_in_register();
	  // MULT
	  // TODO: multiplication in registers

	  // addition vs. multiplication in memory
	  // ADD
	  task2_result = add_in_mem();
	  // MULT
	  // TODO: multiplication in memory
  }
}

///////////////////////////////////////////////////////////

// program entry point after reset
int main(void) {
  int i;

  // chip errata
  CHIP_Init();

  // enable eA Profiler trace
  BSP_TraceProfilerSetup();

  // MCU core clock: 48Mhz, running off HFXO
  CMU_ClockSelectSet(cmuClock_HF, cmuSelect_HFXO);
  // core clock divider
  CMU_ClockDivSet(cmuClock_CORE, CORE_CLKDIV);

  // TODO: enable task 2
  //task2(); // does not return

  // init workload data
  init_workload();

  // setup RTC & enable regular interrupt
  rtc_setup();

  // infinite workload loop
  while (1) {
    // wait for RTC interrupt
    while (!ready) {
      // put MCU into sleep mode
      // TODO: choose EM1 or EM2
    }
    // clear ready flag
    ready = 0;
    // perform work
    for (i = 0; i < WORKLOAD_FACTOR; i++) {
      workload(MAT_ROWS, mat_a, mat_b, mat_c);
    }
  }

  return 0;
}
