$ git clone https://github.com/google/CFU-Playground.git
$ cd CFU-Playground
$ ./scripts/setup
Download the August 2020 toolchain from freedom-tools and unpack the binaries to your home directory:
$ tar xvfz ~/Downloads/riscv64-unknown-elf-gcc-10.1.0-2020.08.2-x86_64-linux-ubuntu14.tar.gz
Add the toolchain to your PATH in your .bashrc script:
export PATH=$PATH:$HOME/riscv64-unknown-elf-gcc-10.1.0-2020.08.2-x86_64-linux-ubuntu14/bin
Change the target board
export TARGET ?= digilent_nexys4ddr
Make Your Project
$ cp -r proj/proj_template_v proj/my_first_cfu
$ cd proj/my_first_cfu
Test run
$ make prog USE_VIVADO=1 TTY=/dev/ttyUSB0
$ make load BUILD_JOBS=4 TTY=/dev/ttyUSB1
press the “CPU_RESET” button on the board
CFU-Playground doesn't have following two audio operators, so we should porting them first:
Download kws_tflm_audio_op.patch
$ cd CFU-Playground
$ patch -p1 -i kws_tflm_audio_op.patch
proj/proj.mk
mkdir -p $(BUILD_DIR)/src/third_party/fft2d
$(COPY) $(TFLM_TP_DIR)/fft2d/fft.h $(BUILD_DIR)/src/third_party/fft2d
$(COPY) $(TFLM_TP_DIR)/fft2d/fft2d.h $(BUILD_DIR)/src/third_party/fft2d
$(COPY) $(TFLM_TP_DIR)/fft2d/fft4g.c $(BUILD_DIR)/src/third_party/fft2d
$ cd CFU-Playground/common/src/models/
$ mkdir ds_cnn_stream_fe
$ cd ds_cnn_stream_fe
Download ds_cnn_stream_fe.tflite
Put ds_cnn_stream_fe.tflite
in CFU-Playground/common/src/models/ds_cnn_stream_fe/
Download label.zip
Unzip label.zip
in CFU-Playground/common/src/models/
How to run inference using TensorFlow Lite for Microcontrollers
CFU-Playground/common/src/models/ds_cnn_stream_fe/ds_cnn.h
#ifndef _DS_CNN_STREAM_FE_H
#define _DS_CNN_STREAM_FE_H
#ifdef __cplusplus
extern "C" {
#endif
// For integration into menu system
void ds_cnn_stream_fe_menu();
#ifdef __cplusplus
}
#endif
#endif // _DS_CNN_STREAM_FE_H
CFU-Playground/common/src/models/ds_cnn_stream_fe/ds_cnn.cc
Design the following codes to run inference on the model. You need to use files in models/label/
as your inputs which have already include in the following codes. Then print all 12 output scores.
common/src/models/
and use the functions in common/src/tflite.cc
#include "models/ds_cnn_stream_fe/ds_cnn.h"
#include <stdio.h>
#include "menu.h"
#include "models/ds_cnn_stream_fe/ds_cnn_stream_fe.h"
#include "tflite.h"
#include "models/label/label0_board.h"
#include "models/label/label1_board.h"
#include "models/label/label6_board.h"
#include "models/label/label8_board.h"
#include "models/label/label11_board.h"
// Initialize everything once
// deallocate tensors when done
static void ds_cnn_stream_fe_init(void) {
tflite_load_model(ds_cnn_stream_fe, ds_cnn_stream_fe_len);
}
// Implement your design here
static struct Menu MENU = {
"Tests for ds_cnn_stream_fe",
"ds_cnn_stream_fe",
{
MENU_END,
},
};
// For integration into menu system
void ds_cnn_stream_fe_menu() {
ds_cnn_stream_fe_init();
menu_run(&MENU);
}
Add codes below:
CFU-Playground/common/src/models/model.c
#include "models/ds_cnn_stream_fe/ds_cnn.h"
#if defined(INCLUDE_MODEL_DS_CNN_STREAM_FE)
MENU_ITEM(AUTO_INC_CHAR, "Ds cnn stream fe", ds_cnn_stream_fe_menu),
#endif
CFU-Playground/common/src/tflite.cc
Set the kTensorArenaSize. You should set the "size" below.
#ifdef INCLUDE_MODEL_DS_CNN_STREAM_FE
3000 * 1024,
#endif
CFU-Playground/proj/my_first_cfu/Makefile
DEFINES += INCLUDE_MODEL_DS_CNN_STREAM_FE
#DEFINES += INCLUDE_MODEL_PDTI8
$ cd CFU-Playground/proj/my_first_cfu
$ make prog USEVIVADO=1 TTY=/dev/ttyUSB0
$ make load BUILD_JOBS=4 TTY=/dev/ttyUSB1
Press a number to run a test.
CFU-Playground/common/src/tflite.cc
Add codes below:
printf("DRAM: %d bytes\n", interpreter->arena_used_bytes());
We got KWS model used 1934292 bytes of the memory space.
We can use the functions in CFU-Playground/common/src/perf.h
to count the cycles of MAC operations.
CFU-Playground/common/src/models
to record cycles.my_cycles.cc
long long unsigned my_cycles = 0;
long long unsigned get_my_cycles(){
return my_cycles;
}
void reset_my_cycles(){
my_cycles = 0;
}
my_cycles.h
long long unsigned get_my_cycles();
void reset_my_cycles();
$ mkdir -p src/tensorflow/lite/kernels/internal/reference/integer_ops/
$ cp \
../../third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/conv.h \
src/tensorflow/lite/kernels/internal/reference/conv.h
This will create a copy of the convolution source code in your project directory. At build time your copy of the source code will replace the regular implementation.
conv.h
Open the newly created copy at proj/my_first_cfu/src/tensorflow/lite/kernels/ internal/reference/conv.h
. Locate the innermost loop of the first function, it should look something like this:
for (int in_channel = 0; in_channel < filter_input_depth; ++in_channel) {
float input_value = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel + group * filter_input_depth)];
float filter_value = filter_data[Offset(
filter_shape, out_channel, filter_y, filter_x, in_channel)];
total += (input_value * filter_value);
}
Add #include "perf.h"
, #include "models/my_cycles.h"
and extern long long unsigned my_cycles;
at the top of the file and then surround the inner loop with perf functions to count how many cycles this inner loop takes.
#include "perf.h"
#include "models/my_cycles.h"
extern long long unsigned my_cycles;
/* ... */
unsigned my_start = perf_get_mcycle();
for (int in_channel = 0; in_channel < filter_input_depth; ++in_channel) {
float input_value = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel + group * filter_input_depth)];
float filter_value = filter_data[Offset(
filter_shape, out_channel, filter_y, filter_x, in_channel)];
total += (input_value * filter_value);
}
unsigned my_finish = perf_get_mcycle();
my_cycles += (my_finish - my_start);
CFU-Playground/common/src/models/ds_cnn_stream_fe/ds_cnn.cc
.Add #include "models/my_cycles.h"
at the top of the file. Use the functions in models/my_cycles.h
to print the total cycles of MAC operations.
#include "models/ds_cnn_stream_fe/ds_cnn.h"
#include <stdio.h>
#include "menu.h"
#include "models/ds_cnn_stream_fe/ds_cnn_stream_fe.h"
#include "tflite.h"
#include "models/label/label0_board.h"
#include "models/label/label1_board.h"
#include "models/label/label6_board.h"
#include "models/label/label8_board.h"
#include "models/label/label11_board.h"
#include "models/my_cycles.h"
// Initialize everything once
// deallocate tensors when done
static void ds_cnn_stream_fe_init(void) {
tflite_load_model(ds_cnn_stream_fe, ds_cnn_stream_fe_len);
}
// Implement your design here
static struct Menu MENU = {
"Tests for ds_cnn_stream_fe",
"ds_cnn_stream_fe",
{
MENU_END,
},
};
// For integration into menu system
void ds_cnn_stream_fe_menu() {
ds_cnn_stream_fe_init();
menu_run(&MENU);
}
You must make clean first. To enable performance counters you should use the command below.
$ make clean
$ make prog EXTRA_LITEX_ARGS="--cpu-variant=perf+cfu"
$ make load