Try   HackMD

rv32emu clock_gettime Implementation

tags: Computer Architecture

Problem

Original C code

int main() { char *test_1_s = "anagram; char *test_1_t = "anagram"; clock_t time_start = clock(); if ( isAnagram(test_1_s, test_1_t) == 1) { ; // print something if needed } else { ; // print something if needed } clock_t time_end = clock(); }

In line 11 and line 29, I call clock function and try to caculate the time between my testcase. However, I got an error message when I use rv32emu to run the .elf file which is compiled by The xPack GNU RISC-V Embedded GCC

unknown syscall 403

Some Problem with gettimeofday

After reading file unistd.h, which define the system call number under RISC-V architecture

#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_gettimeofday 169
__SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
//...
#endif
    
#if __BITS_PER_LONG == 32
#define __NR_clock_gettime64 403
__SYSCALL(__NR_clock_gettime64, sys_clock_gettime)
//...
#endif

If compile C file by riscv-none-elf-gcc -march=rv32i -mabi=ilp32, which will means __BITS_PER_LONG = 32, and it will never define __NR_gettimeof day 169, so if compile C file with gnu toolchain, the elf file cannot execute well on rv32emu and get the error message

Because in rv32emu, only implemented system call gettimeofday with number 169, I think it can be use if write the assembly code directly.

  • However, there is a flag __ARCH_WANT_TIME32_SYSCALLS, which can optional define in /(arch-type)/unistd.h, but we use The xPack GNU RISC-V Embedded GCC, which riscv-none-elf-gcc is already an executable code.

So I try to implement a system call clock_gettime with number 403 in rv32emu in order to run the .elf file which is compiled by riscv-none-elf-gcc directly.

Try object dump the executable file to figure out what operation actually done by clock function associate with system call.

Disassembly

0001ed50 <_gettimeofday>: 1ed50: fe010113 addi sp,sp,-32 1ed54: 00912a23 sw s1,20(sp) 1ed58: 00112e23 sw ra,28(sp) 1ed5c: 00050493 mv s1,a0 1ed60: 00812c23 sw s0,24(sp) 1ed64: 19300893 li a7,403 1ed68: 00000513 li a0,0 1ed6c: 00010593 mv a1,sp 1ed70: 00000073 ecall 1ed74: 00050413 mv s0,a0 1ed78: 04054463 bltz a0,1edc0 <_gettimeofday+0x70> 1ed7c: 00812703 lw a4,8(sp) 1ed80: 00012603 lw a2,0(sp) 1ed84: 00412683 lw a3,4(sp) 1ed88: 00571793 slli a5,a4,0x5 1ed8c: 40e787b3 sub a5,a5,a4 1ed90: 00279793 slli a5,a5,0x2 1ed94: 01c12083 lw ra,28(sp) 1ed98: 00040513 mv a0,s0 1ed9c: 00e787b3 add a5,a5,a4 1eda0: 01812403 lw s0,24(sp) 1eda4: 00379793 slli a5,a5,0x3 1eda8: 00c4a023 sw a2,0(s1) 1edac: 00d4a223 sw a3,4(s1) 1edb0: 00f4a423 sw a5,8(s1) 1edb4: 01412483 lw s1,20(sp) 1edb8: 02010113 addi sp,sp,32 1edbc: 00008067 ret 1edc0: 40800433 neg s0,s0 1edc4: a91fe0ef jal ra,1d854 <__errno> 1edc8: 00852023 sw s0,0(a0) 1edcc: fff00413 li s0,-1 1edd0: fadff06f j 1ed7c <_gettimeofday+0x2c>
  • In line 7, I notice that when I call clock function, it actually use system call number 403 which is clock_gettime, mentioned in Juraj's Blog-RISC-V Linux syscall table
  • From line 16 to line 23, there is some operation on a4, which is original store tv_nsec in timespec. After operation, a5 will store a4 * 1000
slli	a5,a4,0x5
sub	a5,a5,a4
slli	a5,a5,0x2
add	a5,a5,a4
slli	a5,a5,0x3
  • a5 = \((((a_4 \times 32)-a_4)\times4)+a_4)\times8 = a_4 \times 1000\)

So, the value should be write back in syscall clock_gettime should be

  • tv_sec which is original defined in timespec
  • tv_msec which is time_nsec / 1000000, this will cause the resolution will reduce to millisecond.

Result

syscall.c

#define SUPPORTED_SYSCALLS       \
    _(close,            57)      \
    _(lseek,            62)      \
    _(read,             63)      \
    _(write,            64)      \
    _(fstat,            80)      \
    _(exit,             93)      \
    _(gettimeofday,     169)     \
    _(brk,              214)     \
+    _(clock_gettime,    403)     \
    _(open,             1024)    \
    IIF(RV32_HAS(SDL))(          \
        _(draw_frame,   0xBEEF)  \
        _(setup_queue,  0xC0DE)  \
        _(submit_queue, 0xFEED), \
    )
static void syscall_clock_gettime(struct riscv_t *rv) { state_t *s = rv_userdata(rv); /* access userdata */ /* get the parameters */ riscv_word_t tp = rv_get_reg(rv, rv_reg_a1); /* return the clock time */ if (tp) { #if defined(HAVE_POSIX_TIMER) struct timespec t; clock_gettime(CLOCK_MONOTONIC, &t); int32_t tv_sec = t.tv_sec; int32_t tv_msec = t.tv_nsec / 1000000; // resolution (ms) #elif defined(HAVE_MACH_TIMER) static mach_timebase_info_data_t info; /* If this is the first time we have run, get the timebase. * We can use denom == 0 to indicate that sTimebaseInfo is * uninitialized. */ if (info.denom == 0) (void) mach_timebase_info(&info); /* Hope that the multiplication doesn't overflow. */ uint64_t nsecs = mach_absolute_time() * info.numer / info.denom; int32_t tv_sec = nsecs / 1e9; int32_t tv_msec = (nsecs / 1e6) - (tv_sec * 1e9); #else /* low resolution timer */ clock_t t = clock(); int32_t tv_sec = t / CLOCKS_PER_SEC; int32_t tv_msec = ((t / 1000) % CLOCKS_PER_SEC) * (1000000 / CLOCKS_PER_SEC); #endif memory_write(s->mem, tp + 0, (const uint8_t *) &tv_sec, 4); memory_write(s->mem, tp + 8, (const uint8_t *) &tv_msec, 4); } /* success */ rv_set_reg(rv, rv_reg_a0, 0); }

Now it can use run clock function in rv32emu which is compiled by riscv-none-elf-gcc

$ riscv-none-elf-gcc -march=rv32i -mabi=ilp32	hw2.c -o hw2.elf
$ ../../build/rv32emu hw2.elf
test_1: correct
clock gettime time = 1 ms
inferior exit code 0

Test Code

Generate random string between a - z, the code with SWAR technique.

#include<stdio.h>
#include<time.h>
#include<stdlib.h>
#include<string.h>

#define MAX_MSG_LEN 1000
#define GENRAND64(X) (((X) & 0x7F7F7F7F7F7F7F7F) | 0x6060606060606060)
#define GENRAND8(X)  (((X) & 0x7F) | 0x61)

#define DETECT_NULL(X)         (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
#define DETECT_CHAR(X, MASK)   (DETECT_NULL((X) ^ (MASK)))

static void GenRandString(char *str, int length)
{
    int size = length;
    uint64_t *lptr = (uint64_t *) str;

    while (size >= 8) {
        uint64_t rand64 = (uint64_t) rand() << 32 | rand();
        *lptr = GENRAND64(rand64);
		// if detected no needed character
        if (DETECT_CHAR(*lptr, 0x6060606060606060) ||\
            DETECT_CHAR(*lptr, 0x7B7B7B7B7B7B7B7B) ||\
            DETECT_CHAR(*lptr, 0x7C7C7C7C7C7C7C7C) ||\
            DETECT_CHAR(*lptr, 0x7D7D7D7D7D7D7D7D) ||\
            DETECT_CHAR(*lptr, 0x7E7E7E7E7E7E7E7E) ||\
            DETECT_CHAR(*lptr, 0x7F7F7F7F7F7F7F7F))
            continue;
        lptr++;
        size -= 8;
    }

    char *cptr = (char *) lptr;

    while (size) {
        *cptr = GENRAND8(rand());
		// if detected no needed character
        if (!(*cptr ^ 0x60) || !(*cptr ^ 0x7B) || !(*cptr ^ 0x7C) || !(*cptr ^ 0x7D) || !(*cptr ^ 0x7E) || !(*cptr ^ 0x7F))
            continue;
        cptr++;
        size--;
    }   
    *cptr = '\0';
}



int isAnagram(char * s, char * t)
{
    int letter_freq[26] = {0}; //store frequency of every letter

    for (int i = 0; s[i]; i++) {
        letter_freq[s[i] - 'a']++;
    }
    // calculate frequency of every letter of t
    for (int i = 0; t[i]; i++) {
        letter_freq[t[i] - 'a']--;
    }
    // if letter_freq[i] != 0, it mean in letter char(i + 'a') frequency of letter of s and t are not same. 
    for (int i = 0; i < 26; i++) {
        if (letter_freq[i]) 
            return 0;
    }
    return 1; // all of letter of frequency are same
}

int main() {
    srand(time(0));
    char* str = malloc(sizeof(char));
    for(int i = 10000; i <= 100000; i = i + 100) {
        GenRandString(str, i);
        clock_t time_start = clock();
        if(isAnagram(str, str)) {
            ;	// print something if needed
        }
        clock_t time_end = clock();
        printf("%d\t%d\n", i, (time_end - time_start) * 1000/CLOCKS_PER_SEC);
    }
}

Test code takes 2 same input strings, and the algorithm will go through whole string.

Now can use clock function in C code and compile with command riscv-none-elf-gcc -march=rv32i -mabi=ilp32, and directly run on rv32emu.

Trick
There is a problem if I don't allocate char* str and the associate code is below.

uint32_t memory_ifetch(memory_t *m, uint32_t addr)
{
    const uint32_t addr_lo = addr & mask_lo;
    assert((addr_lo & 1) == 0);

    chunk_t *c = m->chunks[addr >> 16];
    assert(c);
    return *(const uint32_t *) (c->data + addr_lo);
}
typedef struct {
    chunk_t *chunks[0x10000];
} memory_t;
  • chunks only give 0x10000 which means 65536 bytes space. So I malloc(system call brk I think) it.

TODO: clarify newlib internals