Try   HackMD

2016q3 Homework1 (clz)

目標

  • iteration version
  • recursive version
  • Harley’s algorithm
  • binary search technique
  • byte-shift version
  • benchmark suit gnuplot

程式解說

  • 定義unsigned int
    uint8_t為0~2^8-1 (0x00~0xFF)
    uint16_t為0~2^16-1 (0x0000~0xFFFF)
    uint32_t為0~2^32-1 (0x00000000~0xFFFFFFFF)
    uint64_t為0~2^64-1 (0x0000000000000000~0xFFFFFFFFFFFFFFFF)

  • 遞迴

原式

uint8_t clz(uint32_t x)
{
	/* shift upper half down, rest is filled up with 0s */
	uint16_t upper = (x >> 16); 
	// mask upper half away
	uint16_t lower = (x & 0xFFFF);
	return upper ? clz(upper) : 16 + clz(lower);
}

修改過後的程式碼,決定多一個參數 count 來計算目前切割的 bit 位數

uint8_t clz_recursive(uint32_t x,int count)
{
	count = count / 2;
	if (count < 1)
		return 0;
	
	uint16_t upper = (x >> count);
	uint16_t lower = (x & 0xFFFF);

	return upper ? clz_recursive(upper,count) : count + clz_recursive(lower,count);
}
  • Harley’s algorithm

原式

 static prog_uint8_t const Table[] = {
      0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF,
      16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF,
      0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF,
      0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF,
      14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF,
      18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13,
      26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25,
      0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF,
    };

    /* Propagate leftmost 1-bit to the right */
    x = x | (x >> 1);
    x = x | (x >> 2);
    x = x | (x >> 4);
    x = x | (x >> 8);
    x = x | (x >> 16);
 
    /* x = x * 0x6EB14F9 */
    x = (x << 3) - x;   /* Multiply by 7. */
    x = (x << 8) - x;   /* Multiply by 255. */
    x = (x << 8) - x;   /* Again. */
    x = (x << 8) - x;   /* Again. */

    return pgm_read_byte(&Table[x >> 26]);
  • 研究pgm_read_byte
uint8_t clz_Harley(uint32_t x)
{
    static uint8_t const Table[] = {
      0xFF, 0, 0xFF, 15, 0xFF, 1, 28, 0xFF,
      16, 0xFF, 0xFF, 0xFF, 2, 21, 29, 0xFF,
      0xFF, 0xFF, 19, 17, 10, 0xFF, 12, 0xFF,
      0xFF, 3, 0xFF, 6, 0xFF, 22, 30, 0xFF,
      14, 0xFF, 27, 0xFF, 0xFF, 0xFF, 20, 0xFF,
      18, 9, 11, 0xFF, 5, 0xFF, 0xFF, 13,
      26, 0xFF, 0xFF, 8, 0xFF, 4, 0xFF, 25,
      0xFF, 7, 24, 0xFF, 23, 0xFF, 31, 0xFF,
    };

    /* Propagate leftmost 1-bit to the right */
    x = x | (x >> 1);
    x = x | (x >> 2);
    x = x | (x >> 4);
    x = x | (x >> 8);
    x = x | (x >> 16);
 
    /* x = x * 0x6EB14F9 */
    x = (x << 3) - x;   /* Multiply by 7. */
    x = (x << 8) - x;   /* Multiply by 255. */
    x = (x << 8) - x;   /* Again. */
    x = (x << 8) - x;   /* Again. */

    return Table[x >> 26];
}
  • 這是loop 10000次的結果
    0.000434 0.0003200.000280 0.000241 0.000229