contributed by < hankchang805
>
linux2020
1
考慮到以下 bitcpy
實作,允許開發者對指定的記憶體區域,逐 bit 進行資料複製:
#include <stdint.h>
#include <stdio.h>
#include <string.h>
void bitcpy(void *_dest, /* Address of the buffer to write to */
size_t _write, /* Bit offset to start writing to */
const void *_src, /* Address of the buffer to read from */
size_t _read, /* Bit offset to start reading from */
size_t _count)
{
uint8_t data, original, mask;
size_t bitsize;
size_t read_lhs = _read & 7;
size_t read_rhs = 8 - read_lhs;
const uint8_t *source = _src + (_read / 8);
size_t write_lhs = _write & 7; //KK1
size_t write_rhs = 8 - write_lhs;
uint8_t *dest = _dest + (_write / 8);
static const uint8_t read_mask[] = {
0x00, /* == 0 00000000b */
0x80, /* == 1 10000000b */
0xC0, /* == 2 11000000b */
0xE0, /* == 3 11100000b */
0xF0, /* == 4 11110000b */
0xF8, /* == 5 11111000b */
0xFC, /* == 6 11111100b */
0xFE, /* == 7 11111110b */
0xFF /* == 8 11111111b */
};
static const uint8_t write_mask[] = {
0xFF, /* == 0 11111111b */
0x7F, /* == 1 01111111b */
0x3F, /* == 2 00111111b */
0x1F, /* == 3 00011111b */
0x0F, /* == 4 00001111b */
0x07, /* == 5 00000111b */
0x03, /* == 6 00000011b */
0x01, /* == 7 00000001b */
0x00 /* == 8 00000000b */
};
while (_count > 0) {
data = *source++;
bitsize = (_count > 8) ? 8 : _count;
if (read_lhs > 0) {
data <<= read_lhs;
if (bitsize > read_rhs)
data |= (*source >> read_rhs);
}
if (bitsize < 8)
data &= read_mask[bitsize];
original = *dest;
if (write_lhs > 0) {
mask = read_mask[write_lhs];
if (bitsize > write_rhs) {
*dest++ = (original & mask) | (data >> write_lhs);
original = *dest & write_mask[bitsize - write_rhs];
*dest = original | (data << write_rhs);
} else {
if ((bitsize - write_lhs) > 0)
mask = mask | write_mask[8 - (bitsize - write_lhs)];
*dest++ = (original & mask) | (data >> write_lhs);
}
} else {
if (bitsize < 8)
data = data | (original & write_mask[bitsize]);
*dest++ = data;
}
_count -= bitsize; //KK2
}
}
測試的程式碼:
static uint8_t output[8], input[8];
static inline void dump_8bits(uint8_t _data)
{
for (int i = 0; i < 8; ++i)
printf("%d", (_data & (0x80 >> i)) ? 1 : 0);
}
static inline void dump_binary(uint8_t *_buffer, size_t _length)
{
for (int i = 0; i < _length; ++i)
dump_8bits(*_buffer++);
}
int main(int _argc, char **_argv)
{
memset(&input[0], 0xFF, sizeof(input));
for (int i = 1; i <= 32; ++i) {
for (int j = 0; j < 16; ++j) {
for (int k = 0; k < 16; ++k) {
memset(&output[0], 0x00, sizeof(output));
printf("%2d:%2d:%2d ", i, k, j);
bitcpy(&output[0], k, &input[0], j, i);
dump_binary(&output[0], 8);
printf("\n");
}
}
}
return 0;
}
延伸問題:
提示: 觀察 linux/bitops.h 裡頭巨集在原始程式碼的運用狀況
void bitcpy(void *_dest, /* Address of the buffer to write to */
size_t _write, /* Bit offset to start writing to */
const void *_src, /* Address of the buffer to read from */
size_t _read, /* Bit offset to start reading from */
size_t _count){
uint8_t data, original, mask;
size_t bitsize;
size_t read_lhs = _read & 7;
size_t read_rhs = 8 - read_lhs;
const uint8_t *source = _src + (_read / 8);
size_t write_lhs = _write & 7; //KK1
size_t write_rhs = 8 - write_lhs;
uint8_t *dest = _dest + (_write / 8);
.....
.....
}
在這段程式碼中可以看見 read_lhs
、 read_rhs
,因為 _src
傳進來是 uint8_t
所以我們就以 8 bit 為一個單位來操作,&7
其實就是在 mod 8 ,算出來的 read_lhs
就是指要複製的那個 bit 左邊有幾個 bit , read_rhs
則反之;那 write
的部分也是如此
static const uint8_t read_mask[] = {
0x00, /* == 0 00000000b */
0x80, /* == 1 10000000b */
0xC0, /* == 2 11000000b */
0xE0, /* == 3 11100000b */
0xF0, /* == 4 11110000b */
0xF8, /* == 5 11111000b */
0xFC, /* == 6 11111100b */
0xFE, /* == 7 11111110b */
0xFF /* == 8 11111111b */
};
static const uint8_t write_mask[] = {
0xFF, /* == 0 11111111b */
0x7F, /* == 1 01111111b */
0x3F, /* == 2 00111111b */
0x1F, /* == 3 00011111b */
0x0F, /* == 4 00001111b */
0x07, /* == 5 00000111b */
0x03, /* == 6 00000011b */
0x01, /* == 7 00000001b */
0x00 /* == 8 00000000b */
};
從這邊可以看到 read
、 write
的遮罩,舉例來說 read_mask[3]
代表要複製的位元長度為 3 bit ,所以就對前三個位元做 &1
運算至於其他部分就做 &0
,如此一來便可以只保留前三位元的資料
while (_count > 0) {
data = *source++;
bitsize = (_count > 8) ? 8 : _count;
if (read_lhs > 0) {
data <<= read_lhs;
if (bitsize > read_rhs)
data |= (*source >> read_rhs);
}
if (bitsize < 8)
data &= read_mask[bitsize];
original = *dest;
if (write_lhs > 0) {
mask = read_mask[write_lhs];
if (bitsize > write_rhs) {
*dest++ = (original & mask) | (data >> write_lhs);
original = *dest & write_mask[bitsize - write_rhs];
*dest = original | (data << write_rhs);
} else {
if ((bitsize - write_lhs) > 0)
mask = mask | write_mask[8 - (bitsize - write_lhs)];
*dest++ = (original & mask) | (data >> write_lhs);
}
} else {
if (bitsize < 8)
data = data | (original & write_mask[bitsize]);
*dest++ = data;
}
_count -= bitsize; //KK2
}
}
這段程式碼在進行逐 bit 的資料複製, _count
代表的是總共要複製的資料長度 ( bit ) ,以 8 bit 為一段進行處理;考慮到我們所複製的位元資料需要位移所以必須利用上面算出來的 read_lhs
、 read_rhs
做位元的調整再透過遮罩處理以得到要複製進去的資料;在 write
的部分也是如此。
目前程式的 if - else
敘述太多,減少 branch 指令是我目前想到的改進
TODO : 把程式碼補上來
vector 的操作:
int main()
{
v(float, 3, vec1);
v(int, 2, vec2, 13, 42);
printf("pos(vec2,0)=%d, pos(vec2,1)=%d\n", vec_pos(vec2, 0),
vec_pos(vec2, 1));
vec_push_back(vec2, 88);
vec_reserve(vec2, 100);
printf("capacity(vec1)=%zu, capacity(vec2)=%zu\n", vec_capacity(vec1),
vec_capacity(vec2));
printf("pos(vec2,2)=%d\n", vec_pos(vec2, 2));
#define display(v) \
do { \
for (size_t i = 0; i < vec_size(v); i++) \
printf("%.2f ", vec_pos(v, i)); \
puts(v.on_heap ? "heap" : "stack"); \
} while (0)
display(vec1);
vec_push_back(vec1, 0.0);
display(vec1);
vec_push_back(vec1, 1.1);
display(vec1);
vec_push_back(vec1, 2.2);
display(vec1);
vec_push_back(vec1, 3.3);
display(vec1);
vec_push_back(vec1, 4.4);
display(vec1);
vec_push_back(vec1, 5.5);
display(vec1);
vec_pop_back(vec1);
display(vec1);
vec_pop_back(vec1);
display(vec1);
vec_pop_back(vec1);
display(vec1);
vec_pop_back(vec1);
display(vec1);
vec_pop_back(vec1);
display(vec1);
vec_pop_back(vec1);
display(vec1);
return 0;
}
程式碼本體如下:
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* vector with small buffer optimization */
#define STRUCT_BODY(type) \
struct { \
size_t size : 54, on_heap : 1, capacity : 6, flag1 : 1, flag2 : 1, \
flag3 : 1; \
type *ptr; \
}
#define NEXT_POWER_OF_2(s) \
(s&(s-1)) == 0 \ // VV1
? s \
: (size_t) 1 << (64 - __builtin_clzl(s)) /* next power of 2 */
#define v(t, s, name, ...) \
union { \
STRUCT_BODY(t); \
struct { \
size_t filler; \
t buf[NEXT_POWER_OF_2(s)]; \
}; \
} name __attribute__((cleanup(vec_free))) = {.buf = {__VA_ARGS__}}; \
name.size = sizeof((__typeof__(name.buf[0])[]){0, __VA_ARGS__}) / \
sizeof(name.buf[0]) - 1; \
name.capacity = sizeof(name.buf) / sizeof(name.buf[0])
#define vec_size(v) v.size
#define vec_capacity(v) \
(v.on_heap ? (size_t) 1 << v.capacity : sizeof(v.buf) / sizeof(v.buf[0]))
#define vec_data(v) (v.on_heap ? v.ptr : v.buf) /* always contiguous buffer */
#define vec_elemsize(v) sizeof(v.buf[0])
#define vec_pos(v, n) vec_data(v)[n] /* lvalue */
#define vec_reserve(v, n) __vec_reserve(&v, n, vec_elemsize(v), vec_capacity(v))
#define vec_push_back(v, e) \
__vec_push_back(&v, &(__typeof__(v.buf[0])[]){e}, vec_elemsize(v), \
vec_capacity(v))
#define vec_pop_back(v) (void) (v.size -= 1) // VV2
/* This function attribute specifies function parameters that are not supposed
* to be null pointers. This enables the compiler to generate a warning on
* encountering such a parameter.
*/
#define NON_NULL __attribute__((nonnull))
static NON_NULL void vec_free(void *p)
{
STRUCT_BODY(void) *s = p;
if (s->on_heap)
free(s->ptr);
}
static inline int ilog2(size_t n)
{
return 64 - __builtin_clzl(n) - ((n & (n - 1)) == 0);
}
static NON_NULL void __vec_reserve(void *vec,
size_t n,
size_t elemsize,
size_t capacity)
{
union {
STRUCT_BODY(void);
struct {
size_t filler;
char buf[];
};
} *v = vec;
if (n > capacity) {
if (v->on_heap) {
v->ptr = realloc(v->ptr,
elemsize * (size_t) 1 << (v->capacity = ilog2(n)));
} else {
void *tmp =
malloc(elemsize * (size_t) 1 << (v->capacity = ilog2(n)));
memcpy(tmp, v->buf, elemsize * v->size);
v->ptr = tmp;
v->on_heap = 1;
}
}
}
static NON_NULL void __vec_push_back(void *restrict vec,
void *restrict e,
size_t elemsize,
size_t capacity)
{
union {
STRUCT_BODY(char);
struct {
size_t filler;
char buf[];
};
} *v = vec;
if (v->on_heap) {
if (v->size == capacity)
v->ptr = realloc(v->ptr, elemsize * (size_t) 1 << ++v->capacity);
memcpy(&v->ptr[v->size++ * elemsize], e, elemsize);
} else {
if (v->size == capacity) {
void *tmp =
malloc(elemsize * (size_t) 1 << (v->capacity = capacity + 1));
memcpy(tmp, v->buf, elemsize * v->size);
v->ptr = tmp;
v->on_heap = 1;
memcpy(&v->ptr[v->size++ * elemsize], e, elemsize);
} else
memcpy(&v->buf[v->size++ * elemsize], e, elemsize);
}
}
延伸問題:
#define NEXT_POWER_OF_2(s) \
(s&(s-1)) == 0 \ // VV1
? s \
: (size_t) 1 << (64 - __builtin_clzl(s)) /* next power of 2 */
看這個 Macro 的名字可以得知在求大於數字 s 的下一個 VV1
的解答為 (s&(s-1)) == 0
,判斷式成立的時候代表 s 是 __builtin_clzl(s)
把最高位元前面有幾個 0 找出來,這代表我們找出最高的 1 是在第幾個位元,再透過向左移動去求大於 s 的下一個
#define vec_pop_back(v) (void) (v.size -= 1) // VV2
這邊是做 vector 很常使用的操作 vec_pop_back
,這邊把 v.size
扣掉 1,會這樣做的目的是為了配合 void display(v)
這個 Macro
#define display(v) \
do { \
for (size_t i = 0; i < vec_size(v); i++) \
printf("%.2f ", vec_pos(v, i)); \
puts(v.on_heap ? "heap" : "stack"); \
} while (0)
可以看見裡面定義是 vector size 有多長就印出多長,如此一來就相當於把 vector 的最後一個元素 pop 掉
題目1 考慮以下仿效 Linux 核心 include/linux/list.h 的精簡實作: #include <stddef.h> /** * container_of() - Calculate address of object that contains address ptr * @ptr: pointer to member variable * @type: type of the structure containing ptr * @member: name of the member variable in struct @type
Mar 5, 2021contributed by < hankchang805 > 開發紀錄 queue.h queue_t 為了讓 q_insert_tail 以及 q_size 可以在 $O(1)$ 的時間複雜度內完成,所以增加 tail (指向 queue 中最後一個元素)以及 size (紀錄目前 queue 有幾個元素) typedef struct { list_ele_t *head; list_ele_t *tail;
Mar 28, 2020or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up