contributed by < KYG-yaya573142
>
首先觀察 xs 的資料結構定義
typedef union {
/* allow strings up to 15 bytes to stay on the stack
* use the last byte as a null terminator and to store flags
* much like fbstring:
* https://github.com/facebook/folly/blob/master/folly/docs/FBString.md
*/
char data[16];
struct {
uint8_t filler[15],
/* how many free bytes in this stack allocated string
* same idea as fbstring
*/
space_left : 4,
/* if it is on heap, set to 1 */
is_ptr : 1, flag1 : 1, flag2 : 1, flag3 : 1;
};
/* heap allocated */
struct {
char *ptr;
/* supports strings up to 2^54 - 1 bytes */
size_t size : 54,
/* capacity is always a power of 2 (unsigned)-1 */
capacity : 6;
/* the last 4 bits are important flags */
};
} xs;
space_left
,後 4 bits 的每一個 bit 都作為 flag 使用 (目前只有第一個 flag 有用途)space_left
共佔 4 bits,可以表示的無號整數範圍是 0~15\0
剛好也是 0,此時第 16 個 byte 的位置剛好可以同時表達 null terminator 以及剩餘空間是 0ilog2
使用xs *xs_new(xs *x, const void *p)
{
*x = xs_literal_empty();
size_t len = strlen(p) + 1;
if (len > AAA) {
x->capacity = ilog2(len) + 1;
x->size = len - 1;
x->is_ptr = true;
x->ptr = malloc((size_t) 1 << x->capacity);
memcpy(x->ptr, p, len);
} else {
memcpy(x->data, p, len);
x->space_left = 15 - (len - 1);
}
return x;
}
if (len > AAA)
的目的是根據字串長度決定是否需要動態配置記憶體,根據 xs 的資料結構知道最大可以使用的空間為 16 bytes,因此 AAA
應填入 16
觀察 xs_trim
的預期結果,可以了解其用途是根據 pattern 來去除原本字串的頭和尾,本例的 pattern 為 "\n "
,因此會將頭尾的空白以及 \n
去除
xs string = *xs_tmp(" foobarbar \n\n");
xs_trim(&string, "\n ");
printf("[%s] : %2zu\n", xs_data(&string), xs_size(&string));
xs prefix = *xs_tmp("((("), suffix = *xs_tmp(")))");
xs_concat(&string, &prefix, &suffix);
printf("[%s] : %2zu\n", xs_data(&string), xs_size(&string));
接著觀察 xs_trim
16~25 行的部分
trimset
,使用 set_bit
來對 mask
進行一些操作check_bit
來根據 mask 決定是否刪減頭尾的 bytemask
與 set_bit
及 check_bit
是 xs_trim
運作的關鍵
xs *xs_trim(xs *x, const char *trimset)
{
if (!trimset[0])
return x;
char *dataptr = xs_data(x), *orig = dataptr;
/* similar to strspn/strpbrk but it operates on binary data */
uint8_t mask[BBB] = {0};
#define check_bit(byte) (mask[(uint8_t) byte / 8] CCC 1 << (uint8_t) byte % 8)
#define set_bit(byte) (mask[(uint8_t) byte / 8] |= 1 << (uint8_t) byte % 8)
size_t i, slen = xs_size(x), trimlen = strlen(trimset);
for (i = 0; i < trimlen; i++)
set_bit(trimset[i]);
for (i = 0; i < slen; i++)
if (!check_bit(dataptr[i]))
break;
for (; slen > 0; slen--)
if (!check_bit(dataptr[slen - 1]))
break;
dataptr += i;
slen -= i;
/* reserved space as a buffer on the heap.
* Do not reallocate immediately. Instead, reuse it as possible.
* Do not shrink to in place if < 16 bytes.
*/
memmove(orig, dataptr, slen);
/* do not dirty memory unless it is needed */
if (orig[slen])
orig[slen] = 0;
if (xs_is_ptr(x))
x->size = slen;
else
x->space_left = 15 - slen;
return x;
#undef check_bit
#undef set_bit
}
觀察 set_bit
與 check_bit
mask[(uint8_t) byte / 8]
可以理解為將輸入的 byte 以 8 為倍數進行分組1 << (uint8_t) byte % 8
可以用來設置對應 mask
中的某個 bitmask
中的特定 bit,bit 被設置為 1 就代表對應的 ASCII 字元須被刪除,以 char *trimset = "a"
為例
char *trimset = "a";
byte = 0x61 = 97
mask[12] |= 1 << 1
mask[12] = 00000010
mask
的總 bit 數量至少要大於 256check_bit
返回 1 就會刪除一個字元 (藉由移動指標的方式),這同時表示 mask
中對應的 bit 有被 set_bit
設置,因此 check_bit
中的 CCC
應填入 &
xs_tmp
整個 xs.c 程式碼中我最看不懂的部分是 xs_tmp
/* Memory leaks happen if the string is too long but it is still useful for
* short strings.
* "" causes a compile-time error if x is not a string literal or too long.
*/
#define xs_tmp(x) \
((void) ((struct { \
_Static_assert(sizeof(x) <= 16, "it is too big"); \
int dummy; \
}){1}), \
xs_new(&xs_literal_empty(), "" x))
C11 [6.7.10] Static assertions
The constant expression shall be an integer constant expression. If the value of the constant expression compares unequal to 0, the declaration has no effect. Otherwise, the constraint is violated and the implementation shall produce a diagnostic message that includes the text of the string literal, except that characters not in the basic source character set are not required to appear in the message.
xs_tmp
會展開為((void) ((struct {int dummy;}){1}), xs_new(&xs_literal_empty(), "" x))
(struct {int dummy}){1}
的部分就是定義一個 struct 物件且值為 1,
,查閱規格書後知道它其實是一個運算子 comma perator,規格書中相關的定義如下
(void)
型態轉換_Static_assert
,會因為缺乏 left operand 而造成編譯錯誤C11 [6.5.17] Comma operator
The left operand of a comma operator is evaluated as a void expression; there is a sequence point between its evaluation and that of the right operand. Then the right operand is evaluated; the result has its type and value.
xs_tmp
實質上會變成xs_new(&xs_literal_empty(), "" x)
""
,理解這個部分的靈感來自 colinyoyo26 的共筆以及你所不知道的C語言:指標篇xs_tmpa(x)
的 x
代入 ptr
或是 string literal 都可以正常編譯xs_tmpb(x)
的 x
只能代入 string literal,代入 ptr
會 compilation error#include <stdio.h>
#define xs_tmpa(x) printf("%s\n", x)
#define xs_tmpb(x) printf("%s\n", "" x)
int main()
{
char *ptr = "foo";
xs_tmpa(ptr);
xs_tmpa("bar");
xs_tmpb(ptr); //compilation error
xs_tmpb("bar");
return 0;
}
xs_tmpb
進行測試,以 objdump
觀察可以確認 string literal 確實在 section .rodata$ gcc -g -c test.c -o test.o
$ objdump -s test.o | grep -B 1 bar
Contents of section .rodata:
0000 666f6f00 62617200 foo.bar.
首先觀察 xs_literal_empty
的實作,會發現使用此 macro 的部分在展開後其實等同定義一個無名的 xs 物件 (此物件在 stack 上)。因此 xs_new
做的事情是將 xs_literal_empty
中定義的物件的值複製給 x 指向的物件 (初始化),再更新 x 指向的 xs 物件的內容
#define xs_literal_empty() (xs) { .space_left = 15 }
xs *xs_new(xs *x, const void *p)
{
*x = xs_literal_empty();
size_t len = strlen(p) + 1;
if (len > 16) {
x->capacity = ilog2(len) + 1;
x->size = len - 1;
x->is_ptr = true;
x->ptr = malloc((size_t) 1 << x->capacity);
memcpy(x->ptr, p, len);
} else {
memcpy(x->data, p, len);
x->space_left = 15 - (len - 1);
}
return x;
}
接著觀察原本例題中的程式碼使用 xs 物件的方式
xs string = *xs_tmp(" foobarbar \n\n");
根據上面對xs_tmp
的討論,展開上述的表達
xs string = xs_new(&xs_literal_empty(), " foobarbar \n\n");
會發現這種使用的方式其實定義了兩個物件,一個是等號左方的 string,另一個是等號右方 xs_literal_empty
定義的無名物件,接著根據給定的 string literal 以 xs_new
初始化等號右方的物件,最後再將等號右方物件的值複製給等號左方的物件 string。
仔細想想會發現這樣根本多此一舉,應該直接定義一個物件然後直接用 xs_new
進行初始化就好,因此原本例題中的程式碼可以改成如下用法
int main()
{
xs *string = xs_tmp("\n foobarbar \n\n\n");
xs_trim(string, "\n ");
printf("[%s] : %2zu\n", xs_data(string), xs_size(string));
xs *prefix = xs_tmp("((("), *suffix = xs_tmp(")))");
xs_concat(string, prefix, suffix);
printf("[%s] : %2zu\n", xs_data(string), xs_size(string));
return 0;
}
xs_tmp
的位置定義的 (位於 stack)xs *xs_cpy(xs *dest, xs* src)
{
/* data on heap */
if (xs_is_ptr(src)) {
xs_grow(dest, xs_size(src) + 1);
memcpy(dest->ptr, src->ptr, xs_size(src) + 1);
dest->size = src->size;
} else { /* src data on stack */
if (xs_is_ptr(dest)) {
free(dest->ptr);
dest->is_ptr = false;
}
memcpy(dest->data, src->data, xs_size(src) + 1);
dest->space_left = src->space_left;
}
return dest;
}
src
字串在 stack (短字串),空間一定夠所以直接複製到 dest
dest
本來有動態配置記憶體,要釋放掉避免 memory leaksrc
字串在 heap,使用 xs_grow
來處理並確保 dest
有足夠的空間可以使用,再將 src
複製過去使用以下程式碼測試,結果符合預期
int main()
{
xs *src = xs_tmp("foobarbar");
xs *dest = xs_tmp("original");
printf("src: [%s] size: %2zu\n", xs_data(src), xs_size(src));
printf("dest: [%s] size: %2zu\n", xs_data(dest), xs_size(dest));
xs_cpy(dest, src);
printf("after xs_cpy(dest, src)\n");
printf("src: [%s] size: %2zu\n", xs_data(src), xs_size(src));
printf("dest: [%s] size: %2zu\n", xs_data(dest), xs_size(dest));
printf("----\n");
xs *prefix = xs_tmp("I like "), *suffix = xs_tmp("!!!");
xs_concat(src, prefix, suffix);
printf("src: [%s] size: %2zu\n", xs_data(src), xs_size(src));
printf("dest: [%s] size: %2zu\n", xs_data(dest), xs_size(dest));
xs_cpy(dest, src);
printf("after xs_cpy(dest, src)\n");
printf("src: [%s] size: %2zu\n", xs_data(src), xs_size(src));
printf("dest: [%s] size: %2zu\n", xs_data(dest), xs_size(dest));
return 0;
}
src: [foobarbar] size: 9
dest: [original] size: 8
after xs_cpy(dest, src)
src: [foobarbar] size: 9
dest: [foobarbar] size: 9
----
src: [I like foobarbar!!!] size: 19
dest: [foobarbar] size: 9
after xs_cpy(dest, src)
src: [I like foobarbar!!!] size: 19
dest: [I like foobarbar!!!] size: 19
這裡的參考 CS:APP 第 10 章 當中描述的 fork 後 child process 共用檔案的方式,將動態配置的記憶體前 4 bytes 用來記錄 reference count (refcnt)。
為了更簡易的存取 refcnt
,首先定義以下 3 個 macro 以及 2 個輔助函式 xs_is_ref
與 xs_cow
#define REFCNT_NUM(ptr) (*((int*)(ptr) - 1))
#define REFCNT_INCRE(ptr) (REFCNT_NUM(ptr)++)
#define REFCNT_DECRE(ptr) (REFCNT_NUM(ptr)--)
xs_is_ref
static inline size_t xs_is_ref(const xs *x)
{
if (xs_is_ptr(x)) {
if (REFCNT_NUM(x->ptr) > 1)
return true;
}
return false;
}
x->ptr
是否有被參照xs_cow
static void xs_cow(xs *x)
{
if (xs_is_ref(x)) {
REFCNT_DECRE(x->ptr);
xs_new(x, xs_data(x));
}
}
x->ptr
有被參照的話,另開一個新的空間給 x
xs_new
xs *xs_new(xs *x, const void *p)
{
*x = xs_literal_empty();
size_t len = strlen(p) + 1;
if (len > 16) {
x->capacity = ilog2(len) + 1;
x->size = len - 1;
x->is_ptr = true;
- x->ptr = malloc((size_t) 1 << x->capacity);
+ x->ptr = malloc((size_t) 1 << x->capacity + 4); /* additional 4 bytes for refcnt */
+ x->ptr += 4; /* leading 4 bytes = refcnt */
+ REFCNT_NUM(x->ptr) = 1;
memcpy(x->ptr, p, len);
} else {
memcpy(x->data, p, len);
x->space_left = 15 - (len - 1);
}
return x;
}
refcnt
使用refcnt
數值為 1xs_grow
xs *xs_grow(xs *x, size_t len)
{
+ xs_cow(x);
if (len <= xs_capacity(x))
return x;
len = ilog2(len) + 1;
if (xs_is_ptr(x)) {
+ x->ptr -=4;
x->ptr = realloc(x->ptr, (size_t) 1 << len + 4);
+ x->ptr +=4;
} else {
char buf[16];
memcpy(buf, x->data, 16);
x->ptr = malloc((size_t) 1 << len + 4);
+ x->ptr -=4;
+ REFCNT_NUM(x->ptr) = 1;
memcpy(x->ptr, buf, 16);
}
x->is_ptr = true;
x->capacity = len;
return x;
}
refcnt
,因此 malloc
和 free
使用前後需要特別處理代入的指標xs_cow
xs_free
static inline xs *xs_free(xs *x)
{
if (xs_is_ptr(x)) {
- free(xs_data(x));
+ if (REFCNT_NUM(x->ptr) > 1) {
+ REFCNT_DECRE(x->ptr);
+ x->ptr = NULL;
+ } else {
+ x->ptr -= 4; /* leading 4 bytes = refcnt */
+ free(x->ptr);
+ }
}
return xs_newempty(x);
}
refcnt
大於 1 代表還有別的物件會參照此動態記憶體,因此只需將 refcnt
減 1refcnt
為 1 的時候代表無其他物件參照此動態記憶體,所以需要 free
xs_concat
xs *xs_concat(xs *string, const xs *prefix, const xs *suffix)
{
size_t pres = xs_size(prefix), sufs = xs_size(suffix),
size = xs_size(string), capacity = xs_capacity(string);
char *pre = xs_data(prefix), *suf = xs_data(suffix),
*data = xs_data(string);
+ xs_cow(string);
if (size + pres + sufs <= capacity) {
memmove(data + pres, data, size);
memcpy(data, pre, pres);
memcpy(data + pres + size, suf, sufs + 1);
- string->space_left = 15 - (size + pres + sufs);
} else {
xs tmps = xs_literal_empty();
xs_grow(&tmps, size + pres + sufs);
char *tmpdata = xs_data(&tmps);
memcpy(tmpdata + pres, data, size);
memcpy(tmpdata, pre, pres);
memcpy(tmpdata + pres + size, suf, sufs + 1);
xs_free(string);
*string = tmps;
- string->size = size + pres + sufs;
}
+ if (xs_is_ptr(string)) {
+ string->size = size + pres + sufs;
+ } else {
+ string->space_left = 15 - (size + pres + sufs);
+ }
return string;
}
xs_grow
來執行 copy on write 的相關處理size
中的邏輯漏洞
size + pres + sufs <= capacity
以及 xs_is_ptr(string)
,就會錯誤的去修改 string->space_left
xs_trim
xs *xs_trim(xs *x, const char *trimset)
{
if (!trimset[0])
return x;
+ xs_cow(x);
...
}
xs_grow
來執行 copy on write 的相關處理xs_cpy
xs *xs_cpy(xs *dest, xs* src)
{
xs_free(dest);
memcpy(dest->data, src->data, 16);
if (xs_is_ptr(src))
REFCNT_INCRE(src->ptr);
return dest;
}
dest
可能原本就有使用或是參照的動態記憶體,需先 xs_free
來處理dest->ptr
指向 src->ptr
,並將 refcnt
增加 1memcpy
複製整個 src
內容
xs_cpy
用以下程式碼驗證,結果正確
int main()
{
printf("---original---\n");
xs *src = xs_tmp("foobarbar");
xs *dest = xs_tmp("original");
xs *prefix = xs_tmp("@I like "), *suffix = xs_tmp("!!!");
printf("src: [%s] size: %2zu\n", xs_data(src), xs_size(src));
printf("dest: [%s] size: %2zu\n", xs_data(dest), xs_size(dest));
printf("prefix: [%s] suffix: [%s]\n", xs_data(prefix), xs_data(suffix));
xs_concat(src, prefix, suffix);
printf("---after xs_concat(src, prefix, suffix)---\n");
printf("src: [%s] size: %2zu\n", xs_data(src), xs_size(src));
printf("dest: [%s] size: %2zu\n", xs_data(dest), xs_size(dest));
xs_cpy(dest, src);
printf("---after xs_cpy(dest, src)---\n");
printf("src: [%s] size: %2zu\n", xs_data(src), xs_size(src));
printf("dest: [%s] size: %2zu\n", xs_data(dest), xs_size(dest));
printf("src refcnt: %d dest refcnt: %d\n", REFCNT_NUM(src->ptr), REFCNT_NUM(dest->ptr));
printf("src: %p\ndest: %p\n", src->ptr, dest->ptr);
xs_trim(dest, "!@");
printf("---after xs_trim(dest, \"@!\")---\n");
printf("src: [%s] size: %2zu\n", xs_data(src), xs_size(src));
printf("dest: [%s] size: %2zu\n", xs_data(dest), xs_size(dest));
printf("src: %p\ndest: %p\n", src->ptr, dest->ptr);
return 0;
}
---original---
src: [foobarbar] size: 9
dest: [original] size: 8
prefix: [@I like ] suffix: [!!!]
---after xs_concat(src, prefix, suffix)---
src: [@I like foobarbar!!!] size: 20
dest: [original] size: 8
---after xs_cpy(dest, src)---
src: [@I like foobarbar!!!] size: 20
dest: [@I like foobarbar!!!] size: 20
src refcnt: 2 dest refcnt: 2
src: 0x5600d4a8366c
dest: 0x5600d4a8366c
---after xs_trim(dest, "@!")---
src: [@I like foobarbar!!!] size: 20
dest: [I like foobarbar] size: 16
src: 0x5600d4a8366c
dest: 0x5600d4a83884
使用以下程式碼進行測試
#include <stdio.h>
#include <time.h>
int main()
{
int sample_size = 500;
xs *src = xs_tmp("foobarbar");
xs *prefix = xs_tmp("@I like "), *suffix = xs_tmp("!!!");
xs_concat(src, prefix, suffix); /* let string on heap */
xs dest[sample_size];
for (int i = 0; i < sample_size; i++)
dest[i] = xs_literal_empty();
struct timespec t_start, t_end;
for (int i = 1; i < (sample_size + 1); i++) {
clock_gettime(CLOCK_MONOTONIC, &t_start);
for (int n = 0; n < i; n++) {
xs_cpy(&dest[n], src);
}
clock_gettime(CLOCK_MONOTONIC, &t_end);
long long tt = (long long)(t_end.tv_sec * 1e9 + t_end.tv_nsec)
- (long long)(t_start.tv_sec * 1e9 + t_start.tv_nsec);
printf("%d %lld\n", i, tt);
}
return 0;
}
使用 gnuplot
繪製結果
接著以不同長度的字串進行測試
#include <stdio.h>
#include <time.h>
int main()
{
int max_length = 1000;
xs *src = xs_tmp("f");
xs *prefix = xs_tmp(""), *suffix = xs_tmp("o");
xs dest = xs_literal_empty();
struct timespec t_start, t_end;
for (int i = 1; i < (max_length + 1); i++) {
clock_gettime(CLOCK_MONOTONIC, &t_start);
xs_cpy(&dest, src);
clock_gettime(CLOCK_MONOTONIC, &t_end);
long long tt = (long long)(t_end.tv_sec * 1e9 + t_end.tv_nsec)
- (long long)(t_start.tv_sec * 1e9 + t_start.tv_nsec);
printf("%d %lld\n", i, tt);
xs_concat(src, prefix, suffix); /* increase src length by 1 byte */
xs_free(&dest);
}
return 0;
}
使用 gnuplot
繪製結果
以下列實驗程式碼進行實驗,將 src
的字串複製給總共 1000 項的 dest
int main()
{
int sample_size = 1000;
xs *src = xs_tmp("foobarbar");
xs *prefix = xs_tmp("@I like "), *suffix = xs_tmp("!!!");
xs_concat(src, prefix, suffix); /* let string on heap */
xs dest[sample_size];
for (int i = 0; i < sample_size; i++)
dest[i] = xs_literal_empty();
for (int i = 0; i < sample_size; i++)
xs_cpy(&dest[i], src);
for (int i = 0; i < sample_size; i++)
xs_free(&dest[i]);
return 0;
}
使用 valgrind
來分析 heap 的用量
$ valgrind --tool=massif --time-unit=B ./xs
以 ms_print massif.out.<pid>
觀察結果
xs_cpy w/o CoW
--------------------------------------------------------------------------------
n time(B) total(B) useful-heap(B) extra-heap(B) stacks(B)
--------------------------------------------------------------------------------
31 40,040 40,040 32,032 8,008 0
xs_cpy w/ CoW
--------------------------------------------------------------------------------
n time(B) total(B) useful-heap(B) extra-heap(B) stacks(B)
--------------------------------------------------------------------------------
1 56 56 36 20 0
使用以下程式碼進行實驗,第 15~18 行的目的是追加取用字串的次數
int main()
{
int sample_size = 1000;
xs *src = xs_tmp("foobarbar");
xs *prefix = xs_tmp("@I like "), *suffix = xs_tmp("!!!");
xs_concat(src, prefix, suffix); /* let string on heap */
xs dest[sample_size];
for (int i = 0; i < sample_size; i++)
dest[i] = xs_literal_empty();
for (int i = 0; i < sample_size; i++)
xs_cpy(&dest[i], src);
for (int i = 0; i < 20; i++) {
for (int i = 0; i < sample_size; i++)
printf("%s", xs_data(&dest[i]));
}
return 0;
}
用 perf
進行分析
$ sudo perf stat --repeat 5 -e cache-misses,cache-references,instructions,cycles ./xs_old
xs_cpy
w/ CoW Performance counter stats for './xs' (5 runs):
3,371 cache-misses # 7.211 % of all cache refs ( +- 9.76% )
46,751 cache-references ( +- 11.18% )
24,062,939 instructions # 1.78 insn per cycle
# 0.00 stalled cycles per insn ( +- 0.04% )
13,518,560 cycles ( +- 3.82% )
0.006644 +- 0.000514 seconds time elapsed ( +- 7.73% )
xs_cpy
w/o CoW Performance counter stats for './xs_old' (5 runs):
4,575 cache-misses # 8.177 % of all cache refs ( +- 12.67% )
55,947 cache-references ( +- 4.58% )
24,502,092 instructions # 1.85 insn per cycle
# 0.00 stalled cycles per insn ( +- 0.02% )
13,280,095 cycles ( +- 2.08% )
0.006589 +- 0.000263 seconds time elapsed ( +- 3.99% )
dest[i]->ptr
位置都一樣,但是每個 dest[i]
仍是獨立的變數,因此系統較難善用 locality
Performance counter stats for './xs' (5 runs):
525 cache-misses # 4.377 % of all cache refs ( +- 83.11% )
11,990 cache-references ( +- 1.41% )
706,297 instructions # 0.85 insn per cycle
# 0.00 stalled cycles per insn ( +- 0.83% )
826,928 cycles ( +- 2.04% )
0.0007727 +- 0.0000104 seconds time elapsed ( +- 1.35% )
Performance counter stats for './xs_old' (5 runs):
616 cache-misses # 4.864 % of all cache refs ( +- 91.30% )
12,669 cache-references ( +- 1.04% )
1,152,005 instructions # 1.13 insn per cycle
# 0.00 stalled cycles per insn ( +- 0.12% )
1,017,068 cycles ( +- 1.49% )
0.0008488 +- 0.0000158 seconds time elapsed ( +- 1.86% )
直接仿造 glibc 實作的程式碼 來實作
/* Reentrant xs string tokenizer */
char *xs_strtok_r(xs *x, const char *delim, char **save_ptr)
{
int src_flag = 0;
char *s = NULL;
char *end = NULL;
if (x == NULL) {
s = *save_ptr;
} else { /* use the source x */
xs_cow(x);
s = xs_data(x);
src_flag = 1;
}
if (*s == '\0') {
*save_ptr = s;
return NULL;
}
/* Scan leading delimiters */
s += strspn(s, delim);
if (*s == '\0') {
*save_ptr = s;
return NULL;
}
/* Find the end of the token */
end = s + strcspn(s, delim);
if (*end == '\0') {
*save_ptr = end;
return s;
}
/* Terminate the token and make *SAVE_PTR point past it */
*end = '\0';
*save_ptr = end + 1;
/* contents after the first tok is no longer accessible for x */
if (src_flag) {
if (xs_is_ptr(x)) {
x->size = (size_t) (end - xs_data(x));
} else {
x->space_left = 15 - (size_t) (end - xs_data(x));
}
}
return s;
}
char *xs_strtok(xs *x, const char *delim)
{
static char *old;
return xs_strtok_r(x, delim, &old);
}
xs_strtok_r
xs_strtok
xs_cow
處理xs_strtok
會將字串中符合 delim
的部分以 '\0'
替換,因此原先字串的內容從第一個 tok 以後的部分會被 '\0'
截斷
xs_strtok
時 (也就是代入的參數有 xs
物件),會將 src_flag
設置為 1 並且更新 xs
中紀錄的字串長度以下列程式碼進行檢驗
int main()
{
printf("---original---\n");
printf("---after xs_cpy(dest, src)---\n");
xs *src = xs_new(&xs_literal_empty(), "|foo|bar|||bar|bar!!!|||");
xs *dest = xs_tmp("original");
xs_cpy(dest, src);
printf("src: [%s] size: %2zu\n", xs_data(src), xs_size(src));
printf("dest: [%s] size: %2zu\n", xs_data(dest), xs_size(dest));
printf("src refcnt: %d dest refcnt: %d\n", REFCNT_NUM(src->ptr), REFCNT_NUM(dest->ptr));
printf("src: %p\ndest: %p\n", src->ptr, dest->ptr);
printf("---after xs_strtok(dest, \"|\")---\n");
printf("tok: %s\n", xs_strtok(dest, "|"));
printf("src: [%s] size: %2zu\n", xs_data(src), xs_size(src));
printf("dest: [%s] size: %2zu\n", xs_data(dest), xs_size(dest));
printf("src refcnt: %d dest refcnt: %d\n", REFCNT_NUM(src->ptr), REFCNT_NUM(dest->ptr));
printf("src: %p\ndest: %p\n", src->ptr, dest->ptr);
for (int i = 0; i < 5; i++) {
printf("---after xs_strtok(NULL, \"|\")---\n");
printf("tok: %s\n", xs_strtok(NULL, "|"));
printf("dest: [%s] size: %2zu\n", xs_data(dest), xs_size(dest));
}
return 0;
}
結果正確
---original---
---after xs_cpy(dest, src)---
src: [|foo|bar|||bar|bar!!!|||] size: 24
dest: [|foo|bar|||bar|bar!!!|||] size: 24
src refcnt: 2 dest refcnt: 2
src: 0x558e6d756674
dest: 0x558e6d756674
---after xs_strtok(dest, "|")---
tok: foo
src: [|foo|bar|||bar|bar!!!|||] size: 24
dest: [|foo] size: 4
src refcnt: 1 dest refcnt: 1
src: 0x558e6d756674
dest: 0x558e6d756884
---after xs_strtok(NULL, "|")---
tok: bar
dest: [|foo] size: 4
---after xs_strtok(NULL, "|")---
tok: bar
dest: [|foo] size: 4
---after xs_strtok(NULL, "|")---
tok: bar!!!
dest: [|foo] size: 4
---after xs_strtok(NULL, "|")---
tok: (null)
dest: [|foo] size: 4
---after xs_strtok(NULL, "|")---
tok: (null)
dest: [|foo] size: 4
之前閱讀 CS:APP 第 10 章就有提到類似的用法,打算從這裡進行探討
linux 2020