changed 7 years ago
Published Linked with GitHub

Go Slices Slide

tags: Golang, 簡報

Agenda

  • 起因
  • slice in GO
  • 回到問題上(起因)
  • 延伸閱讀 - slice 上的 "gotcha"
  • 延伸閱讀 - slice 當參數傳遞
  • 延伸閱讀 - Method receivers
  • 延伸閱讀 - slice 相關豆知識

起因


看到了一篇 medium

Go’s append is not always thread safe


這邊舉了兩個例子,在使用 goroutine 對同一個 slice append 會發生 race 的情況

p.s. 使用 go test -race 可以檢查是否會發生 race


範例一

package main import ( "sync" "testing" ) func TestAppend(t *testing.T) { x := []string{"start"} wg := sync.WaitGroup{} wg.Add(2) go func() { defer wg.Done() y := append(x, "hello", "world") t.Log(cap(y), len(y)) }() go func() { defer wg.Done() z := append(x, "goodbye", "bob") t.Log(cap(z), len(z)) }() wg.Wait() }

結果

> go test -race .
ok  	_/Users/kais.lin/go/goTest/slice	1.015s

> go test -v
z=> [start goodbye bob]
y=> [start hello world]
[start]
--- PASS: TestAppend (0.00s)
	app_test.go:24: 3
	app_test.go:18: 3
PASS
ok  	_/Users/kais.lin/go/goTest/slice	0.006s

範例二

package main import ( "testing" "sync" ) func TestAppend(t *testing.T) { x := make([]string, 0, 6) wg := sync.WaitGroup{} wg.Add(2) go func() { defer wg.Done() y := append(x, "hello", "world") t.Log(len(y)) }() go func() { defer wg.Done() z := append(x, "goodbye", "bob") t.Log(len(z)) }() wg.Wait() }

結果

> go test -race .

==================

WARNING: DATA RACE

Write at 0x00c4200be060 by goroutine 8:

_/tmp.TestAppend.func2()

/tmp/main_test.go:20 +0xcb

Previous write at 0x00c4200be060 by goroutine 7:

_/tmp.TestAppend.func1()

/tmp/main_test.go:15 +0xcb

Goroutine 8 (running) created at:

_/tmp.TestAppend()

/tmp/main_test.go:18 +0x14f

testing.tRunner()

/usr/local/Cellar/go/1.10.2/libexec/src/testing/testing.go:777 +0x16d

Goroutine 7 (running) created at:

_/tmp.TestAppend()

/tmp/main_test.go:13 +0x105

testing.tRunner()

/usr/local/Cellar/go/1.10.2/libexec/src/testing/testing.go:777 +0x16d

==================

==================

WARNING: DATA RACE

Write at 0x00c4200be070 by goroutine 8:

_/tmp.TestAppend.func2()

/tmp/main_test.go:20 +0x11a

Previous write at 0x00c4200be070 by goroutine 7:

_/tmp.TestAppend.func1()

/tmp/main_test.go:15 +0x11a

Goroutine 8 (running) created at:

_/tmp.TestAppend()

/tmp/main_test.go:18 +0x14f

testing.tRunner()

/usr/local/Cellar/go/1.10.2/libexec/src/testing/testing.go:777 +0x16d

Goroutine 7 (finished) created at:

_/tmp.TestAppend()

/tmp/main_test.go:13 +0x105

testing.tRunner()

/usr/local/Cellar/go/1.10.2/libexec/src/testing/testing.go:777 +0x16d

==================

--- FAIL: TestAppend (0.00s)

main_test.go:16: 2

main_test.go:21: 2

testing.go:730: race detected during execution of test

FAIL

FAIL _/tmp 0.901s

> go test -v
=== RUN   TestAppend
z=> [hello world]
y=> [hello world]
[]
--- PASS: TestAppend (0.00s)
	app_test.go:24: 2
	app_test.go:18: 2
PASS
ok  	_/Users/kais.lin/go/goTest/slice	0.005s

SO WHY?


slice in GO


要瞭解 slice 前必須先了解 array

The slice type is an abstraction built on top of Go’s array type, and so to understand slices we must first understand arrays.


Array in GO

Array 宣告時會指定了長度和元素類型,且不需初始化,預設值為 0
var a [4]int
a[0] = 1
i := a[0]
// i == 1
a[2] == 0
// true, the zero value of the int type
在記憶體中會長這樣


那 slice 呢?

為了可以更彈性的利用 Array,所以 slice 長這樣:
type sliceHeader struct {
  Length        int
  Capacity      int
  ZerothElement *byte
}
- Length: slice 長度
- Capacity: 底層 array 的長度
- ZerothElement: 參考(指向)的底層 array

所以當你從 slice 擷取部分值時:

s = s[2:4]

[]byte 上到下分別為: ZerothElement, len, cap


所以對 slice append 值進去時,其實是進了底層 array,但如過底層 array 不夠大時怎麼辦?


func AppendByte(slice []byte, data ...byte) []byte {
  m := len(slice)
  n := m + len(data)
  if n > cap(slice) { // if necessary, reallocate
    // allocate double what's needed, for future growth.
    newSlice := make([]byte, (n+1)*2)
    copy(newSlice, slice)
    slice = newSlice
  }
  slice = slice[0:n]
  copy(slice[m:n], data)
  return slice
}

回到問題上(起因)


回到範例一

...

x := []string{"start"}

...

go func() {
    ...
    y := append(x, "hello", "world")
    ...
}

go func() {
    ...
    z := append(x, "hello", "world")
    ...
}



範例二

...

x := make([]string, 0, 6)

...

go func() {
    ...
    y := append(x, "hello", "world")
    ...
}

go func() {
    ...
    z := append(x, "hello", "world")
    ...
}



作者提供的解決方法

package main

import (
        "sync"
        "testing"
)

func TestAppend(t *testing.T) {
        x := make([]string, 0, 6)
        x = append(x, "start")

        wg := sync.WaitGroup{}
        wg.Add(2)
        go func() {
                defer wg.Done()
                y := make([]string, 0, len(x)+2)
                y = append(y, x...)
                y = append(y, "hello", "world")
                t.Log(cap(y), len(y), y[0])
        }()
        go func() {
                defer wg.Done()
                z := make([]string, 0, len(x)+2)
                z = append(z, x...)
                z = append(z, "goodbye", "bob")
                t.Log(cap(z), len(z), z[0])
        }()
        wg.Wait()
}

延伸閱讀 - slice 上的 "gotcha"


還記得我們說過
slice 指向(參考)著底層的 array


所以如果存了一大堆資料進 slice 而最後只為了取其中一小部分值的話,就會發生:
var digitRegexp = regexp.MustCompile("[0-9]+")

func FindDigits(filename string) []byte {
  b, _ := ioutil.ReadFile(filename) // b 為 slice
  return digitRegexp.Find(b) // 回傳由 b(slice) 做正規畫後得到的資料
}
會導致其他的 data 依然存在底層 array 中~

解決方法

func CopyDigits(filename string) []byte {
  b, _ := ioutil.ReadFile(filename)
  b = digitRegexp.Find(b)
  c := make([]byte, len(b))
  copy(c, b)
  return c
}

延伸閱讀 - slice 當參數傳遞


func AddOneToEachElement(slice []int) {
  for i := range slice {
    slice[i]++
  }
}

func main() {
  buffer := []int {0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10}
  slice := buffer[10:20]
  fmt.Println("slice before ", slice)
  // slice before  [0 1 2 3 4 5 6 7 8 9]
  fmt.Println("buffer before", buffer)
  // buffer before [0 0 0 0 0 0 0 0 0 0 0 1 2 3 4 5 6 7 8 9 10]
  AddOneToEachElement(slice)
  fmt.Println("slice after  ", slice)
  // slice after [1 2 3 4 5 6 7 8 9 10]
  fmt.Println("buffer after ", buffer)
  // buffer after  [0 0 0 0 0 0 0 0 0 0 1 2 3 4 5 6 7 8 9 10 10]
}
儘管 slice 傳遞看起來像 passed by value,但因為 slice header 內含指標,所以會更新到原始參照的 array

延伸閱讀 - Method receivers


想將這功能改用 receivers 的方式實作

func main() {
  pathName := path("/usr/bin/tso") // Conversion from string to path.
  i := bytes.LastIndex(pathName, []byte("/"))
  if i >= 0 {
      pathName = (pathName)[0:i]
  }

  fmt.Printf("%s\n", pathName)
  // /usr/bin
}

It is idiomatic to use a pointer receiver for a method that modifies a slice.

type path []byte

func (p *path) TruncateAtFinalSlash() {
    i := bytes.LastIndex(*p, []byte("/"))
    if i >= 0 {
        *p = (*p)[0:i]
    }
}

func main() {
    pathName := path("/usr/bin/tso") // Conversion from string to path.
    pathName.TruncateAtFinalSlash()
    fmt.Printf("%s\n", pathName)
    // /usr/bin
}

但如果這邊改用 value receiver 的話

type path []byte

func (p path) TruncateAtFinalSlash() {
  i := bytes.LastIndex(p, []byte("/"))
  if i >= 0 {
    p = (p)[0:i]
  }
  fmt.Printf("%s\n", p)
  // /usr/bin
}

func main() {
  pathName := path("/usr/bin/tso") // Conversion from string to path.
  pathName.TruncateAtFinalSlash()
  fmt.Printf("%s\n", pathName)
  // /usr/bin/tso
}

範例二: 要將 path 全轉大寫

type path []byte

func (p path) ToUpper() {
  for i, b := range p {
    if 'a' <= b && b <= 'z' {
      p[i] = b + 'A' - 'a'
    }
  }
}

func main() {
  pathName := path("/usr/bin/tso")
  pathName.ToUpper()
  fmt.Printf("%s\n", pathName)
  // /USR/BIN/TSO
}

type path []byte

func (p *path) ToUpper() {
  // for i, b := range *p {
  for _, b := range *p {
    if 'a' <= b && b <= 'z' {
      // (*p)[i] = b + 'A' - 'a'
      b = b + 'A' - 'a'
    }
  }
  fmt.Printf("%s\n", *p)
  // /usr/bin/tso
}

func main() {
  pathName := path("/usr/bin/tso")
  pathName.ToUpper()
  fmt.Printf("%s\n", pathName)
  // /usr/bin/tso
}

※ go 在 range 時所遍歷的值是複製的值而不是元素本身,而如果使用`&遍歷值`這樣拿到的也是臨時變數的位置

而範例二的重點我覺得是 range 的小細節,而非將重點放在 slice 上


延伸閱讀 - slice 相關豆知識


空 slice = nil

當你 make 一個空 slice 時

sliceHeader{
    Length:        0,
    Capacity:      0,
    ZerothElement: nil,
}

var a []int
fmt.Println(a == nil)
// true

b := make([]int,0)
fmt.Println(b == nil)
// flase

String 也是 slice

Strings are actually very simple: they are just read-only slices of bytes with a bit of extra syntactic support from the language.


所以你也可以直接做互相轉換

str := string(slice)

// reverse

slice := []byte(usr)

這種 slice-like 的設計,可以輕鬆的建立子字串,並且跟改後不會影響父子串
a := "1234567890"
b := a[1:5]
fmt.Println(b)
// 12345
b = "54321"
fmt.Println(a)
// 1234567890
fmt.Println(b)
// 54321
Select a repo