# 2020q3 Homework3 (quiz3)
contributed by < [stonelin](https://github.com/StoneLin0708) >
###### tags: `sysprog2020`
# asr_i
```c
int asr_i(signed int m, unsigned int n)
{
```
Test shift behavior, coubld be one of following.
||binary||logical|meaning|
|-|-|-|-|-|
|-1|111...111|||
|-1>>1|111...111|<0|0||
|-1>>1|011...111|>0|1|need to be fixed|
```c
const int logical = (((int) -1) >> 1) > 0;
```
Set fix to 0xFFFF when fix is needed.
```c
unsigned int fixu = -(logical & (m < 0));
int fix = *(int *) &fixu;
```
||binary|
|-|-|
|input|1xxxxx|
|shifted n=3|0001xx|
|fix|111111|
|fix>>n|000111|
|fix^(fix>>n)|111000|
|result|1111xx|
```c
return (m >> n) | (fix ^ (fix >> n));
}
```
refs
* msvc
> The result of a right-shift of a signed negative number is implementation-dependent. Although the Microsoft C++ compiler uses the sign bit to fill vacated bit positions, there is no guarantee that other implementations also do so.
> -- <cite>[microsoft doc](https://docs.microsoft.com/en-us/cpp/cpp/left-shift-and-right-shift-operators-input-and-output?view=vs-2019#right-shifts)</cite>
### The result of arm32 confused me a lot, and I couldn't found a compiler work as expected.
* x86 vs arm32 (clang, gcc)
<iframe width="1000px" height="600px" src="https://godbolt.org/e#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAM1QDsCBlZAQwBtMQBGAFlICsupVs1qgA%2BhOSkAzpnbICeOpUy10AYVSsArgFtag1egAyeWpgByegEaZiXXgAdU0wktqad%2Bwc9eK6puZWurb2PDJymAruDATMxASeegacEfL%2BtLHxBIGWNnYOMnEJSd6p0sU5ZnkhBTwAlDKo2sTIHADkAKQATADMZsg6WADUnb3qFfioAHQIY9idAAwAgkvLZgTDzNLEYngQrsDm6MMbw7qkw9q0h8en9MO09WudAOwAQmvD38NoN5tnVioYB4FisUa9AAiwygEA29WGAFpOAj5vNhiiIdhhosxp8Vj8rjc8EdMCczlQ8AAPbQQ6GIiBAkFg0bdABsMN0EPUOPqz16%2BOWhIp1LpwwAVHCHuLUezKTS8V8fsRMAQWrROVj0U9Rq8edRRZ0AKzYg1UrW9bFPfmCt6Ql4rCmoVAQZ4fJXfFVq4gaxEATjRls4ipWdod6we1nirregsJXvVWx2ewg/tIKJDq1ekPajVYIHaRvapAM7UWxdQBZ50marUwrN6nGLBAL5b5pAA1iBupxpt0/b1uq8%2Bq9Fn7x71uK8hAXuMXS%2BXSJX2sXpCBFqQW2Xc6Q4LAYFADxAkGhdI48OwyBQIKfz5eUMBOItuqRKawCHY1xBrK3i9YzPEACeBZNqQp66KoBAAPK0KwwHbqQWC6CIwDsL%2BiF4Cq0QAG6YGuCGYFSUTaB%2BIHFhscjoaweDWMQQGaFgZGbsQeC6GRjQ0PQTBsBw4QCC%2BwiiCAEhiFI1HWGukCNKgjgZPhiJQb0q6RNEygQEYpQpKQRi5MEoQ%2BC4bh0JpBl%2BO4un5GEaRRBkWQlFoySCLI6QxJUFm1FZFTZCZ5RudUel1I0NYtG0XB5gWRYluhy5UgAHGyiJstwwzAMgyAYosfYwrghAkA2qTDJoZ4XnYDbdAi6jNr%2B7ZdkaG75u0c6kGxdVRQhy6ruum7VbuR4nqgxWXuQlC3iV9hpYJj6LIsqRvh%2BxBfj%2BCH/rQQFMeBkEwXB6FIShaEIfgWGKLh%2BGLoRxGke0oEUQ1i7iXRxCAQxHSgQQLFsVdO6cYwYK8bwAi9EIKHCZIQg0ZJrpLrJ7jyYpykuWpGkOWU2lqO5%2BmpL4RkeMjWlYxk6N1NZqmZJUPnE7ZflBJZTlk7jtPZITYRBbWoUojOhbztFBZxQlSW/JNGXTJlnDZfgRDEPllxFXepU9L0vQVVV27tggmDMFg9iQ7V9Wzlz7UFp1G5bm24XtN0xYtRuC4Vob3Uq718B7ogKADbLV4jW7Y0oPEujALF3QbnNn6UEti4rWtn1gQNEH0Ft8GLrtoj7YnmE2XgJ3oedyAkS95H0JRCH3fRGB58xrHsa%2BdA/TxhQCKkk0g6JYMSfA0nQ3QsNKRT7gqGo5M6f5NOY4ZGTk/j5lDx5TkqZT3n0%2BUs%2BuYzU8Y0U89eFpXkJEzYVNCFvFm5FNtLgWywAEoALIpWlwyxVlEA5RLUuFV7l75Ur9um6QasawU2sgFag1JqVs2qLg6jILqJsdzO2PK7Qadhho3jfgUX22FXiIgmihZEQcLzzUWuhCOj11ox02rBBOxYk6oQ6KnI6Gc8JZyIjnS610C63WLMXR6z0mJvQrp9Di1duJoT4iAXgjcRJiXBm3KGckCwKW7s5dOiN%2B4L1RiYVeRMJ7GVUVo2gu9F4I1JhvRyBilFGJ3hozydNN4MwsdTae7Ngp1j3g1Y%2B3N2hoIwcwAWKEhaZUWGLXKkt5YFRlmNBs3BP7QNVurTWlBGg6w5k1E%2BEC1zG2qmbC2zVAHW3ccrb%2BXZXj31HGyYcrw2T9mmosNkRoObdxSXbaJmT9bgMaRk0guEFq924EAA"></iframe>
# isPowerOfFour
```c
bool isPowerOfFour(int num)
{
return num > 0 &&
```
Check there is only one bit is set.
|||
|-|-|
|a|0100|
|a-1|0011|
|a&(a-1)|0000|
|b|1100|
|b-1|1011|
|b&(b-1)|1000|
```c
(num & (num - 1))==0 &&
```
Check trailing zero is even to detect 4's power.
|||trailing zeros|
|-|-|-|
|1|0 00 01|0|
|4|0 01 **00**|2|
|16|1 **00 00**|4|
```c
!(__builtin_ctz(num) & 0x1);
// equal to __builtin_ctz(num) % 2 == 0
}
```
# numberOfSteps
```c
int numberOfSteps (int num)
{
return num ? __builtin_popcount(num) + 31 - __builtin_clz(num) : 0;
}
```
The original problem can be convert to `count number of 1 twice` plus `number of zero` in binary representation of `num` and MSB only count as once, so minus result for 1. Therefor
$result(num)=count\_one(num)+count\_zero(num)-1$
for 32 bit integer in c
$result(num)=popcount(num)+32-clz(num)-1$
$result(num)=popcount(num)+31-clz(num)$
# gcd64
```c
uint64_t gcd64(uint64_t u, uint64_t v) {
if (!u || !v) return u | v;
int shift;
for (shift = 0; !((u | v) & 1); shift++) {
u /= 2, v /= 2;
}
while (!(u & 1))
u /= 2;
do {
while (!(v & 1))
v /= 2;
if (u < v) {
v -= u;
} else {
uint64_t t = u - v;
u = v;
v = t;
}
} while (v);
return u<<shift;
}
```
### Improved version
```c
uint64_t gcd64(uint64_t u, uint64_t v) {
if (!u || !v) return u | v;
```
Shift `u` and `v` without loop (assume CPU supports LZCNT instruction).
$O(bits(int))$ -> $O(1)$
```c
int uz = __builtin_ctz(u);
int vz = __builtin_ctz(v);
int shift = uz > vz ? vz : uz;
u >>= shift;
v >>= shift;
```
```c
while (!(u & 1))
u /= 2;
do {
while (!(v & 1))
v /= 2;
if (u < v) {
v -= u;
} else {
uint64_t t = u - v;
u = v;
v = t;
}
} while (v);
return u<<shift;
}
```
# bit array
```c
size_t naive(uint64_t *bitmap, size_t bitmapsize, uint32_t *out)
{
size_t pos = 0;
for (size_t k = 0; k < bitmapsize; ++k) {
uint64_t bitset = bitmap[k];
size_t p = k * 64;
for (int i = 0; i < 64; i++) {
if ((bitset >> i) & 0x1)
out[pos++] = p + i;
}
}
return pos;
}
```
```c
size_t improved(uint64_t *bitmap, size_t bitmapsize, uint32_t *out)
{
size_t pos = 0;
uint64_t bitset;
for (size_t k = 0; k < bitmapsize; ++k) {
bitset = bitmap[k];
while (bitset != 0) {
uint64_t t = KKK;
int r = __builtin_ctzll(bitset);
out[pos++] = k * 64 + r;
bitset ^= t;
}
}
return pos;
}
```
The naive version is much easy to understand, the function output the position of every set bit.