```cpp=
#include <cassert>
#include <cmath>
#include <iomanip>
#include <iostream>
#include <omp.h>
#include <random>
#include <vector>
using Matrix = std::vector<std::vector<double>>;
void usage(const char *name) {
std::cout << "usage: " << name << " matrix_size n_workers\n";
exit(-1);
}
void print_matrix(const Matrix &mat) {
std::cout << std::setprecision(4) << std::fixed;
int n = mat.size();
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
std::cout << mat[i][j] << (j == n - 1 ? '\n' : ' ');
}
}
}
void initialize_random_matrix(Matrix &mat) {
std::random_device rd; // Will be used to obtain a seed for the random number engine
std::mt19937 gen(rd()); // Standard mersenne_twister_engine seeded with rd()
std::uniform_real_distribution<> dis(0.0, 1.0);
for (auto &row : mat) {
for (auto &elem : row) {
elem = dis(gen);
}
}
}
Matrix multiply(const Matrix &A, const Matrix &B) {
int m1 = A.size(), m2 = B.size();
assert(m1 > 0 && m2 > 0);
int n1 = A[0].size(), n2 = B[0].size();
assert(n1 > 0 && n2 > 0 && n1 == m2);
Matrix C(m1, std::vector<double>(n2));
for (int i = 0; i < m1; ++i) {
for (int j = 0; j < n2; ++j) {
for (int k = 0; k < n1; ++k) {
C[i][j] += A[i][k] * B[k][j];
}
}
}
return C;
}
void lu_decomposition(Matrix A, Matrix &L, Matrix &U, std::vector<int> &p) {
int n = A.size();
for (int i = 0; i < n; ++i) {
L[i][i] = 1.0;
p[i] = i;
}
for (int k = 0; k < n; ++k) {
double pivot = 0.0;
int pivotIndex = -1;
for (int i = k; i < n; ++i) {
if (pivot < std::abs(A[i][k])) {
pivot = std::abs(A[i][k]);
pivotIndex = i;
}
}
if (pivot == 0.0) {
std::cout << "singular matrix!\n";
exit(-1);
}
if (pivotIndex != k) {
std::swap(p[k], p[pivotIndex]);
std::swap(A[k], A[pivotIndex]);
for (int j = 0; j < k; ++j) {
std::swap(L[k][j], L[pivotIndex][j]);
}
}
U[k][k] = A[k][k];
for (int i = k + 1; i < n; ++i) {
L[i][k] = A[i][k] / U[k][k];
U[k][i] = A[k][i];
}
for (int i = k + 1; i < n; ++i) {
for (int j = k + 1; j < n; ++j) {
A[i][j] -= L[i][k] * U[k][j];
}
}
}
}
double verify(const Matrix &A, const Matrix &L, const Matrix &U, const std::vector<int> &p) {
int n = A.size();
Matrix LU = multiply(L, U);
double residual = 0.0;
for (int j = 0; j < n; ++j) {
double col = 0.0;
for (int i = 0; i < n; ++i) {
col += pow(A[p[i]][j] - LU[i][j], 2);
}
residual += sqrt(col);
}
return residual;
}
int main(int argc, char **argv) {
const char *name = argv[0];
if (argc < 3) {
usage(name);
}
int matrix_size = atoi(argv[1]);
int n_workers = atoi(argv[2]);
std::cout << name << ": "
<< matrix_size << " " << n_workers
<< std::endl;
std::vector<double> row(matrix_size);
Matrix A(matrix_size, row);
initialize_random_matrix(A);
// std::cout << "A = " << '\n';
// print_matrix(A);
Matrix L(matrix_size, row);
Matrix U(matrix_size, row);
std::vector<int> p(matrix_size);
lu_decomposition(A, L, U, p);
// std::cout << "L = " << '\n';
// print_matrix(L);
// std::cout << "U = " << '\n';
// print_matrix(U);
double residual = verify(A, L, U, p);
std::cout << "residual = " << residual << '\n';
return 0;
}
```