Firedrake Offloading TODO

# Firedrake Offloading TODO ## Tests - [ ] Mixed FS - Example: <details> ```python from firedrake import * from pyop2.backends.cpu import cpu_backend import firedrake_configuration # ~ AVAILABLE_BACKENDS = [cpu_backend] firedrake_config = firedrake_configuration.get_config() if firedrake_config["options"].get("cuda"): from pyop2.backends.cuda import cuda_backend # ~ AVAILABLE_BACKENDS.append(cuda_backend) offloading_backend = cuda_backend elif firedrake_config["options"].get("opencl"): from pyop2.backends.opencl import opencl_backend # ~ AVAILABLE_BACKENDS.append(opencl_backend) offloading_backend = opencl_backend else: offloading_backend = cpu_backend set_offloading_backend(offloading_backend) baseN = 4 nref = 3 degree = 1 mesh = UnitCubeMesh(baseN, baseN, baseN) hierarchy = MeshHierarchy(mesh, nref) mesh = hierarchy[-1] # LBB stable FEM pair? Sigma = FunctionSpace(mesh, "RT", degree) V = FunctionSpace(mesh, "DG", degree - 1) W = Sigma * V sigma, u = TrialFunctions(W) tau, v = TestFunctions(W) x, y, z = SpatialCoordinate(mesh) # ~ f = 10*exp(-100*((x - 0.5)**2 + (y - 0.5)**2)) simplerhs = True if simplerhs: # Very simple RHS k = [Constant(1.0), Constant(1.0), Constant(1.0)] exact = sin(k[0]*pi*x)*sin(k[1]*pi*y)*sin(k[2]*pi*z) f = ((k[0]**2 + k[1]**2 + k[2]**2)*(pi**2))*exact else: # Less simple RHS a = Constant(1) b = Constant(2) exact = sin(pi*x)*tan(pi*x/4)*sin(a*pi*y)*sin(b*pi*z) f = -pi**2 / 2 f *= 2*cos(pi*x) - cos(pi*x/2) - 2*(a**2 + b**2)*sin(pi*x)*tan(pi*x/4) f *= sin(a*pi*y)*sin(b*pi*z) w_h = Function(W) solver_parameters = { "pc_type": "none", "ksp_type": "minres", "ksp_monitor": None } with offloading(): a = assemble(dot(sigma, tau)*dx + div(tau)*u*dx + div(sigma)*v*dx, mat_type="matfree") L = assemble(-f*v*dx) solve(a, w_h, L, solver_parameters=solver_parameters) sigma_h, u_h = w_h.subfunctions print(f"Norm on GPU: {errornorm(exact, u_h)}") sigma_h, u_h = w_h.subfunctions print(f"Norm on CPU: {errornorm(exact, u_h)}") ``` </details> - [ ] Dirichlet BCs - Example: <details> ```python from firedrake import * from pyop2.backends.cpu import cpu_backend import firedrake_configuration # ~ AVAILABLE_BACKENDS = [cpu_backend] firedrake_config = firedrake_configuration.get_config() if firedrake_config["options"].get("cuda"): from pyop2.backends.cuda import cuda_backend # ~ AVAILABLE_BACKENDS.append(cuda_backend) offloading_backend = cuda_backend elif firedrake_config["options"].get("opencl"): from pyop2.backends.opencl import opencl_backend # ~ AVAILABLE_BACKENDS.append(opencl_backend) offloading_backend = opencl_backend else: offloading_backend = cpu_backend set_offloading_backend(offloading_backend) baseN = 4 nref = 2 degree = 1 mesh = BoxMesh(baseN, baseN, baseN, 1, 1, 1) hierarchy = MeshHierarchy(mesh, nref) mesh = hierarchy[-1] V = FunctionSpace(mesh, "CG", degree) dofs = V.dim() print('DOFs', dofs) u = TrialFunction(V) v = TestFunction(V) bcs = DirichletBC(V, zero(), (1, 2, 3, 4, 5, 6)) x, y, z = SpatialCoordinate(mesh) simplerhs = True if simplerhs: # Very simple RHS k = [Constant(1.0), Constant(1.0), Constant(1.0)] exact = sin(k[0]*pi*x)*sin(k[1]*pi*y)*sin(k[2]*pi*z) f = ((k[0]**2 + k[1]**2 + k[2]**2)*(pi**2))*exact else: # Less simple RHS a = Constant(1) b = Constant(2) exact = sin(pi*x)*tan(pi*x/4)*sin(a*pi*y)*sin(b*pi*z) f = -pi**2 / 2 f *= 2*cos(pi*x) - cos(pi*x/2) - 2*(a**2 + b**2)*sin(pi*x)*tan(pi*x/4) f *= sin(a*pi*y)*sin(b*pi*z) u_h = Function(V) solver_parameters = { "pc_type": "none", "ksp_type": "cg", "ksp_monitor": None } a = assemble(dot(grad(u), grad(v))*dx, bcs=bcs, mat_type="matfree") with offloading(): L = assemble(f*v*dx) solve(a, u_h, L, solver_parameters=solver_parameters) print(f"Norm on GPU: {errornorm(exact, u_h)}") print(f"Norm on CPU: {errornorm(exact, u_h)}") ``` </details> - [ ] Parallel with multiple GPUs ## Suggestions in PR - [ ] Use `compile_on_disk` that returns a callable object with the grid sizes baked in. (Connor: this overlaps a lot with the big `MirroredArray` refactor so I'd like to implement this after that's done) - [ ] Mirrored Arrays - [x] petscvec state management - [x] `MirroredArray.data` SHOULD return backend relevant ndarray. - Maybe lazy allocation of the (Connor: huh?) - [ ] Kaushik: get this into the `gpu` branch and passing tests for the different backends ## Backburner - [ ] Additional copies in `ImplicitMatrix.mult` - [ ] GMG prolongation - [ ] Test suite modification for GPUs (Connor)