# build_k8s.sh error log ``` wangyzh@ai-dev-10-71-109-135:~/jax_dist_gpu$ sh ./build_k8s.sh -c 16 -m 16G -g 2 -n music-ai -j jax-pmap -p jax-pmap INFO:absl:Launching kubernetes job INFO:absl:Ensuring that pvc is created INFO:absl:PVC OK INFO:absl:Taking file-copier up INFO:absl:{ "apiVersion": "v1", "kind": "Pod", "metadata": { "name": "file-copier" }, "spec": { "containers": [ { "name": "main", "image": "nginx:stable-alpine", "imagePullPolicy": "IfNotPresent", "resources": { "requests": { "cpu": "0.1", "memory": "1Gi" }, "limits": { "cpu": "1", "memory": "1Gi" } }, "command": null, "args": null, "workingDir": "/jobs", "volumeMounts": [ { "name": "jobs-pvc", "mountPath": "/jobs" } ] } ], "restartPolicy": "Never", "volumes": [ { "name": "jobs-pvc", "persistentVolumeClaim": { "claimName": "jobs" } } ] } } INFO:absl:file-copier up INFO:absl:Make dir for job dev-jax-pmap-vd2jt INFO:absl:Done make dir INFO:absl:Packing files in /mnt/home/wangyzh/jax_dist_gpu INFO:absl:requirements.txt not found INFO:absl:Generating requirements.txt INFO:absl:larrypage==0.2.5 INFO:absl:absl-py==0.13.0 INFO:absl:Done packing INFO:absl:Building docker image from ./Dockerfile INFO:absl:Step 1/14 : FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 INFO:absl: INFO:absl:---> 99bf323cb1c2 INFO:absl:Step 2/14 : ARG DEBIAN_FRONTEND=noninteractive INFO:absl: INFO:absl:---> Using cache INFO:absl:---> 1499690a2efc INFO:absl:Step 3/14 : RUN ln -sf /user/local/cuda-11.2 /usr/local/cuda INFO:absl: INFO:absl:---> Using cache INFO:absl:---> 64c6bcb92c26 INFO:absl:Step 4/14 : RUN apt-get update && apt-get -y purge python3.8 && apt-get -y autoremove INFO:absl: INFO:absl:---> Using cache INFO:absl:---> efa075f11091 INFO:absl:Step 5/14 : RUN apt-get install -y python3.9 python3.9-distutils tmux ffmpeg libsm6 libxext6 libxrender-dev git vim build-essential g++ cmake zlib1g-dev libboost-all-dev libsdl2-dev libfreetype6-dev libgl1-mesa-dev libglu1-mesa-dev libpng-dev libjpeg-dev libbz2-dev libfluidsynth-dev libgme-dev libopenal-dev zlib1g-dev timidity tar nasm libgtk2.0-dev wget curl INFO:absl: INFO:absl:---> Using cache INFO:absl:---> aff422727d5a INFO:absl:Step 6/14 : RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py INFO:absl: INFO:absl:---> Using cache INFO:absl:---> 30e325833613 INFO:absl:Step 7/14 : RUN python3.9 get-pip.py INFO:absl: INFO:absl:---> Using cache INFO:absl:---> ad31d4d16182 INFO:absl:Step 8/14 : RUN pip3 install absl-py tabulate numpy>=1.20.2 dm-env>=1.2 dm-tree>=0.1.1 packaging>=20.9 tensorflow-datasets>=4.2.0 tensorflow_probability==0.11.0 tf-nightly ml_collections dm-haiku rlax bsuite optax dm-launchpad[reverb] atari-py gym[atari] dm-acme atari-py gym[atari] dm-sonnet viztracer cupy-cuda112 INFO:absl: INFO:absl:---> Using cache INFO:absl:---> b550c5f0642b INFO:absl:Step 9/14 : RUN pip3 install --upgrade pip INFO:absl: INFO:absl:---> Using cache INFO:absl:---> 66460791f26c INFO:absl:Step 10/14 : RUN pip3 install --upgrade jax INFO:absl: INFO:absl:---> Using cache INFO:absl:---> f947a408b7d7 INFO:absl:Step 11/14 : RUN wget https://storage.googleapis.com/jax-temp-releases/jaxlib-0.1.71%2Bcuda111-cp39-none-manylinux2010_x86_64.whl INFO:absl: INFO:absl:---> Using cache INFO:absl:---> 220e18a1b301 INFO:absl:Step 12/14 : RUN pip3 install jaxlib-0.1.71+cuda111-cp39-none-manylinux2010_x86_64.whl INFO:absl: INFO:absl:---> Using cache INFO:absl:---> 6da6f0a2816e INFO:absl:Step 13/14 : RUN pip3 install --upgrade --force-reinstall git+https://github.com/deepmind/dm-haiku INFO:absl: INFO:absl:---> Using cache INFO:absl:---> 18e0d7642828 INFO:absl:Step 14/14 : COPY ./daydream.py . INFO:absl: INFO:absl:---> Using cache INFO:absl:---> 501c9728bd94 INFO:absl:Successfully built 501c9728bd94 INFO:absl:Tagging image as harbor.seacloud.garenanow.com/music-ai/dev-jax-pmap-vd2jt:20210831-141848 INFO:absl:Pusing harbor.seacloud.garenanow.com/music-ai/dev-jax-pmap-vd2jt:20210831-141848 INFO:absl:Docker image harbor.seacloud.garenanow.com/music-ai/dev-jax-pmap-vd2jt:20210831-141848 pushed INFO:absl:Sending files to k8s job dir INFO:absl:. INFO:absl:Dockerfile INFO:absl:build_k8s.sh INFO:absl:daydream.py INFO:absl:Done sending INFO:absl:apiVersion: batch/v1 kind: Job metadata: name: dev-jax-pmap-vd2jt spec: backoffLimit: 0 template: spec: containers: - args: - python3 daydream.py --expire_seconds=14400 command: - sh - -c image: harbor.seacloud.garenanow.com/music-ai/dev-jax-pmap-vd2jt:20210831-141848 imagePullPolicy: IfNotPresent name: main resources: limits: cpu: '16' memory: 16G nvidia.com/gpu: 2 requests: cpu: '16' memory: 16G nvidia.com/gpu: 2 volumeMounts: - mountPath: /jobs name: jobs-pvc workingDir: /jobs/dev-jax-pmap-vd2jt imagePullSecrets: - name: regcred priorityClassName: high restartPolicy: Never volumes: - name: jobs-pvc persistentVolumeClaim: claimName: jobs INFO:absl:Job dev-jax-pmap-vd2jt created job is dev-jax-pmap-vd2jt pod is dev-jax-pmap-vd2jt-x4v4b Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running Waiting for pod dev-jax-pmap-vd2jt-x4v4b to be running ```