Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 31 additions & 9 deletions charts/templates/compute-worker-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,51 @@ spec:
- bash
- -c
- >
watchmedo auto-restart -p '*.py' --recursive -- celery -A compute_worker worker -l info -Q compute-worker -n compute-worker{{ if not $isDefault }}-{{ .name }}{{ end }}@%n
workingDir: /app
celery -A compute_worker worker -l info -Q compute-worker -n compute-worker{{ if not $isDefault }}-{{ .name }}{{ end }}@%n
workingDir: /
env:
- name: CONTAINER_ENGINE_EXECUTABLE
value: '{{ $.Values.compute_worker.container_engine_executable }}'
- name: USE_GPU
value: '{{ .gpu.enabled }}'
- name: RESOURCE_LIMITS
value: '{{ toJson .gpu.resourceLimits }}'
- name: NODE_SELECTOR
value: '{{ toJson .gpu.nodeSelector }}'
- name: TOTAL_TIME_TO_WAIT_FOR_POD
value: '{{ $.Values.compute_worker.podCreationRetries.totalTimeToWaitForPod }}'
- name: SLEEP_TIME_BETWEEN_RETRIES
value: '{{ $.Values.compute_worker.podCreationRetries.sleepTimeBetweenRetries }}'
- name: USERID
value: '{{ $.Values.compute_worker.submissionPods.securityContext.runAsUser }}'
- name: GROUPID
value: '{{ $.Values.compute_worker.submissionPods.securityContext.runAsGroup }}'
- name: FSGROUP
value: '{{ $.Values.compute_worker.submissionPods.securityContext.fsGroup }}'
- name: COMPUTE_WORKER_LABELS
value: '{{ toJson $.Values.compute_worker.submissionPods.metadata.labels }}'
- name: CURRENT_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: SHARED_JOB_PVC
value: '{{ $.Values.sharedJob.pvcName }}'
- name: KUBERNETES_VERIFY_SSL
value: '{{ $.Values.compute_worker.kubernetesVerifySsl }}'
- name: BROKER_URL
value: "{{ if .url }}{{ .url }}{{ else }}pyamqp://{{ $.Values.env.RABBITMQ_DEFAULT_USER }}:{{ $.Values.env.RABBITMQ_DEFAULT_PASS }}@{{ $.Values.env.RABBITMQ_HOST }}:{{ $.Values.env.RABBITMQ_PORT }}//{{ end }}"
- name: CODALAB_IGNORE_CLEANUP_STEP
value: "1"
value: '{{ $.Values.compute_worker.codalabIgnoreCleanupStep }}'
{{- range $key, $value := $.Values.env }}
- name: {{ $key }}
value: "{{ $value }}"
{{- end }}
resources:
{{- toYaml $.Values.compute_worker.resources | nindent 12 }}
volumeMounts:
- name: docker-socket
mountPath: /var/run/docker.sock
- name: codabench-storage
mountPath: /codabench
volumes:
- name: docker-socket
hostPath:
path: /var/run/docker.sock
type: Socket
- name: codabench-storage
persistentVolumeClaim:
claimName: {{ $.Values.compute_worker.volumes.pvcName }}
Expand Down
9 changes: 3 additions & 6 deletions charts/templates/compute-worker-rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,12 @@ metadata:
name: compute-worker-role
namespace: {{ .Release.Namespace }}
rules:
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create", "get", "list", "watch", "delete"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "watch", "delete"]
verbs: ["create", "get", "list", "watch", "delete", "deletecollection"]
- apiGroups: [""]
resources: ["pods/exec", "pods/log"]
verbs: ["create", "get", "list"]
resources: ["pods/log"]
verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
Expand Down
9 changes: 7 additions & 2 deletions charts/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,26 @@ compute_worker:
repository: <compute-worker-image-repository>
tag: <compute-worker-image-tag>
pullPolicy: Always
container_engine_executable: "kubernetes"
kubernetesVerifySsl: "false"
podCreationRetries:
numberOfRetries: 30
sleepTimeBetweenRetries: 10
totalTimeToWaitForPod: 300
sleepTimeBetweenRetries: 0.5
submissionPods:
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
metadata:
labels: {}
resources:
requests:
memory: 256Mi
limits:
memory: 512Mi
volumes:
pvcName: shared-job-pvc
codalabIgnoreCleanupStep: "0"
brokers:
- name: "default"
gpu:
Expand Down
Loading