Back to top
Full Kubernetes example
## This is the otel collector setup running on each node in agent mode
---
apiVersion: v1
kind: ConfigMap
metadata:
name: otel-agent-conf
namespace: otel-k8s-monitoring
labels:
app: opentelemetry
component: otel-agent-conf
data:
otel-agent-config: |
receivers:
otlp:
protocols:
grpc:
endpoint: ${env:MY_POD_IP}:4317
http:
endpoint: ${env:MY_POD_IP}:4318
filelog:
exclude:
- '**/*.gz'
- '**/*.tmp'
exclude_older_than: 24h
include:
- /var/log/pods/*/*/*.log
- /var/log/kube-apiserver-audit.log
start_at: end
include_file_name: false
include_file_path: true
operators:
- id: container-parser
max_log_size: 102400
type: container
poll_interval: 200ms
retry_on_failure:
enabled: true
initial_interval: 1s
max_elapsed_time: 5m
max_interval: 30s
kubeletstats:
collection_interval: 30s
auth_type: none
insecure_skip_verify: true
metric_groups:
- node
- pod
- container
processors:
batch:
memory_limiter:
check_interval: 10s
limit_percentage: 80
spike_limit_percentage: 15
resource/pod:
attributes:
- action: upsert
key: agent.source
value: k8s-otel-agent
# do minimal enrichment so gateway/standalone collector can pick up
k8sattributes:
passthrough: true
# agent will just forward to the gateway/standalone deployment for further processing
exporters:
otlp:
endpoint: "otel-collector.otel-k8s-monitoring.svc.cluster.local:4317"
tls:
insecure: true
sending_queue:
num_consumers: 4
queue_size: 100
retry_on_failure:
enabled: true
service:
telemetry:
metrics:
logs:
processors:
- batch:
exporter:
otlp:
protocol: grpc
endpoint: http://${env:MY_POD_IP}:4317
pipelines:
metrics:
receivers: [otlp, kubeletstats]
processors: [memory_limiter, batch, k8sattributes, resource/pod]
exporters: [otlp]
logs:
receivers: [otlp, filelog]
processors: [memory_limiter, batch, k8sattributes, resource/pod]
exporters: [otlp]
traces:
receivers: [otlp]
processors: [memory_limiter, batch, k8sattributes, resource/pod]
exporters: [otlp]
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: otel-agent
namespace: otel-k8s-monitoring
labels:
app: opentelemetry
component: otel-agent
spec:
selector:
matchLabels:
app: opentelemetry
component: otel-agent
template:
metadata:
labels:
app: opentelemetry
component: otel-agent
spec:
shareProcessNamespace: true
containers:
- command:
- "/otelcol-k8s"
- "--config=/conf/otel-agent-config.yaml"
image: otel/opentelemetry-collector-k8s:latest
name: otel-agent
resources:
limits:
cpu: 500m
memory: 500Mi
requests:
cpu: 100m
memory: 100Mi
env:
- name: MY_POD_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: GOMEMLIMIT
value: 1600MiB
ports:
- containerPort: 8888 # prom metrics
- containerPort: 10255 # kubeletstats
volumeMounts:
- name: otel-agent-config-vol
mountPath: /conf
- name: varlogpods
mountPath: /var/log/pods
readOnly: true
# container logs are normally symlinked to here, hence we need this too
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: true
volumes:
- name: varlogpods
hostPath:
path: /var/log/pods
type: Directory
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
type: Directory
- configMap:
name: otel-agent-conf
items:
- key: otel-agent-config
path: otel-agent-config.yaml
name: otel-agent-config-vol
## This is the otel collector setup running standalone/as gateway
---
apiVersion: v1
kind: ConfigMap
metadata:
name: otel-collector-conf
namespace: otel-k8s-monitoring
labels:
app: opentelemetry
component: otel-collector-conf
data:
otel-collector-config: |
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
prometheus:
config:
scrape_configs:
- honor_labels: true
job_name: pod-metrics
kubernetes_sd_configs:
- role: pod
scrape_interval: 60s
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels:
[__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
action: drop
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
regex: (https?)
target_label: __scheme__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels:
[__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
# NOTE: otel collector uses env var replacement. $$ is used as a literal $.
replacement: $$1:$$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
processors:
memory_limiter:
check_interval: 10s
limit_percentage: 80
spike_limit_percentage: 15
batch/observe/metrics:
send_batch_max_size: 20480
send_batch_size: 16384
timeout: 1s
batch/observe/logs:
send_batch_max_size: 4096
send_batch_size: 4096
timeout: 5s
batch/observe/traces:
send_batch_max_size: 4096
send_batch_size: 4096
timeout: 5s
k8sattributes:
extract:
metadata:
- k8s.namespace.name
- k8s.deployment.name
- k8s.replicaset.name
- k8s.statefulset.name
- k8s.daemonset.name
- k8s.cronjob.name
- k8s.job.name
- k8s.node.name
- k8s.node.uid
- k8s.pod.name
- k8s.pod.uid
- k8s.cluster.uid
- k8s.container.name
- container.id
- service.namespace
- service.name
- service.version
- service.instance.id
otel_annotations: true
passthrough: false
pod_association:
- sources:
- from: resource_attribute
name: k8s.pod.ip
- sources:
- from: resource_attribute
name: k8s.pod.uid
- sources:
- from: connection
resource/observe_common:
attributes:
- key: deployment.environment.name
action: upsert
value: sandbox
- key: k8s.cluster.name
action: upsert
value: sandbox
exporters:
# the collector k8s distributoin doesn't have prometheus exporter,
# so we only set up otel metrics and send prometheus there
otlphttp/observe/metrics:
compression: zstd
endpoint: https://108096787817.collect.observe-sandbox.com/v2/otel
headers:
authorization: Bearer ds1QLnUsrmVIA9D77WHk:OHjGX-qxIlpSPZjDqvj8dRN8anvAHliC
x-observe-target-package: Metrics # Otel metrics go in a common "Metrics" package
retry_on_failure:
enabled: true
initial_interval: 1s
max_elapsed_time: 5m
max_interval: 30s
sending_queue:
enabled: true
otlphttp/observe/logs:
compression: zstd
endpoint: https://108096787817.collect.observe-sandbox.com/v2/otel
headers:
authorization: Bearer ds1QLnUsrmVIA9D77WHk:OHjGX-qxIlpSPZjDqvj8dRN8anvAHliC
x-observe-target-package: Kubernetes Explorer
retry_on_failure:
enabled: true
initial_interval: 1s
max_elapsed_time: 5m
max_interval: 30s
sending_queue:
enabled: true
otlphttp/observe/traces:
compression: zstd
endpoint: https://108096787817.collect.observe-sandbox.com/v2/otel
headers:
authorization: Bearer ds1QLnUsrmVIA9D77WHk:OHjGX-qxIlpSPZjDqvj8dRN8anvAHliC
x-observe-target-package: Tracing # Traces go in a shared Tracing package
retry_on_failure:
enabled: true
initial_interval: 1s
max_elapsed_time: 5m
max_interval: 30s
sending_queue:
enabled: true
service:
telemetry:
metrics:
logs:
processors:
- batch:
exporter:
otlp:
protocol: grpc
endpoint: http://localhost:4317
pipelines:
traces/observe:
exporters: [otlphttp/observe/traces]
receivers: [otlp]
processors:
- memory_limiter
- k8sattributes # needs to be before batch to allow for connection enrichment
- batch/observe/traces
- resource/observe_common # to add Observe common metrics
metrics/observe:
exporters: [otlphttp/observe/metrics]
receivers: [otlp, prometheus]
processors:
- memory_limiter
- k8sattributes # needs to be before batch to allow for connection enrichment
- batch/observe/metrics
- resource/observe_common # to add Observe common metrics
logs/observe:
exporters: [otlphttp/observe/logs]
receivers: [otlp]
processors:
- memory_limiter
- k8sattributes # needs to be before batch to allow for connection enrichment
- batch/observe/logs
- resource/observe_common # to add Observe common metric
---
apiVersion: v1
kind: Service
metadata:
name: otel-collector
namespace: otel-k8s-monitoring
labels:
app: opentelemetry
component: otel-collector
spec:
ports:
- name: otlp-grpc # Default endpoint for OpenTelemetry gRPC receiver.
port: 4317
protocol: TCP
targetPort: 4317
- name: otlp-http # Default endpoint for OpenTelemetry HTTP receiver.
port: 4318
protocol: TCP
targetPort: 4318
- name: metrics # Default endpoint for querying metrics.
port: 8888
selector:
component: otel-collector
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: otel-collector
namespace: otel-k8s-monitoring
labels:
app: opentelemetry
component: otel-collector
spec:
selector:
matchLabels:
app: opentelemetry
component: otel-collector
minReadySeconds: 5
progressDeadlineSeconds: 120
replicas: 1 # in case of more replicas, prom receiver needs to be sharded!
template:
metadata:
labels:
app: opentelemetry
component: otel-collector
spec:
serviceAccountName: otel-service-account
containers:
- command:
- "/otelcol-k8s"
- "--config=/conf/otel-collector-config.yaml"
image: otel/opentelemetry-collector-k8s:latest
name: otel-collector
resources:
limits:
cpu: 1
memory: 2Gi
requests:
cpu: 200m
memory: 400Mi
ports:
- containerPort: 4317 # Default endpoint for OpenTelemetry grpc receiver.
- containerPort: 4318 # Default endpoint for OpenTelemetry http receiver.
- containerPort: 8888 # Default endpoint for querying metrics.
env:
- name: GOMEMLIMIT
value: 1600MiB
volumeMounts:
- name: otel-collector-config-vol
mountPath: /conf
volumes:
- configMap:
name: otel-collector-conf
items:
- key: otel-collector-config
path: otel-collector-config.yaml
name: otel-collector-config-vol
---
# Source: agent/templates/serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: otel-service-account
namespace: otel-k8s-monitoring
---
# Source: agent/templates/cluster-role.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: otel-k8s-cluster-role-k8smonitoring
namespace: otel-k8s-monitoring
labels:
app.kubernetes.io/name: otel-cluster-role-binding-k8smonitoring
app.kubernetes.io/instance: otel-agent-and-collector
rules:
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- apiGroups:
- ""
- '*'
- apps
- authorization.k8s.io
- autoscaling
- batch
- networking.k8s.io
- events.k8s.io
- rbac.authorization.k8s.io
- storage.k8s.io
- vpcresources.k8s.aws
resources:
- '*'
verbs:
- get
- list
- watch
---
# Source: agent/templates/cluster-role.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: otel-k8s-cluster-role-binding
labels:
app.kubernetes.io/name: otel-cluster-role-binding-k8smonitoring
app.kubernetes.io/instance: otel-agent-and-collector
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: otel-k8s-cluster-role-k8smonitoring
subjects:
- kind: ServiceAccount
name: otel-service-account
namespace: otel-k8s-monitoring