Full Kubernetes example

## This is the otel collector setup running on each node in agent mode
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: otel-agent-conf
  namespace: otel-k8s-monitoring
  labels:
    app: opentelemetry
    component: otel-agent-conf
data:
  otel-agent-config: |
    receivers:
      otlp:
        protocols:
          grpc:
            endpoint: ${env:MY_POD_IP}:4317
          http:
            endpoint: ${env:MY_POD_IP}:4318
      filelog:
        exclude:
        - '**/*.gz'
        - '**/*.tmp'
        exclude_older_than: 24h
        include:
        - /var/log/pods/*/*/*.log
        - /var/log/kube-apiserver-audit.log
        start_at: end
        include_file_name: false
        include_file_path: true
        operators:
        - id: container-parser
          max_log_size: 102400
          type: container
        poll_interval: 200ms
        retry_on_failure:
          enabled: true
          initial_interval: 1s
          max_elapsed_time: 5m
          max_interval: 30s
      kubeletstats:
        collection_interval: 30s
        auth_type: none
        insecure_skip_verify: true
        metric_groups:
          - node
          - pod
          - container
    processors:
      batch:
      memory_limiter:
        check_interval: 10s
        limit_percentage: 80
        spike_limit_percentage: 15
      resource/pod:
        attributes:
          - action: upsert
            key: agent.source
            value: k8s-otel-agent
      # do minimal enrichment so gateway/standalone collector can pick up 
      k8sattributes:
        passthrough: true

    # agent will just forward to the gateway/standalone deployment for further processing
    exporters:
      otlp:
        endpoint: "otel-collector.otel-k8s-monitoring.svc.cluster.local:4317"
        tls:
          insecure: true
        sending_queue:
          num_consumers: 4
          queue_size: 100
        retry_on_failure:
          enabled: true
    service:
      telemetry:
        metrics:
        logs:
          processors:
            - batch:
                exporter:
                  otlp:
                    protocol: grpc
                    endpoint: http://${env:MY_POD_IP}:4317

      pipelines:
        metrics:
          receivers: [otlp, kubeletstats]
          processors: [memory_limiter, batch, k8sattributes, resource/pod]
          exporters: [otlp]
        logs:
          receivers: [otlp, filelog]
          processors: [memory_limiter, batch, k8sattributes, resource/pod]
          exporters: [otlp]
        traces:
          receivers: [otlp]
          processors: [memory_limiter, batch, k8sattributes, resource/pod]
          exporters: [otlp]

---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: otel-agent
  namespace: otel-k8s-monitoring
  labels:
    app: opentelemetry
    component: otel-agent
spec:
  selector:
    matchLabels:
      app: opentelemetry
      component: otel-agent
  template:
    metadata:
      labels:
        app: opentelemetry
        component: otel-agent
    spec:
      shareProcessNamespace: true
      containers:
      - command:
          - "/otelcol-k8s"
          - "--config=/conf/otel-agent-config.yaml"
        image: otel/opentelemetry-collector-k8s:latest
        name: otel-agent
        resources:
          limits:
            cpu: 500m
            memory: 500Mi
          requests:
            cpu: 100m
            memory: 100Mi
        env:
          - name: MY_POD_IP
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: status.podIP
          - name: GOMEMLIMIT
            value: 1600MiB
        ports:
        - containerPort: 8888  # prom metrics
        - containerPort: 10255 # kubeletstats
        volumeMounts:
        - name: otel-agent-config-vol
          mountPath: /conf
        - name: varlogpods
          mountPath: /var/log/pods
          readOnly: true
        # container logs are normally symlinked to here, hence we need this too  
        - name: varlibdockercontainers
          mountPath: /var/lib/docker/containers
          readOnly: true
      volumes:
        - name: varlogpods
          hostPath:
            path: /var/log/pods
            type: Directory
        - name: varlibdockercontainers
          hostPath:
            path: /var/lib/docker/containers
            type: Directory
        - configMap:
            name: otel-agent-conf
            items:
              - key: otel-agent-config
                path: otel-agent-config.yaml
          name: otel-agent-config-vol


## This is the otel collector setup running standalone/as gateway
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: otel-collector-conf
  namespace: otel-k8s-monitoring
  labels:
    app: opentelemetry
    component: otel-collector-conf
data:
  otel-collector-config: |
    receivers:
      otlp:
        protocols:
          grpc:
            endpoint: 0.0.0.0:4317
          http:
            endpoint: 0.0.0.0:4318
      prometheus:
        config:
          scrape_configs:
          - honor_labels: true
            job_name: pod-metrics
            kubernetes_sd_configs:
            - role: pod
            scrape_interval: 60s
            relabel_configs:
              - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
                action: keep
                regex: true
              - source_labels:
                  [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
                action: drop
                regex: true
              - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
                action: replace
                regex: (https?)
                target_label: __scheme__
              - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
                action: replace
                target_label: __metrics_path__
                regex: (.+)
              - source_labels:
                  [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
                action: replace
                regex: ([^:]+)(?::\d+)?;(\d+)
                # NOTE: otel collector uses env var replacement. $$ is used as a literal $.
                replacement: $$1:$$2
                target_label: __address__
              - action: labelmap
                regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
                replacement: __param_$$1
              - action: labelmap
                regex: __meta_kubernetes_pod_label_(.+)

    processors:
      memory_limiter:
        check_interval: 10s
        limit_percentage: 80
        spike_limit_percentage: 15
      batch/observe/metrics:
        send_batch_max_size: 20480
        send_batch_size: 16384
        timeout: 1s
      batch/observe/logs:
        send_batch_max_size: 4096
        send_batch_size: 4096
        timeout: 5s
      batch/observe/traces:
        send_batch_max_size: 4096
        send_batch_size: 4096
        timeout: 5s

      k8sattributes:
        extract:
          metadata:
          - k8s.namespace.name
          - k8s.deployment.name
          - k8s.replicaset.name
          - k8s.statefulset.name
          - k8s.daemonset.name
          - k8s.cronjob.name
          - k8s.job.name
          - k8s.node.name
          - k8s.node.uid
          - k8s.pod.name
          - k8s.pod.uid
          - k8s.cluster.uid
          - k8s.container.name
          - container.id
          - service.namespace
          - service.name
          - service.version
          - service.instance.id
          otel_annotations: true
        passthrough: false
        pod_association:
        - sources:
          - from: resource_attribute
            name: k8s.pod.ip
        - sources:
          - from: resource_attribute
            name: k8s.pod.uid
        - sources:
          - from: connection

      resource/observe_common:
        attributes:
          - key: deployment.environment.name
            action: upsert
            value: sandbox
          - key: k8s.cluster.name
            action: upsert
            value: sandbox

    exporters:
      # the collector k8s distributoin doesn't have prometheus exporter,
      # so we only set up otel metrics and send prometheus there
      otlphttp/observe/metrics:
        compression: zstd
        endpoint: https://108096787817.collect.observe-sandbox.com/v2/otel
        headers:
          authorization: Bearer ds1QLnUsrmVIA9D77WHk:OHjGX-qxIlpSPZjDqvj8dRN8anvAHliC            
          x-observe-target-package: Metrics # Otel metrics go in a common "Metrics" package
        retry_on_failure:
          enabled: true
          initial_interval: 1s
          max_elapsed_time: 5m
          max_interval: 30s
        sending_queue:
          enabled: true

      otlphttp/observe/logs:
        compression: zstd
        endpoint: https://108096787817.collect.observe-sandbox.com/v2/otel
        headers:
          authorization: Bearer ds1QLnUsrmVIA9D77WHk:OHjGX-qxIlpSPZjDqvj8dRN8anvAHliC            
          x-observe-target-package: Kubernetes Explorer
        retry_on_failure:
          enabled: true
          initial_interval: 1s
          max_elapsed_time: 5m
          max_interval: 30s
        sending_queue:
          enabled: true

      otlphttp/observe/traces:
        compression: zstd
        endpoint: https://108096787817.collect.observe-sandbox.com/v2/otel
        headers:
          authorization: Bearer ds1QLnUsrmVIA9D77WHk:OHjGX-qxIlpSPZjDqvj8dRN8anvAHliC            
          x-observe-target-package: Tracing # Traces go in a shared Tracing package
        retry_on_failure:
          enabled: true
          initial_interval: 1s
          max_elapsed_time: 5m
          max_interval: 30s
        sending_queue:
          enabled: true

    service:
      telemetry:
        metrics:
        logs:
          processors:
            - batch:
                exporter:
                  otlp:
                    protocol: grpc
                    endpoint: http://localhost:4317

      pipelines:
        traces/observe:
          exporters: [otlphttp/observe/traces]
          receivers: [otlp]
          processors:
          - memory_limiter
          - k8sattributes # needs to be before batch to allow for connection enrichment
          - batch/observe/traces
          - resource/observe_common # to add Observe common metrics

        metrics/observe:
          exporters: [otlphttp/observe/metrics]
          receivers: [otlp, prometheus]
          processors:
          - memory_limiter
          - k8sattributes # needs to be before batch to allow for connection enrichment
          - batch/observe/metrics
          - resource/observe_common # to add Observe common metrics

        logs/observe:
          exporters: [otlphttp/observe/logs]
          receivers: [otlp]
          processors:
          - memory_limiter
          - k8sattributes # needs to be before batch to allow for connection enrichment
          - batch/observe/logs
          - resource/observe_common # to add Observe common metric

---
apiVersion: v1
kind: Service
metadata:
  name: otel-collector
  namespace: otel-k8s-monitoring
  labels:
    app: opentelemetry
    component: otel-collector
spec:
  ports:
  - name: otlp-grpc # Default endpoint for OpenTelemetry gRPC receiver.
    port: 4317
    protocol: TCP
    targetPort: 4317
  - name: otlp-http # Default endpoint for OpenTelemetry HTTP receiver.
    port: 4318
    protocol: TCP
    targetPort: 4318
  - name: metrics # Default endpoint for querying metrics.
    port: 8888
  selector:
    component: otel-collector

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: otel-collector
  namespace: otel-k8s-monitoring
  labels:
    app: opentelemetry
    component: otel-collector
spec:
  selector:
    matchLabels:
      app: opentelemetry
      component: otel-collector
  minReadySeconds: 5
  progressDeadlineSeconds: 120
  replicas: 1 # in case of more replicas, prom receiver needs to be sharded!
  template:
    metadata:
      labels:
        app: opentelemetry
        component: otel-collector
    spec:
      serviceAccountName: otel-service-account
      containers:
      - command:
          - "/otelcol-k8s"
          - "--config=/conf/otel-collector-config.yaml"
        image: otel/opentelemetry-collector-k8s:latest
        name: otel-collector
        resources:
          limits:
            cpu: 1
            memory: 2Gi
          requests:
            cpu: 200m
            memory: 400Mi
        ports:
        - containerPort: 4317 # Default endpoint for OpenTelemetry grpc receiver.
        - containerPort: 4318 # Default endpoint for OpenTelemetry http receiver.
        - containerPort: 8888  # Default endpoint for querying metrics.
        env:
          - name: GOMEMLIMIT
            value: 1600MiB
        volumeMounts:
        - name: otel-collector-config-vol
          mountPath: /conf
      volumes:
        - configMap:
            name: otel-collector-conf
            items:
              - key: otel-collector-config
                path: otel-collector-config.yaml
          name: otel-collector-config-vol


---
# Source: agent/templates/serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
  name: otel-service-account
  namespace: otel-k8s-monitoring

---
# Source: agent/templates/cluster-role.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: otel-k8s-cluster-role-k8smonitoring
  namespace: otel-k8s-monitoring
  labels:
    app.kubernetes.io/name: otel-cluster-role-binding-k8smonitoring
    app.kubernetes.io/instance: otel-agent-and-collector

rules:
  - apiGroups:
    - ""
    resources:
    - configmaps
    verbs:
    - get
  - apiGroups:
    - ""
    - '*'
    - apps
    - authorization.k8s.io
    - autoscaling
    - batch
    - networking.k8s.io
    - events.k8s.io
    - rbac.authorization.k8s.io
    - storage.k8s.io
    - vpcresources.k8s.aws
    resources:
    - '*'
    verbs:
    - get
    - list
    - watch

---
# Source: agent/templates/cluster-role.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: otel-k8s-cluster-role-binding
  labels:
    app.kubernetes.io/name: otel-cluster-role-binding-k8smonitoring
    app.kubernetes.io/instance: otel-agent-and-collector
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: otel-k8s-cluster-role-k8smonitoring
subjects:
- kind: ServiceAccount
  name: otel-service-account
  namespace: otel-k8s-monitoring