226 lines
8.7 KiB
YAML
226 lines
8.7 KiB
YAML
configs:
|
|
alloy-config-v3:
|
|
file: /srv/monitoring/config/alloy.v3.alloy
|
|
loki-config-v1:
|
|
file: /srv/monitoring/config/loki.v1.yml
|
|
prometheus-config-v3:
|
|
file: /srv/monitoring/config/prometheus.v3.yml
|
|
tempo-config-v1:
|
|
file: /srv/monitoring/config/tempo.v1.yml
|
|
|
|
volumes:
|
|
prometheus-data:
|
|
driver: local
|
|
grafana-data:
|
|
driver: local
|
|
loki-data:
|
|
driver: local
|
|
alloy-data:
|
|
driver: local
|
|
tempo-data:
|
|
driver: local
|
|
|
|
networks:
|
|
monitoring-net: # Internes Overlay-Netzwerk für die Monitoring-Komponenten
|
|
driver: overlay
|
|
attachable: true # Erlaubt anderen Containern/Stacks ggf. den Zugriff
|
|
traefik_public: # Das externe Netzwerk, auf dem Traefik lauscht
|
|
external: true # Wichtig: Dieses Netzwerk wird NICHT von diesem Stack erstellt
|
|
|
|
services:
|
|
prometheus:
|
|
image: prom/prometheus:latest
|
|
user: "65534:988"
|
|
volumes:
|
|
- prometheus-data:/prometheus
|
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
configs:
|
|
- source: prometheus-config-v3 # Versionierte Config
|
|
target: /etc/prometheus/prometheus.yml
|
|
command:
|
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
- '--storage.tsdb.path=/prometheus'
|
|
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
|
- '--web.console.templates=/usr/share/prometheus/consoles'
|
|
- '--web.enable-lifecycle'
|
|
- '--web.enable-remote-write-receiver'
|
|
networks:
|
|
- monitoring-net
|
|
- traefik_public # Nur wenn Traefik direkt auf Prometheus zugreifen soll (optional)
|
|
deploy:
|
|
mode: replicated
|
|
replicas: 1
|
|
placement:
|
|
constraints:
|
|
- node.role == manager # Optional: An Manager-Nodes binden
|
|
labels:
|
|
- "traefik.enable=true"
|
|
# --- Router für Prometheus UI ---
|
|
- "traefik.http.routers.prometheus.rule=Host(`prometheus.genius.ceo`)"
|
|
- "traefik.http.routers.prometheus.entrypoints=https" # Entrypoint anpassen, falls anders
|
|
- "traefik.http.routers.prometheus.tls.certresolver=main" # CertResolver anpassen!
|
|
# --- Service für Prometheus UI ---
|
|
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
|
|
# --- Middleware (optional, z.B. für Authentifizierung) ---
|
|
# - "traefik.http.routers.prometheus.middlewares=my-auth-middleware"
|
|
# --- Netzwerk für Traefik ---
|
|
# WICHTIG: Das Netzwerk muss existieren und Traefik muss darauf lauschen.
|
|
- "traefik.swarm.network=traefik_public" # Traefik Netzwerkname anpassen!
|
|
|
|
loki:
|
|
image: grafana/loki:latest
|
|
volumes:
|
|
- loki-data:/loki
|
|
configs:
|
|
- source: loki-config-v1
|
|
target: /etc/loki/local-config.yaml
|
|
command: "-config.file=/etc/loki/local-config.yaml"
|
|
networks:
|
|
- monitoring-net
|
|
deploy:
|
|
mode: replicated
|
|
replicas: 1
|
|
placement:
|
|
constraints:
|
|
- node.role == manager
|
|
|
|
tempo:
|
|
image: grafana/tempo:latest # Aktuelles Tempo Image
|
|
volumes:
|
|
- tempo-data:/tmp/tempo # Persistenter Speicher für Traces (Standardpfad)
|
|
configs:
|
|
- source: tempo-config-v1
|
|
target: /etc/tempo/tempo.yaml
|
|
command: [ "-config.file=/etc/tempo/tempo.yaml" ]
|
|
user: root
|
|
# Tempo lauscht intern auf verschiedenen Ports für verschiedene Protokolle:
|
|
# - 4317 (OTLP gRPC - wird von Alloy genutzt)
|
|
# - 4318 (OTLP HTTP)
|
|
# - 14268 (Jaeger gRPC)
|
|
# - 3200 (Tempo HTTP Frontend/API - für Grafana & UI)
|
|
# Wir mappen sie vorerst nicht nach außen.
|
|
networks:
|
|
- monitoring-net
|
|
deploy:
|
|
mode: replicated
|
|
replicas: 1
|
|
placement:
|
|
constraints:
|
|
- node.role == manager # Optional: An Manager-Nodes binden
|
|
|
|
grafana:
|
|
image: grafana/grafana:latest
|
|
volumes:
|
|
- grafana-data:/var/lib/grafana
|
|
environment:
|
|
- GF_SECURITY_ADMIN_USER=admin
|
|
- GF_SECURITY_ADMIN_PASSWORD=admin # Besser über Docker Secrets lösen!
|
|
# Weitere Grafana env vars nach Bedarf
|
|
networks:
|
|
- monitoring-net
|
|
- traefik_public # Nur wenn Traefik direkt auf Grafana zugreifen soll (optional)
|
|
deploy:
|
|
mode: replicated
|
|
replicas: 1
|
|
placement:
|
|
constraints:
|
|
- node.role == manager # Optional: An Manager-Nodes binden
|
|
labels:
|
|
- "traefik.enable=true"
|
|
# --- Router für Grafana ---
|
|
- "traefik.http.routers.grafana.rule=Host(`grafana.genius.ceo`)"
|
|
- "traefik.http.routers.grafana.entrypoints=https" # Entrypoint anpassen, falls anders
|
|
- "traefik.http.routers.grafana.tls.certresolver=main" # CertResolver anpassen!
|
|
# --- Service für Grafana ---
|
|
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
|
|
# --- Middleware (optional) ---
|
|
# - "traefik.http.routers.grafana.middlewares=my-auth-middleware"
|
|
# --- Netzwerk für Traefik ---
|
|
- "traefik.swarm.network=traefik_public" # Traefik Netzwerkname anpassen!
|
|
|
|
alloy:
|
|
image: grafana/alloy:latest # Offizielles Alloy Image
|
|
volumes:
|
|
- alloy-data:/var/lib/alloy/data # Persistenter Speicher für Alloy (WAL etc.)
|
|
- /var/run/docker.sock:/var/run/docker.sock:ro # Für Docker Discovery
|
|
configs:
|
|
- source: alloy-config-v3
|
|
target: /etc/alloy/config.alloy # S3-Pfad für Alloy Config
|
|
environment:
|
|
- HOSTNAME=${HOSTNAME}
|
|
# Start mit root wegen Docker Socket / Volume Permissions, kann später optimiert werden (Socket Proxy)
|
|
# user: root
|
|
command: [
|
|
"run",
|
|
"--server.http.listen-addr=0.0.0.0:12345",
|
|
"/etc/alloy/config.alloy",
|
|
]
|
|
networks:
|
|
- monitoring-net
|
|
- traefik_public
|
|
deploy:
|
|
mode: global # WICHTIG: Alloy muss auf jedem Node laufen!
|
|
labels: # Traefik Labels für Alloy UI
|
|
- "traefik.enable=true"
|
|
# --- Router für Alloy UI ---
|
|
- "traefik.http.routers.alloy-ui.rule=Host(`otlp.genius.ceo`)"
|
|
- "traefik.http.routers.alloy-ui.entrypoints=https"
|
|
- "traefik.http.routers.alloy-ui.tls.certresolver=main"
|
|
- "traefik.http.routers.alloy-ui.service=alloy-ui@swarm"
|
|
# --- Service für Alloy UI ---
|
|
- "traefik.http.services.alloy-ui.loadbalancer.server.port=12345" # Ziel-Port ist 12345 (Alloy UI Standard)
|
|
# # --- Router für OTLP HTTP ---
|
|
# - "traefik.http.routers.otlp-http.rule=Host(`alloy.genius.ceo`)"
|
|
# - "traefik.http.routers.otlp-http.entrypoints=https"
|
|
# - "traefik.http.routers.otlp-http.tls.certresolver=main"
|
|
# - "traefik.http.routers.otlp-http.service=otlp-http@swarm"
|
|
# # --- Service für OTLP HTTP ---
|
|
# - "traefik.http.services.otlp-http.loadbalancer.server.port=4318" # Ziel-Port ist 4318 (OTLP HTTP Standard)
|
|
# --- Router für FARO RECEIVER ---
|
|
- "traefik.http.routers.faro-receiver.rule=Host(`alloy.genius.ceo`)"
|
|
- "traefik.http.routers.faro-receiver.entrypoints=https"
|
|
- "traefik.http.routers.faro-receiver.tls.certresolver=main"
|
|
- "traefik.http.routers.faro-receiver.service=faro-receiver@swarm"
|
|
# --- Service für FARO RECEIVER ---
|
|
- "traefik.http.services.faro-receiver.loadbalancer.server.port=12347" # Ziel-Port ist 12347 (FARO RECEIVER Standard)
|
|
# # --- Middlewares ---
|
|
# - "traefik.http.routers.otlp-http.middlewares=alloy-ratelimit@swarm"
|
|
# - "traefik.http.middlewares.alloy-ratelimit.ratelimit.average=100" # z.B. 100 Anfragen pro Sekunde
|
|
# - "traefik.http.middlewares.alloy-ratelimit.ratelimit.burst=50" # kurzfristig 50 mehr erlaubt
|
|
# --- Netzwerk für Traefik ---
|
|
- "traefik.swarm.network=traefik_public" # Traefik Netzwerkname prüfen/anpassen!
|
|
|
|
node-exporter:
|
|
image: quay.io/prometheus/node-exporter:latest # Aktuelles Image verwenden
|
|
volumes:
|
|
- /proc:/host/proc:ro
|
|
- /sys:/host/sys:ro
|
|
- /:/rootfs:ro
|
|
command:
|
|
- '--path.procfs=/host/proc'
|
|
- '--path.sysfs=/host/sys'
|
|
- '--path.rootfs=/rootfs'
|
|
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)'
|
|
networks:
|
|
- monitoring-net # Nur internes Netzwerk nötig
|
|
deploy:
|
|
mode: global # Läuft auf JEDEM Node im Swarm
|
|
|
|
cadvisor:
|
|
image: gcr.io/cadvisor/cadvisor:latest # Google's cAdvisor Image
|
|
volumes:
|
|
# cAdvisor braucht Zugriff auf Host-System-Infos und Docker
|
|
- /:/rootfs:ro
|
|
- /var/run:/var/run:ro
|
|
- /sys:/sys:ro
|
|
- /var/lib/docker/:/var/lib/docker:ro
|
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
networks:
|
|
- monitoring-net # Nur internes Netzwerk nötig
|
|
deploy:
|
|
mode: global # Läuft auf JEDEM Node im Swarm
|
|
resources: # Optional: Limitiert Ressourcen, cAdvisor kann hungrig sein
|
|
limits:
|
|
memory: 512M
|
|
reservations:
|
|
memory: 256M |