add plattform services
This commit is contained in:
parent
e25d8dd5d9
commit
193319fa52
|
|
@ -0,0 +1,8 @@
|
|||
.local/
|
||||
|
||||
leantime/
|
||||
|
||||
|
||||
.vscode/
|
||||
|
||||
.DS_Store
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
██████╗ ███████╗███╗ ██╗██╗██╗ ██╗ ██████╗ ██████╗ ███████╗ ██████╗
|
||||
██╔════╝ ██╔════╝████╗ ██║██║██║ ██║██╔════╝ ██╔════╝ ██╔════╝██╔═══██╗
|
||||
██║ ███╗█████╗ ██╔██╗ ██║██║██║ ██║╚█████╗ ██║ █████╗ ██║ ██║
|
||||
██║ ██║██╔══╝ ██║╚██╗██║██║██║ ██║ ╚═══██╗ ██║ ██╔══╝ ██║ ██║
|
||||
╚██████╔╝███████╗██║ ╚████║██║╚█████╔╝██████╔╝ ╚██████╗ ███████╗╚██████╔╝
|
||||
╚═════╝ ╚══════╝╚═╝ ╚═══╝╚═╝ ╚════╝ ╚═════╝ ██ ╚═════╝ ╚══════╝ ╚═════╝
|
||||
---
|
||||
|
||||
# Genius.ceo repository
|
||||
|
||||
Ceph Dashboard is now available at:
|
||||
|
||||
URL: https://manager-node-1:8443/
|
||||
User: admin
|
||||
Password: g0uhtgv520
|
||||
|
||||
Enabling client.admin keyring and conf on hosts with "admin" label
|
||||
Saving cluster configuration to /var/lib/ceph/6fb9d55b-5b20-11f0-9be6-920006043bcc/config directory
|
||||
You can access the Ceph CLI as following in case of multi-cluster or non-default config:
|
||||
|
||||
sudo /usr/sbin/cephadm shell --fsid 6fb9d55b-5b20-11f0-9be6-920006043bcc -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin.keyring
|
||||
|
||||
Or, if you are only running a single cluster on this host:
|
||||
|
||||
sudo /usr/sbin/cephadm shell
|
||||
|
||||
Please consider enabling telemetry to help improve Ceph:
|
||||
|
||||
ceph telemetry on
|
||||
|
||||
For more information see:
|
||||
|
||||
https://docs.ceph.com/en/latest/mgr/telemetry/
|
||||
|
||||
Bootstrap complete.
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
- name: Nodes initialisieren und härten
|
||||
hosts: all
|
||||
become: true
|
||||
|
||||
roles:
|
||||
- role: common
|
||||
tags: common
|
||||
- role: ssh_hardening
|
||||
tags: ssh
|
||||
- role: ufw_firewall
|
||||
tags: firewall
|
||||
- role: fail2ban
|
||||
tags: fail2ban
|
||||
handlers:
|
||||
- name: restart sshd
|
||||
ansible.builtin.service:
|
||||
name: ssh
|
||||
state: restarted
|
||||
- name: restart fail2ban
|
||||
ansible.builtin.service:
|
||||
name: fail2ban
|
||||
state: restarted
|
||||
- name: Setup Ceph Cluster and CephFS
|
||||
hosts: all
|
||||
become: true
|
||||
roles:
|
||||
- role: ceph_setup
|
||||
|
||||
- name: Docker Swarm initialisieren
|
||||
hosts: all
|
||||
become: true
|
||||
roles:
|
||||
- role: docker_swarm
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
- name: Infrastruktur Dienste bereitstellen
|
||||
hosts: all
|
||||
gather_facts: true
|
||||
roles:
|
||||
- traefik
|
||||
- authentik
|
||||
- portainer
|
||||
- leantime
|
||||
- kestra
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
APT::Periodic::Update-Package-Lists "1";
|
||||
APT::Periodic::Download-Upgradeable-Packages "1";
|
||||
APT::Periodic::AutocleanInterval "7";
|
||||
APT::Periodic::Unattended-Upgrade "1";
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
Unattended-Upgrade::Allowed-Origins {
|
||||
"${distro_id}:${distro_codename}-security";
|
||||
};
|
||||
Unattended-Upgrade::Package-Blacklist {
|
||||
};
|
||||
Unattended-Upgrade::DevRelease "false";
|
||||
Unattended-Upgrade::Remove-Unused-Kernel-Packages "true";
|
||||
Unattended-Upgrade::Remove-Unused-Dependencies "true";
|
||||
Unattended-Upgrade::Automatic-Reboot "false";
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
admin_user: 'admin'
|
||||
ssh_port: 22
|
||||
cephfs_name: "shared-fs"
|
||||
ceph_osd_device: "/dev/sdb"
|
||||
public_interface: 'eth0'
|
||||
private_interface: 'enp7s0'
|
||||
authorized_keys:
|
||||
- 'ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKtYTptTN9ggoy0aUKXmxaPKpclEn86jM7s5UtTw1JJI' # Marcels MacBook
|
||||
main_domain: genius.ceo
|
||||
|
||||
ceph_volume: /mnt/cephfs
|
||||
traefik_public_net: traefik_public
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
$ANSIBLE_VAULT;1.1;AES256
|
||||
34613362353964313436306439386661613364663666653265313937343239633365663836653030
|
||||
6262386661666364383961336461316139333262623034340a643434316632336132613264646437
|
||||
36376365613061353866383135353432303433353931633063313566613166303064316666613132
|
||||
3733623536643935370a656431646435626265666265666230356162656363663838636662313466
|
||||
61336237306332643032653766313036636163336431613236663864636438363832383231323362
|
||||
62666463336639303766356331353635323031636465616235663738333761653934346663386636
|
||||
63623361363164663663313966653939643462353638613464396466613931363662623763326535
|
||||
34376237353663656636363866373466346434666339646131396439653261373738636665613435
|
||||
65356330303863303236373933333163633964633061393136646632386137346434353365343763
|
||||
30343937656166303962653030366566616331666262343336343138623566353832313836643435
|
||||
62636333346235316562303061656166383135633464623734626336623565346336626134333933
|
||||
35363363376663333061663164623539363731613263376163306436636265336562396439356137
|
||||
30663431373131303437393166396539306133636264653733303762316363386438643536306338
|
||||
32303139303363316264393939326561393730396664343361393863303736343933636265633439
|
||||
65633765666362396439643863653531363366383866373939616333353430633530343262366138
|
||||
31663863663165653932653733623761613265383039336633383832393761666337336165613933
|
||||
63383934366662353038626539633132313939376231643133363739303235326433353733363437
|
||||
35626233613936626532326262646166363739666162353237323237383132333134343439336134
|
||||
33613462393237626432386462373439303439356666336630363536366233346438313039346530
|
||||
33393232333633663731393466653439623638316565346530306439326431323436356166633334
|
||||
66383034643834613133333265646338303463393035393266653832366434313636633730636436
|
||||
38353337633437656262623061666563646637626363353561323231376237623264373861376666
|
||||
66363265633638356133353933613664353934373634613662326437336562663766306364303538
|
||||
35623130616265623838353838396235386661666132623163383162373665313462663738303933
|
||||
63363764653561616162386139646130393439373066666437623236383238396233653165623032
|
||||
34316439376331356539626464313462616238623166623761626435303565653233386236656262
|
||||
62613935336661623862323833353265366533643830373634663266666332333463303666343366
|
||||
39653332346433306566316430656361363230343761613263393230366362363132663565636264
|
||||
65313633653464663963373561373532636235353331353237623635613034613337343730656632
|
||||
31656165666134333864353730363163623365393030333932393565666235643639303662663532
|
||||
38343734393135643039633664653966313536616533656635373535636434396333313536623536
|
||||
39623132326362656166366566373163386363336231633233353639313166333932656133363365
|
||||
66666665346331613638656562396463386637356539366539343232353061666531353166396536
|
||||
39623762633064323332653831643832303332396431633738396266633935656132323164613161
|
||||
61353663383532613763356630373063383161376165333736316466353231656534366636313636
|
||||
37616636383163616136643630363535346137636636633432643337393865393063626663333164
|
||||
36656537343231386333323637386539386364356266376433616636313239376666353066306363
|
||||
39376461323062393935613630656230346131373634363633393035346263663762623063356633
|
||||
36646664623230303761373138333164303363373365386266386138653764623030623630333631
|
||||
66363866633064656532336137613964653431663436333761666631656339646161636435343065
|
||||
37646164653937633962386631373236653064346438323664383933643738656536356562626532
|
||||
34663834363230303164626236393938643037363036613965373330636238633661346335336531
|
||||
62663461626365386362393061626266303463663735303539383937363965383234666337386165
|
||||
30366564363766623162306666656566353662633866396430396633623266383332303339666663
|
||||
38313536666336323366616432336161656434646463373963356331326364333038366337386638
|
||||
39396535386331663466323334613533383439343437363631363532313362663564353635343735
|
||||
37653063383163316366366335663537653134326564643062653065303337303333643961383837
|
||||
39393734326562616165313133643766303934336263326433366436623539633233643761616436
|
||||
33356234313538343635343630623337343436346638396539316131623861353630333964633839
|
||||
33316565326164386337623730623932313363306436316335336238333430626165663232343463
|
||||
36653038633632616335393262656638346434386639383131396233643932323931393264613134
|
||||
30336134343464373265636234656561653462356435383138323638613039623839373935326462
|
||||
32393430616438356332313766353337383035623137363233323664393833303464313162303833
|
||||
65383131313335353832343963636639346162353634306430353638393136623734623833306136
|
||||
32396130623065326636633235346630336435663261353866323862666231656261333839373162
|
||||
35623835663434356438653533623337363531353634663064303035633839656463656238636132
|
||||
66316333356633613130323438376530623634336632323365616239373865623334363635396331
|
||||
3263616336653336636666386632316564613331323431363935
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
[all:children]
|
||||
managers
|
||||
workers
|
||||
|
||||
[managers]
|
||||
manager-node-1 ansible_host=37.27.215.220 ansible_connection=ssh ansible_user=root ansible_ssh_private_key_file=./.local/secure/private_key
|
||||
manager-node-2 ansible_host=135.181.146.55 ansible_connection=ssh ansible_user=root ansible_ssh_private_key_file=./.local/secure/private_key
|
||||
manager-node-3 ansible_host=65.109.135.85 ansible_connection=ssh ansible_user=root ansible_ssh_private_key_file=./.local/secure/private_key
|
||||
|
||||
[workers]
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
- name: Main-Playbook
|
||||
hosts: all
|
||||
gather_facts: true
|
||||
roles:
|
||||
# - traefik
|
||||
# - portainer
|
||||
# - kestra
|
||||
- gitea
|
||||
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
networks:
|
||||
traefik_public:
|
||||
external: true
|
||||
|
||||
services:
|
||||
dockge:
|
||||
image: louislam/dockge:1
|
||||
environment:
|
||||
- DOCKGE_STACKS_DIR=/opt/stacks
|
||||
- DOCKGE_DATA_DIR=/app/data
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- '/mnt/cephfs/dockge/data:/app/data'
|
||||
- '/mnt/cephfs/dockge/stacks:/opt/stacks'
|
||||
networks:
|
||||
- traefik_public
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
labels:
|
||||
- 'traefik.enable=true'
|
||||
- 'traefik.swarm.network=traefik_public'
|
||||
# --- Router für Dockge ---
|
||||
- 'traefik.http.routers.dockge.rule=Host(`dockge.genius.ceo`)'
|
||||
- 'traefik.http.routers.dockge.entrypoints=https'
|
||||
- 'traefik.http.routers.dockge.tls.certresolver=main'
|
||||
# --- Service für Dockge ---
|
||||
- 'traefik.http.services.dockge.loadbalancer.server.port=5001'
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
|
||||
services:
|
||||
sd_server:
|
||||
image: socheatsok78/dockerswarm_sd_server:latest
|
||||
networks:
|
||||
- sd_network
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
|
||||
networks:
|
||||
monitoring:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
ipam:
|
||||
config:
|
||||
- subnet: 172.16.201.0/24
|
||||
sd_network:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
ipam:
|
||||
config:
|
||||
- subnet: 172.16.202.0/24
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
faro.receiver "stage_app_agent_receiver" {
|
||||
server {
|
||||
listen_address = "0.0.0.0"
|
||||
listen_port = 12347
|
||||
cors_allowed_origins = ["*"]
|
||||
// cors_allowed_origins = ["https://avicenna.genius.ceo"]
|
||||
api_key = "t3stK3y"
|
||||
max_allowed_payload_size = "10MiB"
|
||||
|
||||
rate_limiting {
|
||||
rate = 100
|
||||
}
|
||||
}
|
||||
|
||||
sourcemaps {}
|
||||
|
||||
output {
|
||||
logs = [loki.process.logs_process_client.receiver]
|
||||
traces = [otelcol.exporter.otlp.tempo.input]
|
||||
}
|
||||
}
|
||||
|
||||
loki.process "logs_process_client" {
|
||||
forward_to = [loki.write.to_loki.receiver]
|
||||
|
||||
stage.logfmt {
|
||||
mapping = { "kind" = "", "service_name" = "", "app_name" = "", "namespace" = "" }
|
||||
}
|
||||
|
||||
stage.labels {
|
||||
values = { "kind" = "kind", "service_name" = "service_name", "app" = "app_name", "namespace" = "namespace" }
|
||||
}
|
||||
}
|
||||
|
||||
otelcol.receiver.otlp "otel_collector" {
|
||||
grpc {
|
||||
endpoint = "0.0.0.0:4317"
|
||||
}
|
||||
http {
|
||||
endpoint = "0.0.0.0:4318"
|
||||
cors {
|
||||
allowed_origins = ["https://avicenna.genius.ceo/"]
|
||||
}
|
||||
}
|
||||
|
||||
// Definiert, wohin die empfangenen Daten weitergeleitet werden
|
||||
output {
|
||||
metrics = [otelcol.exporter.prometheus.otel_metrics.input]
|
||||
logs = [otelcol.exporter.loki.otel_logs.input]
|
||||
traces = [otelcol.exporter.otlp.tempo.input]
|
||||
}
|
||||
}
|
||||
|
||||
loki.write "to_loki" {
|
||||
endpoint {
|
||||
url = "http://loki:3100/loki/api/v1/push"
|
||||
}
|
||||
}
|
||||
|
||||
prometheus.remote_write "to_prometheus" {
|
||||
endpoint {
|
||||
url = "http://prometheus:9090/api/v1/write"
|
||||
}
|
||||
}
|
||||
|
||||
// Docker-Container auf dem Host entdecken
|
||||
discovery.docker "logs_integration_docker" {
|
||||
host = "unix:///var/run/docker.sock"
|
||||
refresh_interval = "5s"
|
||||
}
|
||||
discovery.relabel "logs_integration_docker" {
|
||||
targets = []
|
||||
|
||||
rule {
|
||||
action = "labelmap"
|
||||
regex = "__meta_docker_container_label_com_docker_swarm_node_id"
|
||||
replacement = "node_id"
|
||||
}
|
||||
|
||||
rule {
|
||||
action = "labelmap"
|
||||
regex = "__meta_docker_container_label_com_docker_stack_namespace"
|
||||
replacement = "namespace"
|
||||
}
|
||||
|
||||
rule {
|
||||
action = "labelmap"
|
||||
regex = "__meta_docker_container_label_com_docker_swarm_service_name"
|
||||
replacement = "service_name"
|
||||
}
|
||||
|
||||
rule {
|
||||
action = "labelmap"
|
||||
regex = "__meta_docker_container_name"
|
||||
replacement = "container_name"
|
||||
}
|
||||
}
|
||||
|
||||
loki.source.docker "logs_from_containers" {
|
||||
host = "unix:///var/run/docker.sock"
|
||||
targets = discovery.docker.logs_integration_docker.targets // Nutzt die entdeckten Container
|
||||
|
||||
relabel_rules = discovery.relabel.logs_integration_docker.rules
|
||||
|
||||
// Leitet die gesammelten Logs an den definierten Loki-Endpunkt weiter
|
||||
forward_to = [loki.write.to_loki.receiver]
|
||||
}
|
||||
|
||||
otelcol.exporter.otlp "tempo" { // Name kann variieren
|
||||
client {
|
||||
endpoint = "tempo:4317" // Ziel: Tempo Service auf Port 4317
|
||||
tls {
|
||||
insecure = true // Interne Kommunikation ohne TLS
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
otelcol.exporter.prometheus "otel_metrics" {
|
||||
forward_to = [prometheus.remote_write.to_prometheus.receiver]
|
||||
}
|
||||
|
||||
otelcol.exporter.loki "otel_logs" {
|
||||
forward_to = [loki.write.to_loki.receiver]
|
||||
}
|
||||
|
||||
// Logging für Alloy selbst konfigurieren
|
||||
logging {
|
||||
level = "info"
|
||||
format = "logfmt"
|
||||
}
|
||||
|
||||
// prometheus.scrape "alloy_self" {
|
||||
// targets = [
|
||||
// prometheus.target_group {
|
||||
// targets = [{"__address__" = "localhost:12345"}]
|
||||
// }
|
||||
// ]
|
||||
// forward_to = [...] // An Prometheus Remote Write oder lokalen Agent
|
||||
// }
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
auth_enabled: false # Einfachste Konfiguration ohne Authentifizierung
|
||||
analytics:
|
||||
reporting_enabled: false
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
grpc_listen_port: 9096 # Standard gRPC Port für Loki
|
||||
|
||||
common:
|
||||
instance_addr: 127.0.0.1 # Adresse, unter der sich die Instanz meldet
|
||||
path_prefix: /loki # Wo Loki seine Daten speichert (im Volume)
|
||||
storage:
|
||||
filesystem: # Lokales Dateisystem für Indizes und Chunks
|
||||
chunks_directory: /loki/chunks
|
||||
rules_directory: /loki/rules
|
||||
replication_factor: 1 # Keine Replikation bei Einzelinstanz
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory # Einfachster Ring-Speicher für Einzelinstanz
|
||||
|
||||
query_range:
|
||||
results_cache:
|
||||
cache:
|
||||
embedded_cache:
|
||||
enabled: true
|
||||
max_size_mb: 100
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-10-24
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
pattern_ingester:
|
||||
enabled: true
|
||||
metric_aggregation:
|
||||
loki_address: localhost:3100
|
||||
|
||||
frontend:
|
||||
encoding: protobuf
|
||||
|
||||
limits_config:
|
||||
metric_aggregation_enabled: true
|
||||
reject_old_samples: true
|
||||
reject_old_samples_max_age: 168h # 7 Tage
|
||||
ingestion_rate_mb: 15 # Erlaube 15 MiB/Sekunde pro Tenant (Standard war 4)
|
||||
ingestion_burst_size_mb: 30 # Erlaube kurzfristige Bursts bis 30 MiB (Standard war 6)
|
||||
# Optional: Maximale Anzahl aktiver Log-Streams pro Tenant (Standard ist 10000)
|
||||
# max_global_streams_per_user: 10000
|
||||
# Optional: Maximale Größe einer Log-Zeile (Standard 256kB)
|
||||
# max_line_size: 262144
|
||||
|
||||
# --- Optional: Compactor (Bereinigt alte Daten) ---
|
||||
# compactor:
|
||||
# working_directory: /loki/compactor
|
||||
# shared_store: filesystem
|
||||
# compaction_interval: 10m
|
||||
# retention_enabled: true
|
||||
# retention_delete_delay: 2h
|
||||
# retention_delete_worker_count: 150
|
||||
|
||||
# --- Optional: Ruler (für Alerts basierend auf Logs) ---
|
||||
# ruler:
|
||||
# alertmanager_url: http://alertmanager:9093 # Pfad zu deinem Alertmanager
|
||||
# storage:
|
||||
# type: local
|
||||
# local:
|
||||
# directory: /loki/rules
|
||||
# rule_path: /tmp/loki/rules-temp
|
||||
# ring:
|
||||
# kvstore:
|
||||
# store: inmemory
|
||||
# enable_api: true
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
global:
|
||||
scrape_interval: 15s # Wie oft Ziele abgefragt werden
|
||||
evaluation_interval: 15s # Wie oft Regeln ausgewertet werden
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
# Prometheus überwacht sich selbst
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: 'node-exporter'
|
||||
# Docker Swarm Service Discovery für den Node Exporter
|
||||
dockerswarm_sd_configs:
|
||||
- host: unix:///var/run/docker.sock
|
||||
role: tasks
|
||||
port: 9100 # Standard-Port vom Node Exporter
|
||||
relabel_configs:
|
||||
# Nur Tasks im Zustand 'running' verwenden
|
||||
- source_labels: [__meta_dockerswarm_task_desired_state]
|
||||
regex: running
|
||||
action: keep
|
||||
# Nur Tasks des 'node-exporter' Services aus diesem Stack auswählen
|
||||
# Passe den Regex ggf. an, wenn dein Stack anders heißt (hier Annahme: Stack-Name enthält 'monitoring')
|
||||
- source_labels: [__meta_dockerswarm_service_name]
|
||||
regex: ^monitoring_node-exporter$ # Regex an Stack-Namen anpassen!
|
||||
action: keep
|
||||
# Verwende den Hostnamen des Swarm Nodes als Instance Label
|
||||
- source_labels: [__meta_dockerswarm_node_hostname]
|
||||
target_label: instance
|
||||
# Setze die Zieladresse korrekt auf IP:Port
|
||||
- source_labels: [__address__]
|
||||
regex: '(.*):.*' # Extrahiere die IP-Adresse
|
||||
replacement: '${1}:9100' # Setze den korrekten Port (9100)
|
||||
target_label: __address__
|
||||
|
||||
- job_name: 'cadvisor'
|
||||
dockerswarm_sd_configs:
|
||||
- host: unix:///var/run/docker.sock
|
||||
role: tasks
|
||||
port: 8080 # Standard-Port von cAdvisor
|
||||
relabel_configs:
|
||||
# Nur Tasks im Zustand 'running' verwenden
|
||||
- source_labels: [__meta_dockerswarm_task_desired_state]
|
||||
regex: running
|
||||
action: keep
|
||||
# Nur Tasks des 'cadvisor' Services aus diesem Stack auswählen
|
||||
# Passe den Regex an deinen Stack-Namen an!
|
||||
- source_labels: [__meta_dockerswarm_service_name]
|
||||
regex: .*(monitoring|mon)_cadvisor.* # Regex an Stack-Namen anpassen!
|
||||
action: keep
|
||||
# Verwende den Hostnamen des Swarm Nodes als Instance Label
|
||||
- source_labels: [__meta_dockerswarm_node_hostname]
|
||||
target_label: instance
|
||||
# WICHTIG: Setze den Metrik-Pfad, da cAdvisor ihn unter /metrics bereitstellt
|
||||
- action: replace
|
||||
target_label: __metrics_path__
|
||||
replacement: /metrics
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
server:
|
||||
http_listen_port: 9080
|
||||
grpc_listen_port: 0
|
||||
|
||||
positions:
|
||||
filename: /mnt/promtail/positions.yaml # Pfad im gemounteten Volume
|
||||
|
||||
clients:
|
||||
- url: http://loki:3100/loki/api/v1/push # Sendet Logs an den Loki-Service
|
||||
|
||||
scrape_configs:
|
||||
- job_name: docker_containers
|
||||
docker_sd_configs:
|
||||
- host: unix:///var/run/docker.sock
|
||||
refresh_interval: 5s
|
||||
relabel_configs:
|
||||
# Extrahiere Container-Name (ohne '/')
|
||||
- source_labels: ['__meta_docker_container_name']
|
||||
regex: '/(.*)'
|
||||
target_label: 'container_name'
|
||||
# Behalte den Log-Stream (stdout/stderr) als Label
|
||||
- source_labels: ['__meta_docker_container_log_stream']
|
||||
target_label: 'logstream'
|
||||
# Extrahiere Service-Name aus Swarm-Label
|
||||
- source_labels: ['__meta_docker_container_label_com_docker_swarm_service_name']
|
||||
target_label: 'service_name'
|
||||
# Extrahiere Task-Name aus Swarm-Label
|
||||
- source_labels: ['__meta_docker_container_label_com_docker_swarm_task_name']
|
||||
target_label: 'task_name'
|
||||
# Füge 'instance'-Label mit dem Hostnamen des Tasks hinzu (Annäherung an Node-Namen)
|
||||
- action: replace
|
||||
source_labels: ['container_name'] # Braucht ein existierendes Label als Quelle
|
||||
target_label: 'instance'
|
||||
replacement: ${HOSTNAME} # Nutzt Swarm HOSTNAME Variable
|
||||
# Verwerfe Logs von Promtail selbst (Regex ggf. an Stacknamen anpassen)
|
||||
- source_labels: ['container_name']
|
||||
regex: 'monitoring_promtail.*' # Passe 'monitoring' an deinen Stack-Namen an!
|
||||
action: drop
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
server:
|
||||
http_listen_port: 3200 # Standard API/UI Port
|
||||
|
||||
distributor:
|
||||
receivers: # OTLP receiver aktivieren (Tempo kann auch direkt empfangen)
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
# Grundlegende Konfiguration für Datenverarbeitung (meist ok für Start)
|
||||
ingester:
|
||||
trace_idle_period: 10s
|
||||
max_block_bytes: 1048576 # 1MB
|
||||
max_block_duration: 5m
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: 1h # Wie lange Blöcke mindestens aufheben (geringer Wert für Test)
|
||||
|
||||
# WICHTIG: Storage explizit definieren!
|
||||
storage:
|
||||
trace:
|
||||
backend: local # Backend-Typ: lokales Dateisystem
|
||||
# Write Ahead Log (WAL) configuration.
|
||||
wal:
|
||||
path: /tmp/tempo/wal # Directory to store the the WAL locally.
|
||||
# Local configuration for filesystem storage.
|
||||
local:
|
||||
path: /tmp/tempo/blocks # Directory to store the TSDB blocks.
|
||||
# Pool used for finding trace IDs.
|
||||
pool:
|
||||
max_workers: 100 # Worker pool determines the number of parallel requests to the object store backend.
|
||||
queue_depth: 10000 # Maximum depth for the querier queue jobs. A job is required for each block searched.
|
||||
|
|
@ -0,0 +1,226 @@
|
|||
configs:
|
||||
alloy-config-v3:
|
||||
file: /srv/monitoring/config/alloy.v3.alloy
|
||||
loki-config-v1:
|
||||
file: /srv/monitoring/config/loki.v1.yml
|
||||
prometheus-config-v3:
|
||||
file: /srv/monitoring/config/prometheus.v3.yml
|
||||
tempo-config-v1:
|
||||
file: /srv/monitoring/config/tempo.v1.yml
|
||||
|
||||
volumes:
|
||||
prometheus-data:
|
||||
driver: local
|
||||
grafana-data:
|
||||
driver: local
|
||||
loki-data:
|
||||
driver: local
|
||||
alloy-data:
|
||||
driver: local
|
||||
tempo-data:
|
||||
driver: local
|
||||
|
||||
networks:
|
||||
monitoring-net: # Internes Overlay-Netzwerk für die Monitoring-Komponenten
|
||||
driver: overlay
|
||||
attachable: true # Erlaubt anderen Containern/Stacks ggf. den Zugriff
|
||||
traefik_public: # Das externe Netzwerk, auf dem Traefik lauscht
|
||||
external: true # Wichtig: Dieses Netzwerk wird NICHT von diesem Stack erstellt
|
||||
|
||||
services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
user: "65534:988"
|
||||
volumes:
|
||||
- prometheus-data:/prometheus
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
configs:
|
||||
- source: prometheus-config-v3 # Versionierte Config
|
||||
target: /etc/prometheus/prometheus.yml
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
||||
- '--web.console.templates=/usr/share/prometheus/consoles'
|
||||
- '--web.enable-lifecycle'
|
||||
- '--web.enable-remote-write-receiver'
|
||||
networks:
|
||||
- monitoring-net
|
||||
- traefik_public # Nur wenn Traefik direkt auf Prometheus zugreifen soll (optional)
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager # Optional: An Manager-Nodes binden
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
# --- Router für Prometheus UI ---
|
||||
- "traefik.http.routers.prometheus.rule=Host(`prometheus.genius.ceo`)"
|
||||
- "traefik.http.routers.prometheus.entrypoints=https" # Entrypoint anpassen, falls anders
|
||||
- "traefik.http.routers.prometheus.tls.certresolver=main" # CertResolver anpassen!
|
||||
# --- Service für Prometheus UI ---
|
||||
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
|
||||
# --- Middleware (optional, z.B. für Authentifizierung) ---
|
||||
# - "traefik.http.routers.prometheus.middlewares=my-auth-middleware"
|
||||
# --- Netzwerk für Traefik ---
|
||||
# WICHTIG: Das Netzwerk muss existieren und Traefik muss darauf lauschen.
|
||||
- "traefik.swarm.network=traefik_public" # Traefik Netzwerkname anpassen!
|
||||
|
||||
loki:
|
||||
image: grafana/loki:latest
|
||||
volumes:
|
||||
- loki-data:/loki
|
||||
configs:
|
||||
- source: loki-config-v1
|
||||
target: /etc/loki/local-config.yaml
|
||||
command: "-config.file=/etc/loki/local-config.yaml"
|
||||
networks:
|
||||
- monitoring-net
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
|
||||
tempo:
|
||||
image: grafana/tempo:latest # Aktuelles Tempo Image
|
||||
volumes:
|
||||
- tempo-data:/tmp/tempo # Persistenter Speicher für Traces (Standardpfad)
|
||||
configs:
|
||||
- source: tempo-config-v1
|
||||
target: /etc/tempo/tempo.yaml
|
||||
command: [ "-config.file=/etc/tempo/tempo.yaml" ]
|
||||
user: root
|
||||
# Tempo lauscht intern auf verschiedenen Ports für verschiedene Protokolle:
|
||||
# - 4317 (OTLP gRPC - wird von Alloy genutzt)
|
||||
# - 4318 (OTLP HTTP)
|
||||
# - 14268 (Jaeger gRPC)
|
||||
# - 3200 (Tempo HTTP Frontend/API - für Grafana & UI)
|
||||
# Wir mappen sie vorerst nicht nach außen.
|
||||
networks:
|
||||
- monitoring-net
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager # Optional: An Manager-Nodes binden
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
volumes:
|
||||
- grafana-data:/var/lib/grafana
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_USER=admin
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin # Besser über Docker Secrets lösen!
|
||||
# Weitere Grafana env vars nach Bedarf
|
||||
networks:
|
||||
- monitoring-net
|
||||
- traefik_public # Nur wenn Traefik direkt auf Grafana zugreifen soll (optional)
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager # Optional: An Manager-Nodes binden
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
# --- Router für Grafana ---
|
||||
- "traefik.http.routers.grafana.rule=Host(`grafana.genius.ceo`)"
|
||||
- "traefik.http.routers.grafana.entrypoints=https" # Entrypoint anpassen, falls anders
|
||||
- "traefik.http.routers.grafana.tls.certresolver=main" # CertResolver anpassen!
|
||||
# --- Service für Grafana ---
|
||||
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
|
||||
# --- Middleware (optional) ---
|
||||
# - "traefik.http.routers.grafana.middlewares=my-auth-middleware"
|
||||
# --- Netzwerk für Traefik ---
|
||||
- "traefik.swarm.network=traefik_public" # Traefik Netzwerkname anpassen!
|
||||
|
||||
alloy:
|
||||
image: grafana/alloy:latest # Offizielles Alloy Image
|
||||
volumes:
|
||||
- alloy-data:/var/lib/alloy/data # Persistenter Speicher für Alloy (WAL etc.)
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro # Für Docker Discovery
|
||||
configs:
|
||||
- source: alloy-config-v3
|
||||
target: /etc/alloy/config.alloy # S3-Pfad für Alloy Config
|
||||
environment:
|
||||
- HOSTNAME=${HOSTNAME}
|
||||
# Start mit root wegen Docker Socket / Volume Permissions, kann später optimiert werden (Socket Proxy)
|
||||
# user: root
|
||||
command: [
|
||||
"run",
|
||||
"--server.http.listen-addr=0.0.0.0:12345",
|
||||
"/etc/alloy/config.alloy",
|
||||
]
|
||||
networks:
|
||||
- monitoring-net
|
||||
- traefik_public
|
||||
deploy:
|
||||
mode: global # WICHTIG: Alloy muss auf jedem Node laufen!
|
||||
labels: # Traefik Labels für Alloy UI
|
||||
- "traefik.enable=true"
|
||||
# --- Router für Alloy UI ---
|
||||
- "traefik.http.routers.alloy-ui.rule=Host(`otlp.genius.ceo`)"
|
||||
- "traefik.http.routers.alloy-ui.entrypoints=https"
|
||||
- "traefik.http.routers.alloy-ui.tls.certresolver=main"
|
||||
- "traefik.http.routers.alloy-ui.service=alloy-ui@swarm"
|
||||
# --- Service für Alloy UI ---
|
||||
- "traefik.http.services.alloy-ui.loadbalancer.server.port=12345" # Ziel-Port ist 12345 (Alloy UI Standard)
|
||||
# # --- Router für OTLP HTTP ---
|
||||
# - "traefik.http.routers.otlp-http.rule=Host(`alloy.genius.ceo`)"
|
||||
# - "traefik.http.routers.otlp-http.entrypoints=https"
|
||||
# - "traefik.http.routers.otlp-http.tls.certresolver=main"
|
||||
# - "traefik.http.routers.otlp-http.service=otlp-http@swarm"
|
||||
# # --- Service für OTLP HTTP ---
|
||||
# - "traefik.http.services.otlp-http.loadbalancer.server.port=4318" # Ziel-Port ist 4318 (OTLP HTTP Standard)
|
||||
# --- Router für FARO RECEIVER ---
|
||||
- "traefik.http.routers.faro-receiver.rule=Host(`alloy.genius.ceo`)"
|
||||
- "traefik.http.routers.faro-receiver.entrypoints=https"
|
||||
- "traefik.http.routers.faro-receiver.tls.certresolver=main"
|
||||
- "traefik.http.routers.faro-receiver.service=faro-receiver@swarm"
|
||||
# --- Service für FARO RECEIVER ---
|
||||
- "traefik.http.services.faro-receiver.loadbalancer.server.port=12347" # Ziel-Port ist 12347 (FARO RECEIVER Standard)
|
||||
# # --- Middlewares ---
|
||||
# - "traefik.http.routers.otlp-http.middlewares=alloy-ratelimit@swarm"
|
||||
# - "traefik.http.middlewares.alloy-ratelimit.ratelimit.average=100" # z.B. 100 Anfragen pro Sekunde
|
||||
# - "traefik.http.middlewares.alloy-ratelimit.ratelimit.burst=50" # kurzfristig 50 mehr erlaubt
|
||||
# --- Netzwerk für Traefik ---
|
||||
- "traefik.swarm.network=traefik_public" # Traefik Netzwerkname prüfen/anpassen!
|
||||
|
||||
node-exporter:
|
||||
image: quay.io/prometheus/node-exporter:latest # Aktuelles Image verwenden
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
command:
|
||||
- '--path.procfs=/host/proc'
|
||||
- '--path.sysfs=/host/sys'
|
||||
- '--path.rootfs=/rootfs'
|
||||
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)'
|
||||
networks:
|
||||
- monitoring-net # Nur internes Netzwerk nötig
|
||||
deploy:
|
||||
mode: global # Läuft auf JEDEM Node im Swarm
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:latest # Google's cAdvisor Image
|
||||
volumes:
|
||||
# cAdvisor braucht Zugriff auf Host-System-Infos und Docker
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
networks:
|
||||
- monitoring-net # Nur internes Netzwerk nötig
|
||||
deploy:
|
||||
mode: global # Läuft auf JEDEM Node im Swarm
|
||||
resources: # Optional: Limitiert Ressourcen, cAdvisor kann hungrig sein
|
||||
limits:
|
||||
memory: 512M
|
||||
reservations:
|
||||
memory: 256M
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
receivers:
|
||||
hostmetrics:
|
||||
collection_interval: 30s
|
||||
root_path: /hostfs
|
||||
scrapers:
|
||||
cpu: {}
|
||||
load: {}
|
||||
memory: {}
|
||||
disk: {}
|
||||
filesystem: {}
|
||||
network: {}
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
prometheus:
|
||||
config:
|
||||
global:
|
||||
scrape_interval: 60s
|
||||
scrape_configs:
|
||||
- job_name: otel-agent
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:8888
|
||||
labels:
|
||||
job_name: otel-agent
|
||||
tcplog/docker:
|
||||
listen_address: "0.0.0.0:2255"
|
||||
operators:
|
||||
- type: regex_parser
|
||||
regex: '^<([0-9]+)>[0-9]+ (?P<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?) (?P<container_id>\S+) (?P<container_name>\S+) [0-9]+ - -( (?P<body>.*))?'
|
||||
timestamp:
|
||||
parse_from: attributes.timestamp
|
||||
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
|
||||
- type: move
|
||||
from: attributes["body"]
|
||||
to: body
|
||||
- type: remove
|
||||
field: attributes.timestamp
|
||||
# please remove names from below if you want to collect logs from them
|
||||
- type: filter
|
||||
id: signoz_logs_filter
|
||||
expr: 'attributes.container_name matches "^(signoz_(logspout|signoz|otel-collector|clickhouse|zookeeper))|(infra_(logspout|otel-agent|otel-metrics)).*"'
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 10000
|
||||
send_batch_max_size: 11000
|
||||
timeout: 10s
|
||||
resourcedetection:
|
||||
# Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels.
|
||||
detectors:
|
||||
# - ec2
|
||||
# - gcp
|
||||
# - azure
|
||||
- env
|
||||
- system
|
||||
timeout: 2s
|
||||
extensions:
|
||||
health_check:
|
||||
endpoint: 0.0.0.0:13133
|
||||
pprof:
|
||||
endpoint: 0.0.0.0:1777
|
||||
exporters:
|
||||
otlp:
|
||||
endpoint: ${env:SIGNOZ_COLLECTOR_ENDPOINT}
|
||||
tls:
|
||||
insecure: true
|
||||
headers:
|
||||
signoz-access-token: ${env:SIGNOZ_ACCESS_TOKEN}
|
||||
# debug: {}
|
||||
service:
|
||||
telemetry:
|
||||
logs:
|
||||
encoding: json
|
||||
metrics:
|
||||
address: 0.0.0.0:8888
|
||||
extensions:
|
||||
- health_check
|
||||
- pprof
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [otlp]
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [otlp]
|
||||
metrics/hostmetrics:
|
||||
receivers: [hostmetrics]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [otlp]
|
||||
metrics/prometheus:
|
||||
receivers: [prometheus]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [otlp]
|
||||
logs:
|
||||
receivers: [otlp, tcplog/docker]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [otlp]
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
receivers:
|
||||
prometheus:
|
||||
config:
|
||||
global:
|
||||
scrape_interval: 60s
|
||||
scrape_configs:
|
||||
- job_name: otel-metrics
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:8888
|
||||
labels:
|
||||
job_name: otel-metrics
|
||||
# For Docker daemon metrics to be scraped, it must be configured to expose
|
||||
# Prometheus metrics, as documented here: https://docs.docker.com/config/daemon/prometheus/
|
||||
# - job_name: docker-daemon
|
||||
# dockerswarm_sd_configs:
|
||||
# - host: unix:///var/run/docker.sock
|
||||
# role: nodes
|
||||
# relabel_configs:
|
||||
# - source_labels: [__meta_dockerswarm_node_address]
|
||||
# target_label: __address__
|
||||
# replacement: $1:9323
|
||||
- job_name: "dockerswarm"
|
||||
dockerswarm_sd_configs:
|
||||
- host: unix:///var/run/docker.sock
|
||||
role: tasks
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: running
|
||||
source_labels:
|
||||
- __meta_dockerswarm_task_desired_state
|
||||
- action: keep
|
||||
regex: true
|
||||
source_labels:
|
||||
- __meta_dockerswarm_service_label_signoz_io_scrape
|
||||
- regex: ([^:]+)(?::\d+)?
|
||||
replacement: $1
|
||||
source_labels:
|
||||
- __address__
|
||||
target_label: swarm_container_ip
|
||||
- separator: .
|
||||
source_labels:
|
||||
- __meta_dockerswarm_service_name
|
||||
- __meta_dockerswarm_task_slot
|
||||
- __meta_dockerswarm_task_id
|
||||
target_label: swarm_container_name
|
||||
- target_label: __address__
|
||||
source_labels:
|
||||
- swarm_container_ip
|
||||
- __meta_dockerswarm_service_label_signoz_io_port
|
||||
separator: ":"
|
||||
- source_labels:
|
||||
- __meta_dockerswarm_service_label_signoz_io_path
|
||||
target_label: __metrics_path__
|
||||
- source_labels:
|
||||
- __meta_dockerswarm_service_label_com_docker_stack_namespace
|
||||
target_label: namespace
|
||||
- source_labels:
|
||||
- __meta_dockerswarm_service_name
|
||||
target_label: service_name
|
||||
- source_labels:
|
||||
- __meta_dockerswarm_task_id
|
||||
target_label: service_instance_id
|
||||
- source_labels:
|
||||
- __meta_dockerswarm_node_hostname
|
||||
target_label: host_name
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 10000
|
||||
send_batch_max_size: 11000
|
||||
timeout: 10s
|
||||
resourcedetection:
|
||||
detectors:
|
||||
- env
|
||||
- system
|
||||
timeout: 2s
|
||||
extensions:
|
||||
health_check:
|
||||
endpoint: 0.0.0.0:13133
|
||||
pprof:
|
||||
endpoint: 0.0.0.0:1777
|
||||
exporters:
|
||||
otlp:
|
||||
endpoint: ${env:SIGNOZ_COLLECTOR_ENDPOINT}
|
||||
tls:
|
||||
insecure: true
|
||||
headers:
|
||||
signoz-access-token: ${env:SIGNOZ_ACCESS_TOKEN}
|
||||
# debug: {}
|
||||
service:
|
||||
telemetry:
|
||||
logs:
|
||||
encoding: json
|
||||
metrics:
|
||||
address: 0.0.0.0:8888
|
||||
extensions:
|
||||
- health_check
|
||||
- pprof
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [prometheus]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [otlp]
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
version: "3"
|
||||
x-common: &common
|
||||
networks:
|
||||
- signoz-net
|
||||
extra_hosts:
|
||||
- host.docker.internal:host-gateway
|
||||
logging:
|
||||
options:
|
||||
max-size: 50m
|
||||
max-file: "3"
|
||||
deploy:
|
||||
mode: global
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
services:
|
||||
otel-agent:
|
||||
<<: *common
|
||||
image: otel/opentelemetry-collector-contrib:0.111.0
|
||||
command:
|
||||
- --config=/etc/otel-collector-config.yaml
|
||||
configs:
|
||||
- source: otel-agent-config-v1
|
||||
target: /etc/otel-collector-config.yaml
|
||||
volumes:
|
||||
- /:/hostfs:ro
|
||||
environment:
|
||||
- SIGNOZ_COLLECTOR_ENDPOINT=http://host.docker.internal:4317 # In case of external SigNoz or cloud, update the endpoint and access token
|
||||
- OTEL_RESOURCE_ATTRIBUTES=host.name={{.Node.Hostname}},os.type={{.Node.Platform.OS}}
|
||||
# - SIGNOZ_ACCESS_TOKEN="<your-access-token>"
|
||||
# Before exposing the ports, make sure the ports are not used by other services
|
||||
# ports:
|
||||
# - "4317:4317"
|
||||
# - "4318:4318"
|
||||
otel-metrics:
|
||||
<<: *common
|
||||
image: otel/opentelemetry-collector-contrib:0.111.0
|
||||
user: 0:0 # If you have security concerns, you can replace this with your `UID:GID` that has necessary permissions to docker.sock
|
||||
command:
|
||||
- --config=/etc/otel-collector-config.yaml
|
||||
configs:
|
||||
- source: otel-metrics-config-v1
|
||||
target: /etc/otel-collector-config.yaml
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
environment:
|
||||
- SIGNOZ_COLLECTOR_ENDPOINT=http://host.docker.internal:4317 # In case of external SigNoz or cloud, update the endpoint and access token
|
||||
- OTEL_RESOURCE_ATTRIBUTES=host.name={{.Node.Hostname}},os.type={{.Node.Platform.OS}}
|
||||
# - SIGNOZ_ACCESS_TOKEN="<your-access-token>"
|
||||
# Before exposing the ports, make sure the ports are not used by other services
|
||||
# ports:
|
||||
# - "4317:4317"
|
||||
# - "4318:4318"
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
logspout:
|
||||
<<: *common
|
||||
image: "gliderlabs/logspout:v3.2.14"
|
||||
command: syslog+tcp://otel-agent:2255
|
||||
user: root
|
||||
volumes:
|
||||
- /etc/hostname:/etc/host_hostname:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
depends_on:
|
||||
- otel-agent
|
||||
|
||||
networks:
|
||||
signoz-net:
|
||||
name: signoz-net
|
||||
external: true
|
||||
configs:
|
||||
otel-metrics-config-v1:
|
||||
file: /mnt/cephfs/signoz-infra/config/otel-metrics-config.v1.yaml
|
||||
otel-agent-config-v1:
|
||||
file: /mnt/cephfs/signoz-infra/config/otel-agent-config.v1.yaml
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
<?xml version="1.0"?>
|
||||
<clickhouse>
|
||||
<!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
|
||||
Optional. If you don't use replicated tables, you could omit that.
|
||||
|
||||
See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/
|
||||
-->
|
||||
<zookeeper>
|
||||
<node index="1">
|
||||
<host>zookeeper-1</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<!-- <node index="2">
|
||||
<host>zookeeper-2</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<node index="3">
|
||||
<host>zookeeper-3</host>
|
||||
<port>2181</port>
|
||||
</node> -->
|
||||
</zookeeper>
|
||||
|
||||
<!-- Configuration of clusters that could be used in Distributed tables.
|
||||
https://clickhouse.com/docs/en/operations/table_engines/distributed/
|
||||
-->
|
||||
<remote_servers>
|
||||
<cluster>
|
||||
<!-- Inter-server per-cluster secret for Distributed queries
|
||||
default: no secret (no authentication will be performed)
|
||||
|
||||
If set, then Distributed queries will be validated on shards, so at least:
|
||||
- such cluster should exist on the shard,
|
||||
- such cluster should have the same secret.
|
||||
|
||||
And also (and which is more important), the initial_user will
|
||||
be used as current user for the query.
|
||||
|
||||
Right now the protocol is pretty simple and it only takes into account:
|
||||
- cluster name
|
||||
- query
|
||||
|
||||
Also it will be nice if the following will be implemented:
|
||||
- source hostname (see interserver_http_host), but then it will depends from DNS,
|
||||
it can use IP address instead, but then the you need to get correct on the initiator node.
|
||||
- target hostname / ip address (same notes as for source hostname)
|
||||
- time-based security tokens
|
||||
-->
|
||||
<!-- <secret></secret> -->
|
||||
<shard>
|
||||
<!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
|
||||
<!-- <internal_replication>false</internal_replication> -->
|
||||
<!-- Optional. Shard weight when writing data. Default: 1. -->
|
||||
<!-- <weight>1</weight> -->
|
||||
<replica>
|
||||
<host>clickhouse</host>
|
||||
<port>9000</port>
|
||||
<!-- Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority). -->
|
||||
<!-- <priority>1</priority> -->
|
||||
</replica>
|
||||
</shard>
|
||||
<!-- <shard>
|
||||
<replica>
|
||||
<host>clickhouse-2</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>clickhouse-3</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard> -->
|
||||
</cluster>
|
||||
</remote_servers>
|
||||
</clickhouse>
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,21 @@
|
|||
<functions>
|
||||
<function>
|
||||
<type>executable</type>
|
||||
<name>histogramQuantile</name>
|
||||
<return_type>Float64</return_type>
|
||||
<argument>
|
||||
<type>Array(Float64)</type>
|
||||
<name>buckets</name>
|
||||
</argument>
|
||||
<argument>
|
||||
<type>Array(Float64)</type>
|
||||
<name>counts</name>
|
||||
</argument>
|
||||
<argument>
|
||||
<type>Float64</type>
|
||||
<name>quantile</name>
|
||||
</argument>
|
||||
<format>CSV</format>
|
||||
<command>./histogramQuantile</command>
|
||||
</function>
|
||||
</functions>
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
<?xml version="1.0"?>
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<default>
|
||||
<keep_free_space_bytes>10485760</keep_free_space_bytes>
|
||||
</default>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<!-- For S3 cold storage,
|
||||
if region is us-east-1, endpoint can be https://<bucket-name>.s3.amazonaws.com
|
||||
if region is not us-east-1, endpoint should be https://<bucket-name>.s3-<region>.amazonaws.com
|
||||
For GCS cold storage,
|
||||
endpoint should be https://storage.googleapis.com/<bucket-name>/data/
|
||||
-->
|
||||
<endpoint>https://BUCKET-NAME.s3-REGION-NAME.amazonaws.com/data/</endpoint>
|
||||
<access_key_id>ACCESS-KEY-ID</access_key_id>
|
||||
<secret_access_key>SECRET-ACCESS-KEY</secret_access_key>
|
||||
<!-- In case of S3, uncomment the below configuration in case you want to read
|
||||
AWS credentials from the Environment variables if they exist. -->
|
||||
<!-- <use_environment_credentials>true</use_environment_credentials> -->
|
||||
<!-- In case of GCS, uncomment the below configuration, since GCS does
|
||||
not support batch deletion and result in error messages in logs. -->
|
||||
<!-- <support_batch_delete>false</support_batch_delete> -->
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<tiered>
|
||||
<volumes>
|
||||
<default>
|
||||
<disk>default</disk>
|
||||
</default>
|
||||
<s3>
|
||||
<disk>s3</disk>
|
||||
<perform_ttl_move_on_insert>0</perform_ttl_move_on_insert>
|
||||
</s3>
|
||||
</volumes>
|
||||
</tiered>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
<?xml version="1.0"?>
|
||||
<clickhouse>
|
||||
<!-- See also the files in users.d directory where the settings can be overridden. -->
|
||||
|
||||
<!-- Profiles of settings. -->
|
||||
<profiles>
|
||||
<!-- Default settings. -->
|
||||
<default>
|
||||
<!-- Maximum memory usage for processing single query, in bytes. -->
|
||||
<max_memory_usage>10000000000</max_memory_usage>
|
||||
|
||||
<!-- How to choose between replicas during distributed query processing.
|
||||
random - choose random replica from set of replicas with minimum number of errors
|
||||
nearest_hostname - from set of replicas with minimum number of errors, choose replica
|
||||
with minimum number of different symbols between replica's hostname and local hostname
|
||||
(Hamming distance).
|
||||
in_order - first live replica is chosen in specified order.
|
||||
first_or_random - if first replica one has higher number of errors, pick a random one from replicas with minimum number of errors.
|
||||
-->
|
||||
<load_balancing>random</load_balancing>
|
||||
</default>
|
||||
|
||||
<!-- Profile that allows only read queries. -->
|
||||
<readonly>
|
||||
<readonly>1</readonly>
|
||||
</readonly>
|
||||
</profiles>
|
||||
|
||||
<!-- Users and ACL. -->
|
||||
<users>
|
||||
<!-- If user name was not specified, 'default' user is used. -->
|
||||
<default>
|
||||
<!-- See also the files in users.d directory where the password can be overridden.
|
||||
|
||||
Password could be specified in plaintext or in SHA256 (in hex format).
|
||||
|
||||
If you want to specify password in plaintext (not recommended), place it in 'password' element.
|
||||
Example: <password>qwerty</password>.
|
||||
Password could be empty.
|
||||
|
||||
If you want to specify SHA256, place it in 'password_sha256_hex' element.
|
||||
Example: <password_sha256_hex>65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5</password_sha256_hex>
|
||||
Restrictions of SHA256: impossibility to connect to ClickHouse using MySQL JS client (as of July 2019).
|
||||
|
||||
If you want to specify double SHA1, place it in 'password_double_sha1_hex' element.
|
||||
Example: <password_double_sha1_hex>e395796d6546b1b65db9d665cd43f0e858dd4303</password_double_sha1_hex>
|
||||
|
||||
If you want to specify a previously defined LDAP server (see 'ldap_servers' in the main config) for authentication,
|
||||
place its name in 'server' element inside 'ldap' element.
|
||||
Example: <ldap><server>my_ldap_server</server></ldap>
|
||||
|
||||
If you want to authenticate the user via Kerberos (assuming Kerberos is enabled, see 'kerberos' in the main config),
|
||||
place 'kerberos' element instead of 'password' (and similar) elements.
|
||||
The name part of the canonical principal name of the initiator must match the user name for authentication to succeed.
|
||||
You can also place 'realm' element inside 'kerberos' element to further restrict authentication to only those requests
|
||||
whose initiator's realm matches it.
|
||||
Example: <kerberos />
|
||||
Example: <kerberos><realm>EXAMPLE.COM</realm></kerberos>
|
||||
|
||||
How to generate decent password:
|
||||
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-'
|
||||
In first line will be password and in second - corresponding SHA256.
|
||||
|
||||
How to generate double SHA1:
|
||||
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-'
|
||||
In first line will be password and in second - corresponding double SHA1.
|
||||
-->
|
||||
<password></password>
|
||||
|
||||
<!-- List of networks with open access.
|
||||
|
||||
To open access from everywhere, specify:
|
||||
<ip>::/0</ip>
|
||||
|
||||
To open access only from localhost, specify:
|
||||
<ip>::1</ip>
|
||||
<ip>127.0.0.1</ip>
|
||||
|
||||
Each element of list has one of the following forms:
|
||||
<ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
|
||||
2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
|
||||
<host> Hostname. Example: server01.clickhouse.com.
|
||||
To check access, DNS query is performed, and all received addresses compared to peer address.
|
||||
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$
|
||||
To check access, DNS PTR query is performed for peer address and then regexp is applied.
|
||||
Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
|
||||
Strongly recommended that regexp is ends with $
|
||||
All results of DNS requests are cached till server restart.
|
||||
-->
|
||||
<networks>
|
||||
<ip>::/0</ip>
|
||||
</networks>
|
||||
|
||||
<!-- Settings profile for user. -->
|
||||
<profile>default</profile>
|
||||
|
||||
<!-- Quota for user. -->
|
||||
<quota>default</quota>
|
||||
|
||||
<!-- User can create other users and grant rights to them. -->
|
||||
<!-- <access_management>1</access_management> -->
|
||||
</default>
|
||||
</users>
|
||||
|
||||
<!-- Quotas. -->
|
||||
<quotas>
|
||||
<!-- Name of quota. -->
|
||||
<default>
|
||||
<!-- Limits for time interval. You could specify many intervals with different limits. -->
|
||||
<interval>
|
||||
<!-- Length of interval. -->
|
||||
<duration>3600</duration>
|
||||
|
||||
<!-- No limits. Just calculate resource usage for time interval. -->
|
||||
<queries>0</queries>
|
||||
<errors>0</errors>
|
||||
<result_rows>0</result_rows>
|
||||
<read_rows>0</read_rows>
|
||||
<execution_time>0</execution_time>
|
||||
</interval>
|
||||
</default>
|
||||
</quotas>
|
||||
</clickhouse>
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
cors:
|
||||
allowed_origins:
|
||||
- https://*.genius.ceo
|
||||
- https://*.avicenna.hamburg
|
||||
prometheus:
|
||||
config:
|
||||
global:
|
||||
scrape_interval: 60s
|
||||
scrape_configs:
|
||||
- job_name: otel-collector
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:8888
|
||||
labels:
|
||||
job_name: otel-collector
|
||||
docker_stats:
|
||||
endpoint: unix:///var/run/docker.sock
|
||||
metrics:
|
||||
container.cpu.utilization:
|
||||
enabled: true
|
||||
container.memory.percent:
|
||||
enabled: true
|
||||
container.network.io.usage.rx_bytes:
|
||||
enabled: true
|
||||
container.network.io.usage.tx_bytes:
|
||||
enabled: true
|
||||
container.network.io.usage.rx_dropped:
|
||||
enabled: true
|
||||
container.network.io.usage.tx_dropped:
|
||||
enabled: true
|
||||
container.memory.usage.limit:
|
||||
enabled: true
|
||||
container.memory.usage.total:
|
||||
enabled: true
|
||||
container.blockio.io_service_bytes_recursive:
|
||||
enabled: true
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 10000
|
||||
send_batch_max_size: 11000
|
||||
timeout: 10s
|
||||
resourcedetection:
|
||||
# Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels.
|
||||
detectors: [env, system]
|
||||
timeout: 2s
|
||||
resourcedetection/docker:
|
||||
detectors: [env, docker]
|
||||
timeout: 2s
|
||||
override: false
|
||||
signozspanmetrics/delta:
|
||||
metrics_exporter: clickhousemetricswrite, signozclickhousemetrics
|
||||
metrics_flush_interval: 60s
|
||||
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
|
||||
dimensions_cache_size: 100000
|
||||
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
|
||||
enable_exp_histogram: true
|
||||
dimensions:
|
||||
- name: service.namespace
|
||||
default: default
|
||||
- name: deployment.environment
|
||||
default: default
|
||||
# This is added to ensure the uniqueness of the timeseries
|
||||
# Otherwise, identical timeseries produced by multiple replicas of
|
||||
# collectors result in incorrect APM metrics
|
||||
- name: signoz.collector.id
|
||||
- name: service.version
|
||||
- name: browser.platform
|
||||
- name: browser.mobile
|
||||
- name: k8s.cluster.name
|
||||
- name: k8s.node.name
|
||||
- name: k8s.namespace.name
|
||||
- name: host.name
|
||||
- name: host.type
|
||||
- name: container.name
|
||||
extensions:
|
||||
health_check:
|
||||
endpoint: 0.0.0.0:13133
|
||||
pprof:
|
||||
endpoint: 0.0.0.0:1777
|
||||
exporters:
|
||||
clickhousetraces:
|
||||
datasource: tcp://clickhouse:9000/signoz_traces
|
||||
low_cardinal_exception_grouping: ${env:LOW_CARDINAL_EXCEPTION_GROUPING}
|
||||
use_new_schema: true
|
||||
clickhousemetricswrite:
|
||||
endpoint: tcp://clickhouse:9000/signoz_metrics
|
||||
resource_to_telemetry_conversion:
|
||||
enabled: true
|
||||
disable_v2: true
|
||||
clickhousemetricswrite/prometheus:
|
||||
endpoint: tcp://clickhouse:9000/signoz_metrics
|
||||
disable_v2: true
|
||||
signozclickhousemetrics:
|
||||
dsn: tcp://clickhouse:9000/signoz_metrics
|
||||
clickhouselogsexporter:
|
||||
dsn: tcp://clickhouse:9000/signoz_logs
|
||||
timeout: 10s
|
||||
use_new_schema: true
|
||||
otlp:
|
||||
endpoint: http://otel-collector:4317
|
||||
tls:
|
||||
insecure: true
|
||||
# debug: {}
|
||||
service:
|
||||
telemetry:
|
||||
logs:
|
||||
encoding: json
|
||||
metrics:
|
||||
address: 0.0.0.0:8888
|
||||
extensions:
|
||||
- health_check
|
||||
- pprof
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [signozspanmetrics/delta, batch]
|
||||
exporters: [clickhousetraces]
|
||||
metrics/docker:
|
||||
receivers: [docker_stats]
|
||||
processors: [resourcedetection/docker]
|
||||
exporters: [otlp]
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [clickhousemetricswrite, signozclickhousemetrics]
|
||||
metrics/prometheus:
|
||||
receivers: [prometheus]
|
||||
processors: [batch]
|
||||
exporters: [clickhousemetricswrite/prometheus, signozclickhousemetrics]
|
||||
logs:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [clickhouselogsexporter]
|
||||
|
|
@ -0,0 +1 @@
|
|||
server_endpoint: ws://signoz:4320/v1/opamp
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
# my global config
|
||||
global:
|
||||
scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
|
||||
# Alertmanager configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- alertmanager:9093
|
||||
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files: []
|
||||
# - "first_rules.yml"
|
||||
# - "second_rules.yml"
|
||||
# - 'alerts.yml'
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs: []
|
||||
|
||||
remote_read:
|
||||
- url: tcp://clickhouse:9000/signoz_metrics
|
||||
|
|
@ -0,0 +1,243 @@
|
|||
version: '3'
|
||||
x-common: &common
|
||||
networks:
|
||||
- signoz-net
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
logging:
|
||||
options:
|
||||
max-size: 50m
|
||||
max-file: '3'
|
||||
x-clickhouse-defaults: &clickhouse-defaults
|
||||
!!merge <<: *common
|
||||
image: clickhouse/clickhouse-server:24.1.2-alpine
|
||||
tty: true
|
||||
user: "1000:1000"
|
||||
deploy:
|
||||
placement:
|
||||
constraints: [node.hostname == manager-node-3]
|
||||
labels:
|
||||
signoz.io/scrape: 'true'
|
||||
signoz.io/port: '9363'
|
||||
signoz.io/path: '/metrics'
|
||||
depends_on:
|
||||
- init-clickhouse
|
||||
- zookeeper-1
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD
|
||||
- wget
|
||||
- --spider
|
||||
- -q
|
||||
- 0.0.0.0:8123/ping
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
ulimits:
|
||||
nproc: 65535
|
||||
nofile:
|
||||
soft: 262144
|
||||
hard: 262144
|
||||
x-zookeeper-defaults: &zookeeper-defaults
|
||||
!!merge <<: *common
|
||||
image: bitnami/zookeeper:3.7.1
|
||||
user: root
|
||||
deploy:
|
||||
placement:
|
||||
constraints: [node.hostname == manager-node-1]
|
||||
labels:
|
||||
signoz.io/scrape: 'true'
|
||||
signoz.io/port: '9141'
|
||||
signoz.io/path: '/metrics'
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD-SHELL
|
||||
- curl -s -m 2 http://localhost:8080/commands/ruok | grep error | grep null
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
x-db-depend: &db-depend
|
||||
!!merge <<: *common
|
||||
depends_on:
|
||||
- clickhouse
|
||||
- schema-migrator
|
||||
services:
|
||||
init-clickhouse:
|
||||
!!merge <<: *common
|
||||
image: clickhouse/clickhouse-server:24.1.2-alpine
|
||||
command:
|
||||
- bash
|
||||
- -c
|
||||
- |
|
||||
version="v0.0.1"
|
||||
node_os=$$(uname -s | tr '[:upper:]' '[:lower:]')
|
||||
node_arch=$$(uname -m | sed s/aarch64/arm64/ | sed s/x86_64/amd64/)
|
||||
echo "Fetching histogram-binary for $${node_os}/$${node_arch}"
|
||||
cd /tmp
|
||||
wget -O histogram-quantile.tar.gz "https://github.com/SigNoz/signoz/releases/download/histogram-quantile%2F$${version}/histogram-quantile_$${node_os}_$${node_arch}.tar.gz"
|
||||
tar -xvzf histogram-quantile.tar.gz
|
||||
mv histogram-quantile /var/lib/clickhouse/user_scripts/histogramQuantile
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
volumes:
|
||||
- /mnt/cephfs/signoz/data/clickhouse/user_scripts:/var/lib/clickhouse/user_scripts/
|
||||
zookeeper-1:
|
||||
!!merge <<: *zookeeper-defaults
|
||||
# ports:
|
||||
# - "2181:2181"
|
||||
# - "2888:2888"
|
||||
# - "3888:3888"
|
||||
volumes:
|
||||
- /mnt/cephfs/signoz/data/zookeeper-1:/bitnami/zookeeper
|
||||
environment:
|
||||
- ZOO_SERVER_ID=1
|
||||
- ALLOW_ANONYMOUS_LOGIN=yes
|
||||
- ZOO_AUTOPURGE_INTERVAL=1
|
||||
- ZOO_ENABLE_PROMETHEUS_METRICS=yes
|
||||
- ZOO_PROMETHEUS_METRICS_PORT_NUMBER=9141
|
||||
clickhouse:
|
||||
!!merge <<: *clickhouse-defaults
|
||||
# TODO: needed for clickhouse TCP connectio
|
||||
hostname: clickhouse
|
||||
# ports:
|
||||
# - "9000:9000"
|
||||
# - "8123:8123"
|
||||
# - "9181:9181"
|
||||
configs:
|
||||
- source: clickhouse-config-v1
|
||||
target: /etc/clickhouse-server/config.xml
|
||||
- source: clickhouse-users-v1
|
||||
target: /etc/clickhouse-server/users.xml
|
||||
- source: clickhouse-custom-function-v1
|
||||
target: /etc/clickhouse-server/custom-function.xml
|
||||
- source: clickhouse-cluster-v1
|
||||
target: /etc/clickhouse-server/config.d/cluster.xml
|
||||
volumes:
|
||||
- /mnt/cephfs/signoz/data/clickhouse/data/user_scripts:/var/lib/clickhouse/user_scripts/
|
||||
- /mnt/cephfs/signoz/data/clickhouse/data:/var/lib/clickhouse/
|
||||
# - ../common/clickhouse/storage.xml:/etc/clickhouse-server/config.d/storage.xml
|
||||
signoz:
|
||||
!!merge <<: *db-depend
|
||||
image: signoz/signoz:v0.86.1
|
||||
command:
|
||||
- --config=/root/config/prometheus.yml
|
||||
# ports:
|
||||
# - "8080:8080" # signoz port
|
||||
# - "6060:6060" # pprof port
|
||||
configs:
|
||||
- source: signoz-prometheus-config-v1
|
||||
target: /root/config/prometheus.yml
|
||||
volumes:
|
||||
- /mnt/cephfs/signoz/data/dashboards:/root/config/dashboards
|
||||
- /mnt/cephfs/signoz/data/sqlite:/var/lib/signoz/
|
||||
environment:
|
||||
- SIGNOZ_ALERTMANAGER_PROVIDER=signoz
|
||||
- SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_DSN=tcp://clickhouse:9000
|
||||
- SIGNOZ_SQLSTORE_SQLITE_PATH=/var/lib/signoz/signoz.db
|
||||
- DASHBOARDS_PATH=/root/config/dashboards
|
||||
- STORAGE=clickhouse
|
||||
- GODEBUG=netdns=go
|
||||
- TELEMETRY_ENABLED=true
|
||||
- DEPLOYMENT_TYPE=docker-swarm
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD
|
||||
- wget
|
||||
- --spider
|
||||
- -q
|
||||
- localhost:8080/api/v1/health
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
networks:
|
||||
- signoz-net
|
||||
- traefik_public
|
||||
deploy:
|
||||
labels:
|
||||
- 'traefik.enable=true'
|
||||
# --- Router für Signoz UI ---
|
||||
- 'traefik.http.routers.signoz.rule=Host(`signoz.genius.ceo`)'
|
||||
- 'traefik.http.routers.signoz.entrypoints=https'
|
||||
- 'traefik.http.routers.signoz.tls.certresolver=main'
|
||||
# --- Service für Signoz UI ---
|
||||
- 'traefik.http.services.signoz.loadbalancer.server.port=8080'
|
||||
# --- Netzwerk für Traefik ---
|
||||
- 'traefik.swarm.network=traefik_public'
|
||||
otel-collector:
|
||||
!!merge <<: *db-depend
|
||||
image: signoz/signoz-otel-collector:v0.111.42
|
||||
user: root
|
||||
command:
|
||||
- --config=/etc/otel-collector-config.yaml
|
||||
- --manager-config=/etc/manager-config.yaml
|
||||
- --copy-path=/var/tmp/collector-config.yaml
|
||||
- --feature-gates=-pkg.translator.prometheus.NormalizeName
|
||||
configs:
|
||||
- source: otel-collector-config-v4
|
||||
target: /etc/otel-collector-config.yaml
|
||||
- source: otel-collector-manager-config-v1
|
||||
target: /etc/manager-config.yaml
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
environment:
|
||||
- OTEL_RESOURCE_ATTRIBUTES=host.name={{.Node.Hostname}},os.type={{.Node.Platform.OS}}
|
||||
- LOW_CARDINAL_EXCEPTION_GROUPING=false
|
||||
ports:
|
||||
# - "1777:1777" # pprof extension
|
||||
- '4317:4317' # OTLP gRPC receiver
|
||||
- '4318:4318' # OTLP HTTP receiver
|
||||
deploy:
|
||||
replicas: 3
|
||||
labels:
|
||||
- 'traefik.enable=true'
|
||||
# --- Router für Signoz Collector UI ---
|
||||
- 'traefik.http.routers.signoz-collector.rule=Host(`collector.genius.ceo`)'
|
||||
- 'traefik.http.routers.signoz-collector.entrypoints=https'
|
||||
- 'traefik.http.routers.signoz-collector.tls.certresolver=main'
|
||||
# --- Service für Signoz Collector UI ---
|
||||
- 'traefik.http.services.signoz-collector.loadbalancer.server.port=4318'
|
||||
# --- Netzwerk für Traefik ---
|
||||
- 'traefik.swarm.network=traefik_public'
|
||||
depends_on:
|
||||
- clickhouse
|
||||
- schema-migrator
|
||||
- signoz
|
||||
networks:
|
||||
- signoz-net
|
||||
- traefik_public
|
||||
schema-migrator:
|
||||
!!merge <<: *common
|
||||
image: signoz/signoz-schema-migrator:v0.111.42
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
entrypoint: sh
|
||||
command:
|
||||
- -c
|
||||
- '/signoz-schema-migrator sync --dsn=tcp://clickhouse:9000 --up= && /signoz-schema-migrator async --dsn=tcp://clickhouse:9000 --up='
|
||||
depends_on:
|
||||
- clickhouse
|
||||
networks:
|
||||
signoz-net:
|
||||
name: signoz-net
|
||||
attachable: true
|
||||
traefik_public:
|
||||
external: true
|
||||
configs:
|
||||
otel-collector-config-v4:
|
||||
file: /mnt/cephfs/signoz/config/otel-collector-config.v4.yaml
|
||||
otel-collector-manager-config-v1:
|
||||
file: /mnt/cephfs/signoz/config/signoz/otel-collector-opamp-config.yaml
|
||||
clickhouse-config-v1:
|
||||
file: /mnt/cephfs/signoz/config/clickhouse/config.v1.xml
|
||||
clickhouse-users-v1:
|
||||
file: /mnt/cephfs/signoz/config/clickhouse/users.v1.xml
|
||||
clickhouse-custom-function-v1:
|
||||
file: /mnt/cephfs/signoz/config/clickhouse/custom-function.v1.xml
|
||||
clickhouse-cluster-v1:
|
||||
file: /mnt/cephfs/signoz/config/clickhouse/cluster.v1.xml
|
||||
signoz-prometheus-config-v1:
|
||||
file: /mnt/cephfs/signoz/config/signoz/prometheus.v1.yml
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
---
|
||||
# - name: AUTHENTIK | Verzeichnisse erstellen und Berechtigungen setzen
|
||||
# ansible.builtin.file:
|
||||
# path: "/mnt/cephfs/authentik/data/{{ item }}"
|
||||
# state: directory
|
||||
# owner: 1000
|
||||
# group: 1000
|
||||
# mode: '0755'
|
||||
# loop:
|
||||
# - cache
|
||||
# - certs
|
||||
# - db
|
||||
# - media
|
||||
# - templates
|
||||
# run_once: true
|
||||
# delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: AUTHENTIK | Generate Compose file
|
||||
ansible.builtin.template:
|
||||
src: docker-compose.yml.j2
|
||||
dest: /mnt/cephfs/authentik/authentik.yml
|
||||
mode: 0644
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: AUTHENTIK | Deploy app stack
|
||||
community.docker.docker_stack:
|
||||
state: present
|
||||
name: authentik
|
||||
compose:
|
||||
- /mnt/cephfs/authentik/authentik.yml
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
---
|
||||
networks:
|
||||
traefik_public:
|
||||
external: true
|
||||
internal:
|
||||
|
||||
services:
|
||||
postgresql:
|
||||
image: docker.io/library/postgres:16-alpine
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -d $POSTGRES_DB -U $POSTGRES_USER"]
|
||||
start_period: 20s
|
||||
interval: 30s
|
||||
retries: 5
|
||||
timeout: 5s
|
||||
volumes:
|
||||
- /mnt/cephfs/authentik/data/db:/var/lib/postgresql/data
|
||||
environment:
|
||||
POSTGRES_PASSWORD: "{{ pg_pass }}"
|
||||
POSTGRES_USER: "{{ pg_user | default('authentik') }}"
|
||||
POSTGRES_DB: "{{ pg_db | default('authentik') }}"
|
||||
networks:
|
||||
- internal
|
||||
|
||||
redis:
|
||||
image: docker.io/library/redis:alpine
|
||||
command: --save 60 1 --loglevel warning
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "redis-cli ping | grep PONG"]
|
||||
start_period: 20s
|
||||
interval: 30s
|
||||
retries: 5
|
||||
timeout: 3s
|
||||
volumes:
|
||||
- /mnt/cephfs/authentik/data/cache:/data
|
||||
networks:
|
||||
- internal
|
||||
|
||||
server:
|
||||
image: "{{ authentik_image | default('ghcr.io/goauthentik/server') }}:{{ authentik_tag | default('2025.6.3') }}"
|
||||
restart: unless-stopped
|
||||
command: server
|
||||
environment:
|
||||
AUTHENTIK_SECRET_KEY: "{{ authentik_secret_key }}"
|
||||
AUTHENTIK_REDIS__HOST: redis
|
||||
AUTHENTIK_POSTGRESQL__HOST: postgresql
|
||||
AUTHENTIK_POSTGRESQL__USER: "{{ pg_user | default('authentik') }}"
|
||||
AUTHENTIK_POSTGRESQL__NAME: "{{ pg_db | default('authentik') }}"
|
||||
AUTHENTIK_POSTGRESQL__PASSWORD: "{{ pg_pass }}"
|
||||
AUTHENTIK_ERROR_REPORTING__ENABLED: "false"
|
||||
volumes:
|
||||
- /mnt/cephfs/authentik/data/media:/media
|
||||
- /mnt/cephfs/authentik/data/templates:/templates
|
||||
networks:
|
||||
- traefik_public
|
||||
- internal
|
||||
deploy:
|
||||
labels:
|
||||
traefik.enable: "true"
|
||||
traefik.swarm.network: {{ traefik_net }}
|
||||
traefik.http.routers.authentik.rule: Host(`{{ traefik_route }}`) || HostRegexp(`{subdomain:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?}.genius.ceo`) && PathPrefix(`/outpost.goauthentik.io/`)
|
||||
traefik.http.routers.authentik.entrypoints: https
|
||||
traefik.http.routers.authentik.tls: "true"
|
||||
traefik.http.routers.authentik.tls.certresolver: main
|
||||
traefik.http.services.authentik.loadbalancer.server.port: 9000
|
||||
# - "traefik.enable=true"
|
||||
# - "traefik.swarm.network={{ traefik_net }}"
|
||||
# - "traefik.http.routers.authentik.rule=Host(`{{ traefik_route }}`) || HostRegexp(`{subdomain:[A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?}.genius.ceo`) && PathPrefix(`/outpost.goauthentik.io/`)"
|
||||
# - "traefik.http.routers.authentik.entrypoints=https"
|
||||
# - "traefik.http.routers.authentik.tls=true"
|
||||
# - "traefik.http.routers.authentik.tls.certresolver=main"
|
||||
# - "traefik.http.services.authentik.loadbalancer.server.port=9000"
|
||||
|
||||
worker:
|
||||
image: "{{ authentik_image | default('ghcr.io/goauthentik/server') }}:{{ authentik_tag | default('2025.6.3') }}"
|
||||
restart: unless-stopped
|
||||
command: worker
|
||||
environment:
|
||||
AUTHENTIK_SECRET_KEY: "{{ authentik_secret_key }}"
|
||||
AUTHENTIK_REDIS__HOST: redis
|
||||
AUTHENTIK_POSTGRESQL__HOST: postgresql
|
||||
AUTHENTIK_POSTGRESQL__USER: "{{ pg_user | default('authentik') }}"
|
||||
AUTHENTIK_POSTGRESQL__NAME: "{{ pg_db | default('authentik') }}"
|
||||
AUTHENTIK_POSTGRESQL__PASSWORD: "{{ pg_pass }}"
|
||||
# `user: root` and the docker socket volume are optional.
|
||||
# See more for the docker socket integration here:
|
||||
# https://goauthentik.io/docs/outposts/integrations/docker
|
||||
# Removing `user: root` also prevents the worker from fixing the permissions
|
||||
# on the mounted folders, so when removing this make sure the folders have the correct UID/GID
|
||||
# (1000:1000 by default)
|
||||
user: root
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /mnt/cephfs/authentik/data/media:/media
|
||||
- /mnt/cephfs/authentik/data/certs:/certs
|
||||
- /mnt/cephfs/authentik/data/templates:/templates
|
||||
networks:
|
||||
- internal
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
authentik_image: "ghcr.io/goauthentik/server"
|
||||
authentik_tag: "2025.6.3"
|
||||
authentik_secret_key: ""
|
||||
|
||||
pg_user: "authentik"
|
||||
pg_pass: ""
|
||||
pg_db: "authentik"
|
||||
|
||||
traefik_net: "traefik_public"
|
||||
traefik_route: "auth.genius.ceo"
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
---
|
||||
- name: CEPH | Private IP des ersten Managers ermitteln
|
||||
ansible.builtin.set_fact:
|
||||
ceph_bootstrap_ip: "{{ hostvars[inventory_hostname]['ansible_' + private_interface]['ipv4']['address'] }}"
|
||||
when: inventory_hostname == groups['managers'][0]
|
||||
|
||||
- name: CEPH | Cluster auf dem ersten Manager initialisieren (Bootstrap)
|
||||
ansible.builtin.command:
|
||||
cmd: "cephadm bootstrap --mon-ip {{ ceph_bootstrap_ip }}"
|
||||
creates: /etc/ceph/ceph.conf
|
||||
when: inventory_hostname == groups['managers'][0]
|
||||
|
||||
- name: CEPH | Öffentlichen SSH-Schlüssel von cephadm abrufen
|
||||
ansible.builtin.command: "cephadm shell -- ceph cephadm get-pub-key"
|
||||
register: cephadm_pub_key
|
||||
changed_when: false
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
||||
- name: CEPH | Öffentlichen Schlüssel von cephadm auf allen Knoten für root verteilen
|
||||
ansible.posix.authorized_key:
|
||||
user: root
|
||||
key: "{{ hostvars[groups['managers'][0]]['cephadm_pub_key'].stdout }}"
|
||||
state: present
|
||||
key_options: 'no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty'
|
||||
|
||||
- name: CEPH | Andere Knoten zum Ceph-Cluster hinzufügen
|
||||
ansible.builtin.command:
|
||||
cmd: "ceph orch host add {{ item }} {{ hostvars[item]['ansible_' + private_interface]['ipv4']['address'] }}"
|
||||
loop: "{{ groups['all'] }}"
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
||||
- name: CEPH | Prüfen, ob bereits OSDs (Speichergeräte) vorhanden sind
|
||||
ansible.builtin.command: "ceph osd ls"
|
||||
register: existing_osds
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
||||
- name: CEPH | Spezifische Festplatte ({{ ceph_osd_device }}) auf jedem Knoten als OSD hinzufügen
|
||||
ansible.builtin.command: "ceph orch daemon add osd {{ item }}:{{ ceph_osd_device }}"
|
||||
loop: "{{ groups['all'] }}"
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
when: existing_osds.stdout | length == 0
|
||||
|
||||
- name: CEPH | Prüfen, ob CephFS bereits existiert
|
||||
ansible.builtin.command: "ceph fs ls -f json"
|
||||
register: cephfs_list
|
||||
changed_when: false
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
||||
- name: CEPH | CephFS Pools und Dateisystem erstellen, falls nicht vorhanden
|
||||
block:
|
||||
- name: Metadaten-Pool für CephFS erstellen
|
||||
ansible.builtin.command: "ceph osd pool create {{ cephfs_name }}_metadata"
|
||||
- name: Daten-Pool für CephFS erstellen
|
||||
ansible.builtin.command: "ceph osd pool create {{ cephfs_name }}_data"
|
||||
- name: CephFS-Dateisystem erstellen
|
||||
ansible.builtin.command: "ceph fs new {{ cephfs_name }} {{ cephfs_name }}_metadata {{ cephfs_name }}_data"
|
||||
when: cephfs_list.stdout | from_json | length == 0
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
||||
- name: CEPH | Metadaten-Server (MDS) für CephFS starten
|
||||
ansible.builtin.command: "ceph orch apply mds {{ cephfs_name }} --placement=2"
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
when: cephfs_list.stdout | from_json | length == 0
|
||||
|
||||
- name: CEPH | Ceph Admin-Schlüssel für das Mounten abrufen
|
||||
ansible.builtin.command: "ceph auth get-key client.admin"
|
||||
register: ceph_admin_key
|
||||
changed_when: false
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
||||
- name: CEPH | Mount-Punkt für CephFS erstellen
|
||||
ansible.builtin.file:
|
||||
path: /mnt/cephfs
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: CEPH | CephFS auf allen Knoten mounten (und in /etc/fstab eintragen)
|
||||
ansible.posix.mount:
|
||||
path: /mnt/cephfs
|
||||
src: "{{ hostvars[groups['managers'][0]]['ceph_bootstrap_ip'] }}:/"
|
||||
fstype: ceph
|
||||
opts: "name=admin,secret={{ ceph_admin_key.stdout }}"
|
||||
state: mounted
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
---
|
||||
- name: COMMON | Systempakete aktualisieren und upgraden
|
||||
ansible.builtin.apt:
|
||||
update_cache: true
|
||||
upgrade: dist
|
||||
autoremove: true
|
||||
autoclean: true
|
||||
|
||||
- name: COMMON | Notwendige Pakete installieren
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- ufw
|
||||
- fail2ban
|
||||
- unattended-upgrades
|
||||
- apt-listchanges
|
||||
- docker-ce
|
||||
- python3-pip
|
||||
- chrony
|
||||
- lvm2
|
||||
- cephadm
|
||||
- ceph-common
|
||||
state: present
|
||||
|
||||
- name: COMMON | Chrony Dienst starten und aktivieren
|
||||
ansible.builtin.service:
|
||||
name: chronyd
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: COMMON | Docker Dienst starten und aktivieren
|
||||
ansible.builtin.service:
|
||||
name: docker
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: COMMON | Einen dedizierten Admin-Benutzer erstellen
|
||||
ansible.builtin.user:
|
||||
name: "{{ admin_user }}"
|
||||
password: "{{ admin_password}}"
|
||||
shell: /bin/bash
|
||||
groups: sudo,docker
|
||||
append: true
|
||||
state: present
|
||||
|
||||
- name: COMMON | SSH-Schlüssel für den Admin-Benutzer einrichten
|
||||
ansible.posix.authorized_key:
|
||||
user: "{{ admin_user }}"
|
||||
key: "{{ item }}"
|
||||
state: present
|
||||
with_items: "{{ authorized_keys }}"
|
||||
|
||||
- name: COMMON | cephadm-Benutzer erstellen
|
||||
ansible.builtin.user:
|
||||
name: "cephadm"
|
||||
password: "{{ cephadm_password }}"
|
||||
shell: /bin/bash
|
||||
groups: sudo,docker
|
||||
append: yes
|
||||
state: present
|
||||
|
||||
- name: COMMON | .ssh Verzeichnis für cephadm-Benutzer erstellen
|
||||
ansible.builtin.file:
|
||||
path: /home/cephadm/.ssh
|
||||
state: directory
|
||||
|
||||
- name: COMMON | Passwortloses Sudo für cephadm-Benutzer erlauben
|
||||
ansible.builtin.copy:
|
||||
dest: "/etc/sudoers.d/91-cephadm-nopasswd"
|
||||
content: "cephadm ALL=(ALL) NOPASSWD: ALL"
|
||||
mode: '0440'
|
||||
validate: 'visudo -cf %s'
|
||||
|
||||
- name: COMMON | ed25519 SSH-Schlüssel für cephadm-Benutzer generieren (nur auf dem ersten Manager)
|
||||
community.crypto.openssh_keypair:
|
||||
path: /home/cephadm/.ssh/id_ed25519
|
||||
type: ed25519
|
||||
owner: cephadm
|
||||
group: cephadm
|
||||
mode: '0600'
|
||||
when: inventory_hostname == groups['managers'][0]
|
||||
|
||||
- name: COMMON | Öffentlichen SSH-Schlüssel von cephadm abrufen
|
||||
ansible.builtin.slurp:
|
||||
src: /home/cephadm/.ssh/id_ed25519.pub
|
||||
register: cephadm_ssh_pub_key
|
||||
when: inventory_hostname == groups['managers'][0]
|
||||
|
||||
- name: COMMON | Öffentlichen SSH-Schlüssel von cephadm auf allen Knoten verteilen
|
||||
ansible.posix.authorized_key:
|
||||
user: cephadm
|
||||
key: "{{ hostvars[groups['managers'][0]]['cephadm_ssh_pub_key']['content'] | b64decode }}"
|
||||
state: present
|
||||
|
||||
- name: COMMON | Automatische Sicherheitsupdates konfigurieren
|
||||
ansible.builtin.copy:
|
||||
src: assets/50unattended-upgrades
|
||||
dest: /etc/apt/apt.conf.d/50unattended-upgrades
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: COMMON | Periodische Auto-Updates aktivieren
|
||||
ansible.builtin.copy:
|
||||
src: assets/20auto-upgrades
|
||||
dest: /etc/apt/apt.conf.d/20auto-upgrades
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
---
|
||||
- name: SWARM | Ensure Docker SDK for Python is installed
|
||||
ansible.builtin.apt:
|
||||
name: python3-docker
|
||||
state: present
|
||||
|
||||
- name: SWARM | Get interface IP address for the manager
|
||||
ansible.builtin.set_fact:
|
||||
manager_ip: "{{ hostvars[inventory_hostname]['ansible_' + private_interface]['ipv4']['address'] }}"
|
||||
when: inventory_hostname == groups['managers'][0]
|
||||
|
||||
- name: SWARM | Initialize the Docker Swarm
|
||||
community.docker.docker_swarm:
|
||||
state: present
|
||||
advertise_addr: "{{ manager_ip }}"
|
||||
when: inventory_hostname == groups['managers'][0]
|
||||
register: swarm_init_result
|
||||
|
||||
- name: SWARM | Get the join tokens
|
||||
community.docker.docker_swarm_info:
|
||||
register: swarm_info
|
||||
when: inventory_hostname == groups['managers'][0]
|
||||
|
||||
- name: SWARM | Verify that join tokens were fetched
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- swarm_info is defined
|
||||
- swarm_info.swarm_facts is defined
|
||||
- swarm_info.swarm_facts.JoinTokens.Manager is defined
|
||||
- swarm_info.swarm_facts.JoinTokens.Worker is defined
|
||||
fail_msg: "Konnte die Join-Tokens vom Swarm Manager nicht abrufen. Ist der Swarm korrekt initialisiert?"
|
||||
success_msg: "Join-Tokens erfolgreich abgerufen."
|
||||
when: inventory_hostname == groups['managers'][0]
|
||||
|
||||
- name: SWARM | Join manager nodes to the Swarm
|
||||
community.docker.docker_swarm:
|
||||
state: join
|
||||
remote_addrs: [ "{{ hostvars[groups['managers'][0]]['manager_ip'] }}:2377" ]
|
||||
join_token: "{{ hostvars[groups['managers'][0]]['swarm_info']['swarm_facts']['JoinTokens']['Manager'] }}"
|
||||
when: inventory_hostname in groups['managers']
|
||||
|
||||
- name: SWARM | Join worker nodes to the Swarm
|
||||
community.docker.docker_swarm:
|
||||
state: join
|
||||
remote_addrs: [ "{{ hostvars[groups['managers'][0]]['manager_ip'] }}:2377" ]
|
||||
join_token: "{{ hostvars[groups['managers'][0]]['swarm_info']['swarm_facts']['JoinTokens']['Worker'] }}"
|
||||
when: inventory_hostname in groups['workers']
|
||||
|
||||
- name: SWARM | Verify Swarm Cluster State (run on manager)
|
||||
ansible.builtin.command: docker node ls
|
||||
register: swarm_nodes
|
||||
changed_when: false
|
||||
when: inventory_hostname == groups['managers'][0]
|
||||
|
||||
- name: SWARM | Display cluster state
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ swarm_nodes.stdout_lines }}"
|
||||
when: inventory_hostname == groups['managers'][0]
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
---
|
||||
dockge_stacks_dir: /mnt/cephfs/dockge/stacks
|
||||
dockge_data_dir: /mnt/cephfs/dockge/data
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
---
|
||||
- name: DOCKGE | Copy Stack Files
|
||||
copy:
|
||||
directory_mode: true
|
||||
src: /Users/d3r0/dev/repositories/active/gc/iac/ansible/resources/dockge
|
||||
dest: /mnt/cephfs
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
# - name: DOCKGE | Sicherstellen, dass das Verzeichnis für die Anwendungs Daten existiert
|
||||
# ansible.builtin.file:
|
||||
# path: "{{ dockge_data_dir }}"
|
||||
# state: directory
|
||||
# owner: root
|
||||
# group: root
|
||||
# mode: '0755'
|
||||
# become: true
|
||||
|
||||
# - name: DOCKGE | Sicherstellen, dass das Verzeichnis für die Stacks existiert
|
||||
# ansible.builtin.file:
|
||||
# path: "{{ dockge_stacks_dir }}"
|
||||
# state: directory
|
||||
# owner: root
|
||||
# group: root
|
||||
# mode: '0755'
|
||||
# become: true
|
||||
|
||||
# - name: DOCKGE | Stack aus der Template-Datei bereitstellen
|
||||
# community.docker.docker_stack:
|
||||
# state: present
|
||||
# name: dockge
|
||||
# compose:
|
||||
# - "{{ lookup('template', '../../../resources/dockge/dockge.yml') }}"
|
||||
# delegate_to: "{{ groups['managers'][0] }}"
|
||||
# run_once: true
|
||||
- name: DOCKGE | Deploy app stack
|
||||
community.docker.docker_stack:
|
||||
state: present
|
||||
name: dockge
|
||||
compose:
|
||||
- /mnt/cephfs/dockge/dockge.yml
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
---
|
||||
- name: FAIL2BAN | Eine lokale Jail-Konfiguration erstellen
|
||||
ansible.builtin.template:
|
||||
src: jail.local.j2
|
||||
dest: /etc/fail2ban/jail.local
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
notify: restart fail2ban
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
[DEFAULT]
|
||||
bantime = 1h
|
||||
findtime = 10m
|
||||
maxretry = 5
|
||||
|
||||
[sshd]
|
||||
enabled = true
|
||||
port = {{ ssh_port }}
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
postgres_version: 16-alpine
|
||||
gitea_version: "1.21"
|
||||
gitea_domain: "{{ subdomain }}.{{ main_domain }}"
|
||||
gitea_http_port: 3000
|
||||
gitea_ssh_port: 2222
|
||||
|
||||
data_dir: "{{ ceph_volume }}/gitea"
|
||||
subdomain: git
|
||||
|
||||
gitea_db_type: "postgres"
|
||||
gitea_db_host: db:5432
|
||||
gitea_db_name: "gitea"
|
||||
gitea_db_user: "gitea"
|
||||
gitea_db_password: ""
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
- name: GITEA | Ensure data directories
|
||||
ansible.builtin.file:
|
||||
path: '{{ data_dir }}/data'
|
||||
state: directory
|
||||
owner: 1000
|
||||
group: 1000
|
||||
mode: '0750'
|
||||
recurse: yes
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: GITEA | Ensure DB data directories
|
||||
ansible.builtin.file:
|
||||
path: "{{ data_dir }}/data/db"
|
||||
state: directory
|
||||
# Postgres Alpine nutzt UID 70 (postgres).
|
||||
# Bei Debian-Images wäre es 999.
|
||||
owner: 70
|
||||
group: 70
|
||||
mode: '0700'
|
||||
recurse: yes
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: GITEA | Generate Compose file
|
||||
ansible.builtin.template:
|
||||
src: docker-compose.yml.j2
|
||||
dest: '{{ data_dir }}/gitea.yml'
|
||||
mode: 0644
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: GITEA | Deploy stack
|
||||
community.docker.docker_stack:
|
||||
state: present
|
||||
name: gitea
|
||||
compose:
|
||||
- '{{ data_dir }}/gitea.yml'
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
networks:
|
||||
{{ traefik_public_net }}:
|
||||
external: true
|
||||
internal:
|
||||
|
||||
services:
|
||||
server:
|
||||
image: gitea/gitea:{{ gitea_version }}
|
||||
environment:
|
||||
- USER_UID=1000
|
||||
- USER_GID=1000
|
||||
- GITEA__database__DB_TYPE={{ gitea_db_type }}
|
||||
- GITEA__database__HOST={{ gitea_db_host }}
|
||||
- GITEA__database__NAME={{ gitea_db_name }}
|
||||
- GITEA__database__USER={{ gitea_db_user }}
|
||||
- GITEA__database__PASSWD={{ gitea_db_password }}
|
||||
- GITEA__server__DOMAIN={{ gitea_domain }}
|
||||
- GITEA__server__SSH_DOMAIN={{ gitea_domain }}
|
||||
- GITEA__server__SSH_PORT={{ gitea_ssh_port }}
|
||||
- GITEA__server__ROOT_URL=https://{{ gitea_domain }}/
|
||||
volumes:
|
||||
- {{ data_dir }}/data:/data
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
networks:
|
||||
- internal
|
||||
- {{ traefik_public_net }}
|
||||
ports:
|
||||
- "{{ gitea_ssh_port }}:22"
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.docker.network={{ traefik_public_net }}"
|
||||
- "traefik.http.routers.gitea.rule=Host(`{{ gitea_domain }}`)"
|
||||
- "traefik.http.routers.gitea.entrypoints=https"
|
||||
- "traefik.http.routers.gitea.tls.certresolver=main"
|
||||
- "traefik.http.services.gitea.loadbalancer.server.port=3000"
|
||||
|
||||
db:
|
||||
image: postgres:{{ postgres_version }}
|
||||
restart: always
|
||||
environment:
|
||||
- POSTGRES_USER={{ gitea_db_user }}
|
||||
- POSTGRES_PASSWORD={{ gitea_db_password }}
|
||||
- POSTGRES_DB={{ gitea_db_name }}
|
||||
networks:
|
||||
- internal
|
||||
volumes:
|
||||
- {{ data_dir }}/data/db:/var/lib/postgresql/data
|
||||
command:
|
||||
- "postgres"
|
||||
- "-c"
|
||||
- "fsync=on"
|
||||
- "-c"
|
||||
- "full_page_writes=on"
|
||||
- "-c"
|
||||
- "synchronous_commit=on"
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
---
|
||||
- name: KESTRA | Ensure data directory
|
||||
ansible.builtin.file:
|
||||
path: '{{ data_dir }}/data/data'
|
||||
state: directory
|
||||
mode: '0755'
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: KESTRA | Ensure db directory
|
||||
ansible.builtin.file:
|
||||
path: '{{ data_dir }}/data/db'
|
||||
state: directory
|
||||
mode: '0755'
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: KESTRA | Konfigurationsdatei für tmpfiles.d erstellen
|
||||
ansible.builtin.copy:
|
||||
content: "d /tmp/kestra-wd 0755 root root -"
|
||||
dest: /etc/tmpfiles.d/kestra-wd.conf
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: KESTRA | Create Kestra working directory
|
||||
ansible.builtin.file:
|
||||
path: /tmp/kestra-wd
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: KESTRA | Generate Compose file
|
||||
ansible.builtin.template:
|
||||
src: docker-compose.yml.j2
|
||||
dest: '{{ data_dir }}/kestra.yml'
|
||||
mode: 0644
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: KESTRA | Deploy stack
|
||||
community.docker.docker_stack:
|
||||
state: present
|
||||
name: kestra
|
||||
compose:
|
||||
- /mnt/cephfs/kestra/kestra.yml
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
networks:
|
||||
internal:
|
||||
{{ traefik_public_net }}:
|
||||
external: true
|
||||
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:17
|
||||
volumes:
|
||||
- {{ data_dir }}/data/db:/var/lib/postgresql/data
|
||||
environment:
|
||||
POSTGRES_DB: {{ kestra.db.name }}
|
||||
POSTGRES_USER: {{ kestra.db.user }}
|
||||
POSTGRES_PASSWORD: "{{ kestra.db.pass }}"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -d '$${POSTGRES_DB}' -U $${POSTGRES_USER}"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
networks:
|
||||
- internal
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
|
||||
kestra:
|
||||
image: kestra/kestra:v0.24.2
|
||||
entrypoint: /bin/bash
|
||||
# Note that this is meant for development only. Refer to the documentation for production deployments of Kestra which runs without a root user.
|
||||
user: "root"
|
||||
command:
|
||||
- -c
|
||||
- /app/kestra server standalone --worker-thread=128
|
||||
volumes:
|
||||
- {{ data_dir }}/data/data:/app/storage
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /tmp/kestra-wd:/tmp/kestra-wd
|
||||
environment:
|
||||
KESTRA_CONFIGURATION: |
|
||||
datasources:
|
||||
postgres:
|
||||
url: jdbc:postgresql://postgres:5432/kestra
|
||||
driverClassName: org.postgresql.Driver
|
||||
username: {{ kestra.db.user }}
|
||||
password: {{ kestra.db.pass }}
|
||||
kestra:
|
||||
tutorialFlows:
|
||||
enabled: false
|
||||
traces:
|
||||
root: DEFAULT
|
||||
micronaut:
|
||||
metrics:
|
||||
export:
|
||||
otlp:
|
||||
enabled: true
|
||||
url: http://signoz_otel-collector:4318/v1/metrics
|
||||
otel:
|
||||
traces:
|
||||
exporter: otlp
|
||||
exporter:
|
||||
otlp:
|
||||
endpoint: http://signoz_otel-collector:4318
|
||||
server:
|
||||
basic-auth:
|
||||
username: {{ kestra.basic_auth.user }}
|
||||
password: {{ kestra.basic_auth.pass }}
|
||||
repository:
|
||||
type: postgres
|
||||
storage:
|
||||
type: local
|
||||
local:
|
||||
base-path: "/app/storage"
|
||||
queue:
|
||||
type: postgres
|
||||
tasks:
|
||||
tmp-dir:
|
||||
path: /tmp/kestra-wd/tmp
|
||||
url: http://localhost:8080/
|
||||
networks:
|
||||
- {{ traefik_public_net }}
|
||||
- internal
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.swarm.network={{ traefik_public_net }}"
|
||||
- "traefik.http.routers.kestra.rule=Host(`{{ subdomain }}.{{ main_domain }}`)"
|
||||
- "traefik.http.routers.kestra.entrypoints=https"
|
||||
- "traefik.http.routers.kestra.tls=true"
|
||||
- "traefik.http.routers.kestra.tls.certresolver=main"
|
||||
- "traefik.http.services.kestra.loadbalancer.server.port=8080"
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
subdomain: kestra
|
||||
data_dir: "{{ ceph_volume }}/kestra"
|
||||
|
||||
kestra:
|
||||
basic_auth:
|
||||
user: "ma@coachhamburg.com"
|
||||
pass: "igyozi9B87yTeiQ6z2sbe8Y4aQLJV58jdaCNu"
|
||||
db:
|
||||
name: kestra
|
||||
user: kestra
|
||||
pass: ""
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
---
|
||||
- name: Copy Stack Files
|
||||
copy:
|
||||
directory_mode: true
|
||||
src: /Users/d3r0/dev/repositories/active/gc/iac/ansible/resources/monitoring
|
||||
dest: /srv
|
||||
- block:
|
||||
- name: Deploy Monitoring stack
|
||||
community.docker.docker_stack:
|
||||
state: present
|
||||
name: monitoring
|
||||
compose:
|
||||
- /srv/monitoring/observability.yml
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
---
|
||||
- name: PORTAINER | Ensure data directories
|
||||
ansible.builtin.file:
|
||||
path: '{{ data_dir }}/data'
|
||||
state: directory
|
||||
mode: '0755'
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: PORTAINER | Generate Compose file
|
||||
ansible.builtin.template:
|
||||
src: docker-compose.yml.j2
|
||||
dest: '{{ data_dir }}/portainer.yml'
|
||||
mode: 0644
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: PORTAINER | Deploy stack
|
||||
community.docker.docker_stack:
|
||||
state: present
|
||||
name: portainer
|
||||
compose:
|
||||
- '{{ data_dir }}/portainer.yml'
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
version: '3.2'
|
||||
|
||||
services:
|
||||
agent:
|
||||
image: portainer/agent:{{ portainer_version }}
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /var/lib/docker/volumes:/var/lib/docker/volumes
|
||||
networks:
|
||||
- {{ traefik_public_net }}
|
||||
deploy:
|
||||
mode: global
|
||||
placement:
|
||||
constraints: [node.platform.os == linux]
|
||||
|
||||
portainer:
|
||||
image: portainer/portainer-ce:{{ portainer_version }}
|
||||
command: -H tcp://portainer_agent:9001 --tlsskipverify
|
||||
volumes:
|
||||
- {{ data_dir }}/data:/data
|
||||
networks:
|
||||
- {{ traefik_public_net }}
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 1
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.swarm.network={{ traefik_public_net }}"
|
||||
- "traefik.http.routers.portainer.rule=Host(`{{ subdomain }}.{{ main_domain }}`)"
|
||||
- "traefik.http.routers.portainer.entrypoints=https"
|
||||
- "traefik.http.routers.portainer.tls=true"
|
||||
- "traefik.http.routers.portainer.tls.certresolver=main"
|
||||
- "traefik.http.services.portainer.loadbalancer.server.port=9000"
|
||||
|
||||
networks:
|
||||
{{ traefik_public_net }}:
|
||||
external: true
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
subdomain: port
|
||||
data_dir: "{{ ceph_volume }}/portainer"
|
||||
|
||||
portainer_version: 2.33.5
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
---
|
||||
- name: Copy Stack Files
|
||||
copy:
|
||||
directory_mode: true
|
||||
src: /Users/d3r0/dev/repositories/active/gc/iac/ansible/resources/signoz-infra
|
||||
dest: /mnt/cephfs
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
||||
- name: Deploy Signoz Infra stack
|
||||
community.docker.docker_stack:
|
||||
state: present
|
||||
name: signoz-infra
|
||||
prune: true
|
||||
compose:
|
||||
- /mnt/cephfs/signoz-infra/signoz-infra.yml
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
---
|
||||
- name: Copy Stack Files
|
||||
copy:
|
||||
directory_mode: true
|
||||
src: /Users/d3r0/dev/repositories/active/gc/iac/ansible/resources/signoz
|
||||
dest: /mnt/cephfs
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
||||
- name: Deploy Signoz stack
|
||||
community.docker.docker_stack:
|
||||
state: present
|
||||
name: signoz
|
||||
prune: true
|
||||
compose:
|
||||
- /mnt/cephfs/signoz/signoz.yml
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
---
|
||||
- name: SSH | Ensure privilege separation directory exists
|
||||
ansible.builtin.file:
|
||||
path: /run/sshd
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: SSH | Root-Login nur mit Schlüssel erlauben
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/ssh/sshd_config
|
||||
regexp: '^#?PermitRootLogin'
|
||||
line: 'PermitRootLogin prohibit-password'
|
||||
validate: 'sshd -t -f %s'
|
||||
notify: restart sshd
|
||||
|
||||
- name: SSH | Passwort-Authentifizierung deaktivieren
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/ssh/sshd_config
|
||||
regexp: '^#?PasswordAuthentication'
|
||||
line: 'PasswordAuthentication no'
|
||||
validate: 'sshd -t -f %s'
|
||||
notify: restart sshd
|
||||
|
||||
- name: SSH | Leere Passwörter verbieten
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/ssh/sshd_config
|
||||
regexp: '^#?PermitEmptyPasswords'
|
||||
line: 'PermitEmptyPasswords no'
|
||||
validate: 'sshd -t -f %s'
|
||||
notify: restart sshd
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
[http]
|
||||
[http.middlewares]
|
||||
[http.middlewares.authentik.forwardAuth]
|
||||
address = "http://authentik_server:9000/outpost.goauthentik.io/auth/traefik"
|
||||
trustForwardHeader = true
|
||||
authResponseHeaders = [
|
||||
"X-authentik-username",
|
||||
"X-authentik-groups",
|
||||
"X-authentik-email",
|
||||
"X-authentik-name",
|
||||
"X-authentik-uid",
|
||||
"X-authentik-jwt",
|
||||
"X-authentik-meta-jwks",
|
||||
"X-authentik-meta-outpost",
|
||||
"X-authentik-meta-provider",
|
||||
"X-authentik-meta-app",
|
||||
"X-authentik-meta-version"
|
||||
]
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
[global]
|
||||
checkNewVersion = true
|
||||
sendAnonymousUsage = false
|
||||
|
||||
[experimental]
|
||||
otlpLogs = true
|
||||
|
||||
[core]
|
||||
defaultRuleSyntax = "v2"
|
||||
|
||||
[accessLog]
|
||||
filePath = "/logs/access.log"
|
||||
format = "json"
|
||||
|
||||
# Enable the Dashboard
|
||||
[api]
|
||||
dashboard = true
|
||||
|
||||
# Write out Traefik logs
|
||||
[log]
|
||||
level = "INFO"
|
||||
format = "json"
|
||||
filePath = "/logs/traefik.log"
|
||||
# [log.otlp.http]
|
||||
# endpoint = "http://signoz_otel-collector:4318/v1/logs"
|
||||
|
||||
[entryPoints]
|
||||
[entryPoints.http]
|
||||
address = ":80"
|
||||
[entryPoints.http.http.redirections.entryPoint]
|
||||
to = "https"
|
||||
scheme = "https"
|
||||
|
||||
[entryPoints.https]
|
||||
address = ":443"
|
||||
# [entryPoints.https.http.tls]
|
||||
# certResolver = "main"
|
||||
|
||||
# OTel
|
||||
# [tracing]
|
||||
# serviceName = "traefik"
|
||||
# [tracing.otlp.http]
|
||||
# endpoint = "http://signoz_otel-collector:4318/v1/traces"
|
||||
# [tracing.otlp.http.tls]
|
||||
# insecureSkipVerify = true
|
||||
|
||||
# # Metrics
|
||||
# [metrics]
|
||||
# addInternals = false
|
||||
# [metrics.otlp]
|
||||
# serviceName = "traefik"
|
||||
# addEntryPointsLabels = true
|
||||
# addRoutersLabels = true
|
||||
# addServicesLabels = true
|
||||
# [metrics.otlp.http]
|
||||
# endpoint = "http://signoz_otel-collector:4318/v1/metrics"
|
||||
# [metrics.otlp.grpc]
|
||||
# endpoint = "monitoring_alloy:4317"
|
||||
# insecure = true
|
||||
|
||||
# Let's Encrypt
|
||||
[certificatesResolvers.main.acme]
|
||||
email = "ma@coachhamburg.com"
|
||||
storage = "acme.json"
|
||||
# uncomment to use staging CA for testing
|
||||
# caServer = "https://acme-staging-v02.api.letsencrypt.org/directory"
|
||||
# [certificatesResolvers.main.acme.tlsChallenge]
|
||||
[certificatesResolvers.main.acme.dnsChallenge]
|
||||
provider = "digitalocean"
|
||||
# Uncomment to use HTTP validation, like a caveman!
|
||||
# [certificatesResolvers.main.acme.httpChallenge]
|
||||
# entryPoint = "http"
|
||||
|
||||
[providers]
|
||||
[providers.swarm]
|
||||
endpoint = "unix:///var/run/docker.sock"
|
||||
exposedByDefault = false
|
||||
[providers.file]
|
||||
directory = "/etc/traefik/dynamic"
|
||||
watch = true
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
---
|
||||
- name: TRAEFIK | Copy Stack Files
|
||||
copy:
|
||||
directory_mode: true
|
||||
src: traefik
|
||||
dest: "{{ ceph_volume }}"
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: TRAEFIK | Generate Compose file
|
||||
ansible.builtin.template:
|
||||
src: docker-compose.yml.j2
|
||||
dest: "{{ data_dir }}/traefik.yml"
|
||||
mode: 0644
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: TRAEFIK | Ensure permissions on acme.json
|
||||
ansible.builtin.file:
|
||||
path: "{{ data_dir }}/data/acme.json"
|
||||
mode: '0600'
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: TRAEFIK | traefik_public Netzwerk erstellen
|
||||
community.docker.docker_network:
|
||||
name: traefik_public
|
||||
driver: overlay
|
||||
state: present
|
||||
attachable: yes
|
||||
ipam_config:
|
||||
- subnet: '172.16.200.0/24'
|
||||
gateway: '172.16.200.1'
|
||||
run_once: true
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
|
||||
- name: TRAEFIK | Deploy app stack
|
||||
community.docker.docker_stack:
|
||||
state: present
|
||||
name: traefik
|
||||
compose:
|
||||
- "{{ data_dir }}/traefik.yml"
|
||||
delegate_to: "{{ groups['managers'][0] }}"
|
||||
run_once: true
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
services:
|
||||
app:
|
||||
image: traefik:{{ traefik_version }}
|
||||
ports:
|
||||
- target: 80
|
||||
published: 80
|
||||
protocol: tcp
|
||||
mode: host
|
||||
- target: 443
|
||||
published: 443
|
||||
protocol: tcp
|
||||
mode: host
|
||||
- target: 8080
|
||||
published: 8080
|
||||
protocol: tcp
|
||||
environment:
|
||||
# - HETZNER_API_TOKEN={{ hetzner_api_key }}
|
||||
- DO_AUTH_TOKEN={{ do_api_key }}
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
- {{ data_dir }}/config:/etc/traefik
|
||||
- {{ data_dir }}/data/logs:/logs
|
||||
- {{ data_dir }}/data/acme.json:/acme.json
|
||||
# healthcheck:
|
||||
# test: ["CMD", "traefik", "healthcheck", "--ping"]
|
||||
# timeout: 1s
|
||||
# interval: 10s
|
||||
# retries: 3
|
||||
# start_period: 10s
|
||||
networks:
|
||||
- {{ traefik_public_net }}
|
||||
# Global mode makes an instance of traefik listen on _every_ node, so that regardless of which
|
||||
# node the request arrives on, it'll be forwarded to the correct backend service.
|
||||
deploy:
|
||||
mode: global
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.swarm.network={{ traefik_public_net }}"
|
||||
- "traefik.http.routers.api.rule=Host(`{{ subdomain }}.{{ main_domain }}`) && (PathPrefix(`/api`) || PathPrefix(`/dashboard`))"
|
||||
- "traefik.http.routers.api.entrypoints=https"
|
||||
{% if use_authentik %}
|
||||
- "traefik.http.routers.api.middlewares=authentik@file"
|
||||
{% endif %}
|
||||
- "traefik.http.routers.api.tls.domains[0].main={{ main_domain }}"
|
||||
- "traefik.http.routers.api.tls.domains[0].sans=*.{{ main_domain }}"
|
||||
- "traefik.http.routers.api.tls=true"
|
||||
- "traefik.http.routers.api.tls.certresolver=main"
|
||||
- "traefik.http.routers.api.service=api@internal"
|
||||
- "traefik.http.services.dummy.loadbalancer.server.port=9999"
|
||||
placement:
|
||||
constraints: [node.role == manager]
|
||||
|
||||
networks:
|
||||
{{ traefik_public_net }}:
|
||||
external: true
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
subdomain: router
|
||||
use_authentik: true
|
||||
data_dir: "{{ ceph_volume }}/traefik"
|
||||
|
||||
traefik_version: v3.6.2
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
---
|
||||
- name: FIREWALL | UFW auf Standardeinstellungen zurücksetzen
|
||||
community.general.ufw:
|
||||
state: reset
|
||||
|
||||
- name: FIREWALL | Standardmäßig allen ausgehenden Traffic erlauben
|
||||
community.general.ufw:
|
||||
direction: outgoing
|
||||
policy: allow
|
||||
|
||||
- name: FIREWALL | Standardmäßig allen eingehenden Traffic blockieren
|
||||
community.general.ufw:
|
||||
direction: incoming
|
||||
policy: deny
|
||||
|
||||
- name: FIREWALL | Eingehenden SSH-Traffic auf öffentlichem Interface erlauben
|
||||
community.general.ufw:
|
||||
rule: allow
|
||||
port: "{{ ssh_port }}"
|
||||
proto: tcp
|
||||
interface: "{{ public_interface }}"
|
||||
direction: in
|
||||
|
||||
- name: FIREWALL | Eingehenden SSH-Traffic auf privatem Interface erlauben
|
||||
community.general.ufw:
|
||||
rule: allow
|
||||
port: "{{ ssh_port }}"
|
||||
proto: tcp
|
||||
interface: "{{ private_interface }}"
|
||||
direction: in
|
||||
|
||||
- name: FIREWALL | Eingehenden HTTP/HTTPS-Traffic auf öffentlichem Interface erlauben
|
||||
community.general.ufw:
|
||||
rule: allow
|
||||
port: "{{ item.port }}"
|
||||
proto: "{{ item.proto }}"
|
||||
interface: "{{ public_interface }}"
|
||||
direction: in
|
||||
with_items:
|
||||
- { port: '80', proto: 'tcp' }
|
||||
- { port: '443', proto: 'tcp' }
|
||||
|
||||
- name: FIREWALL | Ceph Monitor Ports auf privatem Interface erlauben
|
||||
community.general.ufw:
|
||||
rule: allow
|
||||
port: "{{ item }}"
|
||||
proto: tcp
|
||||
interface: "{{ private_interface }}"
|
||||
direction: in
|
||||
with_items:
|
||||
- '3300'
|
||||
- '6789'
|
||||
|
||||
- name: FIREWALL | Ceph OSD/MGR Port-Range auf öffentlichem Interface erlauben
|
||||
community.general.ufw:
|
||||
rule: allow
|
||||
port: "6800:7568"
|
||||
proto: tcp
|
||||
interface: "{{ private_interface }}"
|
||||
direction: in
|
||||
|
||||
- name: FIREWALL | Docker Swarm Management Ports auf privatem Interface erlauben
|
||||
community.general.ufw:
|
||||
rule: allow
|
||||
port: "2377"
|
||||
proto: tcp
|
||||
interface: "{{ private_interface }}"
|
||||
direction: in
|
||||
|
||||
- name: FIREWALL | Docker Swarm Discovery/Network Ports auf privatem Interface erlauben
|
||||
community.general.ufw:
|
||||
rule: allow
|
||||
port: "{{ item.port }}"
|
||||
proto: "{{ item.proto }}"
|
||||
interface: "{{ private_interface }}"
|
||||
direction: in
|
||||
with_items:
|
||||
- { port: '7946', proto: 'tcp' }
|
||||
- { port: '7946', proto: 'udp' }
|
||||
- { port: '4789', proto: 'udp' }
|
||||
|
||||
- name: FIREWALL | UFW aktivieren
|
||||
community.general.ufw:
|
||||
state: enabled
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
config:
|
||||
hcloud:token:
|
||||
secure: AAABAHkvxBXaEbrikY6bNyuwXehFp71LvsHTT2LOYHLiAaRCil5cSODn1EktYTYL+f4ryGJtN1j/wiyrAkbZBnyVC1QnSb84tTLYeKYXBtHo2fY87vReuyOwFZbFGylC
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
name: gc-infra
|
||||
description: A minimal Go Pulumi program
|
||||
runtime: go
|
||||
config:
|
||||
pulumi:tags:
|
||||
value:
|
||||
pulumi:template: go
|
||||
# hcloud:token:
|
||||
# value: xqb89P4vF2YlBjU75AAtyoQzNvTHaXyhB0J2UYR8dAmEQDKz5GWeKO7KgEyPzUu5
|
||||
|
|
@ -16,7 +16,7 @@ import (
|
|||
type Infrastructure struct {
|
||||
placementGroup *hcloud.PlacementGroup
|
||||
networkID *pulumi.IDOutput
|
||||
masterNodes []*hcloud.Server
|
||||
managerNodes []*hcloud.Server
|
||||
workerNodes []*hcloud.Server
|
||||
}
|
||||
|
||||
|
|
@ -55,30 +55,31 @@ func main() {
|
|||
panic(err.Error())
|
||||
}
|
||||
|
||||
infra.masterNodes, err = utils.CreateServer(ctx, utils.CreateServerArgs{
|
||||
infra.managerNodes, err = utils.CreateServer(ctx, utils.CreateServerArgs{
|
||||
PlacementGroupId: infra.placementGroup.ID(),
|
||||
NetworkId: infra.networkID,
|
||||
NetworkFirstIP: string(utils.IncrementIP(net.ParseIP("10.0.1.0"))),
|
||||
Basename: "master-node",
|
||||
Count: 1,
|
||||
SshKey: hkey,
|
||||
})
|
||||
if err != nil {
|
||||
panic(err.Error())
|
||||
}
|
||||
infra.workerNodes, err = utils.CreateServer(ctx, utils.CreateServerArgs{
|
||||
PlacementGroupId: infra.placementGroup.ID(),
|
||||
NetworkId: infra.networkID,
|
||||
NetworkFirstIP: string(utils.IncrementIP(net.ParseIP("10.0.1.20"))),
|
||||
Basename: "worker-node",
|
||||
Count: 2,
|
||||
Basename: "manager-node",
|
||||
Count: 3,
|
||||
SshKey: hkey,
|
||||
ServerType: "ccx23",
|
||||
})
|
||||
if err != nil {
|
||||
panic(err.Error())
|
||||
}
|
||||
// infra.workerNodes, err = utils.CreateServer(ctx, utils.CreateServerArgs{
|
||||
// PlacementGroupId: infra.placementGroup.ID(),
|
||||
// NetworkId: infra.networkID,
|
||||
// NetworkFirstIP: string(utils.IncrementIP(net.ParseIP("10.0.1.20"))),
|
||||
// Basename: "worker-node",
|
||||
// Count: 2,
|
||||
// SshKey: hkey,
|
||||
// })
|
||||
// if err != nil {
|
||||
// panic(err.Error())
|
||||
// }
|
||||
|
||||
for idx, s := range slices.Concat(infra.masterNodes, infra.workerNodes) {
|
||||
for idx, s := range slices.Concat(infra.managerNodes, infra.workerNodes) {
|
||||
err := utils.InstallAnsibleDependencies(ctx, remote.ConnectionArgs{
|
||||
Host: s.Ipv4Address,
|
||||
User: pulumi.String("root"),
|
||||
|
|
@ -89,22 +90,28 @@ func main() {
|
|||
}
|
||||
}
|
||||
|
||||
var advAddr = infra.masterNodes[0].Networks.ApplyT(func(net []hcloud.ServerNetworkType) string {
|
||||
return *net[0].Ip
|
||||
}).(pulumi.StringOutput)
|
||||
// var advAddr = infra.managerNodes[0].Networks.ApplyT(func(net []hcloud.ServerNetworkType) string {
|
||||
// return *net[0].Ip
|
||||
// }).(pulumi.StringOutput)
|
||||
|
||||
tokens, err := utils.InitDockerSwarm(ctx, remote.ConnectionArgs{
|
||||
Host: infra.masterNodes[0].Ipv4Address,
|
||||
User: pulumi.String("root"),
|
||||
PrivateKey: pk.PrivateKeyOpenssh}, advAddr)
|
||||
if err != nil {
|
||||
panic(err.Error())
|
||||
}
|
||||
// tokens, err := utils.InitDockerSwarm(ctx, remote.ConnectionArgs{
|
||||
// Host: infra.managerNodes[0].Ipv4Address,
|
||||
// User: pulumi.String("root"),
|
||||
// PrivateKey: pk.PrivateKeyOpenssh}, advAddr)
|
||||
// if err != nil {
|
||||
// panic(err.Error())
|
||||
// }
|
||||
|
||||
ctx.Export("SwarmTokens", tokens)
|
||||
// ctx.Export("SwarmTokens", tokens)
|
||||
|
||||
// inventory, err := utils.CreateAnsibleInventory(infra.managerNodes, infra.workerNodes)
|
||||
// if err != nil {
|
||||
// panic(err.Error())
|
||||
// }
|
||||
// ctx.Export("inventory", inventory)
|
||||
|
||||
sm := map[string]pulumi.Input{}
|
||||
for idx, s := range slices.Concat(infra.masterNodes, infra.workerNodes) {
|
||||
for idx, s := range slices.Concat(infra.managerNodes, infra.workerNodes) {
|
||||
sm[fmt.Sprintf("node-%d-ip", idx)] = s.Ipv4Address
|
||||
}
|
||||
ctx.Export("server-ips", pulumi.Map(sm))
|
||||
|
|
|
|||
|
|
@ -1,11 +1,14 @@
|
|||
package utils
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
"github.com/pulumi/pulumi-command/sdk/go/command/remote"
|
||||
"github.com/pulumi/pulumi-hcloud/sdk/go/hcloud"
|
||||
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
|
||||
)
|
||||
|
||||
|
|
@ -14,6 +17,11 @@ type SwarmJoinTokens struct {
|
|||
WorkerToken string
|
||||
}
|
||||
|
||||
type ServerInfo struct {
|
||||
Name pulumi.StringOutput
|
||||
IP pulumi.StringOutput
|
||||
}
|
||||
|
||||
func InstallAnsibleDependencies(ctx *pulumi.Context, connArgs remote.ConnectionArgs, uniqueness string) error {
|
||||
_, err := remote.NewCommand(ctx, strings.Join([]string{uniqueness, "Install Ansible Dependencies"}, ": "),
|
||||
&remote.CommandArgs{
|
||||
|
|
@ -26,7 +34,7 @@ func InstallAnsibleDependencies(ctx *pulumi.Context, connArgs remote.ConnectionA
|
|||
return nil
|
||||
}
|
||||
|
||||
func InitDockerSwarm(ctx *pulumi.Context, connArgs remote.ConnectionArgs, advertiseAddr pulumi.StringOutput) (pulumi.StringOutput, error) {
|
||||
func InitDockerSwarm(ctx *pulumi.Context, connArgs remote.ConnectionArgs, advertiseAddr pulumi.StringOutput) (pulumi.Output, error) {
|
||||
var tokens SwarmJoinTokens
|
||||
|
||||
fullCommand := advertiseAddr.ApplyT(func(addr string) *string {
|
||||
|
|
@ -44,17 +52,103 @@ func InitDockerSwarm(ctx *pulumi.Context, connArgs remote.ConnectionArgs, advert
|
|||
return pulumi.StringOutput{}, err
|
||||
}
|
||||
|
||||
return out.Stdout.ApplyT(func(output string) string {
|
||||
return out.Stdout.ApplyT(func(output string) SwarmJoinTokens {
|
||||
searchWorker := "Worker Token: "
|
||||
pattern := regexp.MustCompile(searchWorker + `(\S+)`)
|
||||
patternWorker := regexp.MustCompile(searchWorker + `(\S+)`)
|
||||
searchManager := "Manager Token: "
|
||||
patternManager := regexp.MustCompile(searchManager + `(\S+)`)
|
||||
|
||||
matches := pattern.FindStringSubmatch(output)
|
||||
matches := patternWorker.FindStringSubmatch(output)
|
||||
if len(matches) > 1 {
|
||||
extracted := matches[1]
|
||||
tokens.WorkerToken = extracted
|
||||
return extracted
|
||||
}
|
||||
fmt.Println(tokens.WorkerToken)
|
||||
return ""
|
||||
}).(pulumi.StringOutput), nil
|
||||
matches = patternManager.FindStringSubmatch(output)
|
||||
if len(matches) > 1 {
|
||||
extracted := matches[1]
|
||||
tokens.ManagerToken = extracted
|
||||
}
|
||||
return tokens
|
||||
}), nil
|
||||
}
|
||||
|
||||
func CreateAnsibleInventory(managerNodes, workerNodes []*hcloud.Server) (pulumi.Output, error) {
|
||||
serverInfos := toServerInfo(managerNodes)
|
||||
return pulumi.All(pulumi.ToOutput(serverInfos)).ApplyT(func(results []interface{}) (string, error) {
|
||||
var serverInfos = results[0].([]ServerInfo)
|
||||
// var workerSlice = results[1].([]*hcloud.Server)
|
||||
|
||||
serverData := make(map[string][]ServerInfo)
|
||||
|
||||
for _, s := range serverInfos {
|
||||
serverData["Manager"] = append(serverData["Manager"], ServerInfo{
|
||||
Name: s.Name,
|
||||
IP: s.IP,
|
||||
})
|
||||
}
|
||||
// for _, result := range workerSlice {
|
||||
// server := result.(map[string]interface{})
|
||||
// serverData["Worker"] = append(serverData["Worker"], ServerInfo{
|
||||
// Name: server["name"].(string),
|
||||
// IP: server["ipv4_address"].(string),
|
||||
// })
|
||||
// }
|
||||
fmt.Println(serverData["Manager"])
|
||||
fmt.Println(results[0])
|
||||
return generateInventoryFile(serverData)
|
||||
}).(pulumi.Output), nil
|
||||
}
|
||||
|
||||
func toServerInfo(server []*hcloud.Server) pulumi.ArrayOutput {
|
||||
serverInfo := []ServerInfo{}
|
||||
for _, s := range server {
|
||||
serverInfo = append(serverInfo, ServerInfo{
|
||||
Name: s.Name,
|
||||
IP: s.Ipv4Address,
|
||||
})
|
||||
}
|
||||
return pulumi.All(serverInfo).ApplyT(func(args []interface{}) []interface{} {
|
||||
var serverInfo []interface{}
|
||||
|
||||
for _, s := range args {
|
||||
val := s.(map[string]interface{})
|
||||
serverInfo = append(serverInfo, map[string]interface{}{
|
||||
"Name": val["Name"].(string),
|
||||
"IP": val["IP"].(string),
|
||||
})
|
||||
}
|
||||
return serverInfo
|
||||
}).(pulumi.ArrayOutput)
|
||||
}
|
||||
|
||||
func generateInventoryFile(inventory map[string][]ServerInfo) (string, error) {
|
||||
const inventoryTmpl = `
|
||||
[all]
|
||||
{{ range .Manager }}
|
||||
{{ .Name }} ansible_host={{ .IP }} ansible_connection=ssh ansible_user=root ansible_ssh_private_key_file=../infra-base/private_key
|
||||
{{ end }}
|
||||
{{ range .Worker }}
|
||||
{{ .Name }} ansible_host={{ .IP }} ansible_connection=ssh ansible_user=root ansible_ssh_private_key_file=../infra-base/private_key
|
||||
{{ end }}
|
||||
|
||||
[manager]
|
||||
{{ range .Manager }}
|
||||
{{ .Name }} ansible_host={{ .IP }} ansible_connection=ssh ansible_user=root ansible_ssh_private_key_file=../infra-base/private_key
|
||||
{{ end }}
|
||||
|
||||
[worker]
|
||||
{{ range .Worker }}
|
||||
{{ .Name }} ansible_host={{ .IP }} ansible_connection=ssh ansible_user=root ansible_ssh_private_key_file=../infra-base/private_key
|
||||
{{ end }}
|
||||
`
|
||||
tmpl, err := template.New("inventory").Parse(inventoryTmpl)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
err = tmpl.Execute(&buf, inventory)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ type CreateServerArgs struct {
|
|||
Basename string
|
||||
Count int
|
||||
SshKey *hcloud.SshKey
|
||||
ServerType string
|
||||
}
|
||||
|
||||
func CreateServer(ctx *pulumi.Context, cfg CreateServerArgs) ([]*hcloud.Server, error) {
|
||||
|
|
@ -64,9 +65,8 @@ func CreateServer(ctx *pulumi.Context, cfg CreateServerArgs) ([]*hcloud.Server,
|
|||
s, err := hcloud.NewServer(ctx, sn, &hcloud.ServerArgs{
|
||||
Name: pulumi.String(sn),
|
||||
Image: pulumi.String("docker-ce"),
|
||||
ServerType: pulumi.String("cpx21"),
|
||||
Location: pulumi.StringPtr("fsn1"),
|
||||
// Datacenter: pulumi.StringPtr("fsn1"),
|
||||
ServerType: pulumi.String(cfg.ServerType),
|
||||
Location: pulumi.StringPtr("hel1"),
|
||||
Networks: hcloud.ServerNetworkTypeArray{
|
||||
&hcloud.ServerNetworkTypeArgs{
|
||||
NetworkId: IDtoIntOutput(cfg.NetworkId),
|
||||
|
|
@ -85,6 +85,24 @@ func CreateServer(ctx *pulumi.Context, cfg CreateServerArgs) ([]*hcloud.Server,
|
|||
if err != nil {
|
||||
return nodes, err
|
||||
}
|
||||
|
||||
cephVolume, err := hcloud.NewVolume(ctx, fmt.Sprintf("ceph-%s", sn), &hcloud.VolumeArgs{
|
||||
Name: pulumi.Sprintf("%s-ceph-vol-0%d", s.Name, i+1),
|
||||
Size: pulumi.Int(100),
|
||||
Location: s.Location,
|
||||
})
|
||||
if err != nil {
|
||||
return nodes, fmt.Errorf("couldn't create volume: %w", err)
|
||||
}
|
||||
|
||||
_, err = hcloud.NewVolumeAttachment(ctx, fmt.Sprintf("ceph-vol-attach-%s", sn), &hcloud.VolumeAttachmentArgs{
|
||||
VolumeId: IDtoIntOutput(cephVolume.ID()),
|
||||
ServerId: IDtoIntOutput(s.ID()),
|
||||
})
|
||||
if err != nil {
|
||||
return nodes, fmt.Errorf("couldn't attach volume to node %d", i)
|
||||
}
|
||||
|
||||
nodes = append(nodes, s)
|
||||
nextIp = IncrementIP(net.ParseIP(nextIp)).String()
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue