Compare commits

...

5 Commits

Author SHA1 Message Date
Marcel Arndt 497562f8a0 optimize gitea tasks and variables 2026-01-07 15:09:10 +01:00
Marcel Arndt 0496728700 add infrastructure monitoring
Add Clickstack (HyperDX) as the aggregation platform.
Configure Otel Collector to collect host metrics.
2026-01-07 15:08:22 +01:00
Marcel Arndt 4eeaf483bc add platform application fizzy 2026-01-07 15:06:18 +01:00
Marcel Arndt 0d83594127 ignore playbook.yml 2026-01-07 14:44:18 +01:00
Marcel Arndt a2accebf3c add tags to platform playbook 2026-01-07 14:42:11 +01:00
26 changed files with 898 additions and 98 deletions

1
iac/ansible/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
playbook.yml

View File

@ -2,8 +2,28 @@
hosts: all
gather_facts: true
roles:
- traefik
- authentik
- portainer
- leantime
- kestra
# --- Infrastructure / Networking ---
- role: traefik
tags: ['infra', 'proxy', 'traefik']
- role: hyperdx
tags: ["ops", "observability", "logging", "hyperdx"]
# --- Operations / Observability ---
- role: infra-monitoring
tags: ['ops', 'monitoring', 'otel']
- role: portainer
tags: ['ops', 'management', 'portainer']
# --- Identity / Security ---
- role: authentik
tags: ['infra', 'auth', 'authentik']
# --- Applications / Dev Tools ---
- role: gitea
tags: ['apps', 'git', 'gitea']
- role: kestra
tags: ['apps', 'workflow', 'kestra']
# - leantime

View File

@ -1,59 +1,74 @@
$ANSIBLE_VAULT;1.1;AES256
34613362353964313436306439386661613364663666653265313937343239633365663836653030
6262386661666364383961336461316139333262623034340a643434316632336132613264646437
36376365613061353866383135353432303433353931633063313566613166303064316666613132
3733623536643935370a656431646435626265666265666230356162656363663838636662313466
61336237306332643032653766313036636163336431613236663864636438363832383231323362
62666463336639303766356331353635323031636465616235663738333761653934346663386636
63623361363164663663313966653939643462353638613464396466613931363662623763326535
34376237353663656636363866373466346434666339646131396439653261373738636665613435
65356330303863303236373933333163633964633061393136646632386137346434353365343763
30343937656166303962653030366566616331666262343336343138623566353832313836643435
62636333346235316562303061656166383135633464623734626336623565346336626134333933
35363363376663333061663164623539363731613263376163306436636265336562396439356137
30663431373131303437393166396539306133636264653733303762316363386438643536306338
32303139303363316264393939326561393730396664343361393863303736343933636265633439
65633765666362396439643863653531363366383866373939616333353430633530343262366138
31663863663165653932653733623761613265383039336633383832393761666337336165613933
63383934366662353038626539633132313939376231643133363739303235326433353733363437
35626233613936626532326262646166363739666162353237323237383132333134343439336134
33613462393237626432386462373439303439356666336630363536366233346438313039346530
33393232333633663731393466653439623638316565346530306439326431323436356166633334
66383034643834613133333265646338303463393035393266653832366434313636633730636436
38353337633437656262623061666563646637626363353561323231376237623264373861376666
66363265633638356133353933613664353934373634613662326437336562663766306364303538
35623130616265623838353838396235386661666132623163383162373665313462663738303933
63363764653561616162386139646130393439373066666437623236383238396233653165623032
34316439376331356539626464313462616238623166623761626435303565653233386236656262
62613935336661623862323833353265366533643830373634663266666332333463303666343366
39653332346433306566316430656361363230343761613263393230366362363132663565636264
65313633653464663963373561373532636235353331353237623635613034613337343730656632
31656165666134333864353730363163623365393030333932393565666235643639303662663532
38343734393135643039633664653966313536616533656635373535636434396333313536623536
39623132326362656166366566373163386363336231633233353639313166333932656133363365
66666665346331613638656562396463386637356539366539343232353061666531353166396536
39623762633064323332653831643832303332396431633738396266633935656132323164613161
61353663383532613763356630373063383161376165333736316466353231656534366636313636
37616636383163616136643630363535346137636636633432643337393865393063626663333164
36656537343231386333323637386539386364356266376433616636313239376666353066306363
39376461323062393935613630656230346131373634363633393035346263663762623063356633
36646664623230303761373138333164303363373365386266386138653764623030623630333631
66363866633064656532336137613964653431663436333761666631656339646161636435343065
37646164653937633962386631373236653064346438323664383933643738656536356562626532
34663834363230303164626236393938643037363036613965373330636238633661346335336531
62663461626365386362393061626266303463663735303539383937363965383234666337386165
30366564363766623162306666656566353662633866396430396633623266383332303339666663
38313536666336323366616432336161656434646463373963356331326364333038366337386638
39396535386331663466323334613533383439343437363631363532313362663564353635343735
37653063383163316366366335663537653134326564643062653065303337303333643961383837
39393734326562616165313133643766303934336263326433366436623539633233643761616436
33356234313538343635343630623337343436346638396539316131623861353630333964633839
33316565326164386337623730623932313363306436316335336238333430626165663232343463
36653038633632616335393262656638346434386639383131396233643932323931393264613134
30336134343464373265636234656561653462356435383138323638613039623839373935326462
32393430616438356332313766353337383035623137363233323664393833303464313162303833
65383131313335353832343963636639346162353634306430353638393136623734623833306136
32396130623065326636633235346630336435663261353866323862666231656261333839373162
35623835663434356438653533623337363531353634663064303035633839656463656238636132
66316333356633613130323438376530623634336632323365616239373865623334363635396331
3263616336653336636666386632316564613331323431363935
65366634326364306462656137353133306666333735343939333130316430346465653639346365
3137303862326133616536373537613964663762306537620a313031663639336136396161366334
34633936366538663132383336383334663366313138303436663563313865383134383364626137
3332626630376234350a313962363364363566646236333333646566383136666332363639373765
39376332396661656333303332353463343266663064636461353631616133613835313431626332
30306137396138313939393238636166643931343136343730656538363630316232653734316165
30313238323266343664306465666664386563353934663731386235373436313163376236396638
35663236623638616463663963666664346436633434623565316634326537393438303038636430
64626531393935323635633631643263373137613337313039363132313762366461353138363734
66363462623932346132376265636339313032616638353132663539653231663930363734323565
64306537373437626232646335366637656662323462643237653934376265613632356661323961
33663638623062303964613063353464656234313661333963386663353264613033386535396136
61393766626264373261393765376337666266633465356235623035313536383939383966343161
62323931383634383064626239316136336239333834396632353661393363306334376134303435
61303432393037356463303331303265613831663434306637633236316535316636323235313466
32653166393932353464376235666531383636373062383365363732613631663262303530363231
32343538363161356166343165343461613132336564373532306136373736613237323664343138
36333631366434303333633830326435623562363734613430653363323365396465353033616532
39393432616431316265386366376534616237363832386264323536633461623663343436393366
64663865366165363362393864623934373031326231333735646665346238313164333232613137
66386461626131323961633130373065656439373836643330613634623039613830623462653062
37373036623035383135383062363436333432663562393531366132663461623734343535333861
32303665356135626434366662643731336663623066383733333831346163346537666464316233
32643565363063343238616139626334643462306565323837323632393135363638326364366334
63643062383139383231613861376463366531343065663933383530636663663264376233313035
66323963626136393434303930363431656563353565376237386535616164396232623537366262
33396534326663333736646165393863613566386566353537616335326239353934343237653361
64373262663461623434633432333336643462383132643866666563336435393537616332616239
37616335393166636439383836653830623634323738613238323837666537346466646130623836
34373061353063626335323561616239616663356566353530363166356162616430353466333431
30333030383334623638663239616335323535356535376630316264633331343036396135353937
34353835396239636634626330313361353263396464636432643333663263326230656665323065
66306233393463666362366330386633346635623464646463313066383236396436666435363161
31326265656365363733653839626363323938393330383034646634636338373161353335323332
61353937656333316664336264386133666537326638336335383536306435623566636234306435
31373836346662363234383862623235346639363235653635313435663630313963343061393462
66303432623739666164303065333432663935633863643538363335313663663964396462613862
65336634663733666232336262663039393731333032623761636464643130346464353739656231
66336239646266656166343738633739616561366565386330326536333433336533396432643434
64613639333039656339306666303330616136636437613565303662306437656638313665353939
37353631666162636433356262393738323531323639626630313333313237313530633236333531
31633135356439666661643339646666396438613735616336383937643035313239386336353264
36373666666335393665353330623533303338303539613932376361323738633365633830666633
66313430333565666637306536626530393063383731346162363039643837643761313032313539
37356530353735353231353432653764663632393061653737316531333634353831626537373261
31336136363638366636633966636532313330633130313462656139393135393565353637646639
36656139653230303832393762623466333532656237353537353231666262343732356666343466
61333539396434316561326331336435313533343136653565333835623363323363306632346465
61323935626136363034363066343762626565656132316139326263396637373964383636396235
37633330656462643338373765333532336339663731646461623233396664336562623334373166
36636531373366663733373932656635346663616662313533353764626139353266373535313932
65633732373932643531326333333731303837356335393461393066333863626161346431366338
30323930323836323565356130666233343964656536633766353030376338636339336638343234
64633939383665353136643938386664356662346466616133626664393761663732353663313266
34313334373136343534646233633736366333613065623137656132623233363134306335663932
33633032346365626238326564343832363937323833306662643561303535613632376564636663
31373639623538633861663563373966363635323638336664353061333266386634373261663361
31343335663539653837626137346333326561393761333635333137643764383463363039353731
38323031323434303639353432373939346236326437346137373966326466326531363239393738
36643438646439353831353064613334376135653036356265393130313566396262373464666463
39386364303363333030356538343565663865343763343330343761353161633364333239393161
30343862306239303032656561633565363731373131633130656235666238303961316334353939
66613530633939613237623635333232613832303366343133623862366637656639663030326365
38306431353065363038336631306138316333383565613961323039316632656466333534396233
34346533383061313038373130373163373339623233613437356130643464316531353961643437
31626336373839316633336239633233343365393737633930383761353733303364663435386639
37366466323433323830343638356261376363366535336437326338323266306565363537303166
61643438633837616436386363333865636639383833636561316339326234663033643230643731
66636664386665663562363263353531333766353137363366333566363461653661623135336665
38633564343362316639383835663236323734316338653730666532353665626261613134316364
64323432656436623463633639343562663565396334386365613966343136343337636563343130
30366564313837316266643931383762643737366535306530643931313565336165383261326437
35363633666432386337373438366463366430613235303335356334636234616464

View File

@ -1,10 +0,0 @@
---
- name: Main-Playbook
hosts: all
gather_facts: true
roles:
# - traefik
# - portainer
# - kestra
- gitea

View File

@ -17,7 +17,7 @@ services:
volumes:
- /mnt/cephfs/authentik/data/db:/var/lib/postgresql/data
environment:
POSTGRES_PASSWORD: "{{ pg_pass }}"
POSTGRES_PASSWORD: "{{ authentik_pg_pass }}"
POSTGRES_USER: "{{ pg_user | default('authentik') }}"
POSTGRES_DB: "{{ pg_db | default('authentik') }}"
networks:
@ -48,7 +48,7 @@ services:
AUTHENTIK_POSTGRESQL__HOST: postgresql
AUTHENTIK_POSTGRESQL__USER: "{{ pg_user | default('authentik') }}"
AUTHENTIK_POSTGRESQL__NAME: "{{ pg_db | default('authentik') }}"
AUTHENTIK_POSTGRESQL__PASSWORD: "{{ pg_pass }}"
AUTHENTIK_POSTGRESQL__PASSWORD: "{{ authentik_pg_pass }}"
AUTHENTIK_ERROR_REPORTING__ENABLED: "false"
volumes:
- /mnt/cephfs/authentik/data/media:/media
@ -83,7 +83,7 @@ services:
AUTHENTIK_POSTGRESQL__HOST: postgresql
AUTHENTIK_POSTGRESQL__USER: "{{ pg_user | default('authentik') }}"
AUTHENTIK_POSTGRESQL__NAME: "{{ pg_db | default('authentik') }}"
AUTHENTIK_POSTGRESQL__PASSWORD: "{{ pg_pass }}"
AUTHENTIK_POSTGRESQL__PASSWORD: "{{ authentik_pg_pass }}"
# `user: root` and the docker socket volume are optional.
# See more for the docker socket integration here:
# https://goauthentik.io/docs/outposts/integrations/docker

View File

@ -1,10 +1,8 @@
---
authentik_image: "ghcr.io/goauthentik/server"
authentik_tag: "2025.6.3"
authentik_secret_key: ""
pg_user: "authentik"
pg_pass: ""
pg_db: "authentik"
traefik_net: "traefik_public"

View File

@ -90,4 +90,19 @@
src: "{{ hostvars[groups['managers'][0]]['ceph_bootstrap_ip'] }}:/"
fstype: ceph
opts: "name=admin,secret={{ ceph_admin_key.stdout }}"
state: mounted
state: mounted
# Metriken aktivieren
- name: CEPH | Prüfen, ob Prometheus Modul bereits aktiv ist
ansible.builtin.command: "ceph mgr module ls --format json"
register: ceph_modules_status
changed_when: false
delegate_to: "{{ groups['managers'][0] }}"
run_once: true
- name: CEPH | Prometheus Modul aktivieren
ansible.builtin.command: "ceph mgr module enable prometheus"
# Wir prüfen im JSON-Output, ob 'prometheus' in der Liste 'enabled_modules' fehlt
when: "'prometheus' not in (ceph_modules_status.stdout | from_json).enabled_modules"
delegate_to: "{{ groups['managers'][0] }}"
run_once: true

View File

@ -0,0 +1,6 @@
fizzy_secret_key_base: ""
fizzy_from_address: "system@avicenna.hamburg"
fizzy_smtp_address: "smtp.postmarkapp.com"
fizzy_smtp_username: ""
fizzy_smtp_password: ""

View File

@ -0,0 +1,42 @@
- name: FIZZY | Ensure data directories
ansible.builtin.file:
path: '{{ data_dir }}/{{ item.path }}'
state: directory
owner: 1000
group: 1000
mode: '0750'
recurse: no
loop:
- { path: 'data' }
- { path: 'data/storage'}
delegate_to: "{{ groups['managers'][0] }}"
run_once: true
# - name: FIZZY | Ensure DB data directories
# ansible.builtin.file:
# path: "{{ data_dir }}/data/db"
# state: directory
# # Postgres Alpine nutzt UID 70 (postgres).
# # Bei Debian-Images wäre es 999.
# owner: 70
# group: 70
# mode: '0700'
# recurse: no
# delegate_to: "{{ groups['managers'][0] }}"
- name: FIZZY | Generate Compose file
ansible.builtin.template:
src: docker-compose.yml.j2
dest: '{{ data_dir }}/fizzy.yml'
mode: 0644
run_once: true
delegate_to: "{{ groups['managers'][0] }}"
- name: FIZZY | Deploy stack
community.docker.docker_stack:
state: present
name: fizzy
compose:
- '{{ data_dir }}/fizzy.yml'
delegate_to: "{{ groups['managers'][0] }}"
run_once: true

View File

@ -0,0 +1,32 @@
networks:
{{ traefik_public_net }}:
external: true
services:
web:
image: ghcr.io/basecamp/fizzy:main
restart: unless-stopped
environment:
- SECRET_KEY_BASE=abcdefabcdef
# - TLS_DOMAIN={{ fizzy_domain }}
- BASE_URL=https://{{ fizzy_domain }}
- MAILER_FROM_ADDRESS={{ fizzy_from_address }}
- SMTP_ADDRESS={{ fizzy_smtp_address }}
- SMTP_USERNAME={{ fizzy_smtp_username }}
- SMTP_PASSWORD={{ fizzy_smtp_password }}
- VAPID_PRIVATE_KEY=myvapidprivatekey
- VAPID_PUBLIC_KEY=myvapidpublickey
volumes:
- {{ data_dir }}/data/storage:/rails/storage
networks:
- {{ traefik_public_net }}
deploy:
mode: replicated
replicas: 1
labels:
- "traefik.enable=true"
- "traefik.docker.network={{ traefik_public_net }}"
- "traefik.http.routers.fizzy.rule=Host(`{{ fizzy_domain }}`)"
- "traefik.http.routers.fizzy.entrypoints=https"
- "traefik.http.routers.fizzy.tls.certresolver=main"
- "traefik.http.services.fizzy.loadbalancer.server.port=80"

View File

@ -0,0 +1,3 @@
data_dir: "{{ ceph_volume }}/fizzy"
subdomain: fizzy
fizzy_domain: "{{ subdomain }}.{{ main_domain }}"

View File

@ -1,12 +1,8 @@
postgres_version: 16-alpine
gitea_version: "1.21"
gitea_domain: "{{ subdomain }}.{{ main_domain }}"
gitea_http_port: 3000
gitea_ssh_port: 2222
data_dir: "{{ ceph_volume }}/gitea"
subdomain: git
gitea_db_type: "postgres"
gitea_db_host: db:5432
gitea_db_name: "gitea"

View File

@ -5,7 +5,8 @@
owner: 1000
group: 1000
mode: '0750'
recurse: yes
recurse: no
run_once: true
delegate_to: "{{ groups['managers'][0] }}"
- name: GITEA | Ensure DB data directories
@ -17,7 +18,8 @@
owner: 70
group: 70
mode: '0700'
recurse: yes
recurse: no
run_once: true
delegate_to: "{{ groups['managers'][0] }}"
- name: GITEA | Generate Compose file

View File

@ -0,0 +1,3 @@
data_dir: "{{ ceph_volume }}/gitea"
subdomain: git
gitea_domain: "{{ subdomain }}.{{ main_domain }}"

View File

@ -0,0 +1,175 @@
<?xml version="1.0"?>
<clickhouse>
<logger>
<level>debug</level>
<console>true</console>
<log remove="remove" />
<errorlog remove="remove" />
</logger>
<listen_host>0.0.0.0</listen_host>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<interserver_http_host>ch-server</interserver_http_host>
<interserver_http_port>9009</interserver_http_port>
<max_connections>4096</max_connections>
<keep_alive_timeout>64</keep_alive_timeout>
<max_concurrent_queries>100</max_concurrent_queries>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<path>/var/lib/clickhouse/</path>
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
<user_directories>
<users_xml>
<path>users.xml</path>
</users_xml>
</user_directories>
<!-- <users_config>users.xml</users_config> -->
<default_profile>default</default_profile>
<default_database>default</default_database>
<timezone>UTC</timezone>
<mlock_executable>false</mlock_executable>
<!-- Prometheus exporter -->
<prometheus>
<endpoint>/metrics</endpoint>
<port>9363</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<errors>true</errors>
</prometheus>
<!-- Query log. Used only for queries with setting log_queries = 1. -->
<query_log>
<database>system</database>
<table>query_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
<!-- Metric log contains rows with current values of ProfileEvents, CurrentMetrics collected
with "collect_interval_milliseconds" interval. -->
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
<!--
Asynchronous metric log contains values of metrics from
system.asynchronous_metrics.
-->
<asynchronous_metric_log>
<database>system</database>
<table>asynchronous_metric_log</table>
<!--
Asynchronous metrics are updated once a minute, so there is
no need to flush more often.
-->
<flush_interval_milliseconds>7000</flush_interval_milliseconds>
</asynchronous_metric_log>
<!--
OpenTelemetry log contains OpenTelemetry trace spans.
-->
<opentelemetry_span_log>
<!--
The default table creation code is insufficient, this <engine> spec
is a workaround. There is no 'event_time' for this log, but two times,
start and finish. It is sorted by finish time, to avoid inserting
data too far away in the past (probably we can sometimes insert a span
that is seconds earlier than the last span in the table, due to a race
between several spans inserted in parallel). This gives the spans a
global order that we can use to e.g. retry insertion into some external
system.
-->
<engine>
engine MergeTree
partition by toYYYYMM(finish_date)
order by (finish_date, finish_time_us, trace_id)
</engine>
<database>system</database>
<table>opentelemetry_span_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</opentelemetry_span_log>
<!-- Crash log. Stores stack traces for fatal errors.
This table is normally empty. -->
<crash_log>
<database>system</database>
<table>crash_log</table>
<partition_by />
<flush_interval_milliseconds>1000</flush_interval_milliseconds>
</crash_log>
<!-- Profiling on Processors level. -->
<processors_profile_log>
<database>system</database>
<table>processors_profile_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</processors_profile_log>
<!-- Uncomment if use part log.
Part log contains information about all actions with parts in MergeTree tables (creation, deletion,
merges, downloads).-->
<part_log>
<database>system</database>
<table>part_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</part_log>
<!-- Trace log. Stores stack traces collected by query profilers.
See query_profiler_real_time_period_ns and query_profiler_cpu_time_period_ns settings. -->
<trace_log>
<database>system</database>
<table>trace_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</trace_log>
<!-- Query thread log. Has information about all threads participated in query execution.
Used only for queries with setting log_query_threads = 1. -->
<query_thread_log>
<database>system</database>
<table>query_thread_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_thread_log>
<!-- Query views log. Has information about all dependent views associated with a query.
Used only for queries with setting log_query_views = 1. -->
<query_views_log>
<database>system</database>
<table>query_views_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_views_log>
<remote_servers>
<hdx_cluster>
<shard>
<replica>
<host>ch-server</host>
<port>9000</port>
</replica>
</shard>
</hdx_cluster>
</remote_servers>
<distributed_ddl>
<path>/clickhouse/task_queue/ddl</path>
</distributed_ddl>
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
</clickhouse>

View File

@ -0,0 +1,51 @@
<?xml version="1.0"?>
<clickhouse>
<profiles>
<default>
<max_memory_usage>10000000000</max_memory_usage>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>in_order</load_balancing>
<log_queries>1</log_queries>
</default>
</profiles>
<users>
<default>
<password></password>
<profile>default</profile>
<networks>
<ip>::/0</ip>
</networks>
<quota>default</quota>
</default>
<api>
<password>api</password>
<profile>default</profile>
<networks>
<ip>::/0</ip>
</networks>
<quota>default</quota>
</api>
<worker>
<password>worker</password>
<profile>default</profile>
<networks>
<ip>::/0</ip>
</networks>
<quota>default</quota>
</worker>
</users>
<quotas>
<default>
<interval>
<duration>3600</duration>
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
</clickhouse>

View File

@ -0,0 +1,66 @@
---
- name: HYPERDX | Verzeichnisse erstellen
ansible.builtin.file:
path: "{{ data_dir }}/{{ item.path }}"
state: directory
owner: "{{ item.uid }}"
group: "{{ item.gid }}"
mode: '0755'
recurse: no
loop:
- { path: 'mongo', uid: 999, gid: 999 } # MongoDB Standard
- { path: 'clickhouse/data', uid: 101, gid: 101 } # ClickHouse Standard
- { path: 'clickhouse/logs', uid: 101, gid: 101 }
- { path: 'clickhouse/config', uid: 101, gid: 101 }
run_once: true
delegate_to: "{{ groups['managers'][0] }}"
- name: HYPERDX | ClickHouse Konfiguration kopieren
ansible.builtin.copy:
src: "{{ item }}"
dest: "/mnt/cephfs/hyperdx/clickhouse/config/"
owner: 101
group: 101
mode: '0644'
loop:
- files/config.xml # Lokal in deinem Ansible Repo
- files/users.xml
run_once: true
delegate_to: "{{ groups['managers'][0] }}"
- name: HYPERDX | shared-observability Netzwerk erstellen
community.docker.docker_network:
name: shared-observability
driver: overlay
state: present
attachable: yes
ipam_config:
- subnet: '172.16.116.0/24'
gateway: '172.16.116.1'
run_once: true
delegate_to: "{{ groups['managers'][0] }}"
- name: HYPERDX | OTel Collector Config generieren
ansible.builtin.template:
src: otel-collector-config.yaml.j2
dest: "{{ data_dir }}/data/otel-collector-config.yaml"
mode: '0644'
run_once: true
delegate_to: "{{ groups['managers'][0] }}"
- name: HYPERDX | Generate Compose file
ansible.builtin.template:
src: docker-compose.yml.j2
dest: '{{ data_dir }}/hyperdx.yml'
mode: 0644
run_once: true
delegate_to: "{{ groups['managers'][0] }}"
- name: HYPERDX | Deploy stack
community.docker.docker_stack:
state: present
name: hyperdx
compose:
- '{{ data_dir }}/hyperdx.yml'
delegate_to: "{{ groups['managers'][0] }}"
run_once: true

View File

@ -0,0 +1,99 @@
version: '3.9'
services:
db:
image: mongo:5.0.14-focal
volumes:
- "{{ data_dir }}/mongo:/data/db"
networks:
- internal
deploy:
mode: replicated
replicas: 1
# placement:
# constraints: [node.role == worker] # DBs besser auf Workern lassen wenn möglich
otel-collector:
image: "clickhouse/clickstack-otel-collector:2"
environment:
CLICKHOUSE_ENDPOINT: 'tcp://ch-server:9000?dial_timeout=10s'
HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE: "default"
HYPERDX_LOG_LEVEL: "{{ hyperdx_log_level | default('info') }}"
OPAMP_SERVER_URL: 'http://app:{{ hyperdx_opamp_port | default(4320) }}'
ports:
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
# - "8888:8888" # Metrics (optional)
networks:
- internal
- shared-observability
- traefik_public
deploy:
mode: replicated
replicas: 3
labels:
- "traefik.enable=true"
- "traefik.docker.network=traefik_public"
- "traefik.http.routers.otel-collector.rule=Host(`{{ otlp_domain }}`)"
- "traefik.http.routers.otel-collector.entrypoints=https"
- "traefik.http.routers.otel-collector.tls.certresolver=main"
- "traefik.http.services.otel-collector.loadbalancer.server.port=4318"
app:
image: "hyperdx/hyperdx:2"
environment:
# URLs anpassen für Traefik Erreichbarkeit
FRONTEND_URL: "https://{{ hdx_domain }}"
HYPERDX_APP_URL: "https://{{ hdx_domain }}"
HYPERDX_API_KEY: "{{ hyperdx_api_key }}"
HYPERDX_API_PORT: "{{ hyperdx_api_port | default(8000) }}"
HYPERDX_APP_PORT: "{{ hyperdx_app_port | default(8080) }}"
HYPERDX_LOG_LEVEL: "{{ hyperdx_log_level | default('info') }}"
MINER_API_URL: 'http://miner:5123' # Falls miner benötigt wird (in original compose nicht definiert?)
MONGO_URI: 'mongodb://db:27017/hyperdx'
SERVER_URL: "http://127.0.0.1:{{ hyperdx_api_port | default(8000) }}"
OPAMP_PORT: "{{ hyperdx_opamp_port | default(4320) }}"
OTEL_EXPORTER_OTLP_ENDPOINT: 'http://otel-collector:4318'
OTEL_SERVICE_NAME: 'hdx-oss-app'
USAGE_STATS_ENABLED: "{{ usage_stats_enabled | default('false') }}"
# Clickhouse Connection String (Default User/Pass from Clickhouse Image)
DEFAULT_CONNECTIONS: >-
[{"name":"Local ClickHouse","host":"http://ch-server:8123","username":"default","password":""}]
DEFAULT_SOURCES: '{{ hyperdx_default_sources | to_json }}'
networks:
- internal
- traefik_public
deploy:
labels:
- "traefik.enable=true"
- "traefik.docker.network=traefik_public"
- "traefik.http.routers.hyperdx.rule=Host(`{{ subdomain }}.{{ main_domain }}`)"
- "traefik.http.routers.hyperdx.entrypoints=https"
- "traefik.http.routers.hyperdx.tls.certresolver=main"
- "traefik.http.services.hyperdx.loadbalancer.server.port={{ hyperdx_app_port | default(8080) }}"
ch-server:
image: clickhouse/clickhouse-server:25.6-alpine
environment:
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
volumes:
- "{{ data_dir }}/clickhouse/config/config.xml:/etc/clickhouse-server/config.xml"
- "{{ data_dir }}/clickhouse/config/users.xml:/etc/clickhouse-server/users.xml"
- "{{ data_dir }}/clickhouse/data:/var/lib/clickhouse"
- "{{ data_dir }}/clickhouse/logs:/var/log/clickhouse-server"
deploy:
mode: replicated
replicas: 1
# placement:
# constraints: [node.role == worker]
networks:
- internal
networks:
internal:
driver: overlay
traefik_public:
external: true
shared-observability:
external: true

View File

@ -0,0 +1,103 @@
data_dir: "{{ ceph_volume }}/hyperdx"
subdomain: "hdx"
hdx_domain: "{{ subdomain }}.{{ main_domain }}"
otlp_domain: "otlp.{{ main_domain }}"
# Generiere einen sicheren Key: `openssl rand -hex 16`
hyperdx_api_key: ""
hyperdx_api_port: 8000
hyperdx_app_port: 8080
hyperdx_log_level: "info"
hyperdx_opamp_port: 4320
usage_stats_enabled: "false"
# Definition der Datenquellen für das Frontend
hyperdx_default_sources:
- name: "Logs"
kind: "log"
from:
databaseName: "default"
tableName: "otel_logs"
timestampValueExpression: "TimestampTime"
displayedTimestampValueExpression: "Timestamp"
implicitColumnExpression: "Body"
serviceNameExpression: "ServiceName"
bodyExpression: "Body"
eventAttributesExpression: "LogAttributes"
resourceAttributesExpression: "ResourceAttributes"
defaultTableSelectExpression: "Timestamp,ServiceName,SeverityText,Body"
severityTextExpression: "SeverityText"
traceIdExpression: "TraceId"
spanIdExpression: "SpanId"
connection: "Local ClickHouse"
traceSourceId: "Traces"
sessionSourceId: "Sessions"
metricSourceId: "Metrics"
- name: "Traces"
kind: "trace"
from:
databaseName: "default"
tableName: "otel_traces"
timestampValueExpression: "Timestamp"
displayedTimestampValueExpression: "Timestamp"
implicitColumnExpression: "SpanName"
serviceNameExpression: "ServiceName"
bodyExpression: "SpanName"
eventAttributesExpression: "SpanAttributes"
resourceAttributesExpression: "ResourceAttributes"
defaultTableSelectExpression: "Timestamp,ServiceName,StatusCode,round(Duration/1e6),SpanName"
traceIdExpression: "TraceId"
spanIdExpression: "SpanId"
durationExpression: "Duration"
durationPrecision: 9
parentSpanIdExpression: "ParentSpanId"
spanNameExpression: "SpanName"
spanKindExpression: "SpanKind"
statusCodeExpression: "StatusCode"
statusMessageExpression: "StatusMessage"
connection: "Local ClickHouse"
logSourceId: "Logs"
sessionSourceId: "Sessions"
metricSourceId: "Metrics"
- name: "Metrics"
kind: "metric"
from:
databaseName: "default"
tableName: ""
timestampValueExpression: "TimeUnix"
resourceAttributesExpression: "ResourceAttributes"
metricTables:
gauge: "otel_metrics_gauge"
histogram: "otel_metrics_histogram"
sum: "otel_metrics_sum"
_id: "682586a8b1f81924e628e808"
id: "682586a8b1f81924e628e808"
connection: "Local ClickHouse"
logSourceId: "Logs"
traceSourceId: "Traces"
sessionSourceId: "Sessions"
- name: "Sessions"
kind: "session"
from:
databaseName: "default"
tableName: "hyperdx_sessions"
timestampValueExpression: "TimestampTime"
displayedTimestampValueExpression: "Timestamp"
implicitColumnExpression: "Body"
serviceNameExpression: "ServiceName"
bodyExpression: "Body"
eventAttributesExpression: "LogAttributes"
resourceAttributesExpression: "ResourceAttributes"
defaultTableSelectExpression: "Timestamp,ServiceName,SeverityText,Body"
severityTextExpression: "SeverityText"
traceIdExpression: "TraceId"
spanIdExpression: "SpanId"
connection: "Local ClickHouse"
logSourceId: "Logs"
traceSourceId: "Traces"
metricSourceId: "Metrics"

View File

@ -0,0 +1,32 @@
- name: MONITORING | Ensure data directories
file:
path: "{{ data_dir }}/data"
state: directory
mode: '0755'
run_once: true
delegate_to: "{{ groups['managers'][0] }}"
- name: MONITORING | Config generieren
template:
src: otel-agent-config.yaml.j2
dest: "{{ data_dir }}/otel-agent-config.yaml"
delegate_to: "{{ groups['managers'][0] }}"
run_once: true
- name: MONITORING | Compose generieren
template:
src: docker-compose.yml.j2
dest: "{{ data_dir }}/monitoring.yml"
mode: 0644
run_once: true
delegate_to: "{{ groups['managers'][0] }}"
- name: MONITORING | Stack deployen
community.docker.docker_stack:
state: present
name: infra-monitoring
compose:
- "{{ data_dir }}/monitoring.yml"
delegate_to: "{{ groups['managers'][0] }}"
run_once: true

View File

@ -0,0 +1,38 @@
version: '3.9'
services:
otel-agent:
image: otel/opentelemetry-collector-contrib:0.143.0
user: "0:0" # Root für Hardware-Zugriff
command: ["--config=/etc/otel-agent-config.yaml"]
security_opt:
- apparmor:unconfined
volumes:
- {{ data_dir }}/otel-agent-config.yaml:/etc/otel-agent-config.yaml
- /:/hostfs:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
- /sys:/hostfs/sys:ro
- /proc:/hostfs/proc:ro
environment:
- GOMEMLIMIT=180MiB
- HOST_PROC=/hostfs/proc
- HOST_SYS=/hostfs/sys
- HOST_ETC=/hostfs/etc
- HOST_VAR=/hostfs/var
- HOST_RUN=/hostfs/run
- HOST_DEV=/hostfs/dev
deploy:
mode: global
update_config:
parallelism: 1
delay: 10s
resources:
limits:
memory: 200M
networks:
- host
networks:
host:
name: host
external: true

View File

@ -0,0 +1,117 @@
extensions:
# Beobachtet Docker Container
docker_observer:
endpoint: "unix:///var/run/docker.sock"
cache_sync_interval: 30s
receivers:
hostmetrics:
root_path: /hostfs
collection_interval: 15s
scrapers:
cpu:
metrics:
system.cpu.time:
enabled: true
system.cpu.utilization:
enabled: true
memory:
metrics:
system.memory.usage:
enabled: true
system.memory.utilization:
enabled: true
filesystem:
metrics:
system.filesystem.usage:
enabled: true
system.filesystem.utilization:
enabled: true
paging:
metrics:
system.paging.usage:
enabled: true
system.paging.utilization:
enabled: true
system.paging.faults:
enabled: true
load:
disk:
network:
docker_stats:
endpoint: unix:///var/run/docker.sock
collection_interval: 30s
timeout: 20s
# receiver_creator:
# watch_observers: [docker_observer]
# receivers:
# filelog:
# rule: type == "container" # Nur für Container
# config:
# include:
# - /hostfs/var/lib/docker/containers/*/*.log
# operators:
# - type: container
# format: docker
# add_metadata_from_filepath: true
# - type: json_parser
# timestamp:
# parse_from: time
# layout: '%Y-%m-%dT%H:%M:%S.%LZ'
# severity:
# parse_from: stream
# mapping:
# info: stdout
# error: stderr
# Ceph Scraping (Funktioniert nur auf Nodes, wo Ceph Mgr läuft)
prometheus:
config:
scrape_configs:
- job_name: 'ceph-local'
scrape_interval: 30s
scrape_timeout: 10s
static_configs:
- targets: ['127.0.0.1:9283']
metric_relabel_configs:
- source_labels: [__name__]
regex: 'ceph_cluster_total_.*|ceph_health_status|ceph_osd_.*|ceph_pool_.*'
action: keep
processors:
batch:
timeout: 5s
resourcedetection:
detectors: [env, system]
resourcedetection/docker:
detectors: [env, docker]
timeout: 2s
override: false
exporters:
debug:
verbosity: detailed
otlp:
endpoint: "127.0.0.1:4317"
headers:
authorization: {{ hyperdx_api_ingestion_key }}
compression: gzip
tls:
insecure: true
service:
extensions: [docker_observer]
pipelines:
metrics:
receivers: [hostmetrics, docker_stats, prometheus]
# receivers: [hostmetrics]
processors: [resourcedetection, batch]
exporters: [otlp]
# logs:
# receivers: [receiver_creator]
# processors: [resourcedetection/docker, batch]
# exporters: [otlp, debug]

View File

@ -0,0 +1 @@
data_dir: "{{ ceph_volume }}/infra-monitoring"

View File

@ -1,13 +0,0 @@
---
- name: Copy Stack Files
copy:
directory_mode: true
src: /Users/d3r0/dev/repositories/active/gc/iac/ansible/resources/monitoring
dest: /srv
- block:
- name: Deploy Monitoring stack
community.docker.docker_stack:
state: present
name: monitoring
compose:
- /srv/monitoring/observability.yml

View File

@ -59,6 +59,14 @@
interface: "{{ private_interface }}"
direction: in
- name: FIREWALL | Ceph Prometheus Exporter auf privatem Interface erlauben
community.general.ufw:
rule: allow
port: "9283"
proto: tcp
interface: "{{ private_interface }}"
direction: in
- name: FIREWALL | Docker Swarm Management Ports auf privatem Interface erlauben
community.general.ufw:
rule: allow