From 4b702a776286ef62861ec4e18b2b90d6de3024db Mon Sep 17 00:00:00 2001 From: Joey Hafner Date: Thu, 12 May 2022 21:36:21 -0700 Subject: [PATCH] Consolidate Grafana-stack and prometheus into Monitoring --- homelab/server/config/grafana-stack/.env | 2 - .../config/grafana-stack/.forgetps.json | 1 - .../config/grafana-stack/docker-compose.yml | 65 ---- .../server/config/grafana-stack/influxdb.conf | 305 ------------------ .../config/grafana-stack/scripts/.forgetps | 0 .../grafana-stack/scripts/.forgetps.json | 1 - .../server/config/grafana-stack/telegraf.conf | 32 -- homelab/server/config/monitoring/.env | 1 + .../{prometheus => monitoring}/README.md | 0 .../docker-compose.yml | 16 + .../grafana-dashboards/Hosts.json | 0 .../{grafana-stack => monitoring}/grafana.ini | 0 .../{prometheus => monitoring}/prometheus.yml | 0 .../scripts/diskstatus.sh | 0 .../scripts/forgepc.sh | 0 .../scripts/forgetps-to-json.sh | 0 homelab/server/config/prometheus/.env | 0 17 files changed, 17 insertions(+), 406 deletions(-) delete mode 100644 homelab/server/config/grafana-stack/.env delete mode 100644 homelab/server/config/grafana-stack/.forgetps.json delete mode 100644 homelab/server/config/grafana-stack/docker-compose.yml delete mode 100644 homelab/server/config/grafana-stack/influxdb.conf delete mode 100644 homelab/server/config/grafana-stack/scripts/.forgetps delete mode 100644 homelab/server/config/grafana-stack/scripts/.forgetps.json delete mode 100644 homelab/server/config/grafana-stack/telegraf.conf create mode 100644 homelab/server/config/monitoring/.env rename homelab/server/config/{prometheus => monitoring}/README.md (100%) rename homelab/server/config/{prometheus => monitoring}/docker-compose.yml (80%) rename homelab/server/config/{grafana-stack => monitoring}/grafana-dashboards/Hosts.json (100%) rename homelab/server/config/{grafana-stack => monitoring}/grafana.ini (100%) rename homelab/server/config/{prometheus => monitoring}/prometheus.yml (100%) rename homelab/server/config/{grafana-stack => monitoring}/scripts/diskstatus.sh (100%) rename homelab/server/config/{grafana-stack => monitoring}/scripts/forgepc.sh (100%) rename homelab/server/config/{grafana-stack => monitoring}/scripts/forgetps-to-json.sh (100%) delete mode 100644 homelab/server/config/prometheus/.env diff --git a/homelab/server/config/grafana-stack/.env b/homelab/server/config/grafana-stack/.env deleted file mode 100644 index 77fc4181..00000000 --- a/homelab/server/config/grafana-stack/.env +++ /dev/null @@ -1,2 +0,0 @@ -DOCKER_DATA=/home/joey/data/grafana-stack -MINECRAFT_DIR=/home/joey/data/minecraft diff --git a/homelab/server/config/grafana-stack/.forgetps.json b/homelab/server/config/grafana-stack/.forgetps.json deleted file mode 100644 index fe51488c..00000000 --- a/homelab/server/config/grafana-stack/.forgetps.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/homelab/server/config/grafana-stack/docker-compose.yml b/homelab/server/config/grafana-stack/docker-compose.yml deleted file mode 100644 index 75999c88..00000000 --- a/homelab/server/config/grafana-stack/docker-compose.yml +++ /dev/null @@ -1,65 +0,0 @@ -version: '3' -services: - influxdb: - image: influxdb:2.0 - container_name: grafana_influxdb - restart: unless-stopped - networks: - - monitoring - ports: - - 8086:8086 - - 8089:8089/udp - volumes: - - ./influxdb.conf:/etc/influxdb/influxdb.conf:ro - - "${DOCKER_DATA}/influxdb:/var/lib/influxdb" - environment: - - TZ=America/Los_Angeles - - DOCKER_INFLUXDB_INIT_MODE=setup - - DOCKER_INFLUXDB_INIT_USERNAME=jafner - - DOCKER_INFLUXDB_INIT_PASSWORD=***REMOVED*** - - "DOCKER_INFLUXDB_INIT_ORG=Jafner Industries" - - DOCKER_INFLUXDB_INIT_BUCKET=Bucket - - DOCKER_INFLUXDB_INIT_ADMIN_TOKEN=***REMOVED*** - - telegraf: - image: telegraf:latest - container_name: grafana_telegraf - restart: unless-stopped - depends_on: - - influxdb - networks: - - monitoring - volumes: - - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro - - ./scripts/.forgetps.json:/.forgetps.json:ro - - /sys:/rootfs/sys:ro - - /proc:/rootfs/proc:ro - - /etc:/rootfs/etc:ro - - grafana: - image: mbarmem/grafana-render:latest - container_name: grafana_grafana - restart: unless-stopped - depends_on: - - influxdb - - telegraf - networks: - - monitoring - - web - user: "0" - volumes: - - ${DOCKER_DATA}/grafana:/var/lib/grafana - - ./grafana.ini:/etc/grafana/grafana.ini - environment: - - GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-worldmap-panel,grafana-piechart-panel - labels: - - traefik.http.routers.grafana.rule=Host(`grafana.jafner.net`) - - traefik.http.routers.grafana.tls.certresolver=lets-encrypt - #- traefik.http.routers.grafana.middlewares=authelia@file - -networks: - monitoring: - external: true - web: - external: true - diff --git a/homelab/server/config/grafana-stack/influxdb.conf b/homelab/server/config/grafana-stack/influxdb.conf deleted file mode 100644 index 10f94fe0..00000000 --- a/homelab/server/config/grafana-stack/influxdb.conf +++ /dev/null @@ -1,305 +0,0 @@ -### Welcome to the InfluxDB configuration file. - -# Once every 24 hours InfluxDB will report usage data to usage.influxdata.com -# The data includes a random ID, os, arch, version, the number of series and other -# usage data. No data from user databases is ever transmitted. -# Change this option to true to disable reporting. -reporting-disabled = false - -# we'll try to get the hostname automatically, but if it the os returns something -# that isn't resolvable by other servers in the cluster, use this option to -# manually set the hostname -# hostname = "localhost" - -### -### [meta] -### -### Controls the parameters for the Raft consensus group that stores metadata -### about the InfluxDB cluster. -### - -[meta] - # Where the metadata/raft database is stored - dir = "/var/lib/influxdb/meta" - - retention-autocreate = true - - # If log messages are printed for the meta service - logging-enabled = true - pprof-enabled = false - - # The default duration for leases. - lease-duration = "1m0s" - -### -### [data] -### -### Controls where the actual shard data for InfluxDB lives and how it is -### flushed from the WAL. "dir" may need to be changed to a suitable place -### for your system, but the WAL settings are an advanced configuration. The -### defaults should work for most systems. -### - -[data] - # Controls if this node holds time series data shards in the cluster - enabled = true - - dir = "/var/lib/influxdb/data" - - # These are the WAL settings for the storage engine >= 0.9.3 - wal-dir = "/var/lib/influxdb/wal" - wal-logging-enabled = true - - # Trace logging provides more verbose output around the tsm engine. Turning - # this on can provide more useful output for debugging tsm engine issues. - # trace-logging-enabled = false - - # Whether queries should be logged before execution. Very useful for troubleshooting, but will - # log any sensitive data contained within a query. - # query-log-enabled = true - - # Settings for the TSM engine - - # CacheMaxMemorySize is the maximum size a shard's cache can - # reach before it starts rejecting writes. - # cache-max-memory-size = 524288000 - - # CacheSnapshotMemorySize is the size at which the engine will - # snapshot the cache and write it to a TSM file, freeing up memory - # cache-snapshot-memory-size = 26214400 - - # CacheSnapshotWriteColdDuration is the length of time at - # which the engine will snapshot the cache and write it to - # a new TSM file if the shard hasn't received writes or deletes - # cache-snapshot-write-cold-duration = "1h" - - # MinCompactionFileCount is the minimum number of TSM files - # that need to exist before a compaction cycle will run - # compact-min-file-count = 3 - - # CompactFullWriteColdDuration is the duration at which the engine - # will compact all TSM files in a shard if it hasn't received a - # write or delete - # compact-full-write-cold-duration = "24h" - - # MaxPointsPerBlock is the maximum number of points in an encoded - # block in a TSM file. Larger numbers may yield better compression - # but could incur a performance penalty when querying - # max-points-per-block = 1000 - -### -### [coordinator] -### -### Controls the clustering service configuration. -### - -[coordinator] - write-timeout = "10s" - max-concurrent-queries = 0 - query-timeout = "0" - log-queries-after = "0" - max-select-point = 0 - max-select-series = 0 - max-select-buckets = 0 - -### -### [retention] -### -### Controls the enforcement of retention policies for evicting old data. -### - -[retention] - enabled = true - check-interval = "30m" - -### -### [shard-precreation] -### -### Controls the precreation of shards, so they are available before data arrives. -### Only shards that, after creation, will have both a start- and end-time in the -### future, will ever be created. Shards are never precreated that would be wholly -### or partially in the past. - -[shard-precreation] - enabled = true - check-interval = "10m" - advance-period = "30m" - -### -### Controls the system self-monitoring, statistics and diagnostics. -### -### The internal database for monitoring data is created automatically if -### if it does not already exist. The target retention within this database -### is called 'monitor' and is also created with a retention period of 7 days -### and a replication factor of 1, if it does not exist. In all cases the -### this retention policy is configured as the default for the database. - -[monitor] - store-enabled = true # Whether to record statistics internally. - store-database = "_internal" # The destination database for recorded statistics - store-interval = "10s" # The interval at which to record statistics - -### -### [admin] -### -### Controls the availability of the built-in, web-based admin interface. If HTTPS is -### enabled for the admin interface, HTTPS must also be enabled on the [http] service. -### - -[admin] - enabled = true - bind-address = ":8083" - https-enabled = false - https-certificate = "/etc/ssl/influxdb.pem" - -### -### [http] -### -### Controls how the HTTP endpoints are configured. These are the primary -### mechanism for getting data into and out of InfluxDB. -### - -[http] - enabled = true - bind-address = ":8086" - auth-enabled = false - log-enabled = true - write-tracing = false - pprof-enabled = false - https-enabled = false - https-certificate = "/etc/ssl/influxdb.pem" - ### Use a separate private key location. - # https-private-key = "" - max-row-limit = 10000 - realm = "InfluxDB" - -### -### [subsciber] -### -### Controls the subscriptions, which can be used to fork a copy of all data -### received by the InfluxDB host. -### - -[subsciber] - enabled = true - http-timeout = "30s" - - -### -### [[graphite]] -### -### Controls one or many listeners for Graphite data. -### - -[[graphite]] - enabled = false - # database = "graphite" - # bind-address = ":2003" - # protocol = "tcp" - # consistency-level = "one" - - # These next lines control how batching works. You should have this enabled - # otherwise you could get dropped metrics or poor performance. Batching - # will buffer points in memory if you have many coming in. - - # batch-size = 5000 # will flush if this many points get buffered - # batch-pending = 10 # number of batches that may be pending in memory - # batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit - # udp-read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max. - - ### This string joins multiple matching 'measurement' values providing more control over the final measurement name. - # separator = "." - - ### Default tags that will be added to all metrics. These can be overridden at the template level - ### or by tags extracted from metric - # tags = ["region=us-east", "zone=1c"] - - ### Each template line requires a template pattern. It can have an optional - ### filter before the template and separated by spaces. It can also have optional extra - ### tags following the template. Multiple tags should be separated by commas and no spaces - ### similar to the line protocol format. There can be only one default template. - # templates = [ - # "*.app env.service.resource.measurement", - # # Default template - # "server.*", - # ] - -### -### [collectd] -### -### Controls one or many listeners for collectd data. -### - -[[collectd]] - enabled = false - # bind-address = "" - # database = "" - # typesdb = "" - - # These next lines control how batching works. You should have this enabled - # otherwise you could get dropped metrics or poor performance. Batching - # will buffer points in memory if you have many coming in. - - # batch-size = 1000 # will flush if this many points get buffered - # batch-pending = 5 # number of batches that may be pending in memory - # batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit - # read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max. - -### -### [opentsdb] -### -### Controls one or many listeners for OpenTSDB data. -### - -[[opentsdb]] - enabled = false - # bind-address = ":4242" - # database = "opentsdb" - # retention-policy = "" - # consistency-level = "one" - # tls-enabled = false - # certificate= "" - # log-point-errors = true # Log an error for every malformed point. - - # These next lines control how batching works. You should have this enabled - # otherwise you could get dropped metrics or poor performance. Only points - # metrics received over the telnet protocol undergo batching. - - # batch-size = 1000 # will flush if this many points get buffered - # batch-pending = 5 # number of batches that may be pending in memory - # batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit - -### -### [[udp]] -### -### Controls the listeners for InfluxDB line protocol data via UDP. -### - -[[udp]] - enabled = true - bind-address = "0.0.0.0:8089" - database = "host" - # retention-policy = "" - - # These next lines control how batching works. You should have this enabled - # otherwise you could get dropped metrics or poor performance. Batching - # will buffer points in memory if you have many coming in. - - batch-size = 1000 # will flush if this many points get buffered - # batch-pending = 5 # number of batches that may be pending in memory - batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit - # read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max. - - # set the expected UDP payload size; lower values tend to yield better performance, default is max UDP size 65536 - # udp-payload-size = 65536 - -### -### [continuous_queries] -### -### Controls how continuous queries are run within InfluxDB. -### - -[continuous_queries] - log-enabled = true - enabled = true - # run-interval = "1s" # interval for how often continuous queries will be checked if they need to run diff --git a/homelab/server/config/grafana-stack/scripts/.forgetps b/homelab/server/config/grafana-stack/scripts/.forgetps deleted file mode 100644 index e69de29b..00000000 diff --git a/homelab/server/config/grafana-stack/scripts/.forgetps.json b/homelab/server/config/grafana-stack/scripts/.forgetps.json deleted file mode 100644 index 5f89234b..00000000 --- a/homelab/server/config/grafana-stack/scripts/.forgetps.json +++ /dev/null @@ -1 +0,0 @@ -[{"dim":"minecraft:overworld","tpt":0.000,"tps":20.000},{"dim":"minecraft:the_nether","tpt":0.000,"tps":20.000},{"dim":"minecraft:the_end","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:coaldim","tpt":0.000,"tps":20.000},{"dim":"compactmachines:compact_world","tpt":0.000,"tps":20.000},{"dim":"rats:ratlantis","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim1","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:fluoritedim","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim2","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim3","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim4","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:uraniumtendrils","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim5","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim6","tpt":0.000,"tps":20.000},{"dim":"atum:atum","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:uraniumdim","tpt":0.000,"tps":20.000},{"dim":"mythicbotany:alfheim","tpt":0.000,"tps":20.000},{"dim":"undergarden:undergarden","tpt":0.000,"tps":20.000},{"dim":"Overall","tpt":0.000,"tps":20.000}] diff --git a/homelab/server/config/grafana-stack/telegraf.conf b/homelab/server/config/grafana-stack/telegraf.conf deleted file mode 100644 index b09b0ebd..00000000 --- a/homelab/server/config/grafana-stack/telegraf.conf +++ /dev/null @@ -1,32 +0,0 @@ -[global_tags] -[agent] - interval = "10s" - round_interval = true - metric_batch_size = 1000 - metric_buffer_limit = 10000 - collection_jitter = "0s" - flush_interval = "10s" - flush_jitter = "0s" - precision = "" - hostname = "" - omit_hostname = false -[[outputs.influxdb_v2]] - urls = ["http://influxdb:8086"] - organization = "Jafner Industries" - bucket = "Bucket" - token = "***REMOVED***" -[[inputs.cpu]] - percpu = true - totalcpu = true - collect_cpu_time = false - report_active = false -[[inputs.disk]] - ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"] -[[inputs.mem]] -[[inputs.system]] -[[inputs.sensors]] -[[inputs.file]] - files = ["/.forgetps.json"] - data_format = "json" - name_override = "tickinfo" - tag_keys = ["dim"] diff --git a/homelab/server/config/monitoring/.env b/homelab/server/config/monitoring/.env new file mode 100644 index 00000000..32c75a00 --- /dev/null +++ b/homelab/server/config/monitoring/.env @@ -0,0 +1 @@ +DOCKER_DATA=/home/joey/data/monitoring \ No newline at end of file diff --git a/homelab/server/config/prometheus/README.md b/homelab/server/config/monitoring/README.md similarity index 100% rename from homelab/server/config/prometheus/README.md rename to homelab/server/config/monitoring/README.md diff --git a/homelab/server/config/prometheus/docker-compose.yml b/homelab/server/config/monitoring/docker-compose.yml similarity index 80% rename from homelab/server/config/prometheus/docker-compose.yml rename to homelab/server/config/monitoring/docker-compose.yml index e66d2ee1..3204888f 100644 --- a/homelab/server/config/prometheus/docker-compose.yml +++ b/homelab/server/config/monitoring/docker-compose.yml @@ -1,5 +1,21 @@ version: '3' services: + grafana: + image: grafana/grafana-oss:latest + container_name: grafana_grafana + restart: unless-stopped + networks: + - monitoring + - web + user: "0" + volumes: + - ${DOCKER_DATA}/grafana:/var/lib/grafana + - ./grafana.ini:/etc/grafana/grafana.ini + labels: + - traefik.http.routers.grafana.rule=Host(`grafana.jafner.net`) + - traefik.http.routers.grafana.tls.certresolver=lets-encrypt + #- traefik.http.routers.grafana.middlewares=authelia@file + prometheus: image: prom/prometheus:latest container_name: monitoring_prometheus diff --git a/homelab/server/config/grafana-stack/grafana-dashboards/Hosts.json b/homelab/server/config/monitoring/grafana-dashboards/Hosts.json similarity index 100% rename from homelab/server/config/grafana-stack/grafana-dashboards/Hosts.json rename to homelab/server/config/monitoring/grafana-dashboards/Hosts.json diff --git a/homelab/server/config/grafana-stack/grafana.ini b/homelab/server/config/monitoring/grafana.ini similarity index 100% rename from homelab/server/config/grafana-stack/grafana.ini rename to homelab/server/config/monitoring/grafana.ini diff --git a/homelab/server/config/prometheus/prometheus.yml b/homelab/server/config/monitoring/prometheus.yml similarity index 100% rename from homelab/server/config/prometheus/prometheus.yml rename to homelab/server/config/monitoring/prometheus.yml diff --git a/homelab/server/config/grafana-stack/scripts/diskstatus.sh b/homelab/server/config/monitoring/scripts/diskstatus.sh similarity index 100% rename from homelab/server/config/grafana-stack/scripts/diskstatus.sh rename to homelab/server/config/monitoring/scripts/diskstatus.sh diff --git a/homelab/server/config/grafana-stack/scripts/forgepc.sh b/homelab/server/config/monitoring/scripts/forgepc.sh similarity index 100% rename from homelab/server/config/grafana-stack/scripts/forgepc.sh rename to homelab/server/config/monitoring/scripts/forgepc.sh diff --git a/homelab/server/config/grafana-stack/scripts/forgetps-to-json.sh b/homelab/server/config/monitoring/scripts/forgetps-to-json.sh similarity index 100% rename from homelab/server/config/grafana-stack/scripts/forgetps-to-json.sh rename to homelab/server/config/monitoring/scripts/forgetps-to-json.sh diff --git a/homelab/server/config/prometheus/.env b/homelab/server/config/prometheus/.env deleted file mode 100644 index e69de29b..00000000