Consolidate Grafana-stack and prometheus into Monitoring

This commit is contained in:
Joey Hafner 2022-05-12 21:36:21 -07:00
parent 77899d1225
commit 4b702a7762
17 changed files with 17 additions and 406 deletions

View File

@ -1,2 +0,0 @@
DOCKER_DATA=/home/joey/data/grafana-stack
MINECRAFT_DIR=/home/joey/data/minecraft

View File

@ -1,65 +0,0 @@
version: '3'
services:
influxdb:
image: influxdb:2.0
container_name: grafana_influxdb
restart: unless-stopped
networks:
- monitoring
ports:
- 8086:8086
- 8089:8089/udp
volumes:
- ./influxdb.conf:/etc/influxdb/influxdb.conf:ro
- "${DOCKER_DATA}/influxdb:/var/lib/influxdb"
environment:
- TZ=America/Los_Angeles
- DOCKER_INFLUXDB_INIT_MODE=setup
- DOCKER_INFLUXDB_INIT_USERNAME=jafner
- DOCKER_INFLUXDB_INIT_PASSWORD=***REMOVED***
- "DOCKER_INFLUXDB_INIT_ORG=Jafner Industries"
- DOCKER_INFLUXDB_INIT_BUCKET=Bucket
- DOCKER_INFLUXDB_INIT_ADMIN_TOKEN=***REMOVED***
telegraf:
image: telegraf:latest
container_name: grafana_telegraf
restart: unless-stopped
depends_on:
- influxdb
networks:
- monitoring
volumes:
- ./telegraf.conf:/etc/telegraf/telegraf.conf:ro
- ./scripts/.forgetps.json:/.forgetps.json:ro
- /sys:/rootfs/sys:ro
- /proc:/rootfs/proc:ro
- /etc:/rootfs/etc:ro
grafana:
image: mbarmem/grafana-render:latest
container_name: grafana_grafana
restart: unless-stopped
depends_on:
- influxdb
- telegraf
networks:
- monitoring
- web
user: "0"
volumes:
- ${DOCKER_DATA}/grafana:/var/lib/grafana
- ./grafana.ini:/etc/grafana/grafana.ini
environment:
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-worldmap-panel,grafana-piechart-panel
labels:
- traefik.http.routers.grafana.rule=Host(`grafana.jafner.net`)
- traefik.http.routers.grafana.tls.certresolver=lets-encrypt
#- traefik.http.routers.grafana.middlewares=authelia@file
networks:
monitoring:
external: true
web:
external: true

View File

@ -1,305 +0,0 @@
### Welcome to the InfluxDB configuration file.
# Once every 24 hours InfluxDB will report usage data to usage.influxdata.com
# The data includes a random ID, os, arch, version, the number of series and other
# usage data. No data from user databases is ever transmitted.
# Change this option to true to disable reporting.
reporting-disabled = false
# we'll try to get the hostname automatically, but if it the os returns something
# that isn't resolvable by other servers in the cluster, use this option to
# manually set the hostname
# hostname = "localhost"
###
### [meta]
###
### Controls the parameters for the Raft consensus group that stores metadata
### about the InfluxDB cluster.
###
[meta]
# Where the metadata/raft database is stored
dir = "/var/lib/influxdb/meta"
retention-autocreate = true
# If log messages are printed for the meta service
logging-enabled = true
pprof-enabled = false
# The default duration for leases.
lease-duration = "1m0s"
###
### [data]
###
### Controls where the actual shard data for InfluxDB lives and how it is
### flushed from the WAL. "dir" may need to be changed to a suitable place
### for your system, but the WAL settings are an advanced configuration. The
### defaults should work for most systems.
###
[data]
# Controls if this node holds time series data shards in the cluster
enabled = true
dir = "/var/lib/influxdb/data"
# These are the WAL settings for the storage engine >= 0.9.3
wal-dir = "/var/lib/influxdb/wal"
wal-logging-enabled = true
# Trace logging provides more verbose output around the tsm engine. Turning
# this on can provide more useful output for debugging tsm engine issues.
# trace-logging-enabled = false
# Whether queries should be logged before execution. Very useful for troubleshooting, but will
# log any sensitive data contained within a query.
# query-log-enabled = true
# Settings for the TSM engine
# CacheMaxMemorySize is the maximum size a shard's cache can
# reach before it starts rejecting writes.
# cache-max-memory-size = 524288000
# CacheSnapshotMemorySize is the size at which the engine will
# snapshot the cache and write it to a TSM file, freeing up memory
# cache-snapshot-memory-size = 26214400
# CacheSnapshotWriteColdDuration is the length of time at
# which the engine will snapshot the cache and write it to
# a new TSM file if the shard hasn't received writes or deletes
# cache-snapshot-write-cold-duration = "1h"
# MinCompactionFileCount is the minimum number of TSM files
# that need to exist before a compaction cycle will run
# compact-min-file-count = 3
# CompactFullWriteColdDuration is the duration at which the engine
# will compact all TSM files in a shard if it hasn't received a
# write or delete
# compact-full-write-cold-duration = "24h"
# MaxPointsPerBlock is the maximum number of points in an encoded
# block in a TSM file. Larger numbers may yield better compression
# but could incur a performance penalty when querying
# max-points-per-block = 1000
###
### [coordinator]
###
### Controls the clustering service configuration.
###
[coordinator]
write-timeout = "10s"
max-concurrent-queries = 0
query-timeout = "0"
log-queries-after = "0"
max-select-point = 0
max-select-series = 0
max-select-buckets = 0
###
### [retention]
###
### Controls the enforcement of retention policies for evicting old data.
###
[retention]
enabled = true
check-interval = "30m"
###
### [shard-precreation]
###
### Controls the precreation of shards, so they are available before data arrives.
### Only shards that, after creation, will have both a start- and end-time in the
### future, will ever be created. Shards are never precreated that would be wholly
### or partially in the past.
[shard-precreation]
enabled = true
check-interval = "10m"
advance-period = "30m"
###
### Controls the system self-monitoring, statistics and diagnostics.
###
### The internal database for monitoring data is created automatically if
### if it does not already exist. The target retention within this database
### is called 'monitor' and is also created with a retention period of 7 days
### and a replication factor of 1, if it does not exist. In all cases the
### this retention policy is configured as the default for the database.
[monitor]
store-enabled = true # Whether to record statistics internally.
store-database = "_internal" # The destination database for recorded statistics
store-interval = "10s" # The interval at which to record statistics
###
### [admin]
###
### Controls the availability of the built-in, web-based admin interface. If HTTPS is
### enabled for the admin interface, HTTPS must also be enabled on the [http] service.
###
[admin]
enabled = true
bind-address = ":8083"
https-enabled = false
https-certificate = "/etc/ssl/influxdb.pem"
###
### [http]
###
### Controls how the HTTP endpoints are configured. These are the primary
### mechanism for getting data into and out of InfluxDB.
###
[http]
enabled = true
bind-address = ":8086"
auth-enabled = false
log-enabled = true
write-tracing = false
pprof-enabled = false
https-enabled = false
https-certificate = "/etc/ssl/influxdb.pem"
### Use a separate private key location.
# https-private-key = ""
max-row-limit = 10000
realm = "InfluxDB"
###
### [subsciber]
###
### Controls the subscriptions, which can be used to fork a copy of all data
### received by the InfluxDB host.
###
[subsciber]
enabled = true
http-timeout = "30s"
###
### [[graphite]]
###
### Controls one or many listeners for Graphite data.
###
[[graphite]]
enabled = false
# database = "graphite"
# bind-address = ":2003"
# protocol = "tcp"
# consistency-level = "one"
# These next lines control how batching works. You should have this enabled
# otherwise you could get dropped metrics or poor performance. Batching
# will buffer points in memory if you have many coming in.
# batch-size = 5000 # will flush if this many points get buffered
# batch-pending = 10 # number of batches that may be pending in memory
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
# udp-read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
### This string joins multiple matching 'measurement' values providing more control over the final measurement name.
# separator = "."
### Default tags that will be added to all metrics. These can be overridden at the template level
### or by tags extracted from metric
# tags = ["region=us-east", "zone=1c"]
### Each template line requires a template pattern. It can have an optional
### filter before the template and separated by spaces. It can also have optional extra
### tags following the template. Multiple tags should be separated by commas and no spaces
### similar to the line protocol format. There can be only one default template.
# templates = [
# "*.app env.service.resource.measurement",
# # Default template
# "server.*",
# ]
###
### [collectd]
###
### Controls one or many listeners for collectd data.
###
[[collectd]]
enabled = false
# bind-address = ""
# database = ""
# typesdb = ""
# These next lines control how batching works. You should have this enabled
# otherwise you could get dropped metrics or poor performance. Batching
# will buffer points in memory if you have many coming in.
# batch-size = 1000 # will flush if this many points get buffered
# batch-pending = 5 # number of batches that may be pending in memory
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
# read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
###
### [opentsdb]
###
### Controls one or many listeners for OpenTSDB data.
###
[[opentsdb]]
enabled = false
# bind-address = ":4242"
# database = "opentsdb"
# retention-policy = ""
# consistency-level = "one"
# tls-enabled = false
# certificate= ""
# log-point-errors = true # Log an error for every malformed point.
# These next lines control how batching works. You should have this enabled
# otherwise you could get dropped metrics or poor performance. Only points
# metrics received over the telnet protocol undergo batching.
# batch-size = 1000 # will flush if this many points get buffered
# batch-pending = 5 # number of batches that may be pending in memory
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
###
### [[udp]]
###
### Controls the listeners for InfluxDB line protocol data via UDP.
###
[[udp]]
enabled = true
bind-address = "0.0.0.0:8089"
database = "host"
# retention-policy = ""
# These next lines control how batching works. You should have this enabled
# otherwise you could get dropped metrics or poor performance. Batching
# will buffer points in memory if you have many coming in.
batch-size = 1000 # will flush if this many points get buffered
# batch-pending = 5 # number of batches that may be pending in memory
batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
# read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
# set the expected UDP payload size; lower values tend to yield better performance, default is max UDP size 65536
# udp-payload-size = 65536
###
### [continuous_queries]
###
### Controls how continuous queries are run within InfluxDB.
###
[continuous_queries]
log-enabled = true
enabled = true
# run-interval = "1s" # interval for how often continuous queries will be checked if they need to run

View File

@ -1 +0,0 @@
[{"dim":"minecraft:overworld","tpt":0.000,"tps":20.000},{"dim":"minecraft:the_nether","tpt":0.000,"tps":20.000},{"dim":"minecraft:the_end","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:coaldim","tpt":0.000,"tps":20.000},{"dim":"compactmachines:compact_world","tpt":0.000,"tps":20.000},{"dim":"rats:ratlantis","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim1","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:fluoritedim","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim2","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim3","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim4","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:uraniumtendrils","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim5","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:dim6","tpt":0.000,"tps":20.000},{"dim":"atum:atum","tpt":0.000,"tps":20.000},{"dim":"rftoolsdim:uraniumdim","tpt":0.000,"tps":20.000},{"dim":"mythicbotany:alfheim","tpt":0.000,"tps":20.000},{"dim":"undergarden:undergarden","tpt":0.000,"tps":20.000},{"dim":"Overall","tpt":0.000,"tps":20.000}]

View File

@ -1,32 +0,0 @@
[global_tags]
[agent]
interval = "10s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
collection_jitter = "0s"
flush_interval = "10s"
flush_jitter = "0s"
precision = ""
hostname = ""
omit_hostname = false
[[outputs.influxdb_v2]]
urls = ["http://influxdb:8086"]
organization = "Jafner Industries"
bucket = "Bucket"
token = "***REMOVED***"
[[inputs.cpu]]
percpu = true
totalcpu = true
collect_cpu_time = false
report_active = false
[[inputs.disk]]
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
[[inputs.mem]]
[[inputs.system]]
[[inputs.sensors]]
[[inputs.file]]
files = ["/.forgetps.json"]
data_format = "json"
name_override = "tickinfo"
tag_keys = ["dim"]

View File

@ -0,0 +1 @@
DOCKER_DATA=/home/joey/data/monitoring

View File

@ -1,5 +1,21 @@
version: '3'
services:
grafana:
image: grafana/grafana-oss:latest
container_name: grafana_grafana
restart: unless-stopped
networks:
- monitoring
- web
user: "0"
volumes:
- ${DOCKER_DATA}/grafana:/var/lib/grafana
- ./grafana.ini:/etc/grafana/grafana.ini
labels:
- traefik.http.routers.grafana.rule=Host(`grafana.jafner.net`)
- traefik.http.routers.grafana.tls.certresolver=lets-encrypt
#- traefik.http.routers.grafana.middlewares=authelia@file
prometheus:
image: prom/prometheus:latest
container_name: monitoring_prometheus