prometheus服务的客户端
环境
服务端 192.168.164.110
客户端 192.168.164.111
1 添加主机到prometheus服务
# 客户端运行node-exporter
docker run -itd \
-p 9100:9100 \
-v /:/rootfs:ro \
-v /sys:/sys:ro \
--name mon_node-exporter \
--hostname mon_node-exporter \
--restart always \
prom/node-exporter:latest
# 客户端检查是否收集了数据
http://192.168.164.111:9100/metrics
# 服务器端添加配置链接客户端配置
vim prometheus.yml
cat >prometheus.yml<<EOF
# 全局配置
global:scrape_interval: 15s # 采集数据间隔时间evaluation_interval: 15s # 评估规则间隔时间
# 报警配置
alerting:alertmanagers:- static_configs:- targets: ['192.168.164.110:9093']
# 规则文件
rule_files:- "node_down.yml"
# 定义监控的目标 每个 'job' 代表一组具有共同目的的目标。
scrape_configs:# 1 监控 Prometheus 服务自身- job_name: 'prometheus'static_configs:- targets: ['localhost:9090']# 2 监控 nodes node_exporter,node_exporter 提供操作系统和硬件相关的指标- job_name: 'mon_node-exporter'static_configs:- targets: ['192.168.164.110:9100']- targets: ['192.168.164.110:9100']# 3 监控 nodes docker节点容器- job_name: 'mon_docker-exporter'static_configs:- targets: ['192.168.164.110:8080']- targets: ['192.168.164.110:8080']# 4 监控 GPU 资源- job_name: 'mon_gpu-exporter'static_configs:- targets: ['192.168.164.110:9835']# 5 监控 mysql服务 - job_name: 'mon_mysql-exporter'static_configs:- targets: ['192.168.164.110:9104']
EOF
# 重启服务端prometheus容器
docker restart mon_prometheus
# 服务器端检查链接信息状态
http://192.168.164.110:9090/targets

# 下载 主机模板 https://grafana.com/grafana/dashboards搜索 Node Exporter 并下载
# prometheus服务端导入模板分类 --Manage-- import --Upload json file
2 添加docker主机到prometheus
# 服务器端检查链接信息状态 是否有docker主机http://192.168.164.110:9090/targets
# 下载docker模板 https://grafana.com/grafana/dashboards搜索 Docker monitoring with node selection 并下载json文件
# prometheus服务端导入模板分类 --Manage-- import --Upload json file
3 添加GPU主机到prometheus服务
# 前提安装gpu驱动
# 在有gpu主机安装docker-nvidia2 < docker-version 19.3
# 安装 docker-nvidia2 docker-version < 19.3curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add -
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list |\
tee /etc/apt/sources.list.d/nvidia-docker.listapt-get updateapt-get install -y nvidia-docker2pkill -SIGHUP dockerd
# 安装 nvidia-container-toolkit docker-version >= 19.3distribution=$(. /etc/os-release;echo $ID$VERSION_ID)curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list |\
tee /etc/apt/sources.list.d/nvidia-docker.listapt updateapt install -y nvidia-container-toolkit
# 在GPU客户端 安装 nvidia_gpu_exporter
# 下载对应系统发行版的包
wget https://github.com/utkuozdemir/nvidia_gpu_exporter/releases/download/v1.2.0/nvidia_gpu_exporter_1.2.0_linux_x86_64.tar.gz
# 解压并运行
tar xf nvidia_gpu_exporter_1.2.0_linux_x86_64.tar.gz
mv nvidia_gpu_exporter /usr/local/gpu-exporter/
/usr/local/gpu-exporter/nvidia_gpu_exporter &
# 验证是否运行成功
curl localhost:9835/metrics
# 配置 Prometheus 抓取 GPU 数据
vim prometheus.yml
# 下载展示GPU模版 json filehttps://grafana.com/grafana/dashboards搜索 Nvidia GPU Metrics 并下载json文件
# prometheus服务端导入模板分类 --Manage-- import --Upload json file
4 添加mysql服务监控
# mysql主机运行监控容器
docker run -itd \-p 9104:9104 \--name mon_mysqld-exporter \--hostname mon_mysqld-exporter \--restart="always" \-e DATA_SOURCE_NAME="root:Kc@123456@(192.168.164.111:3306)/" \prom/mysqld-exporter
# 客户端检查
http://192.168.164.111:9104/metrics
# 服务端检查
# 下载模板 导入
搜索 MySQL Exporter Quickstart and Dashboard
本文来自博客园,站在巨人的肩膀上,坚持开源精神,遵循开源协议:Apache Licene 2.0协议。
