K8S集群中部署Redis集群
大约 4 分钟约 1329 字
K8S集群中部署Redis集群
0 参考资料
- https://artifacthub.io/packages/helm/softonic/redis-operator
- https://github.com/OT-CONTAINER-KIT/redis-operator/tree/master/example
- https://github.com/OT-CONTAINER-KIT/helm-charts/tree/main/charts/redis-cluster
- https://github.com/ot-container-kit/redis-operator
- https://github.com/OT-CONTAINER-KIT/redis-operator
- https://ot-container-kit.github.io/redis-operator/guide/redis-config.html
1 部署Redis-Operator
# operator部署
docker pull quay.io/opstree/redis-operator:v0.13.0
docker tag quay.io/opstree/redis-operator:v0.13.0 harbor.leadchina.cn/opstree/redis-operator:v0.13.0
docker push harbor.leadchina.cn/opstree/redis-operator:v0.13.0
# 添加chart仓库配置
helm repo add ot-helm https://ot-container-kit.github.io/helm-charts/
helm pull ot-helm/redis-operator
# 部署redis-operator
helm upgrade redis-operator ot-helm/redis-operator --install --namespace ot-operators --create-namespace
# 设置redis实例密码
kubectl create secret generic redis-secret --from-literal=password=123456 -n ot-operators
# redis-exporter镜像私有化配置
docker pull quay.io/opstree/redis-exporter:v1.44.0
docker tag quay.io/opstree/redis-exporter:v1.44.0 harbor.leadchina.cn/opstree/redis-exporter:v1.44.0
docker push harbor.leadchina.cn/opstree/redis-exporter:v1.44.0
2 部署Redis standalone
# 使用helm方式部署redis单实例
helm upgrade redis ot-helm/redis --install --namespace myredis
helm upgrade redis ot-helm/redis -f custom-values.yaml --install --namespace myredis
# 可以参考官方仓库中示例 https://github.com/OT-CONTAINER-KIT/redis-operator/tree/master/example,来配置Redis类型的资源自动创建符合自身需求的redis单实例。
3 部署Redis Cluster集群
# 通过helm方式部署redis集群(下面是两个示例)
helm upgrade redis-cluster ot-helm/redis-cluster --set redisCluster.clusterSize=3 --install --namespace myredis
# custom-values.yaml文件内容可以参考官方文档中的示例编写
helm upgrade redis-cluster ot-helm/redis-cluster -f custom-values.yaml --set redisCluster.clusterSize=3 --install --namespace myredis
# 直接配置rediscluster类型资源来创建redis集群
[root@salt-master-50 ~/redis-operator/example]# cat cluster.yaml
---
apiVersion: redis.redis.opstreelabs.in/v1beta1
kind: RedisCluster
metadata:
name: redis-cluster
namespace: dev
spec:
clusterSize: 6
clusterVersion: v7
persistenceEnabled: true
securityContext:
runAsUser: 1000
fsGroup: 1000
kubernetesConfig:
image: harbor.leadchina.cn/opstree/redis:v7.0.5
imagePullPolicy: IfNotPresent
resources:
requests:
cpu: 101m
memory: 128Mi
limits:
cpu: 101m
memory: 128Mi
redisSecret:
name: redis-secret
key: password
# imagePullSecrets:
# - name: regcred
redisExporter:
enabled: true
image: harbor.leadchina.cn/opstree/redis-exporter:v1.44.0
imagePullPolicy: Always
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 100m
memory: 128Mi
# Environment Variables for Redis Exporter
env:
- name: REDIS_EXPORTER_INCL_SYSTEM_METRICS
value: "true"
# - name: UI_PROPERTIES_FILE_NAME
# valueFrom:
# configMapKeyRef:
# name: game-demo
# key: ui_properties_file_name
# - name: SECRET_USERNAME
# valueFrom:
# secretKeyRef:
# name: mysecret
# key: username
redisLeader:
redisConfig:
additionalRedisConfig: redis-external-config
redisFollower:
redisConfig:
additionalRedisConfig: redis-external-config
storage:
volumeClaimTemplate:
spec:
storageClassName: managed-nfs-storage
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
# nodeSelector:
# kubernetes.io/hostname: minikube
# priorityClassName:
# Affinity:
# Tolerations: []
4 Redis监控配置
监控相关配置可参考文档 https://ot-container-kit.github.io/redis-operator/guide/monitoring.html
# redis-exporter服务监控配置
[root@salt-master-50 ~/olm/redis/monitoring]# cat redis-servicemonitor.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor # prometheus-operator 定义的CRD
metadata:
name: redis-metrics
namespace: monitoring
labels:
prometheus: service
spec:
jobLabel: app # 监控数据的job标签指定为metrics label的值,即加上数据标签job=redis-exporter
selector:
matchLabels:
app: redis-exporter # 自动发现label中有 app=redis-exporter 的service
namespaceSelector:
matchNames: # 配置需要自动发现的命名空间,可以配置多个
- default
endpoints:
- port: redis-exporter # 拉去metric的端口,这个写的是 service的端口名称,即 service yaml的spec.ports.name
interval: 15s # 拉取metric的时间间隔
# redis监控告警配置
[root@salt-master-50 ~/olm/redis/monitoring]# cat rules.yaml
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
monitor-svc: redis-exporter
role: alert-rules
name: redis-metrics-rules
namespace: monitoring
spec:
groups:
- name: Redis-监控告警
rules:
- alert: 警报!Redis应用不可用
expr: redis_up == 0
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis应用不可用"
description: "Redis应用不可达\n 当前值 = {{ $value }}"
- alert: 警报!丢失Master节点
expr: (count(redis_instance_info{role="master"}) ) < 1
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} 丢失Redis master"
description: "Redis集群当前没有主节点\n 当前值 = {{ $value }}"
- alert: 警报!脑裂,主节点太多
expr: count(redis_instance_info{role="master"}) > 1
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis脑裂,主节点太多"
description: "{{ $labels.instance }} 主节点太多\n 当前值 = {{ $value }}"
- alert: 警报!Slave连接不可达
expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis丢失slave节点"
description: "Redis slave不可达.请确认主从同步状态\n 当前值 = {{ $value }}"
- alert: 警报!Redis副本不一致
expr: delta(redis_connected_slaves[1m]) < 0
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis 副本不一致"
description: "Redis集群丢失一个slave节点\n 当前值 = {{ $value }}"
- alert: 警报!Redis集群抖动
expr: changes(redis_connected_slaves[1m]) > 1
for: 2m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis集群抖动"
description: "Redis集群抖动,请检查.\n 当前值 = {{ $value }}"
- alert: 警报!持久化失败
expr: (time() - redis_rdb_last_save_timestamp_seconds) / 3600 > 24
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis持久化失败"
description: "Redis持久化失败(>24小时)\n 当前值 = {{ printf \"%.1f\" $value }}小时"
- alert: 警报!内存不足
expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90
for: 2m
labels:
severity: 一般告警
annotations:
summary: "{{ $labels.instance }}系统内存不足"
description: "Redis占用系统内存(> 90%)\n 当前值 = {{ printf \"%.2f\" $value }}%"
- alert: 警报!Maxmemory不足
expr: redis_config_maxmemory !=0 and redis_memory_used_bytes / redis_memory_max_bytes * 100 > 80
for: 2m
labels:
severity: 一般告警
annotations:
summary: "{{ $labels.instance }} Maxmemory设置太小"
description: "超出设置最大内存(> 80%)\n 当前值 = {{ printf \"%.2f\" $value }}%"
- alert: 警报!连接数太多
expr: redis_connected_clients > 200
for: 2m
labels:
severity: 一般告警
annotations:
summary: "{{ $labels.instance }} 实时连接数太多"
description: "连接数太多(>200)\n 当前值 = {{ $value }}"
- alert: 警报!连接数太少
expr: redis_connected_clients < 1
for: 2m
labels:
severity: 一般告警
annotations:
summary: "{{ $labels.instance }} 实时连接数太少"
description: "连接数(<1)\n 当前值 = {{ $value }}"
- alert: 警报!拒绝连接数
expr: increase(redis_rejected_connections_total[1m]) > 0
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} 拒绝连接"
description: "Redis有拒绝连接,请检查连接数配置\n 当前值 = {{ printf \"%.0f\" $value }}"
- alert: 警报!执行命令数大于1000
expr: rate(redis_commands_processed_total[1m]) > 1000
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} 执行命令次数太多"
description: "Redis执行命令次数太多\n 当前值 = {{ printf \"%.0f\" $value }}"
5 grafana dashboard配置
可以直接到grafana官方搜索redis相关的dashboard,https://grafana.com/grafana/dashboards/?pg=graf&plcmt=dashboard-below-txt&search=redis, 选择中意的dashboard导入到自有的grafana示例中即可。
官方推荐的grafana模板: https://ot-container-kit.github.io/redis-operator/guide/grafana.html