跳至主要內容

K8S集群中部署Redis集群

大约 4 分钟约 1329 字

K8S集群中部署Redis集群

0 参考资料

  • https://artifacthub.io/packages/helm/softonic/redis-operator
  • https://github.com/OT-CONTAINER-KIT/redis-operator/tree/master/example
  • https://github.com/OT-CONTAINER-KIT/helm-charts/tree/main/charts/redis-cluster
  • https://github.com/ot-container-kit/redis-operator
  • https://github.com/OT-CONTAINER-KIT/redis-operator
  • https://ot-container-kit.github.io/redis-operator/guide/redis-config.html

1 部署Redis-Operator

# operator部署
docker pull quay.io/opstree/redis-operator:v0.13.0
docker tag quay.io/opstree/redis-operator:v0.13.0 harbor.leadchina.cn/opstree/redis-operator:v0.13.0
docker push harbor.leadchina.cn/opstree/redis-operator:v0.13.0
# 添加chart仓库配置
helm repo add ot-helm https://ot-container-kit.github.io/helm-charts/
helm pull ot-helm/redis-operator

# 部署redis-operator
helm upgrade redis-operator ot-helm/redis-operator --install --namespace ot-operators --create-namespace
# 设置redis实例密码
kubectl create secret generic redis-secret --from-literal=password=123456 -n ot-operators

# redis-exporter镜像私有化配置
docker pull quay.io/opstree/redis-exporter:v1.44.0
docker tag quay.io/opstree/redis-exporter:v1.44.0 harbor.leadchina.cn/opstree/redis-exporter:v1.44.0
docker push harbor.leadchina.cn/opstree/redis-exporter:v1.44.0

2 部署Redis standalone

# 使用helm方式部署redis单实例
helm upgrade redis ot-helm/redis --install --namespace myredis

helm upgrade redis ot-helm/redis -f custom-values.yaml --install --namespace myredis

# 可以参考官方仓库中示例 https://github.com/OT-CONTAINER-KIT/redis-operator/tree/master/example,来配置Redis类型的资源自动创建符合自身需求的redis单实例。

3 部署Redis Cluster集群

# 通过helm方式部署redis集群(下面是两个示例)

helm upgrade redis-cluster ot-helm/redis-cluster   --set redisCluster.clusterSize=3 --install --namespace myredis

# custom-values.yaml文件内容可以参考官方文档中的示例编写
helm upgrade redis-cluster ot-helm/redis-cluster -f custom-values.yaml --set redisCluster.clusterSize=3 --install --namespace myredis
# 直接配置rediscluster类型资源来创建redis集群
[root@salt-master-50 ~/redis-operator/example]# cat cluster.yaml 
---
apiVersion: redis.redis.opstreelabs.in/v1beta1
kind: RedisCluster
metadata:
  name: redis-cluster
  namespace: dev
spec:
  clusterSize: 6
  clusterVersion: v7
  persistenceEnabled: true
  securityContext:
    runAsUser: 1000
    fsGroup: 1000
  kubernetesConfig:
    image: harbor.leadchina.cn/opstree/redis:v7.0.5
    imagePullPolicy: IfNotPresent
    resources:
      requests:
        cpu: 101m
        memory: 128Mi
      limits:
        cpu: 101m
        memory: 128Mi
    redisSecret:
      name: redis-secret
      key: password
    # imagePullSecrets:
    #   - name: regcred
  redisExporter:
    enabled: true
    image: harbor.leadchina.cn/opstree/redis-exporter:v1.44.0
    imagePullPolicy: Always
    resources:
      requests:
        cpu: 100m
        memory: 128Mi
      limits:
        cpu: 100m
        memory: 128Mi
# Environment Variables for Redis Exporter
    env:
    - name: REDIS_EXPORTER_INCL_SYSTEM_METRICS
      value: "true"
#    - name: UI_PROPERTIES_FILE_NAME
#      valueFrom:
#        configMapKeyRef:
#          name: game-demo
#          key: ui_properties_file_name
#    - name: SECRET_USERNAME
#      valueFrom:
#        secretKeyRef:
#          name: mysecret
#          key: username
  redisLeader:
    redisConfig:
      additionalRedisConfig: redis-external-config
  redisFollower:
    redisConfig:
      additionalRedisConfig: redis-external-config
  storage:
    volumeClaimTemplate:
      spec:
        storageClassName: managed-nfs-storage
        accessModes: ["ReadWriteOnce"]
        resources:
          requests:
            storage: 1Gi
  # nodeSelector:
  #   kubernetes.io/hostname: minikube
  # priorityClassName:
  # Affinity:
  # Tolerations: []

4 Redis监控配置

监控相关配置可参考文档 https://ot-container-kit.github.io/redis-operator/guide/monitoring.html

# redis-exporter服务监控配置
[root@salt-master-50 ~/olm/redis/monitoring]# cat redis-servicemonitor.yaml 
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor # prometheus-operator 定义的CRD
metadata:
  name: redis-metrics
  namespace: monitoring
  labels:
    prometheus: service
spec:
  jobLabel: app # 监控数据的job标签指定为metrics label的值,即加上数据标签job=redis-exporter
  selector:
    matchLabels:
      app: redis-exporter # 自动发现label中有 app=redis-exporter 的service
  namespaceSelector:
    matchNames: # 配置需要自动发现的命名空间,可以配置多个
    - default
  endpoints:
  - port: redis-exporter # 拉去metric的端口,这个写的是 service的端口名称,即 service yaml的spec.ports.name
    interval: 15s # 拉取metric的时间间隔
# redis监控告警配置
[root@salt-master-50 ~/olm/redis/monitoring]# cat rules.yaml 
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  labels:
    monitor-svc: redis-exporter
    role: alert-rules
  name: redis-metrics-rules
  namespace: monitoring
spec:
  groups:
  - name: Redis-监控告警
    rules:
    - alert: 警报!Redis应用不可用
      expr: redis_up == 0
      for: 0m
      labels:
        severity: 严重告警
      annotations:
        summary: "{{ $labels.instance }} Redis应用不可用"
        description: "Redis应用不可达\n  当前值 = {{ $value }}"

    - alert: 警报!丢失Master节点
      expr: (count(redis_instance_info{role="master"}) ) < 1
      for: 0m
      labels:
        severity: 严重告警
      annotations:
        summary: "{{ $labels.instance }} 丢失Redis master"
        description: "Redis集群当前没有主节点\n  当前值 = {{ $value }}"

    - alert: 警报!脑裂,主节点太多
      expr: count(redis_instance_info{role="master"}) > 1
      for: 0m
      labels:
        severity: 严重告警
      annotations:
        summary: "{{ $labels.instance }} Redis脑裂,主节点太多"
        description: "{{ $labels.instance }} 主节点太多\n  当前值 = {{ $value }}"

    - alert: 警报!Slave连接不可达
      expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1
      for: 0m
      labels:
        severity: 严重告警
      annotations:
        summary: "{{ $labels.instance }} Redis丢失slave节点"
        description: "Redis slave不可达.请确认主从同步状态\n  当前值 = {{ $value }}"

    - alert: 警报!Redis副本不一致
      expr: delta(redis_connected_slaves[1m]) < 0
      for: 0m
      labels:
        severity: 严重告警
      annotations:
        summary: "{{ $labels.instance }}  Redis 副本不一致"
        description: "Redis集群丢失一个slave节点\n  当前值 = {{ $value }}"

    - alert: 警报!Redis集群抖动
      expr: changes(redis_connected_slaves[1m]) > 1
      for: 2m
      labels:
        severity: 严重告警
      annotations:
        summary: "{{ $labels.instance }}  Redis集群抖动"
        description: "Redis集群抖动,请检查.\n  当前值 = {{ $value }}"

    - alert: 警报!持久化失败
      expr: (time() - redis_rdb_last_save_timestamp_seconds) / 3600 > 24
      for: 0m
      labels:
        severity: 严重告警
      annotations:
        summary: "{{ $labels.instance }}  Redis持久化失败"
        description: "Redis持久化失败(>24小时)\n  当前值 = {{ printf \"%.1f\" $value }}小时"

    - alert: 警报!内存不足
      expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90
      for: 2m
      labels:
        severity: 一般告警
      annotations:
        summary: "{{ $labels.instance }}系统内存不足"
        description: "Redis占用系统内存(> 90%)\n  当前值 = {{ printf \"%.2f\" $value }}%"

    - alert: 警报!Maxmemory不足
      expr: redis_config_maxmemory !=0 and redis_memory_used_bytes / redis_memory_max_bytes * 100 > 80
      for: 2m
      labels:
        severity: 一般告警
      annotations:
        summary: "{{ $labels.instance }} Maxmemory设置太小"
        description: "超出设置最大内存(> 80%)\n  当前值 = {{ printf \"%.2f\" $value }}%"

    - alert: 警报!连接数太多
      expr: redis_connected_clients > 200
      for: 2m
      labels:
        severity: 一般告警
      annotations:
        summary: "{{ $labels.instance }} 实时连接数太多"
        description: "连接数太多(>200)\n  当前值 = {{ $value }}"

    - alert: 警报!连接数太少
      expr: redis_connected_clients < 1
      for: 2m
      labels:
        severity: 一般告警
      annotations:
        summary: "{{ $labels.instance }}  实时连接数太少"
        description: "连接数(<1)\n  当前值 = {{ $value }}"

    - alert: 警报!拒绝连接数
      expr: increase(redis_rejected_connections_total[1m]) > 0
      for: 0m
      labels:
        severity: 严重告警
      annotations:
        summary: "{{ $labels.instance }} 拒绝连接"
        description: "Redis有拒绝连接,请检查连接数配置\n  当前值 = {{ printf \"%.0f\" $value }}"

    - alert: 警报!执行命令数大于1000
      expr: rate(redis_commands_processed_total[1m])  > 1000
      for: 0m
      labels:
        severity: 严重告警
      annotations:
        summary: "{{ $labels.instance }} 执行命令次数太多"
        description: "Redis执行命令次数太多\n  当前值 = {{ printf \"%.0f\" $value }}"

5 grafana dashboard配置

可以直接到grafana官方搜索redis相关的dashboard,https://grafana.com/grafana/dashboards/?pg=graf&plcmt=dashboard-below-txt&search=redis, 选择中意的dashboard导入到自有的grafana示例中即可。

官方推荐的grafana模板: https://ot-container-kit.github.io/redis-operator/guide/grafana.html