Added multi-cluster monitoring

This commit is contained in:
Marcus Noble 2021-06-14 10:09:27 +01:00
parent 59477f604a
commit 9baf2ead15
10 changed files with 2251 additions and 0 deletions

View File

@ -0,0 +1,24 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: monitoring
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: cluster.fun
destination:
namespace: monitoring
name: cluster-fun (scaleway)
source:
path: manifests/monitoring
repoURL: "https://git.cluster.fun/AverageMarcus/cluster.fun.git"
targetRevision: HEAD
syncPolicy:
automated: {}
syncOptions:
- CreateNamespace=true
ignoreDifferences:
- kind: Secret
jsonPointers:
- /data

View File

@ -212,3 +212,17 @@ spec:
targetPort: 8000
selector:
app: inlets
---
kind: Service
apiVersion: v1
metadata:
name: loki-local
namespace: inlets
spec:
type: ClusterIP
ports:
- port: 80
protocol: TCP
targetPort: 8000
selector:
app: inlets

View File

@ -0,0 +1,182 @@
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: blackbox-exporter-psp
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: blackbox-exporter
spec:
privileged: false
allowPrivilegeEscalation: false
volumes:
- configMap
- secret
hostNetwork: false
hostIPC: false
hostPID: false
runAsUser:
rule: RunAsAny
seLinux:
rule: RunAsAny
supplementalGroups:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
fsGroup:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
readOnlyRootFilesystem: true
allowedCapabilities:
- NET_RAW
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: blackbox-exporter
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: blackbox-exporter
---
apiVersion: v1
kind: ConfigMap
metadata:
name: blackbox-exporter
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: blackbox-exporter
data:
blackbox.yaml: |
modules:
http_2xx:
http:
follow_redirects: true
preferred_ip_protocol: ip4
tls_config:
insecure_skip_verify: true
valid_http_versions:
- HTTP/1.1
- HTTP/2.0
prober: http
timeout: 5s
icmp_ping:
icmp:
preferred_ip_protocol: ip4
source_ip_address: 127.0.0.1
prober: icmp
timeout: 5s
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: blackbox-exporter
name: blackbox-exporter
namespace: monitoring
rules:
- apiGroups:
- policy
resources:
- podsecuritypolicies
resourceNames:
- blackbox-exporter-psp
verbs:
- use
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: blackbox-exporter
name: blackbox-exporter
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: blackbox-exporter
subjects:
- kind: ServiceAccount
name: blackbox-exporter
---
kind: Service
apiVersion: v1
metadata:
name: blackbox-exporter
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: blackbox-exporter
spec:
type: ClusterIP
ports:
- name: http
port: 9115
targetPort: http
protocol: TCP
selector:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: blackbox-exporter
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: blackbox-exporter
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: blackbox-exporter
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: blackbox-exporter
strategy:
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
type: RollingUpdate
template:
metadata:
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: blackbox-exporter
spec:
serviceAccountName: blackbox-exporter
restartPolicy: Always
containers:
- name: blackbox-exporter
image: "prom/blackbox-exporter:v0.19.0"
imagePullPolicy: IfNotPresent
securityContext:
readOnlyRootFilesystem: true
capabilities:
add: ["NET_RAW"]
args:
- "--config.file=/config/blackbox.yaml"
ports:
- containerPort: 9115
name: http
livenessProbe:
httpGet:
path: /health
port: http
readinessProbe:
httpGet:
path: /health
port: http
volumeMounts:
- mountPath: /config
name: config
volumes:
- name: config
configMap:
name: blackbox-exporter

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,222 @@
apiVersion: v1
kind: Secret
metadata:
name: proxy-auth
namespace: monitoring
annotations:
kube-1password: mr6spkkx7n3memkbute6ojaarm
kube-1password/vault: Kubernetes
type: Opaque
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-auth
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server-auth
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server-auth
template:
metadata:
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server-auth
spec:
containers:
- args:
- --cookie-secure=false
- --provider=oidc
- --provider-display-name=Auth0
- --upstream=http://prometheus-server.monitoring.svc.cluster.local
- --http-address=$(HOST_IP):8080
- --redirect-url=https://prometheus.cluster.fun/oauth2/callback
- --email-domain=marcusnoble.co.uk
- --pass-basic-auth=false
- --pass-access-token=false
- --oidc-issuer-url=https://marcusnoble.eu.auth0.com/
- --cookie-secret=KDGD6rrK6cBmryyZ4wcJ9xAUNW9AQN
env:
- name: HOST_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: OAUTH2_PROXY_CLIENT_ID
valueFrom:
secretKeyRef:
key: username
name: proxy-auth
- name: OAUTH2_PROXY_CLIENT_SECRET
valueFrom:
secretKeyRef:
key: password
name: proxy-auth
image: quay.io/oauth2-proxy/oauth2-proxy:v5.1.1
name: oauth-proxy
ports:
- containerPort: 8080
protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
name: prometheus-auth
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server-auth
spec:
ports:
- name: http
port: 80
protocol: TCP
targetPort: 8080
selector:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server-auth
type: ClusterIP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: prometheus-auth
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server-auth
annotations:
cert-manager.io/cluster-issuer: letsencrypt
traefik.ingress.kubernetes.io/frontend-entry-points: http,https
traefik.ingress.kubernetes.io/redirect-entry-point: https
traefik.ingress.kubernetes.io/redirect-permanent: "true"
spec:
tls:
- hosts:
- prometheus.cluster.fun
secretName: prometheus-ingress
rules:
- host: prometheus.cluster.fun
http:
paths:
- backend:
service:
name: prometheus-auth
port:
number: 80
path: /
pathType: ImplementationSpecific
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana-auth
namespace: monitoring
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/component: server-auth
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: grafana
app.kubernetes.io/component: server-auth
template:
metadata:
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/component: server-auth
spec:
containers:
- args:
- --cookie-secure=false
- --provider=oidc
- --provider-display-name=Auth0
- --upstream=http://grafana.monitoring.svc.cluster.local
- --http-address=$(HOST_IP):8080
- --redirect-url=https://grafana.cluster.fun/oauth2/callback
- --email-domain=marcusnoble.co.uk
- --pass-basic-auth=false
- --pass-access-token=false
- --oidc-issuer-url=https://marcusnoble.eu.auth0.com/
- --cookie-secret=KDGD6rrK6cBmryyZ4wcJ9xAUNW9AQN
env:
- name: HOST_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: OAUTH2_PROXY_CLIENT_ID
valueFrom:
secretKeyRef:
key: username
name: proxy-auth
- name: OAUTH2_PROXY_CLIENT_SECRET
valueFrom:
secretKeyRef:
key: password
name: proxy-auth
image: quay.io/oauth2-proxy/oauth2-proxy:v5.1.1
name: oauth-proxy
ports:
- containerPort: 8080
protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
name: grafana-auth
namespace: monitoring
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/component: server-auth
spec:
ports:
- name: http
port: 80
protocol: TCP
targetPort: 8080
selector:
app.kubernetes.io/name: grafana
app.kubernetes.io/component: server-auth
type: ClusterIP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: grafana-auth
namespace: monitoring
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/component: server-auth
annotations:
cert-manager.io/cluster-issuer: letsencrypt
traefik.ingress.kubernetes.io/frontend-entry-points: http,https
traefik.ingress.kubernetes.io/redirect-entry-point: https
traefik.ingress.kubernetes.io/redirect-permanent: "true"
spec:
tls:
- hosts:
- grafana.cluster.fun
secretName: grafana-ingress
rules:
- host: grafana.cluster.fun
http:
paths:
- backend:
service:
name: grafana-auth
port:
number: 80
path: /
pathType: ImplementationSpecific

View File

@ -0,0 +1,255 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-state-metrics
namespace: monitoring
labels:
app.kubernetes.io/name: kube-state-metrics
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
name: kube-state-metrics
rules:
- apiGroups: ["certificates.k8s.io"]
resources:
- certificatesigningrequests
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources:
- cronjobs
verbs: ["list", "watch"]
- apiGroups: ["extensions", "apps"]
resources:
- daemonsets
verbs: ["list", "watch"]
- apiGroups: ["extensions", "apps"]
resources:
- deployments
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- endpoints
verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
resources:
- horizontalpodautoscalers
verbs: ["list", "watch"]
- apiGroups: ["extensions", "networking.k8s.io"]
resources:
- ingresses
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources:
- jobs
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- limitranges
verbs: ["list", "watch"]
- apiGroups: ["admissionregistration.k8s.io"]
resources:
- mutatingwebhookconfigurations
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- namespaces
verbs: ["list", "watch"]
- apiGroups: ["networking.k8s.io"]
resources:
- networkpolicies
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- nodes
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- persistentvolumeclaims
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- persistentvolumes
verbs: ["list", "watch"]
- apiGroups: ["policy"]
resources:
- poddisruptionbudgets
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- pods
verbs: ["list", "watch"]
- apiGroups: ["extensions", "apps"]
resources:
- replicasets
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- replicationcontrollers
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- resourcequotas
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- secrets
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- services
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources:
- statefulsets
verbs: ["list", "watch"]
- apiGroups: ["storage.k8s.io"]
resources:
- storageclasses
verbs: ["list", "watch"]
- apiGroups: ["admissionregistration.k8s.io"]
resources:
- validatingwebhookconfigurations
verbs: ["list", "watch"]
- apiGroups: ["storage.k8s.io"]
resources:
- volumeattachments
verbs: ["list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: monitoring
---
apiVersion: v1
kind: Service
metadata:
name: kube-state-metrics
namespace: monitoring
labels:
app.kubernetes.io/name: kube-state-metrics
annotations:
prometheus.io/scrape: 'true'
spec:
type: "ClusterIP"
ports:
- name: "http"
protocol: TCP
port: 8080
targetPort: 8080
selector:
app.kubernetes.io/name: kube-state-metrics
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: kube-state-metrics
namespace: monitoring
labels:
app.kubernetes.io/name: kube-state-metrics
spec:
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
replicas: 1
template:
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
spec:
serviceAccountName: kube-state-metrics
securityContext:
fsGroup: 65534
runAsGroup: 65534
runAsUser: 65534
containers:
- name: kube-state-metrics
args:
- --resources=certificatesigningrequests
- --resources=configmaps
- --resources=cronjobs
- --resources=daemonsets
- --resources=deployments
- --resources=endpoints
- --resources=horizontalpodautoscalers
- --resources=ingresses
- --resources=jobs
- --resources=limitranges
- --resources=mutatingwebhookconfigurations
- --resources=namespaces
- --resources=networkpolicies
- --resources=nodes
- --resources=persistentvolumeclaims
- --resources=persistentvolumes
- --resources=poddisruptionbudgets
- --resources=pods
- --resources=replicasets
- --resources=replicationcontrollers
- --resources=resourcequotas
- --resources=secrets
- --resources=services
- --resources=statefulsets
- --resources=storageclasses
- --resources=validatingwebhookconfigurations
- --resources=volumeattachments
imagePullPolicy: IfNotPresent
image: "k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.1.0"
ports:
- containerPort: 8080
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
---

View File

@ -0,0 +1,232 @@
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: loki
labels:
app.kubernetes.io/name: loki
spec:
privileged: false
allowPrivilegeEscalation: false
volumes:
- 'configMap'
- 'emptyDir'
- 'persistentVolumeClaim'
- 'secret'
- 'projected'
- 'downwardAPI'
hostNetwork: false
hostIPC: false
hostPID: false
runAsUser:
rule: 'MustRunAsNonRoot'
seLinux:
rule: 'RunAsAny'
supplementalGroups:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
fsGroup:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
readOnlyRootFilesystem: true
requiredDropCapabilities:
- ALL
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: loki
namespace: monitoring
labels:
app.kubernetes.io/name: loki
---
apiVersion: v1
kind: ConfigMap
metadata:
name: loki
namespace: monitoring
labels:
app.kubernetes.io/name: loki
data:
loki.yaml: |
auth_enabled: false
chunk_store_config:
max_look_back_period: 0s
compactor:
shared_store: filesystem
working_directory: /data/loki/boltdb-shipper-compactor
ingester:
chunk_block_size: 262144
chunk_idle_period: 3m
chunk_retain_period: 1m
lifecycler:
ring:
kvstore:
store: inmemory
replication_factor: 1
max_transfer_retries: 0
limits_config:
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h
schema_config:
configs:
- from: "2020-10-24"
index:
period: 24h
prefix: index_
object_store: filesystem
schema: v11
store: boltdb-shipper
server:
http_listen_port: 3100
storage_config:
boltdb_shipper:
active_index_directory: /data/loki/boltdb-shipper-active
cache_location: /data/loki/boltdb-shipper-cache
cache_ttl: 24h
shared_store: filesystem
filesystem:
directory: /data/loki/chunks
table_manager:
retention_deletes_enabled: true
retention_period: 720h
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: loki
namespace: monitoring
labels:
app.kubernetes.io/name: loki
rules:
- apiGroups: ['extensions']
resources: ['podsecuritypolicies']
verbs: ['use']
resourceNames: [loki]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: loki
namespace: monitoring
labels:
app.kubernetes.io/name: loki
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: loki
subjects:
- kind: ServiceAccount
name: loki
---
apiVersion: v1
kind: Service
metadata:
name: loki-headless
namespace: monitoring
labels:
app.kubernetes.io/name: loki
variant: headless
spec:
clusterIP: None
ports:
- port: 3100
protocol: TCP
name: http-metrics
targetPort: http-metrics
selector:
app.kubernetes.io/name: loki
---
apiVersion: v1
kind: Service
metadata:
name: loki
namespace: monitoring
labels:
app.kubernetes.io/name: loki
spec:
type: ClusterIP
ports:
- port: 3100
protocol: TCP
name: http-metrics
targetPort: http-metrics
selector:
app.kubernetes.io/name: loki
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: loki
namespace: monitoring
labels:
app.kubernetes.io/name: loki
spec:
podManagementPolicy: OrderedReady
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: loki
serviceName: loki-headless
template:
metadata:
labels:
app.kubernetes.io/name: loki
annotations:
prometheus.io/port: http-metrics
prometheus.io/scrape: "true"
spec:
serviceAccountName: loki
securityContext:
fsGroup: 10001
runAsGroup: 10001
runAsNonRoot: true
runAsUser: 10001
containers:
- name: loki
image: "grafana/loki:2.2.1"
imagePullPolicy: IfNotPresent
args:
- "-config.file=/etc/loki/loki.yaml"
volumeMounts:
- name: config
mountPath: /etc/loki
- name: storage
mountPath: "/data"
subPath:
ports:
- name: http-metrics
containerPort: 3100
protocol: TCP
livenessProbe:
httpGet:
path: /ready
port: http-metrics
initialDelaySeconds: 45
readinessProbe:
httpGet:
path: /ready
port: http-metrics
initialDelaySeconds: 45
securityContext:
readOnlyRootFilesystem: true
terminationGracePeriodSeconds: 4800
volumes:
- name: config
configMap:
name: loki
volumeClaimTemplates:
- metadata:
name: storage
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: "10Gi"
storageClassName: scw-bssd

View File

@ -0,0 +1,87 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-node-exporter
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: node-exporter
---
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: "true"
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: node-exporter
name: prometheus-node-exporter
namespace: monitoring
spec:
clusterIP: None
ports:
- name: metrics
port: 9100
protocol: TCP
targetPort: 9100
selector:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: node-exporter
type: "ClusterIP"
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: node-exporter
name: prometheus-node-exporter
namespace: monitoring
spec:
selector:
matchLabels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: node-exporter
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: node-exporter
spec:
serviceAccountName: prometheus-node-exporter
containers:
- name: prometheus-node-exporter
image: "prom/node-exporter:v1.1.2"
imagePullPolicy: "IfNotPresent"
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --no-collector.wifi
- --no-collector.hwmon
- --no-collector.netclass
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
- --web.listen-address=:9100
ports:
- name: metrics
containerPort: 9100
hostPort: 9100
volumeMounts:
- name: proc
mountPath: /host/proc
readOnly: true
- name: sys
mountPath: /host/sys
readOnly: true
hostNetwork: true
hostPID: true
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
---

View File

@ -0,0 +1,501 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-server
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-server
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server
data:
alerting_rules.yml: |
{}
alerts: |
{}
prometheus.yml: |
global:
evaluation_interval: 1m
scrape_interval: 1m
scrape_timeout: 10s
rule_files:
- /etc/config/recording_rules.yml
- /etc/config/alerting_rules.yml
- /etc/config/rules
- /etc/config/alerts
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- localhost:9090
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
job_name: kubernetes-apiservers
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: default;kubernetes;https
source_labels:
- __meta_kubernetes_namespace
- __meta_kubernetes_service_name
- __meta_kubernetes_endpoint_port_name
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
job_name: kubernetes-nodes
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- replacement: kubernetes.default.svc:443
target_label: __address__
- regex: (.+)
replacement: /api/v1/nodes/$1/proxy/metrics
source_labels:
- __meta_kubernetes_node_name
target_label: __metrics_path__
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
job_name: kubernetes-nodes-cadvisor
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- replacement: kubernetes.default.svc:443
target_label: __address__
- regex: (.+)
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
source_labels:
- __meta_kubernetes_node_name
target_label: __metrics_path__
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
- job_name: kubernetes-service-endpoints
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scrape
- action: replace
regex: (https?)
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scheme
target_label: __scheme__
- action: replace
regex: (.+)
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_path
target_label: __metrics_path__
- action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
source_labels:
- __address__
- __meta_kubernetes_service_annotation_prometheus_io_port
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- action: replace
source_labels:
- __meta_kubernetes_service_name
target_label: kubernetes_name
- action: replace
source_labels:
- __meta_kubernetes_pod_node_name
target_label: kubernetes_node
- job_name: kubernetes-service-endpoints-slow
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scrape_slow
- action: replace
regex: (https?)
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scheme
target_label: __scheme__
- action: replace
regex: (.+)
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_path
target_label: __metrics_path__
- action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
source_labels:
- __address__
- __meta_kubernetes_service_annotation_prometheus_io_port
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- action: replace
source_labels:
- __meta_kubernetes_service_name
target_label: kubernetes_name
- action: replace
source_labels:
- __meta_kubernetes_pod_node_name
target_label: kubernetes_node
scrape_interval: 5m
scrape_timeout: 30s
- honor_labels: true
job_name: prometheus-pushgateway
kubernetes_sd_configs:
- role: service
relabel_configs:
- action: keep
regex: pushgateway
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_probe
- job_name: kubernetes-services
kubernetes_sd_configs:
- role: service
metrics_path: /probe
params:
module:
- http_2xx
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_probe
- source_labels:
- __address__
target_label: __param_target
- replacement: blackbox
target_label: __address__
- source_labels:
- __param_target
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- source_labels:
- __meta_kubernetes_service_name
target_label: kubernetes_name
- job_name: kubernetes-pods
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_scrape
- action: replace
regex: (.+)
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_path
target_label: __metrics_path__
- action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
source_labels:
- __address__
- __meta_kubernetes_pod_annotation_prometheus_io_port
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: kubernetes_pod_name
- action: drop
regex: Pending|Succeeded|Failed
source_labels:
- __meta_kubernetes_pod_phase
- job_name: kubernetes-pods-slow
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_scrape_slow
- action: replace
regex: (.+)
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_path
target_label: __metrics_path__
- action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
source_labels:
- __address__
- __meta_kubernetes_pod_annotation_prometheus_io_port
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: kubernetes_pod_name
- action: drop
regex: Pending|Succeeded|Failed
source_labels:
- __meta_kubernetes_pod_phase
scrape_interval: 5m
scrape_timeout: 30s
- job_name: 'prometheus-blackbox-exporter-ping'
metrics_path: /probe
params:
module: [icmp_ping]
static_configs:
- targets: []
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
- job_name: 'prometheus-blackbox-exporter-http'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets: []
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
- job_name: 'node-exporter'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_endpoints_name]
regex: 'node-exporter'
action: keep
- job_name: 'federated-clusters'
scrape_interval: 15s
honor_labels: true
params:
'match[]':
- '{job="prometheus"}'
- '{job="node"}'
- '{job="node_exporter"}'
- '{job="zfs_exporter"}'
- '{job=~"kubernetes.*"}'
metrics_path: '/federate'
static_configs:
- targets:
- 'prometheus-local.inlets.svc:80'
recording_rules.yml: |
{}
rules: |
{}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: prometheus-server
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: "8Gi"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server
name: prometheus-server
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/proxy
- nodes/metrics
- services
- endpoints
- pods
- ingresses
- configmaps
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
- "networking.k8s.io"
resources:
- ingresses/status
- ingresses
verbs:
- get
- list
- watch
- nonResourceURLs:
- "/metrics"
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server
name: prometheus-server
subjects:
- kind: ServiceAccount
name: prometheus-server
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-server
---
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server
name: prometheus-server
namespace: monitoring
spec:
ports:
- name: http
port: 80
protocol: TCP
targetPort: 9090
selector:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server
sessionAffinity: None
type: "ClusterIP"
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server
name: prometheus-server
namespace: monitoring
spec:
selector:
matchLabels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server
replicas: 1
template:
metadata:
labels:
app.kubernetes.io/name: prometheus
app.kubernetes.io/component: server
spec:
serviceAccountName: prometheus-server
containers:
- name: prometheus-server-configmap-reload
image: "jimmidyson/configmap-reload:v0.5.0"
imagePullPolicy: "IfNotPresent"
args:
- --volume-dir=/etc/config
- --webhook-url=http://127.0.0.1:9090/-/reload
volumeMounts:
- name: config-volume
mountPath: /etc/config
readOnly: true
- name: prometheus-server
image: "prom/prometheus:v2.27.1"
imagePullPolicy: "IfNotPresent"
args:
- --storage.tsdb.retention.time=15d
- --config.file=/etc/config/prometheus.yml
- --storage.tsdb.path=/data
- --web.console.libraries=/etc/prometheus/console_libraries
- --web.console.templates=/etc/prometheus/consoles
- --web.enable-lifecycle
ports:
- containerPort: 9090
readinessProbe:
httpGet:
path: /-/ready
port: 9090
initialDelaySeconds: 30
periodSeconds: 5
timeoutSeconds: 30
failureThreshold: 3
successThreshold: 1
livenessProbe:
httpGet:
path: /-/healthy
port: 9090
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 30
failureThreshold: 3
successThreshold: 1
volumeMounts:
- name: config-volume
mountPath: /etc/config
- name: storage-volume
mountPath: /data
subPath: ""
securityContext:
fsGroup: 65534
runAsGroup: 65534
runAsNonRoot: true
runAsUser: 65534
terminationGracePeriodSeconds: 300
volumes:
- name: config-volume
configMap:
name: prometheus-server
- name: storage-volume
persistentVolumeClaim:
claimName: prometheus-server
---

View File

@ -0,0 +1,449 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: promtail
namespace: monitoring
labels:
app.kubernetes.io/name: promtail
---
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: promtail
namespace: monitoring
labels:
app.kubernetes.io/name: promtail
spec:
allowPrivilegeEscalation: false
fsGroup:
rule: RunAsAny
hostIPC: false
hostNetwork: false
hostPID: false
privileged: false
readOnlyRootFilesystem: true
requiredDropCapabilities:
- ALL
runAsUser:
rule: RunAsAny
seLinux:
rule: RunAsAny
supplementalGroups:
rule: RunAsAny
volumes:
- secret
- configMap
- hostPath
- projected
- downwardAPI
- emptyDir
---
apiVersion: v1
kind: ConfigMap
metadata:
name: promtail
namespace: monitoring
labels:
app.kubernetes.io/name: promtail
data:
promtail.yaml: |
client:
backoff_config:
max_period: 5m
max_retries: 10
min_period: 500ms
batchsize: 1048576
batchwait: 1s
external_labels: {}
timeout: 10s
positions:
filename: /run/promtail/positions.yaml
server:
http_listen_port: 3101
target_config:
sync_period: 10s
scrape_configs:
- job_name: kubernetes-pods-name
pipeline_stages:
- docker: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels:
- __meta_kubernetes_pod_label_name
target_label: __service__
- source_labels:
- __meta_kubernetes_pod_node_name
target_label: __host__
- action: drop
regex: ''
source_labels:
- __service__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
replacement: $1
separator: /
source_labels:
- __meta_kubernetes_namespace
- __service__
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
- replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- job_name: kubernetes-pods-app
pipeline_stages:
- docker: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: drop
regex: .+
source_labels:
- __meta_kubernetes_pod_label_name
- source_labels:
- __meta_kubernetes_pod_label_app
target_label: __service__
- source_labels:
- __meta_kubernetes_pod_node_name
target_label: __host__
- action: drop
regex: ''
source_labels:
- __service__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
replacement: $1
separator: /
source_labels:
- __meta_kubernetes_namespace
- __service__
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
- replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- job_name: kubernetes-pods-direct-controllers
pipeline_stages:
- docker: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: drop
regex: .+
separator: ''
source_labels:
- __meta_kubernetes_pod_label_name
- __meta_kubernetes_pod_label_app
- action: drop
regex: '[0-9a-z-.]+-[0-9a-f]{8,10}'
source_labels:
- __meta_kubernetes_pod_controller_name
- source_labels:
- __meta_kubernetes_pod_controller_name
target_label: __service__
- source_labels:
- __meta_kubernetes_pod_node_name
target_label: __host__
- action: drop
regex: ''
source_labels:
- __service__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
replacement: $1
separator: /
source_labels:
- __meta_kubernetes_namespace
- __service__
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
- replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- job_name: kubernetes-pods-indirect-controller
pipeline_stages:
- docker: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: drop
regex: .+
separator: ''
source_labels:
- __meta_kubernetes_pod_label_name
- __meta_kubernetes_pod_label_app
- action: keep
regex: '[0-9a-z-.]+-[0-9a-f]{8,10}'
source_labels:
- __meta_kubernetes_pod_controller_name
- action: replace
regex: '([0-9a-z-.]+)-[0-9a-f]{8,10}'
source_labels:
- __meta_kubernetes_pod_controller_name
target_label: __service__
- source_labels:
- __meta_kubernetes_pod_node_name
target_label: __host__
- action: drop
regex: ''
source_labels:
- __service__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
replacement: $1
separator: /
source_labels:
- __meta_kubernetes_namespace
- __service__
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
- replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- job_name: kubernetes-pods-static
pipeline_stages:
- docker: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: drop
regex: ''
source_labels:
- __meta_kubernetes_pod_annotation_kubernetes_io_config_mirror
- action: replace
source_labels:
- __meta_kubernetes_pod_label_component
target_label: __service__
- source_labels:
- __meta_kubernetes_pod_node_name
target_label: __host__
- action: drop
regex: ''
source_labels:
- __service__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
replacement: $1
separator: /
source_labels:
- __meta_kubernetes_namespace
- __service__
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
- replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_annotation_kubernetes_io_config_mirror
- __meta_kubernetes_pod_container_name
target_label: __path__
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: promtail-clusterrole
labels:
app.kubernetes.io/name: promtail
rules:
- apiGroups: [""] # "" indicates the core API group
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "watch", "list"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: promtail-clusterrolebinding
labels:
app.kubernetes.io/name: promtail
subjects:
- kind: ServiceAccount
name: promtail
namespace: monitoring
roleRef:
kind: ClusterRole
name: promtail-clusterrole
apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: promtail
namespace: monitoring
labels:
app.kubernetes.io/name: promtail
rules:
- apiGroups: ['extensions']
resources: ['podsecuritypolicies']
verbs: ['use']
resourceNames: [promtail]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: promtail
namespace: monitoring
labels:
app.kubernetes.io/name: promtail
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: promtail
subjects:
- kind: ServiceAccount
name: promtail
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: promtail
namespace: monitoring
labels:
app.kubernetes.io/name: promtail
spec:
selector:
matchLabels:
app.kubernetes.io/name: promtail
template:
metadata:
labels:
app.kubernetes.io/name: promtail
annotations:
prometheus.io/port: http-metrics
prometheus.io/scrape: "true"
spec:
serviceAccountName: promtail
containers:
- name: promtail
image: "grafana/promtail:2.2.1"
imagePullPolicy: IfNotPresent
args:
- "-config.file=/etc/promtail/promtail.yaml"
- "-client.url=http://loki:3100/loki/api/v1/push"
volumeMounts:
- name: config
mountPath: /etc/promtail
- name: run
mountPath: /run/promtail
- mountPath: /var/lib/docker/containers
name: docker
readOnly: true
- mountPath: /var/log/pods
name: pods
readOnly: true
env:
- name: HOSTNAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
ports:
- containerPort: 3101
name: http-metrics
securityContext:
readOnlyRootFilesystem: true
runAsGroup: 0
runAsUser: 0
readinessProbe:
failureThreshold: 5
httpGet:
path: /ready
port: http-metrics
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Exists
volumes:
- name: config
configMap:
name: promtail
- name: run
hostPath:
path: /run/promtail
- hostPath:
path: /var/lib/docker/containers
name: docker
- hostPath:
path: /var/log/pods
name: pods
---