forked from mrckndt/CS-Repro-Mattermost
switching out promtail for alloy (#22)
* rework grafana log ingestion using alloy * fix docker-compose.yml formatting
This commit is contained in:
2
Makefile
2
Makefile
@@ -36,7 +36,7 @@ run:
|
||||
|
||||
run-core:
|
||||
@echo "Starting the core services... hang in there."
|
||||
@docker-compose up -d postgres openldap prometheus grafana elasticsearch mattermost keycloak loki promtail
|
||||
@docker-compose up -d postgres openldap prometheus grafana elasticsearch mattermost keycloak loki alloy
|
||||
|
||||
run-db-replicas:
|
||||
@echo "Starting with replicas. Hang in there..."
|
||||
|
||||
@@ -54,20 +54,30 @@ services:
|
||||
- 3100:3100
|
||||
volumes:
|
||||
- ./files/loki:/etc/loki
|
||||
- ./volumes/logs-node-1:/logs/node-1
|
||||
- ./volumes/logs-node-2:/logs/node-2
|
||||
- ./volumes/loki-data:/tmp/loki
|
||||
command: -config.file=/etc/loki/loki-config.yaml
|
||||
promtail:
|
||||
container_name: cs-repro-promtail
|
||||
image: grafana/promtail:3.2.2
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD-SHELL",
|
||||
"wget --no-verbose --tries=1 --spider http://localhost:3100/loki/api/v1/labels || exit 1",
|
||||
]
|
||||
interval: 20s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
start_period: 40s
|
||||
alloy:
|
||||
container_name: cs-repro-alloy
|
||||
image: grafana/alloy:latest
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 9080:9080
|
||||
- 9999:9999
|
||||
volumes:
|
||||
- ./files/promtail:/etc/promtail
|
||||
- ./volumes/logs-node-1:/logs-node-1
|
||||
- ./volumes/logs-node-2:/logs-node-2
|
||||
command: -config.file=/etc/promtail/promtail-config.yaml
|
||||
- ./files/alloy:/etc/alloy:ro
|
||||
- ./volumes/alloy/data:/var/lib/alloy/data:rw
|
||||
- ./volumes/logs-node-1:/mattermost/logs:ro
|
||||
command: run --server.http.listen-addr=0.0.0.0:9080 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy
|
||||
depends_on:
|
||||
- loki
|
||||
grafana:
|
||||
|
||||
45
files/alloy/README.md
Normal file
45
files/alloy/README.md
Normal file
@@ -0,0 +1,45 @@
|
||||
# Alloy Log Agent Configuration
|
||||
|
||||
This directory contains the configuration for Grafana Alloy, which has replaced Promtail as the log agent in this environment.
|
||||
|
||||
## Key Differences
|
||||
|
||||
- Alloy uses a component-based configuration format with `.alloy` extension
|
||||
- The web UI is available at http://localhost:9080
|
||||
- Alloy can handle logs, metrics, and traces in one agent
|
||||
- Configuration is more flexible with the River language
|
||||
|
||||
## Configuration Explanation
|
||||
|
||||
The `config.alloy` file follows the component-based model where:
|
||||
|
||||
1. `loki.source.file` components directly collect logs from Mattermost log files
|
||||
2. `loki.process` component parses and labels the JSON logs
|
||||
3. `loki.write` component sends the logs to Loki
|
||||
|
||||
## Current Setup
|
||||
|
||||
Our configuration:
|
||||
- Monitors Mattermost logs directly from mounted volumes
|
||||
- Labels all logs with `job="mattermost"` for Grafana dashboard compatibility
|
||||
- Extracts log level, message, and other metadata from JSON logs
|
||||
- Sends logs to Loki service
|
||||
|
||||
## Converting Promtail Config to Alloy
|
||||
|
||||
If you need to update the configuration, you can convert Promtail configs to Alloy format with:
|
||||
|
||||
```bash
|
||||
# Example using the alloy CLI (if installed locally)
|
||||
alloy convert --source-format=promtail --output=config.alloy promtail-config.yaml
|
||||
```
|
||||
|
||||
## Important Syntax Notes
|
||||
|
||||
- The River language used by Alloy requires commas at the end of each key-value pair in objects
|
||||
- Trailing commas in lists and objects are supported and recommended
|
||||
|
||||
## More Information
|
||||
|
||||
- [Grafana Alloy Documentation](https://grafana.com/docs/alloy/latest/)
|
||||
- [Migrating from Promtail to Alloy](https://grafana.com/docs/loki/latest/send-data/alloy/migrate-from-promtail/)
|
||||
59
files/alloy/config.alloy
Normal file
59
files/alloy/config.alloy
Normal file
@@ -0,0 +1,59 @@
|
||||
// File-based log sources
|
||||
loki.source.file "mattermost_logs" {
|
||||
targets = [
|
||||
{ "__path__" = "/mattermost/logs/mattermost.log", "filename" = "/logs-node-1/mattermost.log", "job" = "mattermost" },
|
||||
]
|
||||
forward_to = [loki.process.mm_logs_processor.receiver]
|
||||
}
|
||||
|
||||
loki.source.file "mattermost_advanced_logs" {
|
||||
targets = [
|
||||
{ "__path__" = "/mattermost/logs/advancedLogs.log", "filename" = "/logs-node-1/advancedLogs.log", "job" = "mattermost" },
|
||||
]
|
||||
forward_to = [loki.process.mm_logs_processor.receiver]
|
||||
}
|
||||
|
||||
loki.source.file "mattermost_ldap_logs" {
|
||||
targets = [
|
||||
{ "__path__" = "/mattermost/logs/advancedLdapLogs.log", "filename" = "/logs-node-1/advancedLdapLogs.log", "job" = "mattermost" },
|
||||
]
|
||||
forward_to = [loki.process.mm_logs_processor.receiver]
|
||||
}
|
||||
|
||||
// Process Mattermost logs
|
||||
loki.process "mm_logs_processor" {
|
||||
// First, try to parse as JSON
|
||||
stage.json {
|
||||
expressions = {
|
||||
ts = "timestamp",
|
||||
log_level = "level",
|
||||
log_msg = "msg",
|
||||
log_caller = "caller",
|
||||
}
|
||||
}
|
||||
|
||||
// Set labels based on extracted fields
|
||||
stage.labels {
|
||||
values = {
|
||||
level = "log_level",
|
||||
}
|
||||
}
|
||||
|
||||
forward_to = [loki.write.loki.receiver]
|
||||
}
|
||||
|
||||
// API endpoint for direct log pushing
|
||||
loki.source.api "push_api" {
|
||||
http {
|
||||
listen_address = "0.0.0.0"
|
||||
listen_port = 9999
|
||||
}
|
||||
forward_to = [loki.write.loki.receiver]
|
||||
}
|
||||
|
||||
// Send all logs to Loki
|
||||
loki.write "loki" {
|
||||
endpoint {
|
||||
url = "http://loki:3100/loki/api/v1/push"
|
||||
}
|
||||
}
|
||||
63
files/grafana/README.md
Normal file
63
files/grafana/README.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# Grafana Configuration
|
||||
|
||||
This directory contains the configuration for Grafana, which serves as the visualization and dashboarding platform for the CS Repro environment.
|
||||
|
||||
## Overview
|
||||
|
||||
Grafana is an open-source platform for monitoring and observability that allows you to query, visualize, alert on, and understand your metrics, logs, and traces.
|
||||
|
||||
## Current Setup
|
||||
|
||||
- Running as a single instance (version 10.0.4)
|
||||
- Available at http://localhost:3000
|
||||
- Default credentials: admin/admin
|
||||
|
||||
## Directory Structure
|
||||
|
||||
- **dashboards/**: Contains JSON dashboard definitions
|
||||
- `enhanced_logs.json`: Dashboard for viewing and filtering Mattermost logs
|
||||
- `kpiMetrics.json`: Dashboard for key performance indicators
|
||||
- `metricsv2.json`: Dashboard for detailed application metrics
|
||||
- `bonusMetrics.json`: Dashboard for additional metrics
|
||||
|
||||
- **provisioning/**: Contains automatic provisioning configurations
|
||||
- `datasources/`: Configures data source connections
|
||||
- `dashboards/`: Sets up dashboard loading
|
||||
|
||||
## Data Sources
|
||||
|
||||
The environment is configured with the following data sources:
|
||||
|
||||
1. **Prometheus** (Default): For metrics collection
|
||||
- URL: http://prometheus:9090
|
||||
- Used by most of the metric dashboards
|
||||
|
||||
2. **Loki**: For log aggregation
|
||||
- URL: http://loki:3100
|
||||
- Used by the enhanced_logs dashboard
|
||||
- Queries using the LogQL language
|
||||
|
||||
## Log Dashboard
|
||||
|
||||
The `enhanced_logs.json` dashboard is designed to work with logs collected via Alloy and stored in Loki. It provides:
|
||||
|
||||
- Log filtering by level (error, warn, info, debug)
|
||||
- Visual metrics about log levels and counts
|
||||
- Error tracking and analysis
|
||||
- Time-series views of log patterns
|
||||
|
||||
The dashboard queries use the label `job="mattermost"` to filter logs from the Mattermost application.
|
||||
|
||||
## Best Practices
|
||||
|
||||
When modifying dashboards:
|
||||
- Export/backup existing dashboards before making major changes
|
||||
- Test queries in the Explore interface before adding to dashboards
|
||||
- Use variables for consistent filtering across panels
|
||||
- Maintain consistent styling
|
||||
|
||||
## More Information
|
||||
|
||||
- [Grafana Documentation](https://grafana.com/docs/grafana/latest/)
|
||||
- [Dashboard JSON Model](https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/dashboard-json-model/)
|
||||
- [LogQL in Grafana](https://grafana.com/docs/grafana/latest/datasources/loki/query-editor/)
|
||||
@@ -50,7 +50,7 @@
|
||||
"uid": "${DS_LOKI}"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"expr": "{filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=~\"${log_level}\" | line_format \"{{.level}} [{{.caller}}] {{.msg}}\"",
|
||||
"expr": "{job=\"mattermost\"} | json | level=~\"${log_level}\" | line_format \"{{.level}} [{{.caller}}] {{.msg}}\"",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -140,7 +140,7 @@
|
||||
"uid": "${DS_LOKI}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=~\"(debug|info|warn|error)\" [$__interval])) by (level)",
|
||||
"expr": "sum(count_over_time({job=\"mattermost\"} | json | level=~\"(debug|info|warn|error)\" [$__interval])) by (level)",
|
||||
"legendFormat": "{{level}}",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
@@ -209,7 +209,7 @@
|
||||
"uid": "${DS_LOKI}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=\"error\" [$__range]))",
|
||||
"expr": "sum(count_over_time({job=\"mattermost\"} | json | level=\"error\" [$__range]))",
|
||||
"queryType": "instant",
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -277,7 +277,7 @@
|
||||
"uid": "${DS_LOKI}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=\"warn\" [$__range]))",
|
||||
"expr": "sum(count_over_time({job=\"mattermost\"} | json | level=\"warn\" [$__range]))",
|
||||
"queryType": "instant",
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -337,7 +337,7 @@
|
||||
"uid": "${DS_LOKI}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=\"info\" [$__range]))",
|
||||
"expr": "sum(count_over_time({job=\"mattermost\"} | json | level=\"info\" [$__range]))",
|
||||
"queryType": "instant",
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -397,7 +397,7 @@
|
||||
"uid": "${DS_LOKI}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=\"debug\" [$__range]))",
|
||||
"expr": "sum(count_over_time({job=\"mattermost\"} | json | level=\"debug\" [$__range]))",
|
||||
"queryType": "instant",
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -466,7 +466,7 @@
|
||||
"uid": "${DS_LOKI}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum by(level) (count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json [30m]))",
|
||||
"expr": "sum by(level) (count_over_time({job=\"mattermost\"} | json [30m]))",
|
||||
"legendFormat": "{{level}}",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
@@ -561,7 +561,7 @@
|
||||
"uid": "${DS_LOKI}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, sum by(caller) (count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=~\"warn|error\" [30m])))",
|
||||
"expr": "topk(10, sum by(caller) (count_over_time({job=\"mattermost\"} | json | level=~\"warn|error\" [30m])))",
|
||||
"legendFormat": "{{caller}}",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
@@ -699,7 +699,7 @@
|
||||
"uid": "${DS_LOKI}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, sum by(error) (count_over_time({filename=~\"/logs-node-.*/(mattermost|advancedLogs).log\"} | json | level=\"error\" [30m])))",
|
||||
"expr": "topk(10, sum by(error) (count_over_time({job=\"mattermost\"} | json | level=\"error\" [30m])))",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
|
||||
51
files/loki/README.md
Normal file
51
files/loki/README.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# Loki Configuration
|
||||
|
||||
This directory contains the configuration for Grafana Loki, which serves as the log storage and aggregation system in this environment.
|
||||
|
||||
## Overview
|
||||
|
||||
Loki is a horizontally-scalable, highly-available, multi-tenant log aggregation system designed by Grafana Labs. It is optimized for efficiently storing and querying logs from Kubernetes and microservices deployments.
|
||||
|
||||
## Current Setup
|
||||
|
||||
- Running as a single instance in this CS Repro environment
|
||||
- Available at http://localhost:3100
|
||||
- Receives logs from Grafana Alloy (which replaced Promtail)
|
||||
|
||||
## Configuration Details
|
||||
|
||||
The `loki-config.yaml` file contains the core configuration for the Loki service:
|
||||
|
||||
- **Storage**: Configured to use the local filesystem for simplicity
|
||||
- **Schema**: Uses the v11 schema with appropriate index/chunk configurations
|
||||
- **Compaction**: Enabled to optimize storage over time
|
||||
- **Limits**: Configured with reasonable defaults for this environment
|
||||
- **Frontend**: Configured for basic query handling
|
||||
|
||||
## Querying Logs
|
||||
|
||||
Logs can be queried in several ways:
|
||||
|
||||
1. **LogQL via Grafana**: The primary and most user-friendly method
|
||||
2. **Direct Loki API**: Using the API endpoints at http://localhost:3100/loki/api/v1/
|
||||
3. **LogCLI**: If installed, you can use Grafana's logcli tool
|
||||
|
||||
Example LogQL queries:
|
||||
```
|
||||
{job="mattermost"} | json | level=~"error|warn"
|
||||
{job="mattermost"} | json | level="error" | line_format "{{.msg}}"
|
||||
```
|
||||
|
||||
## Labels
|
||||
|
||||
The current configuration uses the following key labels:
|
||||
|
||||
- `job`: The source application ("mattermost")
|
||||
- `level`: The log level (error, warn, info, debug, etc.)
|
||||
- `service_name`: Identifies the service
|
||||
- `filename`: The source log file
|
||||
|
||||
## More Information
|
||||
|
||||
- [Grafana Loki Documentation](https://grafana.com/docs/loki/latest/)
|
||||
- [LogQL Query Language](https://grafana.com/docs/loki/latest/logql/)
|
||||
@@ -10,7 +10,7 @@ logins () {
|
||||
echo " - For more info https://github.com/coltoneshaw/CS-Repro-Mattermost#use-grafana"
|
||||
echo "- Prometheus: http://localhost:9090"
|
||||
echo "- Loki: http://localhost:3100/ready"
|
||||
echo "- Promtail: http://localhost:9080"
|
||||
echo "- Alloy: http://localhost:9080"
|
||||
echo "- PostgreSQL" "localhost:5432" with 'mmuser' / 'mmuser_password'
|
||||
echo
|
||||
echo ===========================================================
|
||||
|
||||
Reference in New Issue
Block a user