OpenClaw高可用生产环境部署完全指南
开发环境跑起来只需要Docker Compose,但生产环境需要高可用架构。本文给出OpenClaw从单机到高可用的完整部署方案,覆盖Redis集群、MySQL主从复制、Nginx负载均衡三个核心组件。
一、生产环境架构全景
# 高可用架构示意 (最小化生产配置, 3台服务器)
#
# 公网 ──► Nginx(主备) ──► OpenClaw-API x3
# │ │
# MySQL主库 │ Redis集群(3主3从)
# MySQL从库 │
# │
# Kafka集群(3节点)
#
# 服务器规格建议:
# API节点: 4核8G x3 (无状态,可水平扩展)
# DB节点: 8核32G x2 (主从各一台)
# Redis节点: 4核16G x3 (集群模式)
# Kafka节点: 4核16G x3 (Broker)
#
# 最低成本方案(云服务器):
# 阿里云 4核8G ECS x3 ≈ 1200元/月
# RDS MySQL 8核32G ≈ 800元/月
# Redis企业版 ≈ 300元/月
二、Redis Cluster 部署配置
# 6节点Redis Cluster配置 (3主3从)
# /etc/redis/redis-7001.conf
port 7001
cluster-enabled yes
cluster-config-file nodes-7001.conf
cluster-node-timeout 15000
appendonly yes
appendfsync everysec
# 同一台机器启动多个实例用于测试
redis-server /etc/redis/redis-7001.conf --daemonize yes
redis-server /etc/redis/redis-7002.conf --daemonize yes
redis-server /etc/redis/redis-7003.conf --daemonize yes
# 另外3台机器分别启动7004,7005,7006
# 创建集群 (--cluster-replicas 1 表示每主一从)
redis-cli --cluster create \
192.168.1.1:7001 192.168.1.1:7002 192.168.1.1:7003 \
192.168.1.2:7004 192.168.1.2:7005 192.168.1.2:7006 \
--cluster-replicas 1 --cluster-yes
# 验证集群状态
redis-cli -p 7001 cluster info | grep cluster_state
# cluster_state:ok
redis-cli -p 7001 cluster nodes
# 显示6个节点,3主3从
三、MySQL主从复制配置
# 主库配置 /etc/mysql/conf.d/primary.cnf
[mysqld]
server-id = 1
log-bin = mysql-bin
binlog-format = ROW
binlog-do-db = openclaw
sync_binlog = 1 # 每次事务同步binlog,保证不丢数据
innodb_flush_log_at_trx_commit = 1 # 最高持久性
# 主库上创建复制用户
CREATE USER 'repl'@'192.168.1.%' IDENTIFIED BY 'Repl@passwd123';
GRANT REPLICATION SLAVE ON *.* TO 'repl'@'192.168.1.%';
FLUSH PRIVILEGES;
SHOW MASTER STATUS;
# +------------------+----------+
# | File | Position |
# +------------------+----------+
# | mysql-bin.000001 | 154 |
# 从库配置 /etc/mysql/conf.d/replica.cnf
[mysqld]
server-id = 2
relay-log = relay-bin
read-only = ON # 从库只读
super-read-only = ON # 防止超级用户写入
# 从库上启动复制
CHANGE MASTER TO
MASTER_HOST='192.168.1.1',
MASTER_USER='repl',
MASTER_PASSWORD='Repl@passwd123',
MASTER_LOG_FILE='mysql-bin.000001',
MASTER_LOG_POS=154;
START SLAVE;
SHOW SLAVE STATUS\G
# Slave_IO_Running: Yes
# Slave_SQL_Running: Yes
# Seconds_Behind_Master: 0 <- 延迟为0,复制正常
四、Nginx负载均衡配置
# /etc/nginx/conf.d/openclaw.conf
upstream openclaw_api {
least_conn; # 最少连接数负载均衡
server 192.168.1.10:8080 weight=3; # API节点1
server 192.168.1.11:8080 weight=3; # API节点2
server 192.168.1.12:8080 weight=3; # API节点3
keepalive 32; # 保持长连接减少握手开销
}
server {
listen 443 ssl http2;
server_name api.openclaw.yourdomain.com;
ssl_certificate /etc/letsencrypt/live/yourdomain.com/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/yourdomain.com/privkey.pem;
# 广告请求超时配置
proxy_connect_timeout 50ms;
proxy_read_timeout 80ms; # 总时限100ms内
# 高并发优化
proxy_http_version 1.1;
proxy_set_header Connection '';
location /api/v1/ad {
proxy_pass http://openclaw_api;
# 超时后返回兜底广告JSON
proxy_next_upstream error timeout http_502 http_503;
proxy_next_upstream_tries 2;
}
# 监控接口
location /nginx_status {
stub_status;
allow 10.0.0.0/8;
deny all;
}
}
五、健康检查与自动化运维
# healthcheck.sh - 每分钟执行
#!/bin/bash
# 检查API节点
for host in 192.168.1.10 192.168.1.11 192.168.1.12; do
if ! curl -sf http://$host:8080/health | grep -q '"status":"ok"'; then
echo "[ALERT] API节点 $host 异常!"
# 从Nginx upstream摘除
nginx -s reload
fi
done
# 检查MySQL主从延迟
LAG=$(mysql -h 192.168.1.2 -u monitor -pmonitor123 -e \
'SHOW SLAVE STATUS\G' 2>/dev/null | grep 'Seconds_Behind_Master' | awk '{print $2}')
if [ "$LAG" -gt 5 ]; then
echo "[WARN] MySQL主从延迟: ${LAG}秒"
fi
# 检查Redis集群
redis-cli -p 7001 cluster info | grep -q 'cluster_state:ok' || \
echo '[ALERT] Redis集群状态异常'
总结:OpenClaw高可用的三个支柱——Redis Cluster保障高并发频控不挂,MySQL主从保障数据持久性,Nginx多实例保障接入层不停服。生产环境务必配置健康检查和告警,故障要在用户感知之前就被发现。
