ssh连接pi400调试时,有时会断线,但可能pi400与wifi还是连接的,请帮写个分析脚本,看看它与wifi连接稳定性,与业务相关的youtube/cloudflare/github的连接稳定性。
传到 Pi 后直接用:
# 传到 Pi
scp netwatch.sh [email protected]:~/
# 加执行权限
chmod +x ~/netwatch.sh
# 单次完整报告(排查当前状态)
bash ~/netwatch.sh
# 持续监控(每60秒检测一次,同时写日志)
bash ~/netwatch.sh --watch --log
脚本检测四个层面:
WiFi 层:信号强度(dBm)并给出优/良/差评级、频率、连接速率、RX丢弃包数。信号低于 -80dBm 基本可以确定是断线根源。
网关层:ping 本地路由器,RTT 和丢包率。网关丢包说明 WiFi 本身不稳,跟外网无关。
业务节点:分别 ping YouTube、Cloudflare R2、Cloudflare Pages、GitHub、Gemini API,每个节点显示丢包率和 RTT,颜色区分正常/警告/故障。
SSH 断线原因:检查 sshd 的 ClientAliveInterval 配置,为 0 时直接给出修复命令;同时抓取过去 24 小时的 SSH 断线日志;顺带检测 CPU 温度,Pi 过热会触发降频影响 WiFi 驱动稳定性。
SSH 断线最常见的两个原因:一是 sshd 没开 keepalive,长时间无操作后连接被路由器 NAT 超时踢掉;二是 WiFi 信号弱导致间歇性断联。脚本的输出能直接定位是哪种。
#!/bin/bash
# ─────────────────────────────────────────────────────────────
# netwatch.sh — Pi 400 网络稳定性监控
# 监控:WiFi 信号 / 本地网关 / 业务节点连通性
# 用法:
# 单次报告: bash netwatch.sh
# 持续监控: bash netwatch.sh --watch
# 写入日志: bash netwatch.sh --watch --log
# ─────────────────────────────────────────────────────────────
# ── 配置区 ────────────────────────────────────────────────────
INTERVAL=60 # 持续监控时的检测间隔(秒)
PING_COUNT=5 # 每次 ping 的包数
PING_TIMEOUT=5 # ping 超时秒数
LOG_FILE="$HOME/netwatch.log"
LOG_MAX_LINES=5000 # 日志超过此行数时自动截断
# 业务节点(名称:地址)
declare -A TARGETS=(
["YouTube"]="www.youtube.com"
["Cloudflare-R2"]="pub-1b62f0f9bae84486a796cbf53a3a27ee.r2.dev"
["Cloudflare-Pages"]="pabloye.pages.dev"
["GitHub"]="github.com"
["Gemini-API"]="generativelanguage.googleapis.com"
["DNS-Primary"]="8.8.8.8"
["DNS-Secondary"]="1.1.1.1"
)
# ── 颜色 ──────────────────────────────────────────────────────
if [ -t 1 ]; then
RED='\033[0;31m'; YELLOW='\033[0;33m'
GREEN='\033[0;32m'; BLUE='\033[0;34m'
CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
else
RED=''; YELLOW=''; GREEN=''; BLUE=''; CYAN=''; BOLD=''; NC=''
fi
# ── 工具函数 ──────────────────────────────────────────────────
ts() { date '+%Y-%m-%d %H:%M:%S'; }
log() {
local line="$(ts) $*"
echo -e "$line"
if [ "$DO_LOG" = "1" ]; then
echo "$line" >> "$LOG_FILE"
# 自动截断过长日志
local lines
lines=$(wc -l < "$LOG_FILE")
if [ "$lines" -gt "$LOG_MAX_LINES" ]; then
tail -n $((LOG_MAX_LINES / 2)) "$LOG_FILE" > "${LOG_FILE}.tmp"
mv "${LOG_FILE}.tmp" "$LOG_FILE"
echo "$(ts) [INFO] 日志已自动截断至 $((LOG_MAX_LINES / 2)) 行" >> "$LOG_FILE"
fi
fi
}
section() { log "${BOLD}${BLUE}── $1 ──${NC}"; }
# ── WiFi 状态 ─────────────────────────────────────────────────
check_wifi() {
section "WiFi 状态"
local iface
iface=$(iw dev 2>/dev/null | awk '/Interface/{print $2}' | head -1)
if [ -z "$iface" ]; then
log "${RED}[WARN] 未找到无线网卡${NC}"
return
fi
# SSID 和 BSSID
local ssid bssid
ssid=$(iw dev "$iface" link 2>/dev/null | awk '/SSID/{print $2}')
bssid=$(iw dev "$iface" link 2>/dev/null | awk '/Connected to/{print $3}')
[ -z "$ssid" ] && ssid="(未连接)"
log " 接口: ${CYAN}${iface}${NC} SSID: ${CYAN}${ssid}${NC} BSSID: ${bssid}"
# 信号强度
local signal freq bitrate
signal=$(iw dev "$iface" link 2>/dev/null | awk '/signal/{print $2, $3}')
freq=$(iw dev "$iface" link 2>/dev/null | awk '/freq/{print $2}')
bitrate=$(iw dev "$iface" link 2>/dev/null | awk '/tx bitrate/{print $3, $4}')
log " 信号: ${CYAN}${signal:-N/A}${NC} 频率: ${freq:-N/A} MHz 速率: ${bitrate:-N/A}"
# 信号质量评级
if [ -n "$signal" ]; then
local dbm
dbm=$(echo "$signal" | awk '{print $1}')
if [ "$dbm" -ge -60 ] 2>/dev/null; then
log " 质量: ${GREEN}优秀 (≥-60 dBm)${NC}"
elif [ "$dbm" -ge -70 ] 2>/dev/null; then
log " 质量: ${GREEN}良好 (-70~-60 dBm)${NC}"
elif [ "$dbm" -ge -80 ] 2>/dev/null; then
log " 质量: ${YELLOW}一般 (-80~-70 dBm),可能出现丢包${NC}"
else
log " 质量: ${RED}差 (<-80 dBm),建议靠近路由器${NC}"
fi
fi
# IP 地址
local ip
ip=$(ip addr show "$iface" 2>/dev/null | awk '/inet /{print $2}' | head -1)
log " IP地址: ${CYAN}${ip:-未获取}${NC}"
# 网关
GATEWAY=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
log " 网关: ${CYAN}${GATEWAY:-未知}${NC}"
# WiFi 驱动统计(丢包/重传)
local rx_drop tx_retry
rx_drop=$(cat /sys/class/net/"$iface"/statistics/rx_dropped 2>/dev/null)
tx_retry=$(iw dev "$iface" station dump 2>/dev/null | awk '/tx retries/{print $3}')
[ -n "$rx_drop" ] && log " RX丢弃: ${rx_drop}包 TX重传: ${tx_retry:-N/A}次"
}
# ── 网关连通性 ────────────────────────────────────────────────
check_gateway() {
section "本地网关"
if [ -z "$GATEWAY" ]; then
GATEWAY=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
fi
if [ -z "$GATEWAY" ]; then
log " ${RED}[FAIL] 无法获取网关地址${NC}"
return
fi
ping_target "网关" "$GATEWAY"
}
# ── 通用 Ping 检测 ────────────────────────────────────────────
ping_target() {
local name="$1" host="$2"
local result loss avg_rtt
result=$(ping -c "$PING_COUNT" -W "$PING_TIMEOUT" "$host" 2>&1)
if echo "$result" | grep -q "Network is unreachable\|Name or service not known"; then
log " ${RED}[FAIL]${NC} ${BOLD}${name}${NC} (${host}) — 网络不可达或 DNS 失败"
return 1
fi
loss=$(echo "$result" | grep -oP '\d+(?=% packet loss)')
avg_rtt=$(echo "$result" | grep -oP 'rtt.*= [\d.]+/\K[\d.]+')
if [ -z "$loss" ]; then
log " ${RED}[FAIL]${NC} ${BOLD}${name}${NC} (${host}) — 无响应"
return 1
fi
local status_color="$GREEN" status="OK"
if [ "$loss" -ge 50 ]; then
status_color="$RED"; status="严重丢包"
elif [ "$loss" -ge 20 ]; then
status_color="$RED"; status="丢包"
elif [ "$loss" -ge 5 ]; then
status_color="$YELLOW"; status="轻微丢包"
fi
local rtt_color="$GREEN"
if [ -n "$avg_rtt" ]; then
local rtt_int=${avg_rtt%.*}
if [ "$rtt_int" -ge 300 ] 2>/dev/null; then
rtt_color="$RED"
elif [ "$rtt_int" -ge 150 ] 2>/dev/null; then
rtt_color="$YELLOW"
fi
fi
log " [${status_color}${status}${NC}] ${BOLD}${name}${NC} (${host}) — 丢包: ${status_color}${loss}%${NC} RTT: ${rtt_color}${avg_rtt:-N/A} ms${NC}"
}
# ── 业务节点检测 ──────────────────────────────────────────────
check_targets() {
section "业务节点连通性"
for name in YouTube Cloudflare-R2 Cloudflare-Pages GitHub Gemini-API DNS-Primary DNS-Secondary; do
ping_target "$name" "${TARGETS[$name]}"
done
}
# ── SSH 断线原因分析 ──────────────────────────────────────────
check_ssh_stability() {
section "SSH 断线原因分析"
# keepalive 设置
local ka_interval ka_count
ka_interval=$(sshd -T 2>/dev/null | awk '/clientaliveinterval/{print $2}')
ka_count=$(sshd -T 2>/dev/null | awk '/clientalivecountmax/{print $2}')
if [ -n "$ka_interval" ]; then
log " sshd ClientAliveInterval: ${ka_interval}s CountMax: ${ka_count}"
if [ "${ka_interval:-0}" -eq 0 ]; then
log " ${YELLOW}[建议] 服务端未开启 keepalive,长时间无操作会断线${NC}"
log " ${CYAN}修复:sudo sh -c 'echo \"ClientAliveInterval 60\" >> /etc/ssh/sshd_config'${NC}"
log " ${CYAN} sudo sh -c 'echo \"ClientAliveCountMax 3\" >> /etc/ssh/sshd_config'${NC}"
log " ${CYAN} sudo systemctl restart sshd${NC}"
else
log " ${GREEN}服务端 keepalive 已配置${NC}"
fi
fi
# 最近的 SSH 断线记录
log " 最近 SSH 断线记录(最近10条):"
local disconnects
disconnects=$(journalctl -u ssh --since "24 hours ago" 2>/dev/null \
| grep -i "disconnect\|broken pipe\|timeout\|reset by peer" \
| tail -10)
if [ -n "$disconnects" ]; then
echo "$disconnects" | while IFS= read -r line; do
log " ${YELLOW}${line}${NC}"
done
else
log " ${GREEN}最近 24 小时内无异常断线记录${NC}"
fi
# 系统负载(Pi 过热也会导致 WiFi 不稳)
local load temp
load=$(uptime | awk -F'load average:' '{print $2}')
temp=$(vcgencmd measure_temp 2>/dev/null | grep -oP '[\d.]+')
log " 系统负载:${load}"
if [ -n "$temp" ]; then
local temp_color="$GREEN"
[ "$(echo "$temp > 70" | bc -l 2>/dev/null)" = "1" ] && temp_color="$RED"
[ "$(echo "$temp > 60" | bc -l 2>/dev/null)" = "1" ] && temp_color="$YELLOW"
log " CPU温度: ${temp_color}${temp}°C${NC}(>70°C 会触发降频,影响 WiFi 驱动稳定性)"
fi
}
# ── 单次完整报告 ──────────────────────────────────────────────
run_once() {
log "${BOLD}${BLUE}════════════════════════════════════════${NC}"
log "${BOLD} Pi 400 网络稳定性报告${NC}"
log "${BOLD}${BLUE}════════════════════════════════════════${NC}"
check_wifi
check_gateway
check_targets
check_ssh_stability
log "${BOLD}${BLUE}════════════════════════════════════════${NC}"
}
# ── 持续监控模式 ──────────────────────────────────────────────
run_watch() {
log "持续监控模式启动(间隔 ${INTERVAL}s,Ctrl+C 停止)"
[ "$DO_LOG" = "1" ] && log "日志写入: $LOG_FILE"
local round=0
while true; do
round=$((round + 1))
log "${BOLD}${BLUE}══ 第 ${round} 轮检测 ══${NC}"
check_wifi
check_gateway
check_targets
log "下次检测: ${INTERVAL}s 后"
sleep "$INTERVAL"
done
}
# ── 入口 ──────────────────────────────────────────────────────
DO_WATCH=0
DO_LOG=0
for arg in "$@"; do
case "$arg" in
--watch) DO_WATCH=1 ;;
--log) DO_LOG=1 ;;
--help)
echo "用法: bash netwatch.sh [--watch] [--log]"
echo " (无参数) 单次完整报告"
echo " --watch 持续监控,每 ${INTERVAL}s 检测一次"
echo " --log 同时写入 $LOG_FILE"
exit 0 ;;
esac
done
if [ "$DO_WATCH" = "1" ]; then
run_watch
else
run_once
fi
运行测试效果:
bash netwatch.sh
2026-04-06 11:13:53 ════════════════════════════════════════
2026-04-06 11:13:53 Pi 400 网络稳定性报告
2026-04-06 11:13:53 ════════════════════════════════════════
2026-04-06 11:13:53 ── WiFi 状态 ──
2026-04-06 11:13:53 接口: wlan0 SSID: MOVISTAR-WIFI7-5C90 BSSID: dc:08:da:84:5c:9e
2026-04-06 11:13:53 信号: -58 dBm 频率: 5180 MHz 速率: 325.0 MBit/s
2026-04-06 11:13:54 质量: 优秀 (≥-60 dBm)
2026-04-06 11:13:54 IP地址: 192.168.1.38/24
2026-04-06 11:13:55 网关: 192.168.1.1
2026-04-06 11:13:55 RX丢弃: 0包 TX重传: N/A次
2026-04-06 11:13:55 ── 本地网关 ──
2026-04-06 11:13:59 [OK] 网关 (192.168.1.1) — 丢包: 0% RTT: 2.560 ms
2026-04-06 11:13:59 ── 业务节点连通性 ──
2026-04-06 11:14:03 [OK] YouTube (www.youtube.com) — 丢包: 0% RTT: 22.292 ms
2026-04-06 11:14:07 [OK] Cloudflare-R2 (pub-1b62f0f9bae84486a796cbf53a3a27ee.r2.dev) — 丢包: 0% RTT: 18.568 ms
2026-04-06 11:14:11 [OK] Cloudflare-Pages (pabloye.pages.dev) — 丢包: 0% RTT: 9.701 ms
2026-04-06 11:14:16 [OK] GitHub (github.com) — 丢包: 0% RTT: 53.299 ms
2026-04-06 11:14:20 [OK] Gemini-API (generativelanguage.googleapis.com) — 丢包: 0% RTT: 11.181 ms
2026-04-06 11:14:24 [OK] DNS-Primary (8.8.8.8) — 丢包: 0% RTT: 12.489 ms
2026-04-06 11:14:28 [OK] DNS-Secondary (1.1.1.1) — 丢包: 0% RTT: 9.895 ms
2026-04-06 11:14:28 ── SSH 断线原因分析 ──
2026-04-06 11:14:28 最近 SSH 断线记录(最近10条):
2026-04-06 11:14:28 最近 24 小时内无异常断线记录
2026-04-06 11:14:28 系统负载: 0.00, 0.00, 0.00
2026-04-06 11:14:28 CPU温度: 32.1°C(>70°C 会触发降频,影响 WiFi 驱动稳定性)
2026-04-06 11:14:28 ════════════════════════════════════════