pi400网络连接分析脚本

ssh连接pi400调试时,有时会断线,但可能pi400与wifi还是连接的,请帮写个分析脚本,看看它与wifi连接稳定性,与业务相关的youtube/cloudflare/github的连接稳定性。

传到 Pi 后直接用:

# 传到 Pi
scp netwatch.sh [email protected]:~/

# 加执行权限
chmod +x ~/netwatch.sh

# 单次完整报告(排查当前状态)
bash ~/netwatch.sh

# 持续监控(每60秒检测一次,同时写日志)
bash ~/netwatch.sh --watch --log

脚本检测四个层面:

WiFi 层:信号强度(dBm)并给出优/良/差评级、频率、连接速率、RX丢弃包数。信号低于 -80dBm 基本可以确定是断线根源。

网关层:ping 本地路由器,RTT 和丢包率。网关丢包说明 WiFi 本身不稳,跟外网无关。

业务节点:分别 ping YouTube、Cloudflare R2、Cloudflare Pages、GitHub、Gemini API,每个节点显示丢包率和 RTT,颜色区分正常/警告/故障。

SSH 断线原因:检查 sshd 的 ClientAliveInterval 配置,为 0 时直接给出修复命令;同时抓取过去 24 小时的 SSH 断线日志;顺带检测 CPU 温度,Pi 过热会触发降频影响 WiFi 驱动稳定性。

SSH 断线最常见的两个原因:一是 sshd 没开 keepalive,长时间无操作后连接被路由器 NAT 超时踢掉;二是 WiFi 信号弱导致间歇性断联。脚本的输出能直接定位是哪种。

#!/bin/bash
# ─────────────────────────────────────────────────────────────
# netwatch.sh — Pi 400 网络稳定性监控
# 监控:WiFi 信号 / 本地网关 / 业务节点连通性
# 用法:
#   单次报告:  bash netwatch.sh
#   持续监控:  bash netwatch.sh --watch
#   写入日志:  bash netwatch.sh --watch --log
# ─────────────────────────────────────────────────────────────

# ── 配置区 ────────────────────────────────────────────────────
INTERVAL=60          # 持续监控时的检测间隔(秒)
PING_COUNT=5         # 每次 ping 的包数
PING_TIMEOUT=5       # ping 超时秒数
LOG_FILE="$HOME/netwatch.log"
LOG_MAX_LINES=5000   # 日志超过此行数时自动截断

# 业务节点(名称:地址)
declare -A TARGETS=(
    ["YouTube"]="www.youtube.com"
    ["Cloudflare-R2"]="pub-1b62f0f9bae84486a796cbf53a3a27ee.r2.dev"
    ["Cloudflare-Pages"]="pabloye.pages.dev"
    ["GitHub"]="github.com"
    ["Gemini-API"]="generativelanguage.googleapis.com"
    ["DNS-Primary"]="8.8.8.8"
    ["DNS-Secondary"]="1.1.1.1"
)

# ── 颜色 ──────────────────────────────────────────────────────
if [ -t 1 ]; then
    RED='\033[0;31m'; YELLOW='\033[0;33m'
    GREEN='\033[0;32m'; BLUE='\033[0;34m'
    CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
else
    RED=''; YELLOW=''; GREEN=''; BLUE=''; CYAN=''; BOLD=''; NC=''
fi

# ── 工具函数 ──────────────────────────────────────────────────

ts() { date '+%Y-%m-%d %H:%M:%S'; }

log() {
    local line="$(ts) $*"
    echo -e "$line"
    if [ "$DO_LOG" = "1" ]; then
        echo "$line" >> "$LOG_FILE"
        # 自动截断过长日志
        local lines
        lines=$(wc -l < "$LOG_FILE")
        if [ "$lines" -gt "$LOG_MAX_LINES" ]; then
            tail -n $((LOG_MAX_LINES / 2)) "$LOG_FILE" > "${LOG_FILE}.tmp"
            mv "${LOG_FILE}.tmp" "$LOG_FILE"
            echo "$(ts) [INFO] 日志已自动截断至 $((LOG_MAX_LINES / 2)) 行" >> "$LOG_FILE"
        fi
    fi
}

section() { log "${BOLD}${BLUE}── $1 ──${NC}"; }

# ── WiFi 状态 ─────────────────────────────────────────────────

check_wifi() {
    section "WiFi 状态"

    local iface
    iface=$(iw dev 2>/dev/null | awk '/Interface/{print $2}' | head -1)
    if [ -z "$iface" ]; then
        log "${RED}[WARN] 未找到无线网卡${NC}"
        return
    fi

    # SSID 和 BSSID
    local ssid bssid
    ssid=$(iw dev "$iface" link 2>/dev/null | awk '/SSID/{print $2}')
    bssid=$(iw dev "$iface" link 2>/dev/null | awk '/Connected to/{print $3}')
    [ -z "$ssid" ] && ssid="(未连接)"
    log "  接口: ${CYAN}${iface}${NC}  SSID: ${CYAN}${ssid}${NC}  BSSID: ${bssid}"

    # 信号强度
    local signal freq bitrate
    signal=$(iw dev "$iface" link 2>/dev/null | awk '/signal/{print $2, $3}')
    freq=$(iw dev "$iface" link 2>/dev/null | awk '/freq/{print $2}')
    bitrate=$(iw dev "$iface" link 2>/dev/null | awk '/tx bitrate/{print $3, $4}')
    log "  信号: ${CYAN}${signal:-N/A}${NC}  频率: ${freq:-N/A} MHz  速率: ${bitrate:-N/A}"

    # 信号质量评级
    if [ -n "$signal" ]; then
        local dbm
        dbm=$(echo "$signal" | awk '{print $1}')
        if [ "$dbm" -ge -60 ] 2>/dev/null; then
            log "  质量: ${GREEN}优秀 (≥-60 dBm)${NC}"
        elif [ "$dbm" -ge -70 ] 2>/dev/null; then
            log "  质量: ${GREEN}良好 (-70~-60 dBm)${NC}"
        elif [ "$dbm" -ge -80 ] 2>/dev/null; then
            log "  质量: ${YELLOW}一般 (-80~-70 dBm),可能出现丢包${NC}"
        else
            log "  质量: ${RED}差 (<-80 dBm),建议靠近路由器${NC}"
        fi
    fi

    # IP 地址
    local ip
    ip=$(ip addr show "$iface" 2>/dev/null | awk '/inet /{print $2}' | head -1)
    log "  IP地址: ${CYAN}${ip:-未获取}${NC}"

    # 网关
    GATEWAY=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
    log "  网关: ${CYAN}${GATEWAY:-未知}${NC}"

    # WiFi 驱动统计(丢包/重传)
    local rx_drop tx_retry
    rx_drop=$(cat /sys/class/net/"$iface"/statistics/rx_dropped 2>/dev/null)
    tx_retry=$(iw dev "$iface" station dump 2>/dev/null | awk '/tx retries/{print $3}')
    [ -n "$rx_drop" ] && log "  RX丢弃: ${rx_drop}包  TX重传: ${tx_retry:-N/A}次"
}

# ── 网关连通性 ────────────────────────────────────────────────

check_gateway() {
    section "本地网关"
    if [ -z "$GATEWAY" ]; then
        GATEWAY=$(ip route show default 2>/dev/null | awk '{print $3}' | head -1)
    fi
    if [ -z "$GATEWAY" ]; then
        log "  ${RED}[FAIL] 无法获取网关地址${NC}"
        return
    fi
    ping_target "网关" "$GATEWAY"
}

# ── 通用 Ping 检测 ────────────────────────────────────────────

ping_target() {
    local name="$1" host="$2"
    local result loss avg_rtt
    result=$(ping -c "$PING_COUNT" -W "$PING_TIMEOUT" "$host" 2>&1)

    if echo "$result" | grep -q "Network is unreachable\|Name or service not known"; then
        log "  ${RED}[FAIL]${NC} ${BOLD}${name}${NC} (${host}) — 网络不可达或 DNS 失败"
        return 1
    fi

    loss=$(echo "$result" | grep -oP '\d+(?=% packet loss)')
    avg_rtt=$(echo "$result" | grep -oP 'rtt.*= [\d.]+/\K[\d.]+')

    if [ -z "$loss" ]; then
        log "  ${RED}[FAIL]${NC} ${BOLD}${name}${NC} (${host}) — 无响应"
        return 1
    fi

    local status_color="$GREEN" status="OK"
    if [ "$loss" -ge 50 ]; then
        status_color="$RED"; status="严重丢包"
    elif [ "$loss" -ge 20 ]; then
        status_color="$RED"; status="丢包"
    elif [ "$loss" -ge 5 ]; then
        status_color="$YELLOW"; status="轻微丢包"
    fi

    local rtt_color="$GREEN"
    if [ -n "$avg_rtt" ]; then
        local rtt_int=${avg_rtt%.*}
        if [ "$rtt_int" -ge 300 ] 2>/dev/null; then
            rtt_color="$RED"
        elif [ "$rtt_int" -ge 150 ] 2>/dev/null; then
            rtt_color="$YELLOW"
        fi
    fi

    log "  [${status_color}${status}${NC}] ${BOLD}${name}${NC} (${host}) — 丢包: ${status_color}${loss}%${NC}  RTT: ${rtt_color}${avg_rtt:-N/A} ms${NC}"
}

# ── 业务节点检测 ──────────────────────────────────────────────

check_targets() {
    section "业务节点连通性"
    for name in YouTube Cloudflare-R2 Cloudflare-Pages GitHub Gemini-API DNS-Primary DNS-Secondary; do
        ping_target "$name" "${TARGETS[$name]}"
    done
}

# ── SSH 断线原因分析 ──────────────────────────────────────────

check_ssh_stability() {
    section "SSH 断线原因分析"

    # keepalive 设置
    local ka_interval ka_count
    ka_interval=$(sshd -T 2>/dev/null | awk '/clientaliveinterval/{print $2}')
    ka_count=$(sshd -T 2>/dev/null | awk '/clientalivecountmax/{print $2}')
    if [ -n "$ka_interval" ]; then
        log "  sshd ClientAliveInterval: ${ka_interval}s  CountMax: ${ka_count}"
        if [ "${ka_interval:-0}" -eq 0 ]; then
            log "  ${YELLOW}[建议] 服务端未开启 keepalive,长时间无操作会断线${NC}"
            log "  ${CYAN}修复:sudo sh -c 'echo \"ClientAliveInterval 60\" >> /etc/ssh/sshd_config'${NC}"
            log "  ${CYAN}      sudo sh -c 'echo \"ClientAliveCountMax 3\" >> /etc/ssh/sshd_config'${NC}"
            log "  ${CYAN}      sudo systemctl restart sshd${NC}"
        else
            log "  ${GREEN}服务端 keepalive 已配置${NC}"
        fi
    fi

    # 最近的 SSH 断线记录
    log "  最近 SSH 断线记录(最近10条):"
    local disconnects
    disconnects=$(journalctl -u ssh --since "24 hours ago" 2>/dev/null \
        | grep -i "disconnect\|broken pipe\|timeout\|reset by peer" \
        | tail -10)
    if [ -n "$disconnects" ]; then
        echo "$disconnects" | while IFS= read -r line; do
            log "  ${YELLOW}${line}${NC}"
        done
    else
        log "  ${GREEN}最近 24 小时内无异常断线记录${NC}"
    fi

    # 系统负载(Pi 过热也会导致 WiFi 不稳)
    local load temp
    load=$(uptime | awk -F'load average:' '{print $2}')
    temp=$(vcgencmd measure_temp 2>/dev/null | grep -oP '[\d.]+')
    log "  系统负载:${load}"
    if [ -n "$temp" ]; then
        local temp_color="$GREEN"
        [ "$(echo "$temp > 70" | bc -l 2>/dev/null)" = "1" ] && temp_color="$RED"
        [ "$(echo "$temp > 60" | bc -l 2>/dev/null)" = "1" ] && temp_color="$YELLOW"
        log "  CPU温度: ${temp_color}${temp}°C${NC}(>70°C 会触发降频,影响 WiFi 驱动稳定性)"
    fi
}

# ── 单次完整报告 ──────────────────────────────────────────────

run_once() {
    log "${BOLD}${BLUE}════════════════════════════════════════${NC}"
    log "${BOLD}  Pi 400 网络稳定性报告${NC}"
    log "${BOLD}${BLUE}════════════════════════════════════════${NC}"
    check_wifi
    check_gateway
    check_targets
    check_ssh_stability
    log "${BOLD}${BLUE}════════════════════════════════════════${NC}"
}

# ── 持续监控模式 ──────────────────────────────────────────────

run_watch() {
    log "持续监控模式启动(间隔 ${INTERVAL}s,Ctrl+C 停止)"
    [ "$DO_LOG" = "1" ] && log "日志写入: $LOG_FILE"

    local round=0
    while true; do
        round=$((round + 1))
        log "${BOLD}${BLUE}══ 第 ${round} 轮检测 ══${NC}"
        check_wifi
        check_gateway
        check_targets
        log "下次检测: ${INTERVAL}s 后"
        sleep "$INTERVAL"
    done
}

# ── 入口 ──────────────────────────────────────────────────────

DO_WATCH=0
DO_LOG=0

for arg in "$@"; do
    case "$arg" in
        --watch) DO_WATCH=1 ;;
        --log)   DO_LOG=1   ;;
        --help)
            echo "用法: bash netwatch.sh [--watch] [--log]"
            echo "  (无参数)  单次完整报告"
            echo "  --watch     持续监控,每 ${INTERVAL}s 检测一次"
            echo "  --log       同时写入 $LOG_FILE"
            exit 0 ;;
    esac
done

if [ "$DO_WATCH" = "1" ]; then
    run_watch
else
    run_once
fi

运行测试效果:

bash netwatch.sh 

2026-04-06 11:13:53 ════════════════════════════════════════

2026-04-06 11:13:53   Pi 400 网络稳定性报告

2026-04-06 11:13:53 ════════════════════════════════════════

2026-04-06 11:13:53 ── WiFi 状态 ──

2026-04-06 11:13:53   接口: wlan0  SSID: MOVISTAR-WIFI7-5C90  BSSID: dc:08:da:84:5c:9e

2026-04-06 11:13:53   信号: -58 dBm  频率: 5180 MHz  速率: 325.0 MBit/s

2026-04-06 11:13:54   质量: 优秀 (≥-60 dBm)

2026-04-06 11:13:54   IP地址: 192.168.1.38/24

2026-04-06 11:13:55   网关: 192.168.1.1

2026-04-06 11:13:55   RX丢弃: 0包  TX重传: N/A次

2026-04-06 11:13:55 ── 本地网关 ──

2026-04-06 11:13:59   [OK] 网关 (192.168.1.1) — 丢包: 0%  RTT: 2.560 ms

2026-04-06 11:13:59 ── 业务节点连通性 ──

2026-04-06 11:14:03   [OK] YouTube (www.youtube.com) — 丢包: 0%  RTT: 22.292 ms

2026-04-06 11:14:07   [OK] Cloudflare-R2 (pub-1b62f0f9bae84486a796cbf53a3a27ee.r2.dev) — 丢包: 0%  RTT: 18.568 ms

2026-04-06 11:14:11   [OK] Cloudflare-Pages (pabloye.pages.dev) — 丢包: 0%  RTT: 9.701 ms

2026-04-06 11:14:16   [OK] GitHub (github.com) — 丢包: 0%  RTT: 53.299 ms

2026-04-06 11:14:20   [OK] Gemini-API (generativelanguage.googleapis.com) — 丢包: 0%  RTT: 11.181 ms

2026-04-06 11:14:24   [OK] DNS-Primary (8.8.8.8) — 丢包: 0%  RTT: 12.489 ms

2026-04-06 11:14:28   [OK] DNS-Secondary (1.1.1.1) — 丢包: 0%  RTT: 9.895 ms

2026-04-06 11:14:28 ── SSH 断线原因分析 ──

2026-04-06 11:14:28   最近 SSH 断线记录(最近10条):

2026-04-06 11:14:28   最近 24 小时内无异常断线记录

2026-04-06 11:14:28   系统负载: 0.00, 0.00, 0.00

2026-04-06 11:14:28   CPU温度: 32.1°C(>70°C 会触发降频,影响 WiFi 驱动稳定性)

2026-04-06 11:14:28 ════════════════════════════════════════