21 综合实践课-智能体工具链全流程开发与 AI 协同优化

综合智能体工具链全流程开发与 AI 协同优化

关联:索引

术语小抄(初学者版)


课程思政融入点(口径统一):

推荐的最小可落地蓝图(复用 19/20 的核心思想):

用户请求(text + user_ctx + scene)
  ↓
路由(选择候选工具:按场景/分类/可靠性)
  ↓
权限过滤(RBAC + 参数级门禁)
  ↓
执行(按链路:通用→查询→控制→知识)
  ↓
汇总(统一输出结构 + 证据链)
  ↓
审计(通过/拒绝/失败都记录 trace_id)

关键点解释与自检要点:

目的:不同组/不同工具常见输出形态不一致(dict/字符串、ok/success、error 字段漂移)。全流程入口先把输出统一,减少联调噪声。

把下面代码保存为 tool_output_normalize.py(可直接运行,不依赖第三方库):

import json
from dataclasses import dataclass
from typing import Any, Dict, Tuple

@dataclass(frozen=True)
class NormalizedResult:
    # 统一后的工具执行结果:
    # - ok/trace_id:无论成功/失败都必须具备,保证可追溯
    # - data:成功时承载业务数据;失败时可以为空 dict
    # - error_code/error_message:失败语义结构化,便于统计与定位
    ok: bool
    trace_id: str
    data: Dict[str, Any]
    error_code: str
    error_message: str

def _to_obj(x: Any) -> Dict[str, Any]:
    # 工具输出常见两种形态:dict 或 JSON 字符串;先统一成 dict 再做契约断言
    if isinstance(x, dict):
        return x
    if isinstance(x, str):
        # 若字符串不是合法 JSON,这里会抛异常:属于“工具输出契约不满足”
        return json.loads(x)
    raise TypeError(f"unsupported tool output type: {type(x)}")

def normalize_tool_output(out: Any) -> NormalizedResult:
    o = _to_obj(out)
    # 兼容历史字段:success -> ok(过渡策略;工程交付建议统一只暴露 ok)
    ok = bool(o.get("ok", o.get("success", False)))
    trace_id = str(o.get("trace_id", ""))
    # data 只接受 dict,避免下游对 list/str 误用导致异常
    data = o.get("data") if isinstance(o.get("data"), dict) else {}

    if not trace_id:
        # 没有 trace_id 无法形成证据链,直接视为契约失败
        raise ValueError(f"missing trace_id in tool output: {o}")

    if ok:
        return NormalizedResult(ok=True, trace_id=trace_id, data=data, error_code="", error_message="")

    err = o.get("error") if isinstance(o.get("error"), dict) else {}
    error_code = str(err.get("code", "UNKNOWN_ERROR"))
    error_message = str(err.get("message", ""))
    if not error_message:
        error_message = "tool execution failed"
    return NormalizedResult(ok=False, trace_id=trace_id, data=data, error_code=error_code, error_message=error_message)

def demo() -> Tuple[NormalizedResult, NormalizedResult]:
    ok_sample = {"ok": True, "trace_id": "t1", "data": {"x": 1}}
    fail_sample = {"ok": False, "trace_id": "t2", "error": {"code": "INPUT_INVALID", "message": "bad payload"}}
    return normalize_tool_output(ok_sample), normalize_tool_output(fail_sample)

if __name__ == "__main__":
    a, b = demo()
    print(a)
    print(b)

逐段解释与自检要点:

运行方式(Windows PowerShell):

py -3 .\tool_output_normalize.py

解释与自检要点:

说明:以下示例只用标准库,且用 mock 工具保证“脚本可运行”。上机时,你们需要把 invoke_* 三个函数替换成你们组真实工具的 .invoke(payload) 或等价调用方式。

把下面代码保存为 toolchain_full_entry_demo.py

import json
import time
import uuid
from dataclasses import dataclass
from typing import Any, Dict, List, Literal, Tuple

from tool_output_normalize import NormalizedResult, normalize_tool_output

# Role/Scene/Category 是课堂统一“口径类型”,用于 RBAC、场景过滤与工具分类管理
Role = Literal["admin", "operator", "viewer"]
Scene = Literal["sorting_ops", "maintenance", "audit"]
Category = Literal["common", "data_query", "device_control", "knowledge_base"]

@dataclass(frozen=True)
class UserContext:
    # user_id:审计关联;role:权限判定
    user_id: str
    role: Role

@dataclass(frozen=True)
class ToolMeta:
    # 工具元信息:把“工具是谁、能在哪用、谁能用、可靠不可靠、风险多大”放在一起便于管理
    tool_name: str
    category: Category
    scenes: List[Scene]
    required_roles: List[Role]
    reliability: float
    risk_level: Literal["low", "medium", "high"]

def authorize(user: UserContext, scene: Scene, tool: ToolMeta, action: str, payload: Dict[str, Any]) -> Tuple[bool, str, str]:
    # 执行前门禁:先做场景与角色的硬过滤,再对控制类工具做参数级校验(宁可拒绝不误控)
    if scene not in tool.scenes:
        return False, "SCENE_NOT_ALLOWED", f"tool not allowed in scene: {scene}"
    if user.role not in tool.required_roles:
        return False, "ROLE_NOT_ALLOWED", f"role not allowed: {user.role}"
    if tool.category == "device_control":
        # 动作白名单:操作员只允许低风险动作(课堂示例:pick_place/stop)
        device_action = str(payload.get("action", ""))
        if user.role == "operator" and device_action not in {"pick_place", "stop"}:
            return False, "ACTION_NOT_ALLOWED", f"operator cannot run action: {device_action}"
        # 参数门禁:控制速度上限(课堂安全阈值示例)
        speed = ((payload.get("params") or {}).get("speed"))
        if isinstance(speed, (int, float)) and speed > 0.6:
            return False, "PARAM_GUARD", "speed too high for classroom safety"
    return True, "", ""

def audit_log(event: Dict[str, Any]) -> None:
    # 课堂演示:直接打印 JSON 审计事件;工程落地可替换为 JSONL 文件/日志系统
    print(json.dumps(event, ensure_ascii=False))

def select_tools(tools: List[ToolMeta], user: UserContext, scene: Scene) -> List[ToolMeta]:
    # 工具候选集:先过滤(场景/角色),再排序(可靠性优先 + 低风险优先)
    candidates = [t for t in tools if scene in t.scenes and user.role in t.required_roles]
    risk_rank = {"low": 0, "medium": 1, "high": 2}
    candidates.sort(key=lambda x: (-x.reliability, risk_rank[x.risk_level], x.tool_name))
    return candidates

def invoke_data_query(payload: Dict[str, Any]) -> Any:
    # mock:上机时替换为 data_query_tool.invoke(payload)
    trace_id = uuid.uuid4().hex[:8]
    return {"ok": True, "trace_id": trace_id, "data": {"fruit_type": "apple", "quality": "A", "line_id": payload.get("line_id", "")}}

def invoke_device_control(payload: Dict[str, Any]) -> Any:
    # mock:上机时替换为 device_control_tool.invoke(payload)
    trace_id = uuid.uuid4().hex[:8]
    cmd_id = str(payload.get("cmd_id", ""))
    return {
        "ok": True,
        "trace_id": trace_id,
        "data": {"receipt": {"cmd_id": cmd_id, "last_cmd_id": cmd_id, "code": "OK", "message": "accepted"}},
    }

def invoke_kb(payload: Dict[str, Any]) -> Any:
    # mock:上机时替换为 kb_tool.invoke(payload)
    trace_id = uuid.uuid4().hex[:8]
    return {
        "ok": True,
        "trace_id": trace_id,
        "data": {"answer": "示例:苹果 A 级进入优选通道;B/C 级进入普通通道并记录批次。", "hits": [{"source": "kb", "score": 0.71}]},
    }

def main() -> None:
    # run_id:串起一次端到端运行的证据与日志,便于写开发日志与回归对比
    run_id = uuid.uuid4().hex[:8]
    user = UserContext(user_id="u_op_01", role="operator")
    scene: Scene = "sorting_ops"

    tools = [
        ToolMeta("data_query_line_status", "data_query", ["sorting_ops", "maintenance", "audit"], ["admin", "operator", "viewer"], 0.95, "low"),
        ToolMeta("device_control_arm", "device_control", ["sorting_ops", "maintenance"], ["admin", "operator"], 0.90, "high"),
        ToolMeta("kb_sorting_rules", "knowledge_base", ["sorting_ops", "maintenance", "audit"], ["admin", "operator", "viewer"], 0.85, "low"),
    ]

    allowed = select_tools(tools=tools, user=user, scene=scene)

    q_tool = next(t for t in allowed if t.category == "data_query")
    q_payload = {"query_type": "latest_line_status", "line_id": "LINE-01"}
    ok, code, msg = authorize(user=user, scene=scene, tool=q_tool, action="query", payload=q_payload)
    if not ok:
        audit_log({"ok": False, "run_id": run_id, "stage": "authorize", "tool": q_tool.tool_name, "code": code, "message": msg})
        return
    q_out = normalize_tool_output(invoke_data_query(q_payload))
    audit_log({"ok": q_out.ok, "run_id": run_id, "stage": "execute", "tool": q_tool.tool_name, "trace_id": q_out.trace_id})
    if not q_out.ok:
        return

    # 从数据段提取关键字段作为后续输入;若缺失应视为“数据段不完整”
    fruit_type = str((q_out.data.get("fruit_type") or ""))
    quality = str((q_out.data.get("quality") or ""))
    # cmd_id:控制链路最小追踪字段;回执必须能对齐到 last_cmd_id
    cmd_id = f"C-{int(time.time())}-{run_id}"

    c_tool = next(t for t in allowed if t.category == "device_control")
    c_payload = {
        "cmd_id": cmd_id,
        "scene": scene,
        "device_type": "arm",
        "device_id": "arm_01",
        "action": "pick_place",
        "params": {"from": "bin_in_1", "to": f"bin_out_{quality}", "speed": 0.5},
        "ts_ms": int(time.time() * 1000),
    }
    ok, code, msg = authorize(user=user, scene=scene, tool=c_tool, action="control", payload=c_payload)
    if not ok:
        audit_log({"ok": False, "run_id": run_id, "stage": "authorize", "tool": c_tool.tool_name, "code": code, "message": msg})
        return
    c_out = normalize_tool_output(invoke_device_control(c_payload))
    # receipt:控制回执(真实项目字段可能不同,必要时做兼容映射到 last_cmd_id)
    receipt = (c_out.data.get("receipt") or {}) if isinstance(c_out.data.get("receipt"), dict) else {}
    audit_log(
        {
            "ok": c_out.ok,
            "run_id": run_id,
            "stage": "execute",
            "tool": c_tool.tool_name,
            "trace_id": c_out.trace_id,
            "evidence": {"cmd_id": cmd_id, "last_cmd_id": receipt.get("last_cmd_id", "")},
        }
    )
    if not c_out.ok:
        return

    k_tool = next(t for t in allowed if t.category == "knowledge_base")
    k_payload = {"query": f"{fruit_type} 质量{quality} 的分拣规则与注意事项", "top_k": 3}
    ok, code, msg = authorize(user=user, scene=scene, tool=k_tool, action="search", payload=k_payload)
    if not ok:
        audit_log({"ok": False, "run_id": run_id, "stage": "authorize", "tool": k_tool.tool_name, "code": code, "message": msg})
        return
    k_out = normalize_tool_output(invoke_kb(k_payload))
    # hits:检索命中证据;工程上通常要求含 source/section/score 等可复验字段
    hits = (k_out.data.get("hits") or []) if isinstance(k_out.data.get("hits"), list) else []

    result = {
        "ok": True,
        "run_id": run_id,
        "scene": scene,
        "user": {"user_id": user.user_id, "role": user.role},
        "summary": {"fruit_type": fruit_type, "quality": quality, "decision": f"route_to_bin_out_{quality}"},
        "evidence": {
            "data_trace_id": q_out.trace_id,
            "control_trace_id": c_out.trace_id,
            "kb_trace_id": k_out.trace_id,
            "cmd_id": cmd_id,
            "last_cmd_id": receipt.get("last_cmd_id", ""),
            "kb_hits": hits[:1],
        },
    }
    # 最终输出:形成“可复验”的证据链对象,便于写测试报告/日志/回归对比
    print(json.dumps(result, ensure_ascii=False, indent=2))

if __name__ == "__main__":
    main()

逐段解释与自检要点:

运行方式(Windows PowerShell):

py -3 .\toolchain_full_entry_demo.py

解释与自检要点:

1)全流程替换练习:把 invoke_data_query / invoke_device_control / invoke_kb 依次替换为你们组真实工具调用,要求不改“统一输出契约”和“证据链字段口径”,跑通一次端到端输出。

提示:

2)拒绝可解释练习:构造 2 条越权/越界用例(例如 viewer 调控制工具、或 speed>0.6),要求输出结构化拒绝信息(ok=false + code/message + trace_id 或 run_id),并在日志里写清 “谁/何时/为什么被拒绝”。


课程思政融入点(口径统一):

建议用 JSONL(每行一条样本),文件名示例:routing_labels.jsonl

{"id":"c001","scene":"sorting_ops","text":"查询 LINE-01 最近一条分拣状态","expected_tool":"data_query_line_status","expected_action":"query"}
{"id":"c002","scene":"sorting_ops","text":"把 A 级苹果放到优选通道","expected_tool":"device_control_arm","expected_action":"control"}
{"id":"c003","scene":"audit","text":"给我苹果 A 级分拣规则的依据来源","expected_tool":"kb_sorting_rules","expected_action":"search"}

解释与自检要点:

2)最小评估脚本(可运行,学生替换路由函数即可)

把下面代码保存为 eval_routing_demo.py

import json
from collections import Counter
from typing import Dict, Iterable, List, Tuple

def load_jsonl(path: str) -> List[Dict]:
    # 读取 JSONL 标注数据:每行一个 JSON 对象(便于追加与容错)
    items: List[Dict] = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            s = line.strip()
            if not s:
                continue
            items.append(json.loads(s))
    return items

def predict_tool(scene: str, text: str) -> str:
    # 示例路由函数:用关键词规则“跑通评估闭环”
    # 上机时应替换为你们组真实的路由实现(提示词 + LLM / 分类器 / 更完备规则)
    if "查询" in text or "状态" in text or "最近" in text:
        return "data_query_line_status"
    if "放到" in text or "控制" in text or "机械臂" in text:
        return "device_control_arm"
    if "依据" in text or "来源" in text or "规则" in text:
        return "kb_sorting_rules"
    if scene == "audit":
        return "kb_sorting_rules"
    return "data_query_line_status"

def evaluate(samples: Iterable[Dict]) -> Tuple[float, Dict[str, float], List[Dict]]:
    # 输出:整体准确率 + 按工具准确率 + 错例列表(用于错例驱动优化与回归)
    total = 0
    correct = 0
    per_tool_total: Counter = Counter()
    per_tool_correct: Counter = Counter()
    errors: List[Dict] = []

    for s in samples:
        # expected_tool:标注标签;pred:路由预测结果
        total += 1
        exp = str(s.get("expected_tool", ""))
        pred = predict_tool(scene=str(s.get("scene", "")), text=str(s.get("text", "")))
        per_tool_total[exp] += 1
        if pred == exp:
            correct += 1
            per_tool_correct[exp] += 1
        else:
            # 保存错例:后续优先优化“错例最多/影响最大”的样本类型
            errors.append({"id": s.get("id", ""), "scene": s.get("scene", ""), "text": s.get("text", ""), "expected": exp, "pred": pred})

    acc = (correct / total) if total else 0.0
    per_tool_acc: Dict[str, float] = {}
    for tool, n in per_tool_total.items():
        per_tool_acc[tool] = (per_tool_correct[tool] / n) if n else 0.0
    return acc, per_tool_acc, errors

def main() -> None:
    samples = load_jsonl("routing_labels.jsonl")
    acc, per_tool_acc, errors = evaluate(samples)

    print("total:", len(samples))
    print("accuracy:", round(acc, 4))
    print("per_tool_accuracy:")
    for k in sorted(per_tool_acc.keys()):
        print(" ", k, round(per_tool_acc[k], 4))

    print("top_errors:")
    for e in errors[:10]:
        print(json.dumps(e, ensure_ascii=False))

if __name__ == "__main__":
    main()

逐段解释与自检要点:

运行方式(Windows PowerShell):

py -3 .\eval_routing_demo.py

解释与自检要点:

  1. 路由优化(选对工具)
  1. 入参优化(把参数填对)
  1. 兼容优化(减少字段漂移导致的联调失败)
  2. 安全与拒绝语义优化(宁可拒绝也不误控)

任务 A(建议):让 AI 生成“兼容适配器”并回归验证

你们要做的事:

给 AI 的指令模板(可直接复制):

你是 Python 工程师。请为我的工具链写一个“兼容适配器”,把上游输出映射为下游输入,要求:
1)输入是 dict 或 JSON 字符串,输出是 dict;
2)兼容字段漂移:success→ok,msg_id→cmd_id,lineId→line_id,ts→ts_ms(秒/毫秒需自动识别并统一为毫秒);
3)遇到缺字段必须返回结构化错误:ok=false + error.code/error.message + trace_id;
4)只用 Python 标准库;
5)给至少 6 条自测样例(含 3 条错例),并说明每条预期结果。

我的现有字段与问题描述:{粘贴你们组的真实字段与错例}

人工审计与自检清单(学生必须逐条勾选):

任务 B(建议):让 AI 辅助联调排障并输出“证据链 + 修复点 + 回归用例”

给 AI 的指令模板(可直接复制):

你是联调排障专家。以下是我的端到端证据链输出与失败日志(包含 trace_id/cmd_id/last_cmd_id/错误码),请你:
1)判断故障属于:路由未选中/权限拒绝/参数不兼容/协议冲突/知识库无命中策略/其他;
2)给出最小复现输入(可复制的 payload);
3)给出修复点(具体到字段/函数/规则),并说明为什么;
4)给出回归用例(至少 3 条),说明预期从 Fail→Pass 的证据变化;
5)最后输出一份“排障步骤清单”,按先易后难排序。

日志与证据:{粘贴你们组的失败输出与关键日志}

1)每日开发日志模板(建议 JSON 或表格均可)

2)AI 交互记录模板(每项任务一条)

作业:不布置(过程性考核)