inputSchema
{
"type": "object",
"required": [
"content"
],
"properties": {
"content": {
"type": "string",
"description": "待脱敏的正文(已从 Word/PDF/TXT 等提取的纯文本或结构化文本)"
},
"options": {
"type": "object",
"properties": {
"identity": {
"type": "boolean",
"default": true,
"description": "身份证、手机号、邮箱等主体身份信息"
},
"fileMetadata": {
"type": "boolean",
"default": true,
"description": "文件名中的敏感段、常见元数据键值对(若出现在文本中)"
},
"contractParty": {
"type": "boolean",
"default": true,
"description": "合同主体:单位/项目名称行、住所与各类地址行、电话/传真/邮箱行、法定代表人/代理人/签署人、甲乙方与见证方等角色行、户名与收付款单位、中英文组织名称(启发式,可能误伤)"
},
"techCredentials": {
"type": "boolean",
"default": true,
"description": "JWT、Bearer、API Key、私钥 PEM、带口令 URL、内网 IP 等"
},
"businessAndInternal": {
"type": "boolean",
"default": true,
"description": "统一社会信用代码、银行卡号、金额标记、疑似订单/合同编号等"
},
"hiddenStructuredContent": {
"type": "boolean",
"default": true,
"description": "HTML 注释、零宽字符、过长 Base64 块等隐蔽载体"
},
"aggressiveBinaryLikeStrings": {
"type": "boolean",
"default": false,
"description": "更激进:掩码长 Base64 串(易误伤正文,默认关)"
}
},
"description": "各类别开关,未传字段按默认值 true"
},
"fileName": {
"type": "string",
"description": "可选原始文件名;在启用 fileMetadata 时生成脱敏后的建议文件名(数据层替换,非 CSS)"
},
"strategy": {
"enum": [
"pseudonymize",
"mask_same_length",
"erase"
],
"type": "string",
"default": "pseudonymize",
"description": "pseudonymize=类型化占位符 [PHONE_1];mask_same_length=等长 *;erase=直接删除匹配片段"
},
"customPatterns": {
"type": "array",
"items": {
"type": "object",
"required": [
"pattern",
"replacement"
],
"properties": {
"flags": {
"type": "string",
"description": "如 gimsu,默认 g"
},
"label": {
"type": "string",
"description": "统计与摘要用标签"
},
"pattern": {
"type": "string",
"description": "正则表达式源码(不含两侧斜杠)"
},
"replacement": {
"type": "string",
"description": "替换字符串;strategy 为 pseudonymize 且 replacement 为字面 [AUTO] 时自动生成 CUSTOM_n 占位"
}
}
},
"description": "自定义正则规则(在内置规则之后应用);replacement 可为固定字符串或含 $1 的分组替换"
},
"additionalCategoryNotes": {
"type": "string",
"description": "宿主或用户对「其他需脱敏字段」的自然语言说明;基因会原样写入输出的 manualFollowUp 供人工或 LLM 二次处理(本基因不自动解析语义)"
}
}
}
outputSchema
{
"type": "object",
"required": [
"desensitizedContent",
"statistics",
"appliedBuiltinCategories",
"dataLayerNote",
"disclaimer"
],
"properties": {
"disclaimer": {
"type": "string",
"description": "正则脱敏局限性说明"
},
"statistics": {
"type": "object",
"properties": {
"total": {
"type": "integer"
},
"custom": {
"type": "integer"
},
"identity": {
"type": "integer"
},
"fileMetadata": {
"type": "integer"
},
"contractParty": {
"type": "integer"
},
"techCredentials": {
"type": "integer"
},
"hiddenStructured": {
"type": "integer"
},
"businessAndInternal": {
"type": "integer"
}
},
"description": "各类替换次数(不含具体敏感值,避免二次泄露)"
},
"dataLayerNote": {
"type": "string",
"description": "说明脱敏在数据层完成,非前端隐藏"
},
"manualFollowUp": {
"type": "array",
"items": {
"type": "string"
},
"description": "需人工或下游模型处理的提醒,含 additionalCategoryNotes"
},
"desensitizedContent": {
"type": "string",
"description": "已改写后的正文副本;敏感片段已从字符串层面移除或替换,可安全复制/落盘/外传(仍需结合合规审查)"
},
"desensitizedFileName": {
"type": "string",
"description": "当输入含 fileName 且启用 fileMetadata 时返回建议文件名"
},
"replacementShapeSummary": {
"type": "array",
"items": {
"type": "object",
"properties": {
"count": {
"type": "integer"
},
"ruleId": {
"type": "string"
},
"category": {
"type": "string"
},
"samplePlaceholder": {
"type": "string"
}
}
},
"description": "仅规则 id、次数与占位示例,不包含原文"
},
"appliedBuiltinCategories": {
"type": "array",
"items": {
"type": "string"
},
"description": "本次实际启用的内置类别 id 列表"
}
}
}