-
Notifications
You must be signed in to change notification settings - Fork 2.9k
feat: add ai-lakera-guard plugin #13570
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
nic-6443
merged 7 commits into
apache:master
from
janiussyafiq:feat/ai-lakera-guard-pr1
Jun 24, 2026
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
af23bef
feat: add ai-lakera-guard plugin
janiussyafiq 432fa36
fix: register ai-lakera-guard subdir in Makefile and plugins list test
janiussyafiq 54dfcfd
feat(ai-lakera-guard): add fail_mode for unsupported requests
janiussyafiq 1bf9097
fix(ai-lakera-guard): reject empty api_key in schema
janiussyafiq e164ebb
docs(ai-lakera-guard): add Chinese translation
janiussyafiq ae987da
fix(ai-lakera-guard): preserve roles, guard nil body, clarify alert docs
janiussyafiq 84d950f
fix(ai-lakera-guard): harden Lakera response decode and tighten tests
janiussyafiq File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,209 @@ | ||
| -- | ||
| -- Licensed to the Apache Software Foundation (ASF) under one or more | ||
| -- contributor license agreements. See the NOTICE file distributed with | ||
| -- this work for additional information regarding copyright ownership. | ||
| -- The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| -- (the "License"); you may not use this file except in compliance with | ||
| -- the License. You may obtain a copy of the License at | ||
| -- | ||
| -- http://www.apache.org/licenses/LICENSE-2.0 | ||
| -- | ||
| -- Unless required by applicable law or agreed to in writing, software | ||
| -- distributed under the License is distributed on an "AS IS" BASIS, | ||
| -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| -- See the License for the specific language governing permissions and | ||
| -- limitations under the License. | ||
| -- | ||
| local core = require("apisix.core") | ||
| local schema_mod = require("apisix.plugins.ai-lakera-guard.schema") | ||
| local client = require("apisix.plugins.ai-lakera-guard.client") | ||
| local protocols = require("apisix.plugins.ai-protocols") | ||
| local binding = require("apisix.plugins.ai-protocols.binding") | ||
|
|
||
| local ipairs = ipairs | ||
| local type = type | ||
| local concat = table.concat | ||
|
|
||
|
|
||
| local _M = { | ||
| version = 0.1, | ||
| priority = 1028, | ||
| name = "ai-lakera-guard", | ||
| schema = schema_mod.schema, | ||
| } | ||
|
|
||
|
|
||
| function _M.check_schema(conf) | ||
| return schema_mod.check_schema(conf) | ||
| end | ||
|
|
||
|
|
||
| -- Format only the detectors that actually fired (detected = true) for the | ||
| -- client-facing reveal; the raw breakdown may also carry non-detected entries, | ||
| -- which belong in the log but not in the deny message. | ||
| local function format_breakdown(breakdown) | ||
| local parts = {} | ||
| for _, entry in ipairs(breakdown or {}) do | ||
| if type(entry) == "table" and entry.detected and entry.detector_type then | ||
| local part = entry.detector_type | ||
| if entry.result and entry.result ~= "" then | ||
| part = part .. " (" .. entry.result .. ")" | ||
| end | ||
| core.table.insert(parts, part) | ||
| end | ||
| end | ||
| return parts | ||
| end | ||
|
|
||
|
|
||
| local function deny_message(ctx, conf, message, breakdown) | ||
| local proto = protocols.get(ctx.ai_client_protocol) | ||
| if not proto then | ||
| core.log.error("ai-lakera-guard: unsupported protocol: ", | ||
| ctx.ai_client_protocol or "unknown") | ||
| return message | ||
| end | ||
| local text = message | ||
| if conf.reveal_failure_categories then | ||
| local parts = format_breakdown(breakdown) | ||
| if #parts > 0 then | ||
| text = text .. ". Flagged categories: " .. concat(parts, ", ") | ||
| end | ||
| end | ||
| local usage = ctx.llm_raw_usage | ||
| or (proto.empty_usage and proto.empty_usage()) | ||
| or { prompt_tokens = 0, completion_tokens = 0, total_tokens = 0 } | ||
| return proto.build_deny_response({ | ||
| text = text, | ||
| model = ctx.var.request_llm_model, | ||
| usage = usage, | ||
| stream = ctx.var.request_type == "ai_stream", | ||
| }) | ||
| end | ||
|
|
||
|
|
||
| -- Normalize a protocol's canonical {role, content} messages into the shape | ||
| -- Lakera /v2/guard accepts: role preserved, content coerced to a plain string. | ||
| -- Some adapters (e.g. openai-chat) return body.messages verbatim, so a message's | ||
| -- content can be a multimodal array or nil (tool-call turns); flatten the text | ||
| -- parts and drop messages that carry no text. | ||
| local function normalize_messages(messages) | ||
| local out = {} | ||
| for _, message in ipairs(messages or {}) do | ||
| if type(message) == "table" and type(message.role) == "string" then | ||
| local content = message.content | ||
| local text | ||
| if type(content) == "string" then | ||
| text = content | ||
| elseif type(content) == "table" then | ||
| local parts = {} | ||
| for _, part in ipairs(content) do | ||
| if type(part) == "table" and part.type == "text" | ||
| and type(part.text) == "string" then | ||
| core.table.insert(parts, part.text) | ||
| end | ||
| end | ||
| text = concat(parts, " ") | ||
| end | ||
| if text and text ~= "" then | ||
| core.table.insert(out, { role = message.role, content = text }) | ||
| end | ||
| end | ||
| end | ||
| return out | ||
| end | ||
|
|
||
|
|
||
| local function request_content_moderation(ctx, conf, messages) | ||
| if not messages or #messages == 0 then | ||
| return | ||
| end | ||
|
|
||
| local result, err = client.scan(conf, messages) | ||
| if err then | ||
| if conf.fail_open then | ||
| core.log.warn("ai-lakera-guard: ", err, "; fail_open=true, allowing request") | ||
| return | ||
| end | ||
| core.log.error("ai-lakera-guard: ", err, "; fail_open=false, blocking request") | ||
| return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message) | ||
| end | ||
|
|
||
| if not result.flagged then | ||
| return | ||
| end | ||
|
|
||
| -- Log Lakera's full per-detector verdict (every entry, detected or not) so | ||
| -- both alert mode and blocked requests are auditable. | ||
| core.log.warn("ai-lakera-guard: request flagged by Lakera Guard", | ||
| ", breakdown: ", core.json.encode(result.breakdown), | ||
| ", request_uuid: ", result.request_uuid or "") | ||
|
|
||
| if conf.action == "alert" then | ||
| return | ||
| end | ||
|
|
||
| return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message, result.breakdown) | ||
| end | ||
|
|
||
|
|
||
| function _M.access(conf, ctx) | ||
| if not ctx.picked_ai_instance then | ||
| local handled, code, body = binding.on_unsupported( | ||
| conf.fail_mode, _M.name, ctx, | ||
| "no ai instance picked (request did not pass through ai-proxy/ai-proxy-multi)", | ||
| 500, "no ai instance picked, ai-lakera-guard plugin must be used with " | ||
| .. "ai-proxy or ai-proxy-multi plugin") | ||
| if handled then | ||
| return code, body | ||
| end | ||
| return | ||
| end | ||
|
|
||
| local request_tab, err = core.request.get_json_request_body_table() | ||
| if not request_tab then | ||
| local handled, code, body = binding.on_unsupported( | ||
| conf.fail_mode, _M.name, ctx, | ||
| "failed to read request body: " .. (err or "unknown error"), | ||
| 500, "failed to read request body: " .. (err or "unknown error")) | ||
| if handled then | ||
| return code, body | ||
| end | ||
| return | ||
| end | ||
|
|
||
| local proto = protocols.get(ctx.ai_client_protocol) | ||
| if not proto or not proto.get_messages then | ||
| local handled, code, body = binding.on_unsupported( | ||
| conf.fail_mode, _M.name, ctx, | ||
| "unsupported protocol: " .. (ctx.ai_client_protocol or "unknown"), | ||
| 500, "unsupported protocol: " .. (ctx.ai_client_protocol or "unknown")) | ||
| if handled then | ||
| return code, body | ||
| end | ||
| return | ||
| end | ||
|
|
||
| local messages = normalize_messages(proto.get_messages(request_tab)) | ||
| if #messages == 0 and proto.extract_request_content then | ||
| -- The protocol has no role-preserving representation for this body; | ||
| -- fall back to a single user message built from the flat extraction. | ||
| local text = concat(proto.extract_request_content(request_tab), " ") | ||
| if text ~= "" then | ||
| messages = { { role = "user", content = text } } | ||
| end | ||
| end | ||
|
|
||
| local code, message = request_content_moderation(ctx, conf, messages) | ||
| if code then | ||
| if ctx.var.request_type == "ai_stream" then | ||
| core.response.set_header("Content-Type", "text/event-stream") | ||
| else | ||
| core.response.set_header("Content-Type", "application/json") | ||
| end | ||
| return code, message | ||
| end | ||
| end | ||
|
|
||
|
|
||
| return _M | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,99 @@ | ||
| -- | ||
| -- Licensed to the Apache Software Foundation (ASF) under one or more | ||
| -- contributor license agreements. See the NOTICE file distributed with | ||
| -- this work for additional information regarding copyright ownership. | ||
| -- The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| -- (the "License"); you may not use this file except in compliance with | ||
| -- the License. You may obtain a copy of the License at | ||
| -- | ||
| -- http://www.apache.org/licenses/LICENSE-2.0 | ||
| -- | ||
| -- Unless required by applicable law or agreed to in writing, software | ||
| -- distributed under the License is distributed on an "AS IS" BASIS, | ||
| -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| -- See the License for the specific language governing permissions and | ||
| -- limitations under the License. | ||
| -- | ||
| local core = require("apisix.core") | ||
| local http = require("resty.http") | ||
|
|
||
| local type = type | ||
|
|
||
| local _M = {} | ||
|
|
||
|
|
||
| -- Call Lakera Guard /v2/guard with the given messages. | ||
| -- | ||
| -- `messages` is the role-tagged conversation in Lakera's {role, content} shape; | ||
| -- it is forwarded verbatim so the system / user / assistant turns Lakera's | ||
| -- message-based policy acts on are preserved, rather than being flattened into a | ||
| -- single user message. | ||
| -- | ||
| -- On success returns a result table; on the Lakera-unreachable path (timeout, | ||
| -- connection error, non-2xx, decode failure) returns nil + an error string. | ||
| -- | ||
| -- result fields: | ||
| -- flagged (boolean) — Lakera's primary enforcement signal | ||
| -- breakdown (array|nil) — Lakera's per-detector results, passed through | ||
| -- verbatim and unfiltered (both detected and | ||
| -- non-detected entries) so the full verdict can be | ||
| -- logged exactly as Lakera returned it; selecting | ||
| -- which detectors to surface is left to the caller | ||
| -- request_uuid (string|nil) — Lakera trace id, when present | ||
| function _M.scan(conf, messages) | ||
| local body = { | ||
| messages = messages, | ||
| -- Always request the per-detector breakdown so flagged verdicts can be | ||
| -- logged in full (with confidence results); the client-facing reveal is | ||
| -- gated separately by reveal_failure_categories. | ||
| breakdown = true, | ||
| } | ||
| if conf.project_id then | ||
| body.project_id = conf.project_id | ||
| end | ||
| -- A future PII-redaction phase should set `body.payload = true` to have Lakera | ||
| -- return the matched PII / profanity / regex spans. We don't request it here: | ||
| -- this phase doesn't consume those spans, and they can contain sensitive text | ||
| -- we shouldn't pull into the gateway unnecessarily. | ||
|
|
||
| local headers = { | ||
| ["Content-Type"] = "application/json", | ||
| } | ||
| if conf.api_key and conf.api_key ~= "" then | ||
| headers["Authorization"] = "Bearer " .. conf.api_key | ||
| end | ||
|
|
||
| local httpc = http.new() | ||
| httpc:set_timeout(conf.timeout) | ||
|
|
||
| local res, err = httpc:request_uri(conf.lakera_endpoint, { | ||
| method = "POST", | ||
| body = core.json.encode(body), | ||
| headers = headers, | ||
| ssl_verify = conf.ssl_verify, | ||
| }) | ||
| if not res then | ||
| return nil, "failed to request Lakera Guard: " .. (err or "unknown error") | ||
| end | ||
| if res.status ~= 200 then | ||
| return nil, "Lakera Guard returned status " .. res.status | ||
| end | ||
|
|
||
| local data, decode_err = core.json.decode(res.body, { null_as_nil = true }) | ||
| if not data then | ||
| return nil, "failed to decode Lakera Guard response: " | ||
| .. (decode_err or "unknown error") | ||
| end | ||
| if type(data) ~= "table" then | ||
| return nil, "unexpected Lakera Guard response: expected a JSON object" | ||
| end | ||
|
|
||
| return { | ||
| flagged = data.flagged == true, | ||
| breakdown = type(data.breakdown) == "table" and data.breakdown or nil, | ||
| request_uuid = type(data.metadata) == "table" and data.metadata.request_uuid or nil, | ||
| } | ||
| end | ||
|
|
||
|
|
||
| return _M |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.