apache · nic-6443 · Jun 24, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/Makefile b/Makefile
@@ -401,6 +401,9 @@ install: runtime
 	$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search
 	$(ENV_INSTALL) apisix/plugins/ai-rag/vector-search/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search
 
+	$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-lakera-guard
+	$(ENV_INSTALL) apisix/plugins/ai-lakera-guard/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-lakera-guard
+
 	$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/broker
 	$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/transport
 	$(ENV_INSTALL) apisix/plugins/mcp/*.lua $(ENV_INST_LUADIR)/apisix/plugins/mcp

diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua
@@ -246,6 +246,7 @@ local _M = {
     "ai-proxy",
     "ai-aws-content-moderation",
     "ai-aliyun-content-moderation",
+    "ai-lakera-guard",
     "proxy-mirror",
     "graphql-proxy-cache",
     "proxy-rewrite",

diff --git a/apisix/plugins/ai-lakera-guard.lua b/apisix/plugins/ai-lakera-guard.lua
@@ -0,0 +1,209 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local core       = require("apisix.core")
+local schema_mod = require("apisix.plugins.ai-lakera-guard.schema")
+local client     = require("apisix.plugins.ai-lakera-guard.client")
+local protocols  = require("apisix.plugins.ai-protocols")
+local binding    = require("apisix.plugins.ai-protocols.binding")
+
+local ipairs = ipairs
+local type   = type
+local concat = table.concat
+
+
+local _M = {
+    version  = 0.1,
+    priority = 1028,
+    name     = "ai-lakera-guard",
+    schema   = schema_mod.schema,
+}
+
+
+function _M.check_schema(conf)
+    return schema_mod.check_schema(conf)
+end
+
+
+-- Format only the detectors that actually fired (detected = true) for the
+-- client-facing reveal; the raw breakdown may also carry non-detected entries,
+-- which belong in the log but not in the deny message.
+local function format_breakdown(breakdown)
+    local parts = {}
+    for _, entry in ipairs(breakdown or {}) do
+        if type(entry) == "table" and entry.detected and entry.detector_type then
+            local part = entry.detector_type
+            if entry.result and entry.result ~= "" then
+                part = part .. " (" .. entry.result .. ")"
+            end
+            core.table.insert(parts, part)
+        end
+    end
+    return parts
+end
+
+
+local function deny_message(ctx, conf, message, breakdown)
+    local proto = protocols.get(ctx.ai_client_protocol)
+    if not proto then
+        core.log.error("ai-lakera-guard: unsupported protocol: ",
+                       ctx.ai_client_protocol or "unknown")
+        return message
+    end
+    local text = message
+    if conf.reveal_failure_categories then
+        local parts = format_breakdown(breakdown)
+        if #parts > 0 then
+            text = text .. ". Flagged categories: " .. concat(parts, ", ")
+        end
+    end
+    local usage = ctx.llm_raw_usage
+        or (proto.empty_usage and proto.empty_usage())
+        or { prompt_tokens = 0, completion_tokens = 0, total_tokens = 0 }
+    return proto.build_deny_response({
+        text = text,
+        model = ctx.var.request_llm_model,
+        usage = usage,
+        stream = ctx.var.request_type == "ai_stream",
+    })
+end
+
+
+-- Normalize a protocol's canonical {role, content} messages into the shape
+-- Lakera /v2/guard accepts: role preserved, content coerced to a plain string.
+-- Some adapters (e.g. openai-chat) return body.messages verbatim, so a message's
+-- content can be a multimodal array or nil (tool-call turns); flatten the text
+-- parts and drop messages that carry no text.
+local function normalize_messages(messages)
+    local out = {}
+    for _, message in ipairs(messages or {}) do
+        if type(message) == "table" and type(message.role) == "string" then
+            local content = message.content
+            local text
+            if type(content) == "string" then
+                text = content
+            elseif type(content) == "table" then
+                local parts = {}
+                for _, part in ipairs(content) do
+                    if type(part) == "table" and part.type == "text"
+                            and type(part.text) == "string" then
+                        core.table.insert(parts, part.text)
+                    end
+                end
+                text = concat(parts, " ")
+            end
+            if text and text ~= "" then
+                core.table.insert(out, { role = message.role, content = text })
+            end
+        end
+    end
+    return out
+end
+
+
+local function request_content_moderation(ctx, conf, messages)
+    if not messages or #messages == 0 then
+        return
+    end
+
+    local result, err = client.scan(conf, messages)
+    if err then
+        if conf.fail_open then
+            core.log.warn("ai-lakera-guard: ", err, "; fail_open=true, allowing request")
+            return
+        end
+        core.log.error("ai-lakera-guard: ", err, "; fail_open=false, blocking request")
+        return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message)
+    end
+
+    if not result.flagged then
+        return
+    end
+
+    -- Log Lakera's full per-detector verdict (every entry, detected or not) so
+    -- both alert mode and blocked requests are auditable.
+    core.log.warn("ai-lakera-guard: request flagged by Lakera Guard",
+                  ", breakdown: ", core.json.encode(result.breakdown),
+                  ", request_uuid: ", result.request_uuid or "")
+
+    if conf.action == "alert" then
+        return
+    end
+
+    return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message, result.breakdown)
+end
+
+
+function _M.access(conf, ctx)
+    if not ctx.picked_ai_instance then
+        local handled, code, body = binding.on_unsupported(
+            conf.fail_mode, _M.name, ctx,
+            "no ai instance picked (request did not pass through ai-proxy/ai-proxy-multi)",
+            500, "no ai instance picked, ai-lakera-guard plugin must be used with "
+                 .. "ai-proxy or ai-proxy-multi plugin")
+        if handled then
+            return code, body
+        end
+        return
+    end
+
+    local request_tab, err = core.request.get_json_request_body_table()
+    if not request_tab then
+        local handled, code, body = binding.on_unsupported(
+            conf.fail_mode, _M.name, ctx,
+            "failed to read request body: " .. (err or "unknown error"),
+            500, "failed to read request body: " .. (err or "unknown error"))
+        if handled then
+            return code, body
+        end
+        return
+    end
+
+    local proto = protocols.get(ctx.ai_client_protocol)
+    if not proto or not proto.get_messages then
+        local handled, code, body = binding.on_unsupported(
+            conf.fail_mode, _M.name, ctx,
+            "unsupported protocol: " .. (ctx.ai_client_protocol or "unknown"),
+            500, "unsupported protocol: " .. (ctx.ai_client_protocol or "unknown"))
+        if handled then
+            return code, body
+        end
+        return
+    end
+
+    local messages = normalize_messages(proto.get_messages(request_tab))
+    if #messages == 0 and proto.extract_request_content then
+        -- The protocol has no role-preserving representation for this body;
+        -- fall back to a single user message built from the flat extraction.
+        local text = concat(proto.extract_request_content(request_tab), " ")
+        if text ~= "" then
+            messages = { { role = "user", content = text } }
+        end
+    end
+
+    local code, message = request_content_moderation(ctx, conf, messages)
+    if code then
+        if ctx.var.request_type == "ai_stream" then
+            core.response.set_header("Content-Type", "text/event-stream")
+        else
+            core.response.set_header("Content-Type", "application/json")
+        end
+        return code, message
+    end
+end
+
+
+return _M
diff --git a/apisix/plugins/ai-lakera-guard/client.lua b/apisix/plugins/ai-lakera-guard/client.lua
@@ -0,0 +1,99 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local core = require("apisix.core")
+local http = require("resty.http")
+
+local type = type
+
+local _M = {}
+
+
+-- Call Lakera Guard /v2/guard with the given messages.
+--
+-- `messages` is the role-tagged conversation in Lakera's {role, content} shape;
+-- it is forwarded verbatim so the system / user / assistant turns Lakera's
+-- message-based policy acts on are preserved, rather than being flattened into a
+-- single user message.
+--
+-- On success returns a result table; on the Lakera-unreachable path (timeout,
+-- connection error, non-2xx, decode failure) returns nil + an error string.
+--
+-- result fields:
+--   flagged      (boolean)     — Lakera's primary enforcement signal
+--   breakdown    (array|nil)   — Lakera's per-detector results, passed through
+--                                verbatim and unfiltered (both detected and
+--                                non-detected entries) so the full verdict can be
+--                                logged exactly as Lakera returned it; selecting
+--                                which detectors to surface is left to the caller
+--   request_uuid (string|nil)  — Lakera trace id, when present
+function _M.scan(conf, messages)
+    local body = {
+        messages = messages,
+        -- Always request the per-detector breakdown so flagged verdicts can be
+        -- logged in full (with confidence results); the client-facing reveal is
+        -- gated separately by reveal_failure_categories.
+        breakdown = true,
+    }
+    if conf.project_id then
+        body.project_id = conf.project_id
+    end
+    -- A future PII-redaction phase should set `body.payload = true` to have Lakera
+    -- return the matched PII / profanity / regex spans. We don't request it here:
+    -- this phase doesn't consume those spans, and they can contain sensitive text
+    -- we shouldn't pull into the gateway unnecessarily.
+
+    local headers = {
+        ["Content-Type"] = "application/json",
+    }
+    if conf.api_key and conf.api_key ~= "" then
+        headers["Authorization"] = "Bearer " .. conf.api_key
+    end
+
+    local httpc = http.new()
+    httpc:set_timeout(conf.timeout)
+
+    local res, err = httpc:request_uri(conf.lakera_endpoint, {
+        method = "POST",
+        body = core.json.encode(body),
+        headers = headers,
+        ssl_verify = conf.ssl_verify,
+    })
+    if not res then
+        return nil, "failed to request Lakera Guard: " .. (err or "unknown error")
+    end
+    if res.status ~= 200 then
+        return nil, "Lakera Guard returned status " .. res.status
+    end
+
+    local data, decode_err = core.json.decode(res.body, { null_as_nil = true })
+    if not data then
+        return nil, "failed to decode Lakera Guard response: "
+                        .. (decode_err or "unknown error")
+    end
+    if type(data) ~= "table" then
+        return nil, "unexpected Lakera Guard response: expected a JSON object"
+    end
+
+    return {
+        flagged = data.flagged == true,
+        breakdown = type(data.breakdown) == "table" and data.breakdown or nil,
+        request_uuid = type(data.metadata) == "table" and data.metadata.request_uuid or nil,
+    }
+end
+
+
+return _M