Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,9 @@ install: runtime
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search
$(ENV_INSTALL) apisix/plugins/ai-rag/vector-search/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search

$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-lakera-guard
$(ENV_INSTALL) apisix/plugins/ai-lakera-guard/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-lakera-guard

$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/broker
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/transport
$(ENV_INSTALL) apisix/plugins/mcp/*.lua $(ENV_INST_LUADIR)/apisix/plugins/mcp
Expand Down
1 change: 1 addition & 0 deletions apisix/cli/config.lua
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ local _M = {
"ai-proxy",
"ai-aws-content-moderation",
"ai-aliyun-content-moderation",
"ai-lakera-guard",
"proxy-mirror",
"graphql-proxy-cache",
"proxy-rewrite",
Expand Down
209 changes: 209 additions & 0 deletions apisix/plugins/ai-lakera-guard.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
local core = require("apisix.core")
local schema_mod = require("apisix.plugins.ai-lakera-guard.schema")
local client = require("apisix.plugins.ai-lakera-guard.client")
local protocols = require("apisix.plugins.ai-protocols")
local binding = require("apisix.plugins.ai-protocols.binding")

local ipairs = ipairs
local type = type
local concat = table.concat


local _M = {
version = 0.1,
priority = 1028,
name = "ai-lakera-guard",
schema = schema_mod.schema,
}


function _M.check_schema(conf)
return schema_mod.check_schema(conf)
end


-- Format only the detectors that actually fired (detected = true) for the
-- client-facing reveal; the raw breakdown may also carry non-detected entries,
-- which belong in the log but not in the deny message.
local function format_breakdown(breakdown)
local parts = {}
for _, entry in ipairs(breakdown or {}) do
if type(entry) == "table" and entry.detected and entry.detector_type then
local part = entry.detector_type
if entry.result and entry.result ~= "" then
part = part .. " (" .. entry.result .. ")"
end
core.table.insert(parts, part)
end
end
return parts
end


local function deny_message(ctx, conf, message, breakdown)
local proto = protocols.get(ctx.ai_client_protocol)
if not proto then
core.log.error("ai-lakera-guard: unsupported protocol: ",
ctx.ai_client_protocol or "unknown")
return message
end
local text = message
if conf.reveal_failure_categories then
local parts = format_breakdown(breakdown)
if #parts > 0 then
text = text .. ". Flagged categories: " .. concat(parts, ", ")
end
end
local usage = ctx.llm_raw_usage
or (proto.empty_usage and proto.empty_usage())
or { prompt_tokens = 0, completion_tokens = 0, total_tokens = 0 }
return proto.build_deny_response({
text = text,
model = ctx.var.request_llm_model,
usage = usage,
stream = ctx.var.request_type == "ai_stream",
})
end


-- Normalize a protocol's canonical {role, content} messages into the shape
-- Lakera /v2/guard accepts: role preserved, content coerced to a plain string.
-- Some adapters (e.g. openai-chat) return body.messages verbatim, so a message's
-- content can be a multimodal array or nil (tool-call turns); flatten the text
-- parts and drop messages that carry no text.
local function normalize_messages(messages)
local out = {}
for _, message in ipairs(messages or {}) do
if type(message) == "table" and type(message.role) == "string" then
local content = message.content
local text
if type(content) == "string" then
text = content
elseif type(content) == "table" then
local parts = {}
for _, part in ipairs(content) do
if type(part) == "table" and part.type == "text"
and type(part.text) == "string" then
core.table.insert(parts, part.text)
end
end
text = concat(parts, " ")
end
if text and text ~= "" then
core.table.insert(out, { role = message.role, content = text })
end
end
end
return out
end


local function request_content_moderation(ctx, conf, messages)
if not messages or #messages == 0 then
return
end

local result, err = client.scan(conf, messages)
if err then
if conf.fail_open then
core.log.warn("ai-lakera-guard: ", err, "; fail_open=true, allowing request")
return
end
core.log.error("ai-lakera-guard: ", err, "; fail_open=false, blocking request")
return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message)
end

if not result.flagged then
return
end

-- Log Lakera's full per-detector verdict (every entry, detected or not) so
-- both alert mode and blocked requests are auditable.
core.log.warn("ai-lakera-guard: request flagged by Lakera Guard",
", breakdown: ", core.json.encode(result.breakdown),
", request_uuid: ", result.request_uuid or "")

if conf.action == "alert" then
return
end

return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message, result.breakdown)
end


function _M.access(conf, ctx)
if not ctx.picked_ai_instance then
local handled, code, body = binding.on_unsupported(
conf.fail_mode, _M.name, ctx,
"no ai instance picked (request did not pass through ai-proxy/ai-proxy-multi)",
500, "no ai instance picked, ai-lakera-guard plugin must be used with "
.. "ai-proxy or ai-proxy-multi plugin")
if handled then
return code, body
end
return
end
Comment thread
janiussyafiq marked this conversation as resolved.

local request_tab, err = core.request.get_json_request_body_table()
if not request_tab then
local handled, code, body = binding.on_unsupported(
conf.fail_mode, _M.name, ctx,
"failed to read request body: " .. (err or "unknown error"),
500, "failed to read request body: " .. (err or "unknown error"))
if handled then
return code, body
end
return
end

local proto = protocols.get(ctx.ai_client_protocol)
if not proto or not proto.get_messages then
local handled, code, body = binding.on_unsupported(
conf.fail_mode, _M.name, ctx,
"unsupported protocol: " .. (ctx.ai_client_protocol or "unknown"),
500, "unsupported protocol: " .. (ctx.ai_client_protocol or "unknown"))
if handled then
return code, body
end
return
end

local messages = normalize_messages(proto.get_messages(request_tab))
if #messages == 0 and proto.extract_request_content then
-- The protocol has no role-preserving representation for this body;
-- fall back to a single user message built from the flat extraction.
local text = concat(proto.extract_request_content(request_tab), " ")
if text ~= "" then
messages = { { role = "user", content = text } }
end
end

local code, message = request_content_moderation(ctx, conf, messages)
if code then
if ctx.var.request_type == "ai_stream" then
core.response.set_header("Content-Type", "text/event-stream")
else
core.response.set_header("Content-Type", "application/json")
end
return code, message
end
end


return _M
99 changes: 99 additions & 0 deletions apisix/plugins/ai-lakera-guard/client.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
local core = require("apisix.core")
local http = require("resty.http")

local type = type

local _M = {}


-- Call Lakera Guard /v2/guard with the given messages.
--
-- `messages` is the role-tagged conversation in Lakera's {role, content} shape;
-- it is forwarded verbatim so the system / user / assistant turns Lakera's
-- message-based policy acts on are preserved, rather than being flattened into a
-- single user message.
--
-- On success returns a result table; on the Lakera-unreachable path (timeout,
-- connection error, non-2xx, decode failure) returns nil + an error string.
--
-- result fields:
-- flagged (boolean) — Lakera's primary enforcement signal
-- breakdown (array|nil) — Lakera's per-detector results, passed through
-- verbatim and unfiltered (both detected and
-- non-detected entries) so the full verdict can be
-- logged exactly as Lakera returned it; selecting
-- which detectors to surface is left to the caller
-- request_uuid (string|nil) — Lakera trace id, when present
function _M.scan(conf, messages)
local body = {
messages = messages,
-- Always request the per-detector breakdown so flagged verdicts can be
-- logged in full (with confidence results); the client-facing reveal is
-- gated separately by reveal_failure_categories.
breakdown = true,
}
if conf.project_id then
body.project_id = conf.project_id
end
-- A future PII-redaction phase should set `body.payload = true` to have Lakera
-- return the matched PII / profanity / regex spans. We don't request it here:
-- this phase doesn't consume those spans, and they can contain sensitive text
-- we shouldn't pull into the gateway unnecessarily.

local headers = {
["Content-Type"] = "application/json",
}
if conf.api_key and conf.api_key ~= "" then
headers["Authorization"] = "Bearer " .. conf.api_key
end

local httpc = http.new()
httpc:set_timeout(conf.timeout)

local res, err = httpc:request_uri(conf.lakera_endpoint, {
method = "POST",
body = core.json.encode(body),
headers = headers,
ssl_verify = conf.ssl_verify,
})
if not res then
return nil, "failed to request Lakera Guard: " .. (err or "unknown error")
end
if res.status ~= 200 then
return nil, "Lakera Guard returned status " .. res.status
end

local data, decode_err = core.json.decode(res.body, { null_as_nil = true })
if not data then
return nil, "failed to decode Lakera Guard response: "
.. (decode_err or "unknown error")
end
if type(data) ~= "table" then
return nil, "unexpected Lakera Guard response: expected a JSON object"
end

return {
flagged = data.flagged == true,
breakdown = type(data.breakdown) == "table" and data.breakdown or nil,
request_uuid = type(data.metadata) == "table" and data.metadata.request_uuid or nil,
}
end


return _M
Loading
Loading