Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,9 @@ install: runtime
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search
$(ENV_INSTALL) apisix/plugins/ai-rag/vector-search/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search

$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-cache
$(ENV_INSTALL) apisix/plugins/ai-cache/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-cache

$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/broker
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/transport
$(ENV_INSTALL) apisix/plugins/mcp/*.lua $(ENV_INST_LUADIR)/apisix/plugins/mcp
Expand Down
1 change: 1 addition & 0 deletions apisix/cli/config.lua
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ local _M = {
"ai-rate-limiting",
"ai-proxy-multi",
"ai-proxy",
"ai-cache",
"ai-aws-content-moderation",
"ai-aliyun-content-moderation",
"proxy-mirror",
Expand Down
199 changes: 199 additions & 0 deletions apisix/plugins/ai-cache.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--

local core = require("apisix.core")
local schema = require("apisix.plugins.ai-cache.schema")
local key_mod = require("apisix.plugins.ai-cache.key")
local redis_util = require("apisix.utils.redis")

local ngx = ngx
local ngx_null = ngx.null
local ipairs = ipairs
local str_sub = string.sub

local CACHE_STATUS_HEADER = "X-AI-Cache-Status"
local CACHE_AGE_HEADER = "X-AI-Cache-Age"
local DEFAULT_TTL = 3600

local _M = {
version = 0.1,
priority = 1035,
name = "ai-cache",
schema = schema,
}


function _M.check_schema(conf)
return core.schema.check(schema, conf)
end


local function release(conf, red)
local ok, err = red:set_keepalive(conf.redis_keepalive_timeout or 10000,
conf.redis_keepalive_pool or 100)
if not ok then
core.log.warn("ai-cache: failed to set redis keepalive: ", err)
end
end


local function serve_hit(conf, ctx, cached)
ctx.ai_cache_status = "HIT"
if conf.cache_headers ~= false then
core.response.set_header(CACHE_STATUS_HEADER, "HIT")
local age = ngx.time() - (cached.created_at or ngx.time())
core.response.set_header(CACHE_AGE_HEADER, age < 0 and 0 or age)
end
core.response.set_header("Content-Type", "application/json")
return core.response.exit(200, cached.body)
end
Comment thread
janiussyafiq marked this conversation as resolved.


function _M.access(conf, ctx)
-- Streaming responses are not cached in PR-1 (SSE replay is a later
-- increment). ai-proxy (higher priority) has already classified the
-- request, so bypass before doing any work.
if ctx.var.request_type == "ai_stream" then
ctx.ai_cache_status = "BYPASS"
return
end

-- explicit opt-out: any cache_bypass reference resolving to a value
-- that is non-empty and not "0" skips the cache (proxy-cache
-- `cache_bypass` / nginx `proxy_cache_bypass` semantics). A leading
-- "$" marks a variable to resolve; anything else is a literal.
if conf.cache_bypass then
for _, ref in ipairs(conf.cache_bypass) do
local val = ref
if str_sub(ref, 1, 1) == "$" then
val = ctx.var[str_sub(ref, 2)]
end
if val ~= nil and val ~= "" and val ~= "0" then
ctx.ai_cache_status = "BYPASS"
return
end
end
end

local body, err = core.request.get_json_request_body_table()
if not body then
core.log.warn("ai-cache: cannot read request body, bypassing: ", err)
ctx.ai_cache_status = "BYPASS"
return
end

ctx.ai_cache_key = "ai-cache:l1:" .. key_mod.scope(conf, ctx)
.. ":" .. key_mod.fingerprint(ctx, body)

local red
red, err = redis_util.new(conf)
if not red then
-- fail-open: never let a cache-backend outage break the request.
core.log.warn("ai-cache: redis unavailable, fail-open as MISS: ", err)
ctx.ai_cache_status = "MISS"
return
end

local res
res, err = red:get(ctx.ai_cache_key)
release(conf, red)
Comment thread
janiussyafiq marked this conversation as resolved.
Outdated
if err then
core.log.warn("ai-cache: redis get failed, fail-open as MISS: ", err)
ctx.ai_cache_status = "MISS"
return
end

if res ~= nil and res ~= ngx_null then
local cached = core.json.decode(res)
if cached and cached.body then
return serve_hit(conf, ctx, cached)
end
core.log.warn("ai-cache: discarding malformed cache entry for ", ctx.ai_cache_key)
end

ctx.ai_cache_status = "MISS"
end


function _M.header_filter(conf, ctx)
if ctx.ai_cache_status and conf.cache_headers ~= false then
core.response.set_header(CACHE_STATUS_HEADER, ctx.ai_cache_status)
end
end


function _M.body_filter(conf, ctx)
-- only a MISS gets written back; HIT exited in access, BYPASS opts out.
if ctx.ai_cache_status ~= "MISS" then
return
end
local chunk = ngx.arg[1]
if chunk and #chunk > 0 then
ctx.ai_cache_buf = (ctx.ai_cache_buf or "") .. chunk
end
end
Comment thread
janiussyafiq marked this conversation as resolved.
Comment thread
janiussyafiq marked this conversation as resolved.


-- The response-capturing phases (body_filter / log) run in contexts where
-- cosockets are disabled, so the Redis write is deferred to a 0-delay timer
-- (timers run in a light thread where cosockets are allowed).
local function write_to_cache(premature, conf, cache_key, response_body)
if premature then
return
end
local red, err = redis_util.new(conf)
if not red then
core.log.warn("ai-cache: redis unavailable on write: ", err)
return
end
local envelope = core.json.encode({ body = response_body, created_at = ngx.time() })
local ttl = (conf.exact and conf.exact.ttl) or DEFAULT_TTL
local ok
ok, err = red:set(cache_key, envelope, "EX", ttl)
if not ok then
core.log.warn("ai-cache: redis set failed: ", err)
return
end
release(conf, red)
Comment thread
janiussyafiq marked this conversation as resolved.
Comment thread
janiussyafiq marked this conversation as resolved.
end


function _M.log(conf, ctx)
if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_key then
return
end
-- write-on-success only: never cache an error response.
if ngx.status < 200 or ngx.status >= 300 then
return
end
Comment thread
janiussyafiq marked this conversation as resolved.
local response_body = ctx.ai_cache_buf
if not response_body or response_body == "" then
return
end
-- don't cache responses larger than the configured cap.
if #response_body > (conf.max_cache_body_size or 1048576) then
return
end

local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body)
if not ok then
core.log.warn("ai-cache: failed to schedule cache write: ", err)
end
end


return _M
72 changes: 72 additions & 0 deletions apisix/plugins/ai-cache/key.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--

local core = require("apisix.core")
local protocols = require("apisix.plugins.ai-protocols")
local sha256 = require("resty.sha256")
local to_hex = require("resty.string").to_hex

local ipairs = ipairs
local concat = table.concat

local _M = {}


local function hex_digest(s)
local hash = sha256:new()
hash:update(s)
return to_hex(hash:final())
end


function _M.fingerprint(ctx, body)
local params = core.table.deepcopy(body)
params.messages = nil
params.model = nil
params.stream = nil

local repr = core.json.stably_encode({
Comment thread
janiussyafiq marked this conversation as resolved.
Outdated
protocol = ctx.ai_client_protocol or "",
model = ctx.var.request_llm_model or body.model or "",
messages = protocols.get_messages(body, ctx) or {},
params = params,
})
return hex_digest(repr)
end
Comment thread
janiussyafiq marked this conversation as resolved.


function _M.scope(conf, ctx)
local ck = conf.cache_key
local inc_vars = ck and ck.include_vars
if not (ck and ck.include_consumer) and (not inc_vars or #inc_vars == 0) then
return "shared"
Comment thread
janiussyafiq marked this conversation as resolved.
Outdated
end

local parts = {}
if ck.include_consumer then
parts[#parts + 1] = "consumer=" .. (ctx.consumer_name or "")
end
if inc_vars then
for _, name in ipairs(inc_vars) do
parts[#parts + 1] = name .. "=" .. (ctx.var[name] or "")
end
end
return concat(parts, ":")
end


return _M
91 changes: 91 additions & 0 deletions apisix/plugins/ai-cache/schema.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--

local core = require("apisix.core")
local redis_schema = require("apisix.utils.redis-schema")

local policy_to_additional_properties = core.table.deepcopy(redis_schema.schema)

local _M = {
type = "object",
properties = {
layers = {
Comment thread
janiussyafiq marked this conversation as resolved.
Outdated
type = "array",
items = {
enum = { "exact" },
},
minItems = 1,
uniqueItems = true,
default = { "exact" },
},

exact = {
type = "object",
properties = {
ttl = { type = "integer", minimum = 1, default = 3600 },
},
default = {},
},

cache_key = {
type = "object",
properties = {
include_consumer = { type = "boolean", default = false },
include_vars = {
type = "array",
items = { type = "string" },
default = {},
},
},
default = {},
},

max_cache_body_size = {
type = "integer", minimum = 0, default = 1048576,
},

cache_headers = {
type = "boolean", default = true,
},

cache_bypass = {
type = "array",
minItems = 1,
items = {
type = "string",
pattern = [[(^[^\$].+$|^\$[0-9a-zA-Z_]+$)]],
},
},

policy = {
type = "string",
enum = { "redis" },
default = "redis",
},
},
["if"] = {
properties = {
policy = {
enum = { "redis" },
},
},
},
["then"] = policy_to_additional_properties.redis,
encrypt_fields = { "redis_password" },
}

return _M
1 change: 1 addition & 0 deletions conf/config.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ plugins: # plugin list (sorted by priority)
- ai-aws-content-moderation # priority: 1050
- ai-proxy-multi # priority: 1041
- ai-proxy # priority: 1040
- ai-cache # priority: 1035
- ai-rate-limiting # priority: 1030
- ai-aliyun-content-moderation # priority: 1029
- proxy-mirror # priority: 1010
Expand Down
1 change: 1 addition & 0 deletions t/admin/plugins.t
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ ai-rag
ai-aws-content-moderation
ai-proxy-multi
ai-proxy
ai-cache
ai-rate-limiting
ai-aliyun-content-moderation
proxy-mirror
Expand Down
Loading
Loading