Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
8cd41f8
feat: add ai-cache plugin to installation and configuration
janiussyafiq Jun 19, 2026
1ea1aaa
feat: implement ai-cache plugin with Redis support and testing
janiussyafiq Jun 19, 2026
5c04222
Merge remote-tracking branch 'upstream/master' into feat/ai-cache-exact
janiussyafiq Jun 22, 2026
2d7eb3b
fix(ai-cache): canonical-encode fingerprint and switch bypass to bypa…
janiussyafiq Jun 23, 2026
d91e68a
feat(ai-cache): enhance body filter to handle oversized responses and…
janiussyafiq Jun 23, 2026
652a89f
feat(ai-cache): optimize body caching logic and enforce header valida…
janiussyafiq Jun 23, 2026
4ac2398
docs(ai-cache): add English and Chinese plugin documentation
janiussyafiq Jun 23, 2026
84c5ccf
feat(ai-cache): implement canonical JSON encoding and enhance cache k…
janiussyafiq Jun 23, 2026
9024b70
feat(ai-cache): update tests for exact.ttl validation and add cross-r…
janiussyafiq Jun 23, 2026
6f15de7
fix(json): remove redundant require statement in json.lua
janiussyafiq Jun 24, 2026
4775bfc
feat(ai-cache): enhance error handling for unsupported requests and i…
janiussyafiq Jun 24, 2026
edf5b51
Merge remote-tracking branch 'upstream/master' into feat/ai-cache-exact
janiussyafiq Jun 24, 2026
d6c1570
feat(ai-cache): add model to cache key generation and update test cases
janiussyafiq Jun 25, 2026
db70638
feat(ai-cache): enhance fingerprinting to include client request and …
janiussyafiq Jun 26, 2026
95a9243
feat(ai-cache): refine cache key generation and enhance documentation…
janiussyafiq Jun 26, 2026
feabb7f
style(ai-cache): localize tostring global in key.lua
janiussyafiq Jun 26, 2026
b4a6e9b
style(ai-cache): drop trailing whitespace in ai-cache.t
janiussyafiq Jun 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,9 @@ install: runtime
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search
$(ENV_INSTALL) apisix/plugins/ai-rag/vector-search/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search

$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-cache
$(ENV_INSTALL) apisix/plugins/ai-cache/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-cache

$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/broker
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/transport
$(ENV_INSTALL) apisix/plugins/mcp/*.lua $(ENV_INST_LUADIR)/apisix/plugins/mcp
Expand Down
1 change: 1 addition & 0 deletions apisix/cli/config.lua
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ local _M = {
"ai-rate-limiting",
"ai-proxy-multi",
"ai-proxy",
"ai-cache",
"ai-aws-content-moderation",
"ai-aliyun-content-moderation",
"proxy-mirror",
Expand Down
199 changes: 199 additions & 0 deletions apisix/plugins/ai-cache.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--

local core = require("apisix.core")
local schema = require("apisix.plugins.ai-cache.schema")
local key_mod = require("apisix.plugins.ai-cache.key")
local redis_util = require("apisix.utils.redis")

local ngx = ngx
local ngx_null = ngx.null
local ipairs = ipairs
local concat = table.concat

local CACHE_STATUS_HEADER = "X-AI-Cache-Status"
local CACHE_AGE_HEADER = "X-AI-Cache-Age"
local DEFAULT_TTL = 3600
local DEFAULT_MAX_BODY = 1048576

local _M = {
version = 0.1,
priority = 1035,
name = "ai-cache",
schema = schema,
}


function _M.check_schema(conf)
return core.schema.check(schema, conf)
end


local function release(conf, red)
local ok, err = red:set_keepalive(conf.redis_keepalive_timeout or 10000,
conf.redis_keepalive_pool or 100)
if not ok then
core.log.warn("ai-cache: failed to set redis keepalive: ", err)
end
end


local function serve_hit(conf, ctx, cached)
ctx.ai_cache_status = "HIT"
if conf.cache_headers ~= false then
core.response.set_header(CACHE_STATUS_HEADER, "HIT")
local age = ngx.time() - (cached.created_at or ngx.time())
core.response.set_header(CACHE_AGE_HEADER, age < 0 and 0 or age)
end
core.response.set_header("Content-Type", "application/json")
return core.response.exit(200, cached.body)
end
Comment thread
janiussyafiq marked this conversation as resolved.


function _M.access(conf, ctx)
-- Streaming responses are not cached in PR-1 (SSE replay is a later
-- increment). ai-proxy (higher priority) has already classified the
-- request, so bypass before doing any work.
if ctx.var.request_type == "ai_stream" then
ctx.ai_cache_status = "BYPASS"
return
end

if conf.bypass_on then
for _, rule in ipairs(conf.bypass_on) do
if core.request.header(ctx, rule.header) == rule.equals then
ctx.ai_cache_status = "BYPASS"
return
end
end
end

local body, err = core.request.get_json_request_body_table()
if not body then
core.log.warn("ai-cache: cannot read request body, bypassing: ", err)
ctx.ai_cache_status = "BYPASS"
return
end

ctx.ai_cache_key = "ai-cache:l1:" .. key_mod.scope(conf, ctx)
.. ":" .. key_mod.fingerprint(ctx, body)

local red
red, err = redis_util.new(conf)
if not red then
-- fail-open: never let a cache-backend outage break the request.
core.log.warn("ai-cache: redis unavailable, fail-open as MISS: ", err)
ctx.ai_cache_status = "MISS"
return
end

local res
res, err = red:get(ctx.ai_cache_key)
release(conf, red)
Comment thread
janiussyafiq marked this conversation as resolved.
Outdated
if err then
core.log.warn("ai-cache: redis get failed, fail-open as MISS: ", err)
ctx.ai_cache_status = "MISS"
return
end

if res ~= nil and res ~= ngx_null then
local cached = core.json.decode(res)
if cached and cached.body then
return serve_hit(conf, ctx, cached)
end
core.log.warn("ai-cache: discarding malformed cache entry for ", ctx.ai_cache_key)
end

ctx.ai_cache_status = "MISS"
end


function _M.header_filter(conf, ctx)
if ctx.ai_cache_status and conf.cache_headers ~= false then
core.response.set_header(CACHE_STATUS_HEADER, ctx.ai_cache_status)
end
end


function _M.body_filter(conf, ctx)
-- only a MISS gets written back; HIT exited in access, BYPASS opts out.
if ctx.ai_cache_status ~= "MISS" or ctx.ai_cache_oversized then
return
end
local chunk = ngx.arg[1]
if chunk and #chunk > 0 then
local buf = ctx.ai_cache_buf
if not buf then
buf = { n = 0, bytes = 0 }
ctx.ai_cache_buf = buf
end
local n = buf.n + 1
buf.n = n
buf[n] = chunk
buf.bytes = buf.bytes + #chunk
if buf.bytes > (conf.max_cache_body_size or DEFAULT_MAX_BODY) then
ctx.ai_cache_buf = nil
ctx.ai_cache_oversized = true
end
end
end
Comment thread
janiussyafiq marked this conversation as resolved.
Comment thread
janiussyafiq marked this conversation as resolved.


-- The response-capturing phases (body_filter / log) run in contexts where
-- cosockets are disabled, so the Redis write is deferred to a 0-delay timer
-- (timers run in a light thread where cosockets are allowed).
local function write_to_cache(premature, conf, cache_key, response_body)
if premature then
return
end
local red, err = redis_util.new(conf)
if not red then
core.log.warn("ai-cache: redis unavailable on write: ", err)
return
end
local envelope = core.json.encode({ body = response_body, created_at = ngx.time() })
local ttl = (conf.exact and conf.exact.ttl) or DEFAULT_TTL
local ok
ok, err = red:set(cache_key, envelope, "EX", ttl)
if not ok then
core.log.warn("ai-cache: redis set failed: ", err)
end
release(conf, red)
Comment thread
janiussyafiq marked this conversation as resolved.
Comment thread
janiussyafiq marked this conversation as resolved.
end


function _M.log(conf, ctx)
if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_key then
return
end
if ngx.status ~= 200 then
return
end
Comment thread
janiussyafiq marked this conversation as resolved.
local buf = ctx.ai_cache_buf
if not buf or buf.bytes == 0 then
return
end
local response_body = concat(buf, "", 1, buf.n)

local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body)
if not ok then
core.log.warn("ai-cache: failed to schedule cache write: ", err)
end
end


return _M
108 changes: 108 additions & 0 deletions apisix/plugins/ai-cache/key.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--

local core = require("apisix.core")
local protocols = require("apisix.plugins.ai-protocols")
local rapidjson = require("rapidjson")
local sha256 = require("resty.sha256")
local to_hex = require("resty.string").to_hex

local ipairs = ipairs
local pairs = pairs
local type = type
local getmetatable = getmetatable
local concat = table.concat

local rapidjson_null = rapidjson.null
local ENCODE_OPTS = { sort_keys = true }

local _M = {}


local function hex_digest(s)
local hash = sha256:new()
hash:update(s)
return to_hex(hash:final())
end


local function to_rapidjson_value(data)
if data == core.json.null then
return rapidjson_null
end
if type(data) ~= "table" then
return data
end
if getmetatable(data) == core.json.array_mt then
local arr = {}
for i, v in ipairs(data) do
arr[i] = to_rapidjson_value(v)
end
return rapidjson.array(arr)
end
local obj = {}
for k, v in pairs(data) do
obj[k] = to_rapidjson_value(v)
end
return obj
end


local function canonical_encode(value)
return rapidjson.encode(to_rapidjson_value(value), ENCODE_OPTS)
end

Comment thread
janiussyafiq marked this conversation as resolved.
Outdated

function _M.fingerprint(ctx, body)
local params = {}
for k, v in pairs(body) do
if k ~= "messages" and k ~= "model" and k ~= "stream" then
params[k] = v
end
end

local repr = canonical_encode({
protocol = ctx.ai_client_protocol or "",
model = ctx.var.request_llm_model or body.model or "",
messages = protocols.get_messages(body, ctx) or {},
params = params,
})
return hex_digest(repr)
end
Comment thread
janiussyafiq marked this conversation as resolved.


function _M.scope(conf, ctx)
local ck = conf.cache_key
local inc_vars = ck and ck.include_vars
if not (ck and ck.include_consumer) and (not inc_vars or #inc_vars == 0) then
return "shared"
Comment thread
janiussyafiq marked this conversation as resolved.
Outdated
end

local parts = {}
if ck.include_consumer then
parts[#parts + 1] = "consumer=" .. (ctx.consumer_name or "")
end
if inc_vars then
for _, name in ipairs(inc_vars) do
parts[#parts + 1] = name .. "=" .. (ctx.var[name] or "")
end
end
return concat(parts, ":")
end


return _M
Loading
Loading