From 8cd41f8418005acb9004e97060171fa06b919773 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Fri, 19 Jun 2026 15:23:02 +0800
Subject: [PATCH 01/10] feat: add ai-cache plugin to installation and
 configuration

---
 Makefile                 | 3 +++
 apisix/cli/config.lua    | 1 +
 conf/config.yaml.example | 1 +
 t/admin/plugins.t        | 1 +
 4 files changed, 6 insertions(+)

diff --git a/Makefile b/Makefile
index b0d7820cf73b..0e283228834c 100644
--- a/Makefile
+++ b/Makefile
@@ -401,6 +401,9 @@ install: runtime
 	$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search
 	$(ENV_INSTALL) apisix/plugins/ai-rag/vector-search/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search
 
+	$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-cache
+	$(ENV_INSTALL) apisix/plugins/ai-cache/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-cache
+
 	$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/broker
 	$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/transport
 	$(ENV_INSTALL) apisix/plugins/mcp/*.lua $(ENV_INST_LUADIR)/apisix/plugins/mcp
diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua
index 771c21bd339b..0c1246cd1dc5 100644
--- a/apisix/cli/config.lua
+++ b/apisix/cli/config.lua
@@ -244,6 +244,7 @@ local _M = {
     "ai-rate-limiting",
     "ai-proxy-multi",
     "ai-proxy",
+    "ai-cache",
     "ai-aws-content-moderation",
     "ai-aliyun-content-moderation",
     "proxy-mirror",
diff --git a/conf/config.yaml.example b/conf/config.yaml.example
index 2360647e8f4a..38c6afcbd1e7 100644
--- a/conf/config.yaml.example
+++ b/conf/config.yaml.example
@@ -538,6 +538,7 @@ plugins:                           # plugin list (sorted by priority)
   - ai-aws-content-moderation      # priority: 1050
   - ai-proxy-multi                 # priority: 1041
   - ai-proxy                       # priority: 1040
+  - ai-cache                       # priority: 1035
   - ai-rate-limiting               # priority: 1030
   - ai-aliyun-content-moderation   # priority: 1029
   - proxy-mirror                   # priority: 1010
diff --git a/t/admin/plugins.t b/t/admin/plugins.t
index 6061de721daf..cbce68d7f51d 100644
--- a/t/admin/plugins.t
+++ b/t/admin/plugins.t
@@ -108,6 +108,7 @@ ai-rag
 ai-aws-content-moderation
 ai-proxy-multi
 ai-proxy
+ai-cache
 ai-rate-limiting
 ai-aliyun-content-moderation
 proxy-mirror

From 1ea1aaa77defda3ad3d02838b2c656d5061e1970 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Fri, 19 Jun 2026 16:54:36 +0800
Subject: [PATCH 02/10] feat: implement ai-cache plugin with Redis support and
 testing

---
 apisix/plugins/ai-cache.lua        | 199 +++++++++
 apisix/plugins/ai-cache/key.lua    |  72 ++++
 apisix/plugins/ai-cache/schema.lua |  91 ++++
 t/plugin/ai-cache.t                | 652 +++++++++++++++++++++++++++++
 4 files changed, 1014 insertions(+)
 create mode 100644 apisix/plugins/ai-cache.lua
 create mode 100644 apisix/plugins/ai-cache/key.lua
 create mode 100644 apisix/plugins/ai-cache/schema.lua
 create mode 100644 t/plugin/ai-cache.t

diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua
new file mode 100644
index 000000000000..8fdb5ec7347b
--- /dev/null
+++ b/apisix/plugins/ai-cache.lua
@@ -0,0 +1,199 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+local core       = require("apisix.core")
+local schema     = require("apisix.plugins.ai-cache.schema")
+local key_mod    = require("apisix.plugins.ai-cache.key")
+local redis_util = require("apisix.utils.redis")
+
+local ngx        = ngx
+local ngx_null   = ngx.null
+local ipairs     = ipairs
+local str_sub    = string.sub
+
+local CACHE_STATUS_HEADER = "X-AI-Cache-Status"
+local CACHE_AGE_HEADER    = "X-AI-Cache-Age"
+local DEFAULT_TTL         = 3600
+
+local _M = {
+    version  = 0.1,
+    priority = 1035,
+    name     = "ai-cache",
+    schema   = schema,
+}
+
+
+function _M.check_schema(conf)
+    return core.schema.check(schema, conf)
+end
+
+
+local function release(conf, red)
+    local ok, err = red:set_keepalive(conf.redis_keepalive_timeout or 10000,
+                                      conf.redis_keepalive_pool or 100)
+    if not ok then
+        core.log.warn("ai-cache: failed to set redis keepalive: ", err)
+    end
+end
+
+
+local function serve_hit(conf, ctx, cached)
+    ctx.ai_cache_status = "HIT"
+    if conf.cache_headers ~= false then
+        core.response.set_header(CACHE_STATUS_HEADER, "HIT")
+        local age = ngx.time() - (cached.created_at or ngx.time())
+        core.response.set_header(CACHE_AGE_HEADER, age < 0 and 0 or age)
+    end
+    core.response.set_header("Content-Type", "application/json")
+    return core.response.exit(200, cached.body)
+end
+
+
+function _M.access(conf, ctx)
+    -- Streaming responses are not cached in PR-1 (SSE replay is a later
+    -- increment). ai-proxy (higher priority) has already classified the
+    -- request, so bypass before doing any work.
+    if ctx.var.request_type == "ai_stream" then
+        ctx.ai_cache_status = "BYPASS"
+        return
+    end
+
+    -- explicit opt-out: any cache_bypass reference resolving to a value
+    -- that is non-empty and not "0" skips the cache (proxy-cache
+    -- `cache_bypass` / nginx `proxy_cache_bypass` semantics). A leading
+    -- "$" marks a variable to resolve; anything else is a literal.
+    if conf.cache_bypass then
+        for _, ref in ipairs(conf.cache_bypass) do
+            local val = ref
+            if str_sub(ref, 1, 1) == "$" then
+                val = ctx.var[str_sub(ref, 2)]
+            end
+            if val ~= nil and val ~= "" and val ~= "0" then
+                ctx.ai_cache_status = "BYPASS"
+                return
+            end
+        end
+    end
+
+    local body, err = core.request.get_json_request_body_table()
+    if not body then
+        core.log.warn("ai-cache: cannot read request body, bypassing: ", err)
+        ctx.ai_cache_status = "BYPASS"
+        return
+    end
+
+    ctx.ai_cache_key = "ai-cache:l1:" .. key_mod.scope(conf, ctx)
+                       .. ":" .. key_mod.fingerprint(ctx, body)
+
+    local red
+    red, err = redis_util.new(conf)
+    if not red then
+        -- fail-open: never let a cache-backend outage break the request.
+        core.log.warn("ai-cache: redis unavailable, fail-open as MISS: ", err)
+        ctx.ai_cache_status = "MISS"
+        return
+    end
+
+    local res
+    res, err = red:get(ctx.ai_cache_key)
+    release(conf, red)
+    if err then
+        core.log.warn("ai-cache: redis get failed, fail-open as MISS: ", err)
+        ctx.ai_cache_status = "MISS"
+        return
+    end
+
+    if res ~= nil and res ~= ngx_null then
+        local cached = core.json.decode(res)
+        if cached and cached.body then
+            return serve_hit(conf, ctx, cached)
+        end
+        core.log.warn("ai-cache: discarding malformed cache entry for ", ctx.ai_cache_key)
+    end
+
+    ctx.ai_cache_status = "MISS"
+end
+
+
+function _M.header_filter(conf, ctx)
+    if ctx.ai_cache_status and conf.cache_headers ~= false then
+        core.response.set_header(CACHE_STATUS_HEADER, ctx.ai_cache_status)
+    end
+end
+
+
+function _M.body_filter(conf, ctx)
+    -- only a MISS gets written back; HIT exited in access, BYPASS opts out.
+    if ctx.ai_cache_status ~= "MISS" then
+        return
+    end
+    local chunk = ngx.arg[1]
+    if chunk and #chunk > 0 then
+        ctx.ai_cache_buf = (ctx.ai_cache_buf or "") .. chunk
+    end
+end
+
+
+-- The response-capturing phases (body_filter / log) run in contexts where
+-- cosockets are disabled, so the Redis write is deferred to a 0-delay timer
+-- (timers run in a light thread where cosockets are allowed).
+local function write_to_cache(premature, conf, cache_key, response_body)
+    if premature then
+        return
+    end
+    local red, err = redis_util.new(conf)
+    if not red then
+        core.log.warn("ai-cache: redis unavailable on write: ", err)
+        return
+    end
+    local envelope = core.json.encode({ body = response_body, created_at = ngx.time() })
+    local ttl = (conf.exact and conf.exact.ttl) or DEFAULT_TTL
+    local ok
+    ok, err = red:set(cache_key, envelope, "EX", ttl)
+    if not ok then
+        core.log.warn("ai-cache: redis set failed: ", err)
+        return
+    end
+    release(conf, red)
+end
+
+
+function _M.log(conf, ctx)
+    if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_key then
+        return
+    end
+    -- write-on-success only: never cache an error response.
+    if ngx.status < 200 or ngx.status >= 300 then
+        return
+    end
+    local response_body = ctx.ai_cache_buf
+    if not response_body or response_body == "" then
+        return
+    end
+    -- don't cache responses larger than the configured cap.
+    if #response_body > (conf.max_cache_body_size or 1048576) then
+        return
+    end
+
+    local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body)
+    if not ok then
+        core.log.warn("ai-cache: failed to schedule cache write: ", err)
+    end
+end
+
+
+return _M
diff --git a/apisix/plugins/ai-cache/key.lua b/apisix/plugins/ai-cache/key.lua
new file mode 100644
index 000000000000..283501bb3ec6
--- /dev/null
+++ b/apisix/plugins/ai-cache/key.lua
@@ -0,0 +1,72 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+local core      = require("apisix.core")
+local protocols = require("apisix.plugins.ai-protocols")
+local sha256    = require("resty.sha256")
+local to_hex    = require("resty.string").to_hex
+
+local ipairs = ipairs
+local concat = table.concat
+
+local _M = {}
+
+
+local function hex_digest(s)
+    local hash = sha256:new()
+    hash:update(s)
+    return to_hex(hash:final())
+end
+
+
+function _M.fingerprint(ctx, body)
+    local params = core.table.deepcopy(body)
+    params.messages = nil
+    params.model    = nil
+    params.stream   = nil
+
+    local repr = core.json.stably_encode({
+        protocol = ctx.ai_client_protocol or "",
+        model    = ctx.var.request_llm_model or body.model or "",
+        messages = protocols.get_messages(body, ctx) or {},
+        params   = params,
+    })
+    return hex_digest(repr)
+end
+
+
+function _M.scope(conf, ctx)
+    local ck = conf.cache_key
+    local inc_vars = ck and ck.include_vars
+    if not (ck and ck.include_consumer) and (not inc_vars or #inc_vars == 0) then
+        return "shared"
+    end
+
+    local parts = {}
+    if ck.include_consumer then
+        parts[#parts + 1] = "consumer=" .. (ctx.consumer_name or "")
+    end
+    if inc_vars then
+        for _, name in ipairs(inc_vars) do
+            parts[#parts + 1] = name .. "=" .. (ctx.var[name] or "")
+        end
+    end
+    return concat(parts, ":")
+end
+
+
+return _M
diff --git a/apisix/plugins/ai-cache/schema.lua b/apisix/plugins/ai-cache/schema.lua
new file mode 100644
index 000000000000..a36c67ab04f7
--- /dev/null
+++ b/apisix/plugins/ai-cache/schema.lua
@@ -0,0 +1,91 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+local core         = require("apisix.core")
+local redis_schema = require("apisix.utils.redis-schema")
+
+local policy_to_additional_properties = core.table.deepcopy(redis_schema.schema)
+
+local _M = {
+    type = "object",
+    properties = {
+        layers = {
+            type = "array",
+            items = {
+                enum = { "exact" },
+            },
+            minItems = 1,
+            uniqueItems = true,
+            default = { "exact" },
+        },
+
+        exact = {
+            type = "object",
+            properties = {
+                ttl = { type = "integer", minimum = 1, default = 3600 },
+            },
+            default = {},
+        },
+
+        cache_key = {
+            type = "object",
+            properties = {
+                include_consumer = { type = "boolean", default = false },
+                include_vars = {
+                    type = "array",
+                    items = { type = "string" },
+                    default = {},
+                },
+            },
+            default = {},
+        },
+
+        max_cache_body_size = {
+            type = "integer", minimum = 0, default = 1048576,
+        },
+
+        cache_headers = {
+            type = "boolean", default = true,
+        },
+
+        cache_bypass = {
+            type = "array",
+            minItems = 1,
+            items = {
+                type = "string",
+                pattern = [[(^[^\$].+$|^\$[0-9a-zA-Z_]+$)]],
+            },
+        },
+
+        policy = {
+            type = "string",
+            enum = { "redis" },
+            default = "redis",
+        },
+    },
+    ["if"] = {
+        properties = {
+            policy = {
+                enum = { "redis" },
+            },
+        },
+    },
+    ["then"] = policy_to_additional_properties.redis,
+    encrypt_fields = { "redis_password" },
+}
+
+return _M
diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t
new file mode 100644
index 000000000000..1b58af1bbeca
--- /dev/null
+++ b/t/plugin/ai-cache.t
@@ -0,0 +1,652 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+BEGIN {
+    $ENV{TEST_ENABLE_CONTROL_API_V1} = "0";
+}
+
+use t::APISIX 'no_plan';
+
+log_level("info");
+repeat_each(1);
+no_long_string();
+no_root_location();
+
+add_block_preprocessor(sub {
+    my ($block) = @_;
+
+    if (!defined $block->request) {
+        $block->set_value("request", "GET /t");
+    }
+
+    my $user_yaml_config = <<_EOC_;
+plugins:
+  - ai-proxy
+  - ai-cache
+_EOC_
+    if (!defined $block->extra_yaml_config) {
+        $block->set_value("extra_yaml_config", $user_yaml_config);
+    }
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: minimal valid exact-cache configuration
+--- config
+    location /t {
+        content_by_lua_block {
+            local plugin = require("apisix.plugins.ai-cache")
+            local ok, err = plugin.check_schema({
+                redis_host = "127.0.0.1",
+                redis_port = 6379,
+            })
+
+            if not ok then
+                ngx.say(err)
+            else
+                ngx.say("passed")
+            end
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 2: reject config missing required redis (policy=redis then-clause)
+--- config
+    location /t {
+        content_by_lua_block {
+            local plugin = require("apisix.plugins.ai-cache")
+            local ok, err = plugin.check_schema({})
+
+            if not ok then
+                ngx.say(err)
+            else
+                ngx.say("passed")
+            end
+        }
+    }
+--- response_body eval
+qr/then clause did not match/
+
+
+
+=== TEST 3: reject unknown layer value
+--- config
+    location /t {
+        content_by_lua_block {
+            local plugin = require("apisix.plugins.ai-cache")
+            local ok, err = plugin.check_schema({
+                redis_host = "127.0.0.1",
+                layers = { "nonsense" },
+            })
+
+            if not ok then
+                ngx.say(err)
+            else
+                ngx.say("passed")
+            end
+        }
+    }
+--- response_body eval
+qr/layers/
+
+
+
+=== TEST 4: flush redis, then set route with ai-proxy + ai-cache (mock upstream)
+--- config
+    location /t {
+        content_by_lua_block {
+            local redis = require("resty.redis")
+            local red = redis:new()
+            red:set_timeout(1000)
+            local ok, rerr = red:connect("127.0.0.1", 6379)
+            if not ok then
+                ngx.say("redis connect failed: ", rerr)
+                return
+            end
+            red:flushall()
+
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": {
+                                "header": {
+                                    "Authorization": "Bearer test-key"
+                                }
+                            },
+                            "options": {
+                                "model": "gpt-4o"
+                            },
+                            "override": {
+                                "endpoint": "http://127.0.0.1:1980"
+                            }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 5: cold request is a cache MISS and is proxied upstream
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"ai-cache miss unique-prompt-5"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+--- response_body_like eval
+qr/1 \+ 1 = 2/
+--- wait: 0.3
+
+
+
+=== TEST 6: identical re-request is a HIT served from cache (upstream not called)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"ai-cache miss unique-prompt-5"}]}
+--- error_code: 200
+--- response_headers_like
+X-AI-Cache-Status: HIT
+X-AI-Cache-Age: \d+
+--- response_body_like eval
+qr/1 \+ 1 = 2/
+
+
+
+=== TEST 7: fingerprint sensitivity (key.lua unit)
+--- config
+    location /t {
+        content_by_lua_block {
+            local key = require("apisix.plugins.ai-cache.key")
+            local function ctx(model)
+                return { ai_client_protocol = "openai-chat", var = { request_llm_model = model } }
+            end
+            local function fp(body)
+                return key.fingerprint(ctx(body.model), body)
+            end
+
+            local base   = { model="gpt-4o",      messages={{role="user", content="hi"}}, temperature=0.2 }
+            local same   = { model="gpt-4o",      messages={{role="user", content="hi"}}, temperature=0.2 }
+            local msg2   = { model="gpt-4o",      messages={{role="user", content="yo"}}, temperature=0.2 }
+            local model2 = { model="gpt-4o-mini", messages={{role="user", content="hi"}}, temperature=0.2 }
+            local temp2  = { model="gpt-4o",      messages={{role="user", content="hi"}}, temperature=0.7 }
+            local tools2 = { model="gpt-4o",      messages={{role="user", content="hi"}}, temperature=0.2,
+                             tools={{ type="function", ["function"]={ name="f" } }} }
+
+            local b = fp(base)
+            assert(fp(same)   == b, "identical bodies must share a fingerprint")
+            assert(fp(msg2)   ~= b, "changed message must change the fingerprint")
+            assert(fp(model2) ~= b, "changed model must change the fingerprint")
+            assert(fp(temp2)  ~= b, "changed temperature must change the fingerprint")
+            assert(fp(tools2) ~= b, "changed tools must change the fingerprint")
+            ngx.say("passed")
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 8: non-2xx upstream (no fixture -> 401) is a MISS
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"non-2xx-test-prompt"}]}
+--- error_code: 401
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 0.3
+
+
+
+=== TEST 9: same prompt with a valid fixture is still a MISS (the 401 was not cached)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"non-2xx-test-prompt"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: MISS
+--- response_body_like eval
+qr/1 \+ 1 = 2/
+
+
+
+=== TEST 10: set route with a cache_bypass variable rule
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "cache_bypass": ["$http_x_ai_cache_bypass"]
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 11: a non-empty, non-"0" cache_bypass value is a BYPASS
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"bypass rule test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+X-AI-Cache-Bypass: 1
+--- response_headers
+X-AI-Cache-Status: BYPASS
+
+
+
+=== TEST 12: a cache_bypass value of "0" does not bypass (normal MISS)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"bypass-zero-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+X-AI-Cache-Bypass: 0
+--- response_headers
+X-AI-Cache-Status: MISS
+
+
+
+=== TEST 13: set route with a tiny max_cache_body_size
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "max_cache_body_size": 10
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 14: cold request (response exceeds max_cache_body_size) is a MISS
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"body-size-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 0.3
+
+
+
+=== TEST 15: same prompt is still a MISS (oversized response was not cached)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"body-size-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+
+
+
+=== TEST 16: set route isolating the cache by a request variable
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "cache_key": { "include_vars": ["http_x_tenant"] }
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 17: tenant alpha cold request is a MISS (warms scope=alpha)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+X-Tenant: alpha
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 0.3
+
+
+
+=== TEST 18: same prompt, tenant beta is a MISS (not shared with alpha)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+X-Tenant: beta
+--- response_headers
+X-AI-Cache-Status: MISS
+
+
+
+=== TEST 19: same prompt, tenant alpha is a HIT (its own scope persisted)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]}
+--- more_headers
+X-Tenant: alpha
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: HIT
+
+
+
+=== TEST 20: set route with a 1-second exact ttl
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "exact": { "ttl": 1 }
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 21: cold request is a MISS (cached with ttl=1), then wait past the ttl
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"ttl-expiry-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 2
+
+
+
+=== TEST 22: same prompt is a MISS again (entry expired)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"ttl-expiry-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+
+
+
+=== TEST 23: set an anthropic-messages route (cross-protocol)
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/2',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/v1/messages",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "anthropic",
+                            "auth": { "header": { "x-api-key": "test-key" } },
+                            "options": { "model": "claude-3-5-sonnet-20241022" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 24: anthropic cold request is a MISS
+--- request
+POST /v1/messages
+{"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":"cross-protocol test"}],"max_tokens":100}
+--- more_headers
+X-AI-Fixture: anthropic/messages-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 0.3
+
+
+
+=== TEST 25: identical anthropic re-request is a HIT (upstream not called)
+--- request
+POST /v1/messages
+{"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":"cross-protocol test"}],"max_tokens":100}
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: HIT
+
+
+
+=== TEST 26: set route whose redis is unreachable
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6390,
+                            "redis_timeout": 200
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 27: redis unreachable fails open (request still proxied as MISS, no 5xx)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"redis-down failopen"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: MISS
+--- response_body_like eval
+qr/1 \+ 1 = 2/
+--- error_log
+ai-cache: redis unavailable, fail-open as MISS
+
+
+
+=== TEST 28: set route with cache_headers disabled
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "cache_headers": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 29: cache_headers=false suppresses the X-AI-Cache-* headers
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"cache-headers-off-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status:
+X-AI-Cache-Age:
+--- response_body_like eval
+qr/1 \+ 1 = 2/

From 2d7eb3b87339d0ce2930ed5d6deaad91874432e1 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Tue, 23 Jun 2026 09:59:41 +0800
Subject: [PATCH 03/10] fix(ai-cache): canonical-encode fingerprint and switch
 bypass to bypass_on

Encode the request fingerprint with rapidjson (sort_keys) plus a
to_rapidjson_value pass that maps the JSON null sentinel and array_mt
tables, mirroring ai-transport/http.lua. core.json.stably_encode (dkjson)
raised on the cjson null sentinel, so a body carrying an explicit null
(e.g. OpenAI's "stop": null) would error out of the access phase.

Replace the cache_bypass var-ref opt-out with bypass_on: an array of
{header, equals} rules that skip the cache when a request header exactly
equals its value (per rfcs#78). Exact header == value only; any matching
rule triggers BYPASS.

Tests: add a null-body fingerprint regression, migrate the bypass tests
to bypass_on, and cover multiple rules where any match bypasses.
---
 apisix/plugins/ai-cache.lua        |  15 +----
 apisix/plugins/ai-cache/key.lua    |  40 ++++++++++-
 apisix/plugins/ai-cache/schema.lua |  10 ++-
 t/plugin/ai-cache.t                | 102 ++++++++++++++++++++++-------
 4 files changed, 127 insertions(+), 40 deletions(-)

diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua
index 8fdb5ec7347b..5495759cb9a2 100644
--- a/apisix/plugins/ai-cache.lua
+++ b/apisix/plugins/ai-cache.lua
@@ -23,7 +23,6 @@ local redis_util = require("apisix.utils.redis")
 local ngx        = ngx
 local ngx_null   = ngx.null
 local ipairs     = ipairs
-local str_sub    = string.sub
 
 local CACHE_STATUS_HEADER = "X-AI-Cache-Status"
 local CACHE_AGE_HEADER    = "X-AI-Cache-Age"
@@ -72,17 +71,9 @@ function _M.access(conf, ctx)
         return
     end
 
-    -- explicit opt-out: any cache_bypass reference resolving to a value
-    -- that is non-empty and not "0" skips the cache (proxy-cache
-    -- `cache_bypass` / nginx `proxy_cache_bypass` semantics). A leading
-    -- "$" marks a variable to resolve; anything else is a literal.
-    if conf.cache_bypass then
-        for _, ref in ipairs(conf.cache_bypass) do
-            local val = ref
-            if str_sub(ref, 1, 1) == "$" then
-                val = ctx.var[str_sub(ref, 2)]
-            end
-            if val ~= nil and val ~= "" and val ~= "0" then
+    if conf.bypass_on then
+        for _, rule in ipairs(conf.bypass_on) do
+            if core.request.header(ctx, rule.header) == rule.equals then
                 ctx.ai_cache_status = "BYPASS"
                 return
             end
diff --git a/apisix/plugins/ai-cache/key.lua b/apisix/plugins/ai-cache/key.lua
index 283501bb3ec6..228c46e5830e 100644
--- a/apisix/plugins/ai-cache/key.lua
+++ b/apisix/plugins/ai-cache/key.lua
@@ -17,11 +17,18 @@
 
 local core      = require("apisix.core")
 local protocols = require("apisix.plugins.ai-protocols")
+local rapidjson = require("rapidjson")
 local sha256    = require("resty.sha256")
 local to_hex    = require("resty.string").to_hex
 
-local ipairs = ipairs
-local concat = table.concat
+local ipairs       = ipairs
+local pairs        = pairs
+local type         = type
+local getmetatable = getmetatable
+local concat       = table.concat
+
+local rapidjson_null = rapidjson.null
+local ENCODE_OPTS    = { sort_keys = true }
 
 local _M = {}
 
@@ -33,13 +40,40 @@ local function hex_digest(s)
 end
 
 
+local function to_rapidjson_value(data)
+    if data == core.json.null then
+        return rapidjson_null
+    end
+    if type(data) ~= "table" then
+        return data
+    end
+    if getmetatable(data) == core.json.array_mt then
+        local arr = {}
+        for i, v in ipairs(data) do
+            arr[i] = to_rapidjson_value(v)
+        end
+        return rapidjson.array(arr)
+    end
+    local obj = {}
+    for k, v in pairs(data) do
+        obj[k] = to_rapidjson_value(v)
+    end
+    return obj
+end
+
+
+local function canonical_encode(value)
+    return rapidjson.encode(to_rapidjson_value(value), ENCODE_OPTS)
+end
+
+
 function _M.fingerprint(ctx, body)
     local params = core.table.deepcopy(body)
     params.messages = nil
     params.model    = nil
     params.stream   = nil
 
-    local repr = core.json.stably_encode({
+    local repr = canonical_encode({
         protocol = ctx.ai_client_protocol or "",
         model    = ctx.var.request_llm_model or body.model or "",
         messages = protocols.get_messages(body, ctx) or {},
diff --git a/apisix/plugins/ai-cache/schema.lua b/apisix/plugins/ai-cache/schema.lua
index a36c67ab04f7..72d02f81f855 100644
--- a/apisix/plugins/ai-cache/schema.lua
+++ b/apisix/plugins/ai-cache/schema.lua
@@ -62,12 +62,16 @@ local _M = {
             type = "boolean", default = true,
         },
 
-        cache_bypass = {
+        bypass_on = {
             type = "array",
             minItems = 1,
             items = {
-                type = "string",
-                pattern = [[(^[^\$].+$|^\$[0-9a-zA-Z_]+$)]],
+                type = "object",
+                properties = {
+                    header = { type = "string" },
+                    equals = { type = "string" },
+                },
+                required = { "header", "equals" },
             },
         },
 
diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t
index 1b58af1bbeca..a35305e3eb9e 100644
--- a/t/plugin/ai-cache.t
+++ b/t/plugin/ai-cache.t
@@ -194,6 +194,7 @@ qr/1 \+ 1 = 2/
 --- config
     location /t {
         content_by_lua_block {
+            local core = require("apisix.core")
             local key = require("apisix.plugins.ai-cache.key")
             local function ctx(model)
                 return { ai_client_protocol = "openai-chat", var = { request_llm_model = model } }
@@ -216,6 +217,13 @@ qr/1 \+ 1 = 2/
             assert(fp(model2) ~= b, "changed model must change the fingerprint")
             assert(fp(temp2)  ~= b, "changed temperature must change the fingerprint")
             assert(fp(tools2) ~= b, "changed tools must change the fingerprint")
+
+            local nullb = core.json.decode(
+                '{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}],"stop":null}')
+            local ok_null, fp_null = pcall(fp, nullb)
+            assert(ok_null, "explicit null must not raise: " .. tostring(fp_null))
+            assert(fp(nullb) == fp_null, "null-bearing fingerprint must be stable")
+            assert(fp_null ~= b, "stop:null must change the fingerprint")
             ngx.say("passed")
         }
     }
@@ -249,7 +257,7 @@ qr/1 \+ 1 = 2/
 
 
 
-=== TEST 10: set route with a cache_bypass variable rule
+=== TEST 10: set route with a bypass_on header rule
 --- config
     location /t {
         content_by_lua_block {
@@ -268,7 +276,7 @@ qr/1 \+ 1 = 2/
                         "ai-cache": {
                             "redis_host": "127.0.0.1",
                             "redis_port": 6379,
-                            "cache_bypass": ["$http_x_ai_cache_bypass"]
+                            "bypass_on": [{"header": "X-AI-Cache-Bypass", "equals": "1"}]
                         }
                     }
                 }]]
@@ -284,7 +292,7 @@ passed
 
 
 
-=== TEST 11: a non-empty, non-"0" cache_bypass value is a BYPASS
+=== TEST 11: a matching bypass_on header value is a BYPASS
 --- request
 POST /anything
 {"model":"gpt-4o","messages":[{"role":"user","content":"bypass rule test"}]}
@@ -296,10 +304,10 @@ X-AI-Cache-Status: BYPASS
 
 
 
-=== TEST 12: a cache_bypass value of "0" does not bypass (normal MISS)
+=== TEST 12: a non-matching bypass_on header value does not bypass (normal MISS)
 --- request
 POST /anything
-{"model":"gpt-4o","messages":[{"role":"user","content":"bypass-zero-test"}]}
+{"model":"gpt-4o","messages":[{"role":"user","content":"bypass-nonmatch-test"}]}
 --- more_headers
 X-AI-Fixture: openai/chat-basic.json
 X-AI-Cache-Bypass: 0
@@ -308,7 +316,57 @@ X-AI-Cache-Status: MISS
 
 
 
-=== TEST 13: set route with a tiny max_cache_body_size
+=== TEST 13: set route with multiple bypass_on rules
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "bypass_on": [
+                                {"header": "X-AI-Cache-Bypass", "equals": "1"},
+                                {"header": "X-Debug", "equals": "on"}
+                            ]
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 14: any matching bypass_on rule triggers a BYPASS (second rule matches)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"any-rule-bypass-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+X-Debug: on
+--- response_headers
+X-AI-Cache-Status: BYPASS
+
+
+
+=== TEST 15: set route with a tiny max_cache_body_size
 --- config
     location /t {
         content_by_lua_block {
@@ -343,7 +401,7 @@ passed
 
 
 
-=== TEST 14: cold request (response exceeds max_cache_body_size) is a MISS
+=== TEST 16: cold request (response exceeds max_cache_body_size) is a MISS
 --- request
 POST /anything
 {"model":"gpt-4o","messages":[{"role":"user","content":"body-size-test"}]}
@@ -355,7 +413,7 @@ X-AI-Cache-Status: MISS
 
 
 
-=== TEST 15: same prompt is still a MISS (oversized response was not cached)
+=== TEST 17: same prompt is still a MISS (oversized response was not cached)
 --- request
 POST /anything
 {"model":"gpt-4o","messages":[{"role":"user","content":"body-size-test"}]}
@@ -366,7 +424,7 @@ X-AI-Cache-Status: MISS
 
 
 
-=== TEST 16: set route isolating the cache by a request variable
+=== TEST 18: set route isolating the cache by a request variable
 --- config
     location /t {
         content_by_lua_block {
@@ -401,7 +459,7 @@ passed
 
 
 
-=== TEST 17: tenant alpha cold request is a MISS (warms scope=alpha)
+=== TEST 19: tenant alpha cold request is a MISS (warms scope=alpha)
 --- request
 POST /anything
 {"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]}
@@ -414,7 +472,7 @@ X-AI-Cache-Status: MISS
 
 
 
-=== TEST 18: same prompt, tenant beta is a MISS (not shared with alpha)
+=== TEST 20: same prompt, tenant beta is a MISS (not shared with alpha)
 --- request
 POST /anything
 {"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]}
@@ -426,7 +484,7 @@ X-AI-Cache-Status: MISS
 
 
 
-=== TEST 19: same prompt, tenant alpha is a HIT (its own scope persisted)
+=== TEST 21: same prompt, tenant alpha is a HIT (its own scope persisted)
 --- request
 POST /anything
 {"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]}
@@ -438,7 +496,7 @@ X-AI-Cache-Status: HIT
 
 
 
-=== TEST 20: set route with a 1-second exact ttl
+=== TEST 22: set route with a 1-second exact ttl
 --- config
     location /t {
         content_by_lua_block {
@@ -473,7 +531,7 @@ passed
 
 
 
-=== TEST 21: cold request is a MISS (cached with ttl=1), then wait past the ttl
+=== TEST 23: cold request is a MISS (cached with ttl=1), then wait past the ttl
 --- request
 POST /anything
 {"model":"gpt-4o","messages":[{"role":"user","content":"ttl-expiry-test"}]}
@@ -485,7 +543,7 @@ X-AI-Cache-Status: MISS
 
 
 
-=== TEST 22: same prompt is a MISS again (entry expired)
+=== TEST 24: same prompt is a MISS again (entry expired)
 --- request
 POST /anything
 {"model":"gpt-4o","messages":[{"role":"user","content":"ttl-expiry-test"}]}
@@ -496,7 +554,7 @@ X-AI-Cache-Status: MISS
 
 
 
-=== TEST 23: set an anthropic-messages route (cross-protocol)
+=== TEST 25: set an anthropic-messages route (cross-protocol)
 --- config
     location /t {
         content_by_lua_block {
@@ -530,7 +588,7 @@ passed
 
 
 
-=== TEST 24: anthropic cold request is a MISS
+=== TEST 26: anthropic cold request is a MISS
 --- request
 POST /v1/messages
 {"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":"cross-protocol test"}],"max_tokens":100}
@@ -542,7 +600,7 @@ X-AI-Cache-Status: MISS
 
 
 
-=== TEST 25: identical anthropic re-request is a HIT (upstream not called)
+=== TEST 27: identical anthropic re-request is a HIT (upstream not called)
 --- request
 POST /v1/messages
 {"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":"cross-protocol test"}],"max_tokens":100}
@@ -552,7 +610,7 @@ X-AI-Cache-Status: HIT
 
 
 
-=== TEST 26: set route whose redis is unreachable
+=== TEST 28: set route whose redis is unreachable
 --- config
     location /t {
         content_by_lua_block {
@@ -587,7 +645,7 @@ passed
 
 
 
-=== TEST 27: redis unreachable fails open (request still proxied as MISS, no 5xx)
+=== TEST 29: redis unreachable fails open (request still proxied as MISS, no 5xx)
 --- request
 POST /anything
 {"model":"gpt-4o","messages":[{"role":"user","content":"redis-down failopen"}]}
@@ -603,7 +661,7 @@ ai-cache: redis unavailable, fail-open as MISS
 
 
 
-=== TEST 28: set route with cache_headers disabled
+=== TEST 30: set route with cache_headers disabled
 --- config
     location /t {
         content_by_lua_block {
@@ -638,7 +696,7 @@ passed
 
 
 
-=== TEST 29: cache_headers=false suppresses the X-AI-Cache-* headers
+=== TEST 31: cache_headers=false suppresses the X-AI-Cache-* headers
 --- request
 POST /anything
 {"model":"gpt-4o","messages":[{"role":"user","content":"cache-headers-off-test"}]}

From d91e68ab84983d2e9d40e12c84042328d443448f Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Tue, 23 Jun 2026 10:38:36 +0800
Subject: [PATCH 04/10] feat(ai-cache): enhance body filter to handle oversized
 responses and update fingerprinting logic

---
 apisix/plugins/ai-cache.lua     | 15 ++++----
 apisix/plugins/ai-cache/key.lua | 10 +++--
 t/plugin/ai-cache.t             | 66 ++++++++++++++++++++++++++++++++-
 3 files changed, 77 insertions(+), 14 deletions(-)

diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua
index 5495759cb9a2..84965c5f8d5e 100644
--- a/apisix/plugins/ai-cache.lua
+++ b/apisix/plugins/ai-cache.lua
@@ -27,6 +27,7 @@ local ipairs     = ipairs
 local CACHE_STATUS_HEADER = "X-AI-Cache-Status"
 local CACHE_AGE_HEADER    = "X-AI-Cache-Age"
 local DEFAULT_TTL         = 3600
+local DEFAULT_MAX_BODY    = 1048576
 
 local _M = {
     version  = 0.1,
@@ -129,12 +130,16 @@ end
 
 function _M.body_filter(conf, ctx)
     -- only a MISS gets written back; HIT exited in access, BYPASS opts out.
-    if ctx.ai_cache_status ~= "MISS" then
+    if ctx.ai_cache_status ~= "MISS" or ctx.ai_cache_oversized then
         return
     end
     local chunk = ngx.arg[1]
     if chunk and #chunk > 0 then
         ctx.ai_cache_buf = (ctx.ai_cache_buf or "") .. chunk
+        if #ctx.ai_cache_buf > (conf.max_cache_body_size or DEFAULT_MAX_BODY) then
+            ctx.ai_cache_buf = nil
+            ctx.ai_cache_oversized = true
+        end
     end
 end
 
@@ -157,7 +162,6 @@ local function write_to_cache(premature, conf, cache_key, response_body)
     ok, err = red:set(cache_key, envelope, "EX", ttl)
     if not ok then
         core.log.warn("ai-cache: redis set failed: ", err)
-        return
     end
     release(conf, red)
 end
@@ -167,18 +171,13 @@ function _M.log(conf, ctx)
     if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_key then
         return
     end
-    -- write-on-success only: never cache an error response.
-    if ngx.status < 200 or ngx.status >= 300 then
+    if ngx.status ~= 200 then
         return
     end
     local response_body = ctx.ai_cache_buf
     if not response_body or response_body == "" then
         return
     end
-    -- don't cache responses larger than the configured cap.
-    if #response_body > (conf.max_cache_body_size or 1048576) then
-        return
-    end
 
     local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body)
     if not ok then
diff --git a/apisix/plugins/ai-cache/key.lua b/apisix/plugins/ai-cache/key.lua
index 228c46e5830e..3f012892c0b9 100644
--- a/apisix/plugins/ai-cache/key.lua
+++ b/apisix/plugins/ai-cache/key.lua
@@ -68,10 +68,12 @@ end
 
 
 function _M.fingerprint(ctx, body)
-    local params = core.table.deepcopy(body)
-    params.messages = nil
-    params.model    = nil
-    params.stream   = nil
+    local params = {}
+    for k, v in pairs(body) do
+        if k ~= "messages" and k ~= "model" and k ~= "stream" then
+            params[k] = v
+        end
+    end
 
     local repr = canonical_encode({
         protocol = ctx.ai_client_protocol or "",
diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t
index a35305e3eb9e..351bdf5031a1 100644
--- a/t/plugin/ai-cache.t
+++ b/t/plugin/ai-cache.t
@@ -704,7 +704,69 @@ POST /anything
 X-AI-Fixture: openai/chat-basic.json
 --- error_code: 200
 --- response_headers
-X-AI-Cache-Status:
-X-AI-Cache-Age:
+! X-AI-Cache-Status
+! X-AI-Cache-Age
+--- response_body_like eval
+qr/1 \+ 1 = 2/
+
+
+
+=== TEST 32: set a default ai-proxy + ai-cache route (for status-code tests)
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 33: a 2xx that is not 200 (201) is a MISS and is proxied through
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"status-201-test-prompt"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+X-AI-Fixture-Status: 201
+--- error_code: 201
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 0.3
+
+
+
+=== TEST 34: same prompt with a 200 fixture is still a MISS (the 201 was not cached)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"status-201-test-prompt"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: MISS
 --- response_body_like eval
 qr/1 \+ 1 = 2/

From 652a89fe129c5c462890817683652b0aff488526 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Tue, 23 Jun 2026 11:18:10 +0800
Subject: [PATCH 05/10] feat(ai-cache): optimize body caching logic and enforce
 header validation

---
 apisix/plugins/ai-cache.lua        | 18 ++++++++++++++----
 apisix/plugins/ai-cache/schema.lua |  2 +-
 t/plugin/ai-cache.t                |  6 +++++-
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua
index 84965c5f8d5e..0c7053c176d1 100644
--- a/apisix/plugins/ai-cache.lua
+++ b/apisix/plugins/ai-cache.lua
@@ -23,6 +23,7 @@ local redis_util = require("apisix.utils.redis")
 local ngx        = ngx
 local ngx_null   = ngx.null
 local ipairs     = ipairs
+local concat     = table.concat
 
 local CACHE_STATUS_HEADER = "X-AI-Cache-Status"
 local CACHE_AGE_HEADER    = "X-AI-Cache-Age"
@@ -135,8 +136,16 @@ function _M.body_filter(conf, ctx)
     end
     local chunk = ngx.arg[1]
     if chunk and #chunk > 0 then
-        ctx.ai_cache_buf = (ctx.ai_cache_buf or "") .. chunk
-        if #ctx.ai_cache_buf > (conf.max_cache_body_size or DEFAULT_MAX_BODY) then
+        local buf = ctx.ai_cache_buf
+        if not buf then
+            buf = { n = 0, bytes = 0 }
+            ctx.ai_cache_buf = buf
+        end
+        local n = buf.n + 1
+        buf.n = n
+        buf[n] = chunk
+        buf.bytes = buf.bytes + #chunk
+        if buf.bytes > (conf.max_cache_body_size or DEFAULT_MAX_BODY) then
             ctx.ai_cache_buf = nil
             ctx.ai_cache_oversized = true
         end
@@ -174,10 +183,11 @@ function _M.log(conf, ctx)
     if ngx.status ~= 200 then
         return
     end
-    local response_body = ctx.ai_cache_buf
-    if not response_body or response_body == "" then
+    local buf = ctx.ai_cache_buf
+    if not buf or buf.bytes == 0 then
         return
     end
+    local response_body = concat(buf, "", 1, buf.n)
 
     local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body)
     if not ok then
diff --git a/apisix/plugins/ai-cache/schema.lua b/apisix/plugins/ai-cache/schema.lua
index 72d02f81f855..d434d44f45a7 100644
--- a/apisix/plugins/ai-cache/schema.lua
+++ b/apisix/plugins/ai-cache/schema.lua
@@ -68,7 +68,7 @@ local _M = {
             items = {
                 type = "object",
                 properties = {
-                    header = { type = "string" },
+                    header = { type = "string", minLength = 1 },
                     equals = { type = "string" },
                 },
                 required = { "header", "equals" },
diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t
index 351bdf5031a1..c59f58f9181d 100644
--- a/t/plugin/ai-cache.t
+++ b/t/plugin/ai-cache.t
@@ -122,7 +122,11 @@ qr/layers/
                 ngx.say("redis connect failed: ", rerr)
                 return
             end
-            red:flushall()
+            local fok, ferr = red:flushall()
+            if not fok then
+                ngx.say("redis flushall failed: ", ferr)
+                return
+            end
 
             local t = require("lib.test_admin").test
             local code, body = t('/apisix/admin/routes/1',

From 4ac239875d0e092fb5830b70deacc3a3795e0812 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Tue, 23 Jun 2026 12:17:04 +0800
Subject: [PATCH 06/10] docs(ai-cache): add English and Chinese plugin
 documentation

Document the ai-cache plugin: description, full attribute table (incl. all
Redis policy fields), and Admin API / ADC / Ingress Controller examples
covering cache MISS/HIT and bypass_on. Add the page to the en and zh plugin
sidebars.
---
 docs/en/latest/config.json         |   1 +
 docs/en/latest/plugins/ai-cache.md | 332 +++++++++++++++++++++++++++++
 docs/zh/latest/config.json         |   1 +
 docs/zh/latest/plugins/ai-cache.md | 332 +++++++++++++++++++++++++++++
 4 files changed, 666 insertions(+)
 create mode 100644 docs/en/latest/plugins/ai-cache.md
 create mode 100644 docs/zh/latest/plugins/ai-cache.md

diff --git a/docs/en/latest/config.json b/docs/en/latest/config.json
index 7691e45802e9..71b8ba828e9e 100644
--- a/docs/en/latest/config.json
+++ b/docs/en/latest/config.json
@@ -73,6 +73,7 @@
           "items": [
             "plugins/ai-proxy",
             "plugins/ai-proxy-multi",
+            "plugins/ai-cache",
             "plugins/ai-rate-limiting",
             "plugins/ai-prompt-guard",
             "plugins/ai-aws-content-moderation",
diff --git a/docs/en/latest/plugins/ai-cache.md b/docs/en/latest/plugins/ai-cache.md
new file mode 100644
index 000000000000..dc3a559d9adc
--- /dev/null
+++ b/docs/en/latest/plugins/ai-cache.md
@@ -0,0 +1,332 @@
+---
+title: ai-cache
+keywords:
+  - Apache APISIX
+  - API Gateway
+  - Plugin
+  - ai-cache
+  - AI
+  - LLM
+description: The ai-cache Plugin caches LLM responses in Redis and replays them for later requests that resolve to the same prompt, cutting upstream token cost and latency.
+---
+
+<!--
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+-->
+
+<head>
+  <link rel="canonical" href="https://docs.api7.ai/hub/ai-cache" />
+</head>
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+## Description
+
+The `ai-cache` Plugin caches LLM responses and replays them for later requests that resolve to the same prompt, cutting upstream token cost and latency for repetitive workloads (FAQ bots, document Q&A, translation).
+
+This release implements the **exact** cache layer (L1); a semantic cache layer (L2) is planned for a future release.
+
+The `ai-cache` Plugin must be used with the [`ai-proxy`](./ai-proxy.md) or [`ai-proxy-multi`](./ai-proxy-multi.md) Plugin.
+
+:::note
+
+The cache key uses the **requested** model. If routes rewrite the model server-side (`ai-proxy` `options.model` or `ai-proxy-multi` instance selection) and share one Redis and cache scope, isolate them with separate Redis instances or with `cache_key.include_vars` (for example `["route_id"]`).
+
+:::
+
+## Attributes
+
+| Name | Type | Required | Default | Valid values | Description |
+|------|------|----------|---------|--------------|-------------|
+| layers | array[string] | False | ["exact"] | ["exact"] | Cache layers to enable. Only the exact layer is available in this release. |
+| exact.ttl | integer | False | 3600 | >= 1 | Time-to-live, in seconds, of an exact-cache entry. |
+| cache_key.include_consumer | boolean | False | false | | If true, scope the cache per consumer so entries are not shared across consumers. |
+| cache_key.include_vars | array[string] | False | [] | | NGINX variables added to the cache scope (for example `["route_id"]`), isolating entries by their values. |
+| max_cache_body_size | integer | False | 1048576 | >= 0 | Maximum response body size, in bytes, to cache. Larger responses are not cached. |
+| cache_headers | boolean | False | true | | If true, add the `X-AI-Cache-Status` response header (and `X-AI-Cache-Age`, the entry age in seconds, on a hit). |
+| bypass_on | array[object] | False | | | Rules that skip the cache entirely (no lookup, no write-back) when any rule matches. |
+| bypass_on[].header | string | True | | | Request header name to match. |
+| bypass_on[].equals | string | True | | | Bypass when the request header's value exactly equals this string. |
+| policy | string | False | redis | redis | Storage backend. Only single-node `redis` is available in this release. |
+| redis_host | string | True | | | Address of the Redis node. |
+| redis_port | integer | False | 6379 | >= 1 | Port of the Redis node. |
+| redis_username | string | False | | | Username for Redis if Redis ACL is used. For the legacy `requirepass` method, configure only `redis_password`. |
+| redis_password | string | False | | | Password of the Redis node. Encrypted with AES before being stored in etcd. |
+| redis_database | integer | False | 0 | >= 0 | Database number in Redis. |
+| redis_timeout | integer | False | 1000 | >= 1 | Redis timeout value in milliseconds. |
+| redis_ssl | boolean | False | false | | If true, use SSL to connect to Redis. |
+| redis_ssl_verify | boolean | False | false | | If true, verify the Redis server SSL certificate. |
+| redis_keepalive_timeout | integer | False | 10000 | >= 1000 | Keepalive timeout, in milliseconds, for the Redis connection pool. |
+| redis_keepalive_pool | integer | False | 100 | >= 1 | Maximum number of connections in the Redis keepalive pool. |
+
+## Example
+
+The example below uses OpenAI as the Upstream LLM provider. Obtain an [OpenAI API key](https://openai.com/blog/openai-api) and save it, along with your Admin API key, to environment variables:
+
+```shell
+export OPENAI_API_KEY=your-openai-api-key
+export admin_key=$(yq '.deployment.admin.admin_key[0].key' conf/config.yaml | sed 's/"//g')
+```
+
+A Redis instance must be reachable at the configured `redis_host`.
+
+### Cache LLM Responses
+
+Create a Route to the LLM chat completion endpoint with the [`ai-proxy`](./ai-proxy.md) and `ai-cache` Plugins.
+
+<Tabs
+groupId="api"
+defaultValue="admin-api"
+values={[
+{label: 'Admin API', value: 'admin-api'},
+{label: 'ADC', value: 'adc'},
+{label: 'Ingress Controller', value: 'aic'}
+]}>
+
+<TabItem value="admin-api">
+
+```shell
+curl "http://127.0.0.1:9180/apisix/admin/routes" -X PUT \
+  -H "X-API-KEY: ${admin_key}" \
+  -d '{
+    "id": "ai-cache-route",
+    "uri": "/anything",
+    "plugins": {
+      "ai-proxy": {
+        "provider": "openai",
+        "auth": { "header": { "Authorization": "Bearer '"$OPENAI_API_KEY"'" } },
+        "options": { "model": "gpt-4o" }
+      },
+      "ai-cache": {
+        "redis_host": "127.0.0.1"
+      }
+    }
+  }'
+```
+
+</TabItem>
+
+<TabItem value="adc">
+
+```yaml title="adc.yaml"
+services:
+  - name: ai-cache-service
+    routes:
+      - name: ai-cache-route
+        uris:
+          - /anything
+        methods:
+          - POST
+        plugins:
+          ai-proxy:
+            provider: openai
+            auth:
+              header:
+                Authorization: "Bearer ${OPENAI_API_KEY}"
+            options:
+              model: gpt-4o
+          ai-cache:
+            redis_host: 127.0.0.1
+```
+
+Synchronize the configuration to the gateway:
+
+```shell
+adc sync -f adc.yaml
+```
+
+</TabItem>
+
+<TabItem value="aic">
+
+<Tabs
+groupId="k8s-api"
+defaultValue="gateway-api"
+values={[
+{label: 'Gateway API', value: 'gateway-api'},
+{label: 'APISIX CRD', value: 'apisix-crd'}
+]}>
+
+<TabItem value="gateway-api">
+
+```yaml title="ai-cache-ic.yaml"
+apiVersion: apisix.apache.org/v1alpha1
+kind: PluginConfig
+metadata:
+  namespace: aic
+  name: ai-cache-plugin-config
+spec:
+  plugins:
+    - name: ai-cache
+      config:
+        redis_host: 127.0.0.1
+    - name: ai-proxy
+      config:
+        provider: openai
+        auth:
+          header:
+            Authorization: "Bearer your-openai-api-key"
+        options:
+          model: gpt-4o
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  namespace: aic
+  name: ai-cache-route
+spec:
+  parentRefs:
+    - name: apisix
+  rules:
+    - matches:
+        - path:
+            type: Exact
+            value: /anything
+          method: POST
+      filters:
+        - type: ExtensionRef
+          extensionRef:
+            group: apisix.apache.org
+            kind: PluginConfig
+            name: ai-cache-plugin-config
+```
+
+Apply the configuration to your cluster:
+
+```shell
+kubectl apply -f ai-cache-ic.yaml
+```
+
+</TabItem>
+
+<TabItem value="apisix-crd">
+
+```yaml title="ai-cache-ic.yaml"
+apiVersion: apisix.apache.org/v2
+kind: ApisixRoute
+metadata:
+  namespace: aic
+  name: ai-cache-route
+spec:
+  ingressClassName: apisix
+  http:
+    - name: ai-cache-route
+      match:
+        paths:
+          - /anything
+        methods:
+          - POST
+      plugins:
+        - name: ai-cache
+          enable: true
+          config:
+            redis_host: 127.0.0.1
+        - name: ai-proxy
+          enable: true
+          config:
+            provider: openai
+            auth:
+              header:
+                Authorization: "Bearer your-openai-api-key"
+            options:
+              model: gpt-4o
+```
+
+Apply the configuration to your cluster:
+
+```shell
+kubectl apply -f ai-cache-ic.yaml
+```
+
+</TabItem>
+
+</Tabs>
+
+</TabItem>
+
+</Tabs>
+
+Send a request to the Route:
+
+```shell
+curl -i "http://127.0.0.1:9080/anything" -X POST \
+  -H "Content-Type: application/json" \
+  -d '{ "messages": [{ "role": "user", "content": "What is Apache APISIX? Answer in one sentence." }] }'
+```
+
+The first request is a cache miss and is proxied to the LLM. The response carries the `X-AI-Cache-Status: MISS` header and a body similar to the following:
+
+```json
+{
+  "id": "chatcmpl-DtmdUDZeSZ0t62y6BvLkSk5qfH3zA",
+  "object": "chat.completion",
+  "created": 1782187368,
+  "model": "gpt-4o-2024-08-06",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "Apache APISIX is a dynamic, cloud-native API gateway that provides high performance, scalability, and security for API management."
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 19,
+    "completion_tokens": 25,
+    "total_tokens": 44
+  }
+}
+```
+
+Send the same request again. It is served from the cache without calling the LLM, returning the identical body with the headers:
+
+```text
+X-AI-Cache-Status: HIT
+X-AI-Cache-Age: 8
+```
+
+### Bypass the Cache
+
+To skip the cache for selected requests, add a `bypass_on` rule and update the Route:
+
+```shell
+curl "http://127.0.0.1:9180/apisix/admin/routes/ai-cache-route" -X PATCH \
+  -H "X-API-KEY: ${admin_key}" \
+  -d '{
+    "plugins": {
+      "ai-cache": {
+        "redis_host": "127.0.0.1",
+        "bypass_on": [{ "header": "X-Cache-Bypass", "equals": "1" }]
+      }
+    }
+  }'
+```
+
+Send a request with the matching header:
+
+```shell
+curl -i "http://127.0.0.1:9080/anything" -X POST \
+  -H "Content-Type: application/json" \
+  -H "X-Cache-Bypass: 1" \
+  -d '{ "messages": [{ "role": "user", "content": "What is Apache APISIX? Answer in one sentence." }] }'
+```
+
+The cache is skipped entirely (no lookup and no write-back), and the response carries the `X-AI-Cache-Status: BYPASS` header.
diff --git a/docs/zh/latest/config.json b/docs/zh/latest/config.json
index 78ab8ad88718..039eb130db30 100644
--- a/docs/zh/latest/config.json
+++ b/docs/zh/latest/config.json
@@ -64,6 +64,7 @@
           "items": [
             "plugins/ai-proxy",
             "plugins/ai-proxy-multi",
+            "plugins/ai-cache",
             "plugins/ai-rate-limiting",
             "plugins/ai-prompt-guard",
             "plugins/ai-aws-content-moderation",
diff --git a/docs/zh/latest/plugins/ai-cache.md b/docs/zh/latest/plugins/ai-cache.md
new file mode 100644
index 000000000000..fcb3e3e9483c
--- /dev/null
+++ b/docs/zh/latest/plugins/ai-cache.md
@@ -0,0 +1,332 @@
+---
+title: ai-cache
+keywords:
+  - Apache APISIX
+  - API 网关
+  - 插件
+  - ai-cache
+  - AI
+  - LLM
+description: ai-cache 插件将 LLM 响应缓存在 Redis 中，并在后续解析到相同提示词的请求中重放这些响应，从而降低上游的 Token 消耗与延迟。
+---
+
+<!--
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+-->
+
+<head>
+  <link rel="canonical" href="https://docs.api7.ai/hub/ai-cache" />
+</head>
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+## 描述
+
+`ai-cache` 插件缓存 LLM 响应，并在后续解析到相同提示词的请求中重放这些响应，从而为重复性工作负载（FAQ 机器人、文档问答、翻译等）降低上游的 Token 消耗与延迟。
+
+本次发布实现了**精确**缓存层（L1）；语义缓存层（L2）计划在未来的版本中提供。
+
+`ai-cache` 插件必须与 [`ai-proxy`](./ai-proxy.md) 或 [`ai-proxy-multi`](./ai-proxy-multi.md) 插件一起使用。
+
+:::note
+
+缓存键使用**请求中**的模型。如果路由在服务端改写模型（`ai-proxy` 的 `options.model` 或 `ai-proxy-multi` 的实例选择），并且共享同一个 Redis 与缓存作用域，请使用独立的 Redis 实例，或通过 `cache_key.include_vars`（例如 `["route_id"]`）将它们隔离。
+
+:::
+
+## 属性
+
+| 名称 | 类型 | 必选项 | 默认值 | 有效值 | 描述 |
+|------|------|--------|--------|--------|------|
+| layers | array[string] | 否 | ["exact"] | ["exact"] | 要启用的缓存层。本次发布仅支持精确（exact）缓存层。 |
+| exact.ttl | integer | 否 | 3600 | >= 1 | 精确缓存条目的存活时间（TTL），单位为秒。 |
+| cache_key.include_consumer | boolean | 否 | false | | 如果为 true，则按消费者隔离缓存，使缓存条目不会在不同消费者之间共享。 |
+| cache_key.include_vars | array[string] | 否 | [] | | 加入缓存作用域的 NGINX 变量（例如 `["route_id"]`），按其取值隔离缓存条目。 |
+| max_cache_body_size | integer | 否 | 1048576 | >= 0 | 允许缓存的最大响应体大小，单位为字节。超过该大小的响应不会被缓存。 |
+| cache_headers | boolean | 否 | true | | 如果为 true，则添加 `X-AI-Cache-Status` 响应头（命中时还会添加 `X-AI-Cache-Age`，表示缓存条目的存在时长，单位为秒）。 |
+| bypass_on | array[object] | 否 | | | 当任一规则匹配时，完全跳过缓存（不查询、不回写）的规则列表。 |
+| bypass_on[].header | string | 是 | | | 要匹配的请求头名称。 |
+| bypass_on[].equals | string | 是 | | | 当该请求头的值与此字符串完全相等时，绕过缓存。 |
+| policy | string | 否 | redis | redis | 存储后端。本次发布仅支持单节点 `redis`。 |
+| redis_host | string | 是 | | | Redis 节点的地址。 |
+| redis_port | integer | 否 | 6379 | >= 1 | Redis 节点的端口。 |
+| redis_username | string | 否 | | | 使用 Redis ACL 时的用户名。如果使用传统的 `requirepass` 认证方式，则仅配置 `redis_password`。 |
+| redis_password | string | 否 | | | Redis 节点的密码。在存入 etcd 之前使用 AES 加密。 |
+| redis_database | integer | 否 | 0 | >= 0 | Redis 中使用的数据库编号。 |
+| redis_timeout | integer | 否 | 1000 | >= 1 | Redis 超时时间，单位为毫秒。 |
+| redis_ssl | boolean | 否 | false | | 如果为 true，则使用 SSL 连接 Redis。 |
+| redis_ssl_verify | boolean | 否 | false | | 如果为 true，则校验 Redis 服务器的 SSL 证书。 |
+| redis_keepalive_timeout | integer | 否 | 10000 | >= 1000 | Redis 连接池的保活超时时间，单位为毫秒。 |
+| redis_keepalive_pool | integer | 否 | 100 | >= 1 | Redis 保活连接池中的最大连接数。 |
+
+## 示例
+
+以下示例使用 OpenAI 作为上游 LLM 服务提供商。请获取 [OpenAI API key](https://openai.com/blog/openai-api)，并将其与 Admin API key 一起保存到环境变量中：
+
+```shell
+export OPENAI_API_KEY=your-openai-api-key
+export admin_key=$(yq '.deployment.admin.admin_key[0].key' conf/config.yaml | sed 's/"//g')
+```
+
+在配置的 `redis_host` 上必须有一个可访问的 Redis 实例。
+
+### 缓存 LLM 响应
+
+使用 [`ai-proxy`](./ai-proxy.md) 和 `ai-cache` 插件创建一个指向 LLM 聊天补全端点的路由。
+
+<Tabs
+groupId="api"
+defaultValue="admin-api"
+values={[
+{label: 'Admin API', value: 'admin-api'},
+{label: 'ADC', value: 'adc'},
+{label: 'Ingress Controller', value: 'aic'}
+]}>
+
+<TabItem value="admin-api">
+
+```shell
+curl "http://127.0.0.1:9180/apisix/admin/routes" -X PUT \
+  -H "X-API-KEY: ${admin_key}" \
+  -d '{
+    "id": "ai-cache-route",
+    "uri": "/anything",
+    "plugins": {
+      "ai-proxy": {
+        "provider": "openai",
+        "auth": { "header": { "Authorization": "Bearer '"$OPENAI_API_KEY"'" } },
+        "options": { "model": "gpt-4o" }
+      },
+      "ai-cache": {
+        "redis_host": "127.0.0.1"
+      }
+    }
+  }'
+```
+
+</TabItem>
+
+<TabItem value="adc">
+
+```yaml title="adc.yaml"
+services:
+  - name: ai-cache-service
+    routes:
+      - name: ai-cache-route
+        uris:
+          - /anything
+        methods:
+          - POST
+        plugins:
+          ai-proxy:
+            provider: openai
+            auth:
+              header:
+                Authorization: "Bearer ${OPENAI_API_KEY}"
+            options:
+              model: gpt-4o
+          ai-cache:
+            redis_host: 127.0.0.1
+```
+
+将配置同步到网关：
+
+```shell
+adc sync -f adc.yaml
+```
+
+</TabItem>
+
+<TabItem value="aic">
+
+<Tabs
+groupId="k8s-api"
+defaultValue="gateway-api"
+values={[
+{label: 'Gateway API', value: 'gateway-api'},
+{label: 'APISIX CRD', value: 'apisix-crd'}
+]}>
+
+<TabItem value="gateway-api">
+
+```yaml title="ai-cache-ic.yaml"
+apiVersion: apisix.apache.org/v1alpha1
+kind: PluginConfig
+metadata:
+  namespace: aic
+  name: ai-cache-plugin-config
+spec:
+  plugins:
+    - name: ai-cache
+      config:
+        redis_host: 127.0.0.1
+    - name: ai-proxy
+      config:
+        provider: openai
+        auth:
+          header:
+            Authorization: "Bearer your-openai-api-key"
+        options:
+          model: gpt-4o
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  namespace: aic
+  name: ai-cache-route
+spec:
+  parentRefs:
+    - name: apisix
+  rules:
+    - matches:
+        - path:
+            type: Exact
+            value: /anything
+          method: POST
+      filters:
+        - type: ExtensionRef
+          extensionRef:
+            group: apisix.apache.org
+            kind: PluginConfig
+            name: ai-cache-plugin-config
+```
+
+将配置应用到您的集群：
+
+```shell
+kubectl apply -f ai-cache-ic.yaml
+```
+
+</TabItem>
+
+<TabItem value="apisix-crd">
+
+```yaml title="ai-cache-ic.yaml"
+apiVersion: apisix.apache.org/v2
+kind: ApisixRoute
+metadata:
+  namespace: aic
+  name: ai-cache-route
+spec:
+  ingressClassName: apisix
+  http:
+    - name: ai-cache-route
+      match:
+        paths:
+          - /anything
+        methods:
+          - POST
+      plugins:
+        - name: ai-cache
+          enable: true
+          config:
+            redis_host: 127.0.0.1
+        - name: ai-proxy
+          enable: true
+          config:
+            provider: openai
+            auth:
+              header:
+                Authorization: "Bearer your-openai-api-key"
+            options:
+              model: gpt-4o
+```
+
+将配置应用到您的集群：
+
+```shell
+kubectl apply -f ai-cache-ic.yaml
+```
+
+</TabItem>
+
+</Tabs>
+
+</TabItem>
+
+</Tabs>
+
+向该路由发送请求：
+
+```shell
+curl -i "http://127.0.0.1:9080/anything" -X POST \
+  -H "Content-Type: application/json" \
+  -d '{ "messages": [{ "role": "user", "content": "What is Apache APISIX? Answer in one sentence." }] }'
+```
+
+第一次请求是缓存未命中（MISS），会被代理到 LLM。响应中携带 `X-AI-Cache-Status: MISS` 响应头，响应体类似如下：
+
+```json
+{
+  "id": "chatcmpl-DtmdUDZeSZ0t62y6BvLkSk5qfH3zA",
+  "object": "chat.completion",
+  "created": 1782187368,
+  "model": "gpt-4o-2024-08-06",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "Apache APISIX is a dynamic, cloud-native API gateway that provides high performance, scalability, and security for API management."
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 19,
+    "completion_tokens": 25,
+    "total_tokens": 44
+  }
+}
+```
+
+再次发送相同的请求。该请求将直接由缓存返回，而不会调用 LLM，返回完全相同的响应体，并携带以下响应头：
+
+```text
+X-AI-Cache-Status: HIT
+X-AI-Cache-Age: 8
+```
+
+### 绕过缓存
+
+如需为特定请求跳过缓存，可添加 `bypass_on` 规则并更新路由：
+
+```shell
+curl "http://127.0.0.1:9180/apisix/admin/routes/ai-cache-route" -X PATCH \
+  -H "X-API-KEY: ${admin_key}" \
+  -d '{
+    "plugins": {
+      "ai-cache": {
+        "redis_host": "127.0.0.1",
+        "bypass_on": [{ "header": "X-Cache-Bypass", "equals": "1" }]
+      }
+    }
+  }'
+```
+
+发送带有匹配请求头的请求：
+
+```shell
+curl -i "http://127.0.0.1:9080/anything" -X POST \
+  -H "Content-Type: application/json" \
+  -H "X-Cache-Bypass: 1" \
+  -d '{ "messages": [{ "role": "user", "content": "What is Apache APISIX? Answer in one sentence." }] }'
+```
+
+缓存被完全跳过（不查询、不回写），响应中携带 `X-AI-Cache-Status: BYPASS` 响应头。

From 84c5ccf0fac6c289385e99409f4fc5bce987d159 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Tue, 23 Jun 2026 15:57:13 +0800
Subject: [PATCH 07/10] feat(ai-cache): implement canonical JSON encoding and
 enhance cache key configuration

---
 apisix/core/json.lua                 | 47 ++++++++++++++++++++++
 apisix/plugins/ai-cache.lua          |  5 ++-
 apisix/plugins/ai-cache/key.lua      | 58 +++++++---------------------
 apisix/plugins/ai-cache/schema.lua   | 11 +-----
 apisix/plugins/ai-transport/http.lua | 36 +----------------
 docs/en/latest/plugins/ai-cache.md   |  8 ++--
 docs/zh/latest/plugins/ai-cache.md   |  8 ++--
 7 files changed, 77 insertions(+), 96 deletions(-)

diff --git a/apisix/core/json.lua b/apisix/core/json.lua
index 397b80191e13..538e384fe985 100644
--- a/apisix/core/json.lua
+++ b/apisix/core/json.lua
@@ -28,8 +28,14 @@ local ngx = ngx
 local tostring = tostring
 local type = type
 local pairs = pairs
+local ipairs = ipairs
+local getmetatable = getmetatable
 local cached_tab = {}
 
+local rapidjson
+local rapidjson_null
+local rapidjson_encode_opts = { sort_keys = true }
+
 
 cjson.encode_escape_forward_slash(false)
 cjson.decode_array_with_array_mt(true)
@@ -122,6 +128,47 @@ local function encode(data, force)
 end
 _M.encode = encode
 
+
+local function to_rapidjson_value(data)
+    if data == cjson_null then
+        return rapidjson_null
+    end
+
+    if type(data) ~= "table" then
+        return data
+    end
+
+    if getmetatable(data) == cjson.array_mt then
+        local arr = {}
+        for i, v in ipairs(data) do
+            arr[i] = to_rapidjson_value(v)
+        end
+        return rapidjson.array(arr)
+    end
+
+    local obj = {}
+    for k, v in pairs(data) do
+        obj[k] = to_rapidjson_value(v)
+    end
+    return obj
+end
+
+
+--- Encode a Lua value to a canonical JSON string with sorted object keys.
+-- Unlike core.json.encode, object keys are emitted in a stable (sorted) order,
+-- so the same logical value always produces the same string -- suitable for
+-- hashing, cache keys and signatures. cjson null / array_mt markers are
+-- preserved. Backed by rapidjson, which is loaded on first use.
+-- @tparam table data The value to encode.
+-- @treturn string The canonically-encoded JSON string.
+function _M.canonical_encode(data)
+    if not rapidjson then
+        rapidjson = require("rapidjson")
+        rapidjson_null = rapidjson.null
+    end
+    return rapidjson.encode(to_rapidjson_value(data), rapidjson_encode_opts)
+end
+
 local max_delay_encode_items = 16
 local delay_tab_idx = 0
 local delay_tab_arr = {}
diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua
index 0c7053c176d1..43e4f956e3ac 100644
--- a/apisix/plugins/ai-cache.lua
+++ b/apisix/plugins/ai-cache.lua
@@ -103,12 +103,13 @@ function _M.access(conf, ctx)
 
     local res
     res, err = red:get(ctx.ai_cache_key)
-    release(conf, red)
     if err then
+        red:close()
         core.log.warn("ai-cache: redis get failed, fail-open as MISS: ", err)
         ctx.ai_cache_status = "MISS"
         return
     end
+    release(conf, red)
 
     if res ~= nil and res ~= ngx_null then
         local cached = core.json.decode(res)
@@ -170,7 +171,9 @@ local function write_to_cache(premature, conf, cache_key, response_body)
     local ok
     ok, err = red:set(cache_key, envelope, "EX", ttl)
     if not ok then
+        red:close()
         core.log.warn("ai-cache: redis set failed: ", err)
+        return
     end
     release(conf, red)
 end
diff --git a/apisix/plugins/ai-cache/key.lua b/apisix/plugins/ai-cache/key.lua
index 3f012892c0b9..6deb7cae4ca0 100644
--- a/apisix/plugins/ai-cache/key.lua
+++ b/apisix/plugins/ai-cache/key.lua
@@ -17,18 +17,12 @@
 
 local core      = require("apisix.core")
 local protocols = require("apisix.plugins.ai-protocols")
-local rapidjson = require("rapidjson")
 local sha256    = require("resty.sha256")
 local to_hex    = require("resty.string").to_hex
 
-local ipairs       = ipairs
-local pairs        = pairs
-local type         = type
-local getmetatable = getmetatable
-local concat       = table.concat
-
-local rapidjson_null = rapidjson.null
-local ENCODE_OPTS    = { sort_keys = true }
+local ipairs = ipairs
+local pairs  = pairs
+local concat = table.concat
 
 local _M = {}
 
@@ -40,33 +34,6 @@ local function hex_digest(s)
 end
 
 
-local function to_rapidjson_value(data)
-    if data == core.json.null then
-        return rapidjson_null
-    end
-    if type(data) ~= "table" then
-        return data
-    end
-    if getmetatable(data) == core.json.array_mt then
-        local arr = {}
-        for i, v in ipairs(data) do
-            arr[i] = to_rapidjson_value(v)
-        end
-        return rapidjson.array(arr)
-    end
-    local obj = {}
-    for k, v in pairs(data) do
-        obj[k] = to_rapidjson_value(v)
-    end
-    return obj
-end
-
-
-local function canonical_encode(value)
-    return rapidjson.encode(to_rapidjson_value(value), ENCODE_OPTS)
-end
-
-
 function _M.fingerprint(ctx, body)
     local params = {}
     for k, v in pairs(body) do
@@ -75,7 +42,7 @@ function _M.fingerprint(ctx, body)
         end
     end
 
-    local repr = canonical_encode({
+    local repr = core.json.canonical_encode({
         protocol = ctx.ai_client_protocol or "",
         model    = ctx.var.request_llm_model or body.model or "",
         messages = protocols.get_messages(body, ctx) or {},
@@ -86,21 +53,24 @@ end
 
 
 function _M.scope(conf, ctx)
-    local ck = conf.cache_key
-    local inc_vars = ck and ck.include_vars
-    if not (ck and ck.include_consumer) and (not inc_vars or #inc_vars == 0) then
-        return "shared"
-    end
+    local ck = conf.cache_key or {}
 
     local parts = {}
+    if not ck.share_across_routes then
+        parts[#parts + 1] = "route=" .. (ctx.var.route_id or "")
+    end
     if ck.include_consumer then
         parts[#parts + 1] = "consumer=" .. (ctx.consumer_name or "")
     end
-    if inc_vars then
-        for _, name in ipairs(inc_vars) do
+    if ck.include_vars then
+        for _, name in ipairs(ck.include_vars) do
             parts[#parts + 1] = name .. "=" .. (ctx.var[name] or "")
         end
     end
+
+    if #parts == 0 then
+        return "shared"
+    end
     return concat(parts, ":")
 end
 
diff --git a/apisix/plugins/ai-cache/schema.lua b/apisix/plugins/ai-cache/schema.lua
index d434d44f45a7..9494dfeec2f4 100644
--- a/apisix/plugins/ai-cache/schema.lua
+++ b/apisix/plugins/ai-cache/schema.lua
@@ -23,16 +23,6 @@ local policy_to_additional_properties = core.table.deepcopy(redis_schema.schema)
 local _M = {
     type = "object",
     properties = {
-        layers = {
-            type = "array",
-            items = {
-                enum = { "exact" },
-            },
-            minItems = 1,
-            uniqueItems = true,
-            default = { "exact" },
-        },
-
         exact = {
             type = "object",
             properties = {
@@ -44,6 +34,7 @@ local _M = {
         cache_key = {
             type = "object",
             properties = {
+                share_across_routes = { type = "boolean", default = false },
                 include_consumer = { type = "boolean", default = false },
                 include_vars = {
                     type = "array",
diff --git a/apisix/plugins/ai-transport/http.lua b/apisix/plugins/ai-transport/http.lua
index eb7efc34b57b..5ea9d2194545 100644
--- a/apisix/plugins/ai-transport/http.lua
+++ b/apisix/plugins/ai-transport/http.lua
@@ -20,8 +20,6 @@
 
 local core = require("apisix.core")
 local http = require("resty.http")
-local rapidjson = require("rapidjson")
-local getmetatable = getmetatable
 local ngx_now = ngx.now
 local pairs = pairs
 local ipairs = ipairs
@@ -31,8 +29,6 @@ local str_lower = string.lower
 local tostring = tostring
 
 local _M = {}
-local rapidjson_encode_opts = {sort_keys = true}
-local rapidjson_null = rapidjson.null
 
 
 --- Map network errors to HTTP status codes.
@@ -73,38 +69,8 @@ function _M.construct_forward_headers(ext_opts_headers, ctx)
 end
 
 
-local function to_rapidjson_value(data)
-    if data == core.json.null then
-        return rapidjson_null
-    end
-
-    if type(data) ~= "table" then
-        return data
-    end
-
-    if getmetatable(data) == core.json.array_mt then
-        local arr = {}
-        for i, v in ipairs(data) do
-            arr[i] = to_rapidjson_value(v)
-        end
-        return rapidjson.array(arr)
-    end
-
-    local obj = {}
-    for k, v in pairs(data) do
-        obj[k] = to_rapidjson_value(v)
-    end
-    return obj
-end
-
-
-local function rapidjson_encode(body)
-    return rapidjson.encode(to_rapidjson_value(body), rapidjson_encode_opts)
-end
-
-
 local function encode_body(body)
-    local ok, encoded = pcall(rapidjson_encode, body)
+    local ok, encoded = pcall(core.json.canonical_encode, body)
     if ok and encoded then
         return encoded
     end
diff --git a/docs/en/latest/plugins/ai-cache.md b/docs/en/latest/plugins/ai-cache.md
index dc3a559d9adc..4f4568c6ef33 100644
--- a/docs/en/latest/plugins/ai-cache.md
+++ b/docs/en/latest/plugins/ai-cache.md
@@ -46,7 +46,9 @@ The `ai-cache` Plugin must be used with the [`ai-proxy`](./ai-proxy.md) or [`ai-
 
 :::note
 
-The cache key uses the **requested** model. If routes rewrite the model server-side (`ai-proxy` `options.model` or `ai-proxy-multi` instance selection) and share one Redis and cache scope, isolate them with separate Redis instances or with `cache_key.include_vars` (for example `["route_id"]`).
+By default the cache is isolated per route, so two routes never serve each other's entries even when they see the same protocol, model and messages. Set `cache_key.share_across_routes` to `true` to share one cache space across routes.
+
+The cache key uses the **requested** model, not the model a route may rewrite to server-side (`ai-proxy` `options.model` or `ai-proxy-multi` instance selection). When sharing across routes, isolate routes that rewrite to different upstream models with separate Redis instances or with `cache_key.include_vars`.
 
 :::
 
@@ -54,10 +56,10 @@ The cache key uses the **requested** model. If routes rewrite the model server-s
 
 | Name | Type | Required | Default | Valid values | Description |
 |------|------|----------|---------|--------------|-------------|
-| layers | array[string] | False | ["exact"] | ["exact"] | Cache layers to enable. Only the exact layer is available in this release. |
 | exact.ttl | integer | False | 3600 | >= 1 | Time-to-live, in seconds, of an exact-cache entry. |
+| cache_key.share_across_routes | boolean | False | false | | By default the cache is isolated per route. If true, entries are shared across every route that computes the same key. |
 | cache_key.include_consumer | boolean | False | false | | If true, scope the cache per consumer so entries are not shared across consumers. |
-| cache_key.include_vars | array[string] | False | [] | | NGINX variables added to the cache scope (for example `["route_id"]`), isolating entries by their values. |
+| cache_key.include_vars | array[string] | False | [] | | NGINX variables added to the cache scope (for example `["http_x_tenant"]`), isolating entries by their values. |
 | max_cache_body_size | integer | False | 1048576 | >= 0 | Maximum response body size, in bytes, to cache. Larger responses are not cached. |
 | cache_headers | boolean | False | true | | If true, add the `X-AI-Cache-Status` response header (and `X-AI-Cache-Age`, the entry age in seconds, on a hit). |
 | bypass_on | array[object] | False | | | Rules that skip the cache entirely (no lookup, no write-back) when any rule matches. |
diff --git a/docs/zh/latest/plugins/ai-cache.md b/docs/zh/latest/plugins/ai-cache.md
index fcb3e3e9483c..38e6c163da60 100644
--- a/docs/zh/latest/plugins/ai-cache.md
+++ b/docs/zh/latest/plugins/ai-cache.md
@@ -46,7 +46,9 @@ import TabItem from '@theme/TabItem';
 
 :::note
 
-缓存键使用**请求中**的模型。如果路由在服务端改写模型（`ai-proxy` 的 `options.model` 或 `ai-proxy-multi` 的实例选择），并且共享同一个 Redis 与缓存作用域，请使用独立的 Redis 实例，或通过 `cache_key.include_vars`（例如 `["route_id"]`）将它们隔离。
+默认情况下缓存按路由隔离，因此即使两个路由看到相同的协议、模型与消息，也不会相互返回对方的缓存条目。将 `cache_key.share_across_routes` 设为 `true` 可让多个路由共享同一个缓存空间。
+
+缓存键使用**请求中**的模型，而非路由在服务端改写后的模型（`ai-proxy` 的 `options.model` 或 `ai-proxy-multi` 的实例选择）。在跨路由共享时，如果不同路由改写到不同的上游模型，请使用独立的 Redis 实例，或通过 `cache_key.include_vars` 将它们隔离。
 
 :::
 
@@ -54,10 +56,10 @@ import TabItem from '@theme/TabItem';
 
 | 名称 | 类型 | 必选项 | 默认值 | 有效值 | 描述 |
 |------|------|--------|--------|--------|------|
-| layers | array[string] | 否 | ["exact"] | ["exact"] | 要启用的缓存层。本次发布仅支持精确（exact）缓存层。 |
 | exact.ttl | integer | 否 | 3600 | >= 1 | 精确缓存条目的存活时间（TTL），单位为秒。 |
+| cache_key.share_across_routes | boolean | 否 | false | | 默认情况下缓存按路由隔离。如果为 true，则计算出相同缓存键的所有路由之间共享缓存条目。 |
 | cache_key.include_consumer | boolean | 否 | false | | 如果为 true，则按消费者隔离缓存，使缓存条目不会在不同消费者之间共享。 |
-| cache_key.include_vars | array[string] | 否 | [] | | 加入缓存作用域的 NGINX 变量（例如 `["route_id"]`），按其取值隔离缓存条目。 |
+| cache_key.include_vars | array[string] | 否 | [] | | 加入缓存作用域的 NGINX 变量（例如 `["http_x_tenant"]`），按其取值隔离缓存条目。 |
 | max_cache_body_size | integer | 否 | 1048576 | >= 0 | 允许缓存的最大响应体大小，单位为字节。超过该大小的响应不会被缓存。 |
 | cache_headers | boolean | 否 | true | | 如果为 true，则添加 `X-AI-Cache-Status` 响应头（命中时还会添加 `X-AI-Cache-Age`，表示缓存条目的存在时长，单位为秒）。 |
 | bypass_on | array[object] | 否 | | | 当任一规则匹配时，完全跳过缓存（不查询、不回写）的规则列表。 |

From 9024b70afc6fa90ef95eb177762d9826cb92984b Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Tue, 23 Jun 2026 16:09:18 +0800
Subject: [PATCH 08/10] feat(ai-cache): update tests for exact.ttl validation
 and add cross-route cache sharing scenarios

---
 t/plugin/ai-cache.t | 181 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 178 insertions(+), 3 deletions(-)

diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t
index c59f58f9181d..9ff45f7bfdbe 100644
--- a/t/plugin/ai-cache.t
+++ b/t/plugin/ai-cache.t
@@ -88,14 +88,14 @@ qr/then clause did not match/
 
 
 
-=== TEST 3: reject unknown layer value
+=== TEST 3: reject an out-of-range exact.ttl
 --- config
     location /t {
         content_by_lua_block {
             local plugin = require("apisix.plugins.ai-cache")
             local ok, err = plugin.check_schema({
                 redis_host = "127.0.0.1",
-                layers = { "nonsense" },
+                exact = { ttl = 0 },
             })
 
             if not ok then
@@ -106,7 +106,7 @@ qr/then clause did not match/
         }
     }
 --- response_body eval
-qr/layers/
+qr/ttl/
 
 
 
@@ -774,3 +774,178 @@ X-AI-Fixture: openai/chat-basic.json
 X-AI-Cache-Status: MISS
 --- response_body_like eval
 qr/1 \+ 1 = 2/
+
+
+
+=== TEST 35: set two openai routes (same model, default scope) sharing one Redis
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+
+            code, body = t('/apisix/admin/routes/2',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/cache-route-b",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+passed
+
+
+
+=== TEST 36: route 1 cold request is a MISS (warms scope=route=1)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"cross-route isolation test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 0.3
+
+
+
+=== TEST 37: same prompt on route 2 is a MISS (not shared with route 1 by default)
+--- request
+POST /cache-route-b
+{"model":"gpt-4o","messages":[{"role":"user","content":"cross-route isolation test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+
+
+
+=== TEST 38: same prompt on route 1 is a HIT (its own per-route scope persisted)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"cross-route isolation test"}]}
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: HIT
+
+
+
+=== TEST 39: set both routes with share_across_routes enabled
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "cache_key": { "share_across_routes": true }
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+
+            code, body = t('/apisix/admin/routes/2',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/cache-route-b",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980" }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "cache_key": { "share_across_routes": true }
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+passed
+
+
+
+=== TEST 40: route 1 cold request is a MISS (warms the shared scope)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"cross-route share test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 0.3
+
+
+
+=== TEST 41: same prompt on route 2 is a HIT (cache shared across routes)
+--- request
+POST /cache-route-b
+{"model":"gpt-4o","messages":[{"role":"user","content":"cross-route share test"}]}
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: HIT

From 6f15de7841a24057804d6515b0e5b89ffde9be1b Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Wed, 24 Jun 2026 09:11:56 +0800
Subject: [PATCH 09/10] fix(json): remove redundant require statement in
 json.lua

---
 apisix/core/json.lua | 1 +
 1 file changed, 1 insertion(+)

diff --git a/apisix/core/json.lua b/apisix/core/json.lua
index 538e384fe985..9418917b355e 100644
--- a/apisix/core/json.lua
+++ b/apisix/core/json.lua
@@ -24,6 +24,7 @@ local json_encode = cjson.encode
 local json_decode = cjson.decode
 local cjson_null = cjson.null
 local clear_tab = require("table.clear")
+local require = require
 local ngx = ngx
 local tostring = tostring
 local type = type

From 4775bfc1fd822869d705b45bceb1856cefbd2df8 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Wed, 24 Jun 2026 15:53:11 +0800
Subject: [PATCH 10/10] feat(ai-cache): enhance error handling for unsupported
 requests and improve cache key generation

---
 apisix/plugins/ai-cache.lua        |  22 +++-
 apisix/plugins/ai-cache/key.lua    |  12 +-
 apisix/plugins/ai-cache/schema.lua |   3 +
 docs/en/latest/plugins/ai-cache.md |   3 +-
 docs/zh/latest/plugins/ai-cache.md |   3 +-
 t/plugin/ai-cache.t                | 179 +++++++++++++++++++++++++++++
 6 files changed, 215 insertions(+), 7 deletions(-)

diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua
index 43e4f956e3ac..3b7dd5145b0a 100644
--- a/apisix/plugins/ai-cache.lua
+++ b/apisix/plugins/ai-cache.lua
@@ -18,6 +18,7 @@
 local core       = require("apisix.core")
 local schema     = require("apisix.plugins.ai-cache.schema")
 local key_mod    = require("apisix.plugins.ai-cache.key")
+local binding    = require("apisix.plugins.ai-protocols.binding")
 local redis_util = require("apisix.utils.redis")
 
 local ngx        = ngx
@@ -65,6 +66,18 @@ end
 
 
 function _M.access(conf, ctx)
+    if not ctx.picked_ai_instance then
+        local handled, code, body = binding.on_unsupported(
+            conf.fail_mode, _M.name, ctx,
+            "no ai instance picked (request did not pass through ai-proxy/ai-proxy-multi)",
+            500, "ai-cache must be used with the ai-proxy or ai-proxy-multi plugin")
+        if handled then
+            return code, body
+        end
+        ctx.ai_cache_status = "BYPASS"
+        return
+    end
+
     -- Streaming responses are not cached in PR-1 (SSE replay is a later
     -- increment). ai-proxy (higher priority) has already classified the
     -- request, so bypass before doing any work.
@@ -89,8 +102,8 @@ function _M.access(conf, ctx)
         return
     end
 
-    ctx.ai_cache_key = "ai-cache:l1:" .. key_mod.scope(conf, ctx)
-                       .. ":" .. key_mod.fingerprint(ctx, body)
+    ctx.ai_cache_fingerprint = key_mod.fingerprint(ctx, body)
+    ctx.ai_cache_key = key_mod.build(conf, ctx, ctx.ai_cache_fingerprint)
 
     local red
     red, err = redis_util.new(conf)
@@ -180,7 +193,7 @@ end
 
 
 function _M.log(conf, ctx)
-    if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_key then
+    if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_fingerprint then
         return
     end
     if ngx.status ~= 200 then
@@ -192,7 +205,8 @@ function _M.log(conf, ctx)
     end
     local response_body = concat(buf, "", 1, buf.n)
 
-    local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body)
+    local cache_key = key_mod.build(conf, ctx, ctx.ai_cache_fingerprint)
+    local ok, err = ngx.timer.at(0, write_to_cache, conf, cache_key, response_body)
     if not ok then
         core.log.warn("ai-cache: failed to schedule cache write: ", err)
     end
diff --git a/apisix/plugins/ai-cache/key.lua b/apisix/plugins/ai-cache/key.lua
index 6deb7cae4ca0..e08f0586daec 100644
--- a/apisix/plugins/ai-cache/key.lua
+++ b/apisix/plugins/ai-cache/key.lua
@@ -24,6 +24,8 @@ local ipairs = ipairs
 local pairs  = pairs
 local concat = table.concat
 
+local KEY_PREFIX = "ai-cache:l1:"
+
 local _M = {}
 
 
@@ -52,10 +54,13 @@ function _M.fingerprint(ctx, body)
 end
 
 
-function _M.scope(conf, ctx)
+local function scope(conf, ctx)
     local ck = conf.cache_key or {}
 
     local parts = {}
+    if ctx.picked_ai_instance_name then
+        parts[#parts + 1] = "instance=" .. ctx.picked_ai_instance_name
+    end
     if not ck.share_across_routes then
         parts[#parts + 1] = "route=" .. (ctx.var.route_id or "")
     end
@@ -75,4 +80,9 @@ function _M.scope(conf, ctx)
 end
 
 
+function _M.build(conf, ctx, fingerprint)
+    return KEY_PREFIX .. scope(conf, ctx) .. ":" .. fingerprint
+end
+
+
 return _M
diff --git a/apisix/plugins/ai-cache/schema.lua b/apisix/plugins/ai-cache/schema.lua
index 9494dfeec2f4..5eb62661c73d 100644
--- a/apisix/plugins/ai-cache/schema.lua
+++ b/apisix/plugins/ai-cache/schema.lua
@@ -17,6 +17,7 @@
 
 local core         = require("apisix.core")
 local redis_schema = require("apisix.utils.redis-schema")
+local binding      = require("apisix.plugins.ai-protocols.binding")
 
 local policy_to_additional_properties = core.table.deepcopy(redis_schema.schema)
 
@@ -53,6 +54,8 @@ local _M = {
             type = "boolean", default = true,
         },
 
+        fail_mode = binding.schema_property("skip"),
+
         bypass_on = {
             type = "array",
             minItems = 1,
diff --git a/docs/en/latest/plugins/ai-cache.md b/docs/en/latest/plugins/ai-cache.md
index 4f4568c6ef33..360f79b1b3bf 100644
--- a/docs/en/latest/plugins/ai-cache.md
+++ b/docs/en/latest/plugins/ai-cache.md
@@ -48,7 +48,7 @@ The `ai-cache` Plugin must be used with the [`ai-proxy`](./ai-proxy.md) or [`ai-
 
 By default the cache is isolated per route, so two routes never serve each other's entries even when they see the same protocol, model and messages. Set `cache_key.share_across_routes` to `true` to share one cache space across routes.
 
-The cache key uses the **requested** model, not the model a route may rewrite to server-side (`ai-proxy` `options.model` or `ai-proxy-multi` instance selection). When sharing across routes, isolate routes that rewrite to different upstream models with separate Redis instances or with `cache_key.include_vars`.
+Even with `cache_key.share_across_routes` enabled, responses from different upstream models or providers are kept in separate cache entries, so one model's response is never served for another.
 
 :::
 
@@ -62,6 +62,7 @@ The cache key uses the **requested** model, not the model a route may rewrite to
 | cache_key.include_vars | array[string] | False | [] | | NGINX variables added to the cache scope (for example `["http_x_tenant"]`), isolating entries by their values. |
 | max_cache_body_size | integer | False | 1048576 | >= 0 | Maximum response body size, in bytes, to cache. Larger responses are not cached. |
 | cache_headers | boolean | False | true | | If true, add the `X-AI-Cache-Status` response header (and `X-AI-Cache-Age`, the entry age in seconds, on a hit). |
+| fail_mode | string | False | `"skip"` | `skip`, `warn`, `error` | Behavior when the request is not a recognized AI request that this Plugin can cache (for example, a request that did not pass through `ai-proxy` or `ai-proxy-multi`). `skip`: let the request pass through uncached; `warn`: pass through uncached and log a warning; `error`: reject the request. |
 | bypass_on | array[object] | False | | | Rules that skip the cache entirely (no lookup, no write-back) when any rule matches. |
 | bypass_on[].header | string | True | | | Request header name to match. |
 | bypass_on[].equals | string | True | | | Bypass when the request header's value exactly equals this string. |
diff --git a/docs/zh/latest/plugins/ai-cache.md b/docs/zh/latest/plugins/ai-cache.md
index 38e6c163da60..3793317bdd15 100644
--- a/docs/zh/latest/plugins/ai-cache.md
+++ b/docs/zh/latest/plugins/ai-cache.md
@@ -48,7 +48,7 @@ import TabItem from '@theme/TabItem';
 
 默认情况下缓存按路由隔离，因此即使两个路由看到相同的协议、模型与消息，也不会相互返回对方的缓存条目。将 `cache_key.share_across_routes` 设为 `true` 可让多个路由共享同一个缓存空间。
 
-缓存键使用**请求中**的模型，而非路由在服务端改写后的模型（`ai-proxy` 的 `options.model` 或 `ai-proxy-multi` 的实例选择）。在跨路由共享时，如果不同路由改写到不同的上游模型，请使用独立的 Redis 实例，或通过 `cache_key.include_vars` 将它们隔离。
+即使开启 `cache_key.share_across_routes`，来自不同上游模型或 provider 的响应也会分别存储在各自的缓存条目中，因此某个模型的响应绝不会被返回给另一个模型。
 
 :::
 
@@ -62,6 +62,7 @@ import TabItem from '@theme/TabItem';
 | cache_key.include_vars | array[string] | 否 | [] | | 加入缓存作用域的 NGINX 变量（例如 `["http_x_tenant"]`），按其取值隔离缓存条目。 |
 | max_cache_body_size | integer | 否 | 1048576 | >= 0 | 允许缓存的最大响应体大小，单位为字节。超过该大小的响应不会被缓存。 |
 | cache_headers | boolean | 否 | true | | 如果为 true，则添加 `X-AI-Cache-Status` 响应头（命中时还会添加 `X-AI-Cache-Age`，表示缓存条目的存在时长，单位为秒）。 |
+| fail_mode | string | 否 | `"skip"` | `skip`、`warn`、`error` | 当请求不是该插件可缓存的 AI 请求时的处理行为（例如未经过 `ai-proxy` 或 `ai-proxy-multi` 的请求）。`skip`：放行请求且不缓存；`warn`：放行不缓存并记录 warning 日志；`error`：拒绝请求。 |
 | bypass_on | array[object] | 否 | | | 当任一规则匹配时，完全跳过缓存（不查询、不回写）的规则列表。 |
 | bypass_on[].header | string | 是 | | | 要匹配的请求头名称。 |
 | bypass_on[].equals | string | 是 | | | 当该请求头的值与此字符串完全相等时，绕过缓存。 |
diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t
index 9ff45f7bfdbe..e9fdf5292ac9 100644
--- a/t/plugin/ai-cache.t
+++ b/t/plugin/ai-cache.t
@@ -949,3 +949,182 @@ POST /cache-route-b
 --- error_code: 200
 --- response_headers
 X-AI-Cache-Status: HIT
+
+
+
+=== TEST 42: route with ai-cache but NO ai-proxy in front
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/v1/chat/completions",
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": { "127.0.0.1:1980": 1 }
+                    },
+                    "plugins": {
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 43: a request that never passed through ai-proxy is bypassed, not cached
+--- request
+POST /v1/chat/completions
+{"model":"gpt-4o","messages":[{"role":"user","content":"no ai-proxy guard test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: BYPASS
+
+
+
+=== TEST 44: route with ai-cache fail_mode=error and NO ai-proxy
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/v1/chat/completions",
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": { "127.0.0.1:1980": 1 }
+                    },
+                    "plugins": {
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "fail_mode": "error"
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 45: fail_mode=error rejects a request that bypassed the AI proxy
+--- request
+POST /v1/chat/completions
+{"model":"gpt-4o","messages":[{"role":"user","content":"fail_mode error guard test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 500
+--- response_body_like eval
+qr/must be used with the ai-proxy/
+
+
+
+=== TEST 46: flush redis, then set one ai-proxy-multi route with two instances
+--- extra_yaml_config
+plugins:
+  - ai-proxy-multi
+  - ai-cache
+--- config
+    location /t {
+        content_by_lua_block {
+            local redis = require("resty.redis")
+            local red = redis:new()
+            red:set_timeout(1000)
+            local ok, rerr = red:connect("127.0.0.1", 6379)
+            if not ok then
+                ngx.say("redis connect failed: ", rerr)
+                return
+            end
+            local fok, ferr = red:flushall()
+            if not fok then
+                ngx.say("redis flushall failed: ", ferr)
+                return
+            end
+
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/multi",
+                    "plugins": {
+                        "ai-proxy-multi": {
+                            "instances": [
+                                {
+                                    "name": "instance-gpt4o",
+                                    "provider": "openai",
+                                    "weight": 1,
+                                    "auth": { "header": { "Authorization": "Bearer test-key" } },
+                                    "options": { "model": "gpt-4o" },
+                                    "override": { "endpoint": "http://127.0.0.1:1980" }
+                                },
+                                {
+                                    "name": "instance-gpt4o-mini",
+                                    "provider": "openai",
+                                    "weight": 1,
+                                    "auth": { "header": { "Authorization": "Bearer test-key" } },
+                                    "options": { "model": "gpt-4o-mini" },
+                                    "override": { "endpoint": "http://127.0.0.1:1980" }
+                                }
+                            ]
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 47: round-robin alternates instances, so each one caches independently
+--- extra_yaml_config
+plugins:
+  - ai-proxy-multi
+  - ai-cache
+--- pipelined_requests eval
+[
+    "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
+    "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
+    "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
+    "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
+]
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers eval
+[
+    "X-AI-Cache-Status: MISS",
+    "X-AI-Cache-Status: MISS",
+    "X-AI-Cache-Status: HIT",
+    "X-AI-Cache-Status: HIT",
+]