From 8cd41f8418005acb9004e97060171fa06b919773 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Fri, 19 Jun 2026 15:23:02 +0800 Subject: [PATCH 01/10] feat: add ai-cache plugin to installation and configuration --- Makefile | 3 +++ apisix/cli/config.lua | 1 + conf/config.yaml.example | 1 + t/admin/plugins.t | 1 + 4 files changed, 6 insertions(+) diff --git a/Makefile b/Makefile index b0d7820cf73b..0e283228834c 100644 --- a/Makefile +++ b/Makefile @@ -401,6 +401,9 @@ install: runtime $(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search $(ENV_INSTALL) apisix/plugins/ai-rag/vector-search/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search + $(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-cache + $(ENV_INSTALL) apisix/plugins/ai-cache/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-cache + $(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/broker $(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/transport $(ENV_INSTALL) apisix/plugins/mcp/*.lua $(ENV_INST_LUADIR)/apisix/plugins/mcp diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua index 771c21bd339b..0c1246cd1dc5 100644 --- a/apisix/cli/config.lua +++ b/apisix/cli/config.lua @@ -244,6 +244,7 @@ local _M = { "ai-rate-limiting", "ai-proxy-multi", "ai-proxy", + "ai-cache", "ai-aws-content-moderation", "ai-aliyun-content-moderation", "proxy-mirror", diff --git a/conf/config.yaml.example b/conf/config.yaml.example index 2360647e8f4a..38c6afcbd1e7 100644 --- a/conf/config.yaml.example +++ b/conf/config.yaml.example @@ -538,6 +538,7 @@ plugins: # plugin list (sorted by priority) - ai-aws-content-moderation # priority: 1050 - ai-proxy-multi # priority: 1041 - ai-proxy # priority: 1040 + - ai-cache # priority: 1035 - ai-rate-limiting # priority: 1030 - ai-aliyun-content-moderation # priority: 1029 - proxy-mirror # priority: 1010 diff --git a/t/admin/plugins.t b/t/admin/plugins.t index 6061de721daf..cbce68d7f51d 100644 --- a/t/admin/plugins.t +++ b/t/admin/plugins.t @@ -108,6 +108,7 @@ ai-rag ai-aws-content-moderation ai-proxy-multi ai-proxy +ai-cache ai-rate-limiting ai-aliyun-content-moderation proxy-mirror From 1ea1aaa77defda3ad3d02838b2c656d5061e1970 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Fri, 19 Jun 2026 16:54:36 +0800 Subject: [PATCH 02/10] feat: implement ai-cache plugin with Redis support and testing --- apisix/plugins/ai-cache.lua | 199 +++++++++ apisix/plugins/ai-cache/key.lua | 72 ++++ apisix/plugins/ai-cache/schema.lua | 91 ++++ t/plugin/ai-cache.t | 652 +++++++++++++++++++++++++++++ 4 files changed, 1014 insertions(+) create mode 100644 apisix/plugins/ai-cache.lua create mode 100644 apisix/plugins/ai-cache/key.lua create mode 100644 apisix/plugins/ai-cache/schema.lua create mode 100644 t/plugin/ai-cache.t diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua new file mode 100644 index 000000000000..8fdb5ec7347b --- /dev/null +++ b/apisix/plugins/ai-cache.lua @@ -0,0 +1,199 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +local core = require("apisix.core") +local schema = require("apisix.plugins.ai-cache.schema") +local key_mod = require("apisix.plugins.ai-cache.key") +local redis_util = require("apisix.utils.redis") + +local ngx = ngx +local ngx_null = ngx.null +local ipairs = ipairs +local str_sub = string.sub + +local CACHE_STATUS_HEADER = "X-AI-Cache-Status" +local CACHE_AGE_HEADER = "X-AI-Cache-Age" +local DEFAULT_TTL = 3600 + +local _M = { + version = 0.1, + priority = 1035, + name = "ai-cache", + schema = schema, +} + + +function _M.check_schema(conf) + return core.schema.check(schema, conf) +end + + +local function release(conf, red) + local ok, err = red:set_keepalive(conf.redis_keepalive_timeout or 10000, + conf.redis_keepalive_pool or 100) + if not ok then + core.log.warn("ai-cache: failed to set redis keepalive: ", err) + end +end + + +local function serve_hit(conf, ctx, cached) + ctx.ai_cache_status = "HIT" + if conf.cache_headers ~= false then + core.response.set_header(CACHE_STATUS_HEADER, "HIT") + local age = ngx.time() - (cached.created_at or ngx.time()) + core.response.set_header(CACHE_AGE_HEADER, age < 0 and 0 or age) + end + core.response.set_header("Content-Type", "application/json") + return core.response.exit(200, cached.body) +end + + +function _M.access(conf, ctx) + -- Streaming responses are not cached in PR-1 (SSE replay is a later + -- increment). ai-proxy (higher priority) has already classified the + -- request, so bypass before doing any work. + if ctx.var.request_type == "ai_stream" then + ctx.ai_cache_status = "BYPASS" + return + end + + -- explicit opt-out: any cache_bypass reference resolving to a value + -- that is non-empty and not "0" skips the cache (proxy-cache + -- `cache_bypass` / nginx `proxy_cache_bypass` semantics). A leading + -- "$" marks a variable to resolve; anything else is a literal. + if conf.cache_bypass then + for _, ref in ipairs(conf.cache_bypass) do + local val = ref + if str_sub(ref, 1, 1) == "$" then + val = ctx.var[str_sub(ref, 2)] + end + if val ~= nil and val ~= "" and val ~= "0" then + ctx.ai_cache_status = "BYPASS" + return + end + end + end + + local body, err = core.request.get_json_request_body_table() + if not body then + core.log.warn("ai-cache: cannot read request body, bypassing: ", err) + ctx.ai_cache_status = "BYPASS" + return + end + + ctx.ai_cache_key = "ai-cache:l1:" .. key_mod.scope(conf, ctx) + .. ":" .. key_mod.fingerprint(ctx, body) + + local red + red, err = redis_util.new(conf) + if not red then + -- fail-open: never let a cache-backend outage break the request. + core.log.warn("ai-cache: redis unavailable, fail-open as MISS: ", err) + ctx.ai_cache_status = "MISS" + return + end + + local res + res, err = red:get(ctx.ai_cache_key) + release(conf, red) + if err then + core.log.warn("ai-cache: redis get failed, fail-open as MISS: ", err) + ctx.ai_cache_status = "MISS" + return + end + + if res ~= nil and res ~= ngx_null then + local cached = core.json.decode(res) + if cached and cached.body then + return serve_hit(conf, ctx, cached) + end + core.log.warn("ai-cache: discarding malformed cache entry for ", ctx.ai_cache_key) + end + + ctx.ai_cache_status = "MISS" +end + + +function _M.header_filter(conf, ctx) + if ctx.ai_cache_status and conf.cache_headers ~= false then + core.response.set_header(CACHE_STATUS_HEADER, ctx.ai_cache_status) + end +end + + +function _M.body_filter(conf, ctx) + -- only a MISS gets written back; HIT exited in access, BYPASS opts out. + if ctx.ai_cache_status ~= "MISS" then + return + end + local chunk = ngx.arg[1] + if chunk and #chunk > 0 then + ctx.ai_cache_buf = (ctx.ai_cache_buf or "") .. chunk + end +end + + +-- The response-capturing phases (body_filter / log) run in contexts where +-- cosockets are disabled, so the Redis write is deferred to a 0-delay timer +-- (timers run in a light thread where cosockets are allowed). +local function write_to_cache(premature, conf, cache_key, response_body) + if premature then + return + end + local red, err = redis_util.new(conf) + if not red then + core.log.warn("ai-cache: redis unavailable on write: ", err) + return + end + local envelope = core.json.encode({ body = response_body, created_at = ngx.time() }) + local ttl = (conf.exact and conf.exact.ttl) or DEFAULT_TTL + local ok + ok, err = red:set(cache_key, envelope, "EX", ttl) + if not ok then + core.log.warn("ai-cache: redis set failed: ", err) + return + end + release(conf, red) +end + + +function _M.log(conf, ctx) + if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_key then + return + end + -- write-on-success only: never cache an error response. + if ngx.status < 200 or ngx.status >= 300 then + return + end + local response_body = ctx.ai_cache_buf + if not response_body or response_body == "" then + return + end + -- don't cache responses larger than the configured cap. + if #response_body > (conf.max_cache_body_size or 1048576) then + return + end + + local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body) + if not ok then + core.log.warn("ai-cache: failed to schedule cache write: ", err) + end +end + + +return _M diff --git a/apisix/plugins/ai-cache/key.lua b/apisix/plugins/ai-cache/key.lua new file mode 100644 index 000000000000..283501bb3ec6 --- /dev/null +++ b/apisix/plugins/ai-cache/key.lua @@ -0,0 +1,72 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +local core = require("apisix.core") +local protocols = require("apisix.plugins.ai-protocols") +local sha256 = require("resty.sha256") +local to_hex = require("resty.string").to_hex + +local ipairs = ipairs +local concat = table.concat + +local _M = {} + + +local function hex_digest(s) + local hash = sha256:new() + hash:update(s) + return to_hex(hash:final()) +end + + +function _M.fingerprint(ctx, body) + local params = core.table.deepcopy(body) + params.messages = nil + params.model = nil + params.stream = nil + + local repr = core.json.stably_encode({ + protocol = ctx.ai_client_protocol or "", + model = ctx.var.request_llm_model or body.model or "", + messages = protocols.get_messages(body, ctx) or {}, + params = params, + }) + return hex_digest(repr) +end + + +function _M.scope(conf, ctx) + local ck = conf.cache_key + local inc_vars = ck and ck.include_vars + if not (ck and ck.include_consumer) and (not inc_vars or #inc_vars == 0) then + return "shared" + end + + local parts = {} + if ck.include_consumer then + parts[#parts + 1] = "consumer=" .. (ctx.consumer_name or "") + end + if inc_vars then + for _, name in ipairs(inc_vars) do + parts[#parts + 1] = name .. "=" .. (ctx.var[name] or "") + end + end + return concat(parts, ":") +end + + +return _M diff --git a/apisix/plugins/ai-cache/schema.lua b/apisix/plugins/ai-cache/schema.lua new file mode 100644 index 000000000000..a36c67ab04f7 --- /dev/null +++ b/apisix/plugins/ai-cache/schema.lua @@ -0,0 +1,91 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +local core = require("apisix.core") +local redis_schema = require("apisix.utils.redis-schema") + +local policy_to_additional_properties = core.table.deepcopy(redis_schema.schema) + +local _M = { + type = "object", + properties = { + layers = { + type = "array", + items = { + enum = { "exact" }, + }, + minItems = 1, + uniqueItems = true, + default = { "exact" }, + }, + + exact = { + type = "object", + properties = { + ttl = { type = "integer", minimum = 1, default = 3600 }, + }, + default = {}, + }, + + cache_key = { + type = "object", + properties = { + include_consumer = { type = "boolean", default = false }, + include_vars = { + type = "array", + items = { type = "string" }, + default = {}, + }, + }, + default = {}, + }, + + max_cache_body_size = { + type = "integer", minimum = 0, default = 1048576, + }, + + cache_headers = { + type = "boolean", default = true, + }, + + cache_bypass = { + type = "array", + minItems = 1, + items = { + type = "string", + pattern = [[(^[^\$].+$|^\$[0-9a-zA-Z_]+$)]], + }, + }, + + policy = { + type = "string", + enum = { "redis" }, + default = "redis", + }, + }, + ["if"] = { + properties = { + policy = { + enum = { "redis" }, + }, + }, + }, + ["then"] = policy_to_additional_properties.redis, + encrypt_fields = { "redis_password" }, +} + +return _M diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t new file mode 100644 index 000000000000..1b58af1bbeca --- /dev/null +++ b/t/plugin/ai-cache.t @@ -0,0 +1,652 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +BEGIN { + $ENV{TEST_ENABLE_CONTROL_API_V1} = "0"; +} + +use t::APISIX 'no_plan'; + +log_level("info"); +repeat_each(1); +no_long_string(); +no_root_location(); + +add_block_preprocessor(sub { + my ($block) = @_; + + if (!defined $block->request) { + $block->set_value("request", "GET /t"); + } + + my $user_yaml_config = <<_EOC_; +plugins: + - ai-proxy + - ai-cache +_EOC_ + if (!defined $block->extra_yaml_config) { + $block->set_value("extra_yaml_config", $user_yaml_config); + } +}); + +run_tests(); + +__DATA__ + +=== TEST 1: minimal valid exact-cache configuration +--- config + location /t { + content_by_lua_block { + local plugin = require("apisix.plugins.ai-cache") + local ok, err = plugin.check_schema({ + redis_host = "127.0.0.1", + redis_port = 6379, + }) + + if not ok then + ngx.say(err) + else + ngx.say("passed") + end + } + } +--- response_body +passed + + + +=== TEST 2: reject config missing required redis (policy=redis then-clause) +--- config + location /t { + content_by_lua_block { + local plugin = require("apisix.plugins.ai-cache") + local ok, err = plugin.check_schema({}) + + if not ok then + ngx.say(err) + else + ngx.say("passed") + end + } + } +--- response_body eval +qr/then clause did not match/ + + + +=== TEST 3: reject unknown layer value +--- config + location /t { + content_by_lua_block { + local plugin = require("apisix.plugins.ai-cache") + local ok, err = plugin.check_schema({ + redis_host = "127.0.0.1", + layers = { "nonsense" }, + }) + + if not ok then + ngx.say(err) + else + ngx.say("passed") + end + } + } +--- response_body eval +qr/layers/ + + + +=== TEST 4: flush redis, then set route with ai-proxy + ai-cache (mock upstream) +--- config + location /t { + content_by_lua_block { + local redis = require("resty.redis") + local red = redis:new() + red:set_timeout(1000) + local ok, rerr = red:connect("127.0.0.1", 6379) + if not ok then + ngx.say("redis connect failed: ", rerr) + return + end + red:flushall() + + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { + "header": { + "Authorization": "Bearer test-key" + } + }, + "options": { + "model": "gpt-4o" + }, + "override": { + "endpoint": "http://127.0.0.1:1980" + } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379 + } + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 5: cold request is a cache MISS and is proxied upstream +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"ai-cache miss unique-prompt-5"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- response_headers +X-AI-Cache-Status: MISS +--- response_body_like eval +qr/1 \+ 1 = 2/ +--- wait: 0.3 + + + +=== TEST 6: identical re-request is a HIT served from cache (upstream not called) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"ai-cache miss unique-prompt-5"}]} +--- error_code: 200 +--- response_headers_like +X-AI-Cache-Status: HIT +X-AI-Cache-Age: \d+ +--- response_body_like eval +qr/1 \+ 1 = 2/ + + + +=== TEST 7: fingerprint sensitivity (key.lua unit) +--- config + location /t { + content_by_lua_block { + local key = require("apisix.plugins.ai-cache.key") + local function ctx(model) + return { ai_client_protocol = "openai-chat", var = { request_llm_model = model } } + end + local function fp(body) + return key.fingerprint(ctx(body.model), body) + end + + local base = { model="gpt-4o", messages={{role="user", content="hi"}}, temperature=0.2 } + local same = { model="gpt-4o", messages={{role="user", content="hi"}}, temperature=0.2 } + local msg2 = { model="gpt-4o", messages={{role="user", content="yo"}}, temperature=0.2 } + local model2 = { model="gpt-4o-mini", messages={{role="user", content="hi"}}, temperature=0.2 } + local temp2 = { model="gpt-4o", messages={{role="user", content="hi"}}, temperature=0.7 } + local tools2 = { model="gpt-4o", messages={{role="user", content="hi"}}, temperature=0.2, + tools={{ type="function", ["function"]={ name="f" } }} } + + local b = fp(base) + assert(fp(same) == b, "identical bodies must share a fingerprint") + assert(fp(msg2) ~= b, "changed message must change the fingerprint") + assert(fp(model2) ~= b, "changed model must change the fingerprint") + assert(fp(temp2) ~= b, "changed temperature must change the fingerprint") + assert(fp(tools2) ~= b, "changed tools must change the fingerprint") + ngx.say("passed") + } + } +--- response_body +passed + + + +=== TEST 8: non-2xx upstream (no fixture -> 401) is a MISS +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"non-2xx-test-prompt"}]} +--- error_code: 401 +--- response_headers +X-AI-Cache-Status: MISS +--- wait: 0.3 + + + +=== TEST 9: same prompt with a valid fixture is still a MISS (the 401 was not cached) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"non-2xx-test-prompt"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- error_code: 200 +--- response_headers +X-AI-Cache-Status: MISS +--- response_body_like eval +qr/1 \+ 1 = 2/ + + + +=== TEST 10: set route with a cache_bypass variable rule +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379, + "cache_bypass": ["$http_x_ai_cache_bypass"] + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 11: a non-empty, non-"0" cache_bypass value is a BYPASS +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"bypass rule test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +X-AI-Cache-Bypass: 1 +--- response_headers +X-AI-Cache-Status: BYPASS + + + +=== TEST 12: a cache_bypass value of "0" does not bypass (normal MISS) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"bypass-zero-test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +X-AI-Cache-Bypass: 0 +--- response_headers +X-AI-Cache-Status: MISS + + + +=== TEST 13: set route with a tiny max_cache_body_size +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379, + "max_cache_body_size": 10 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 14: cold request (response exceeds max_cache_body_size) is a MISS +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"body-size-test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- response_headers +X-AI-Cache-Status: MISS +--- wait: 0.3 + + + +=== TEST 15: same prompt is still a MISS (oversized response was not cached) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"body-size-test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- response_headers +X-AI-Cache-Status: MISS + + + +=== TEST 16: set route isolating the cache by a request variable +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379, + "cache_key": { "include_vars": ["http_x_tenant"] } + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 17: tenant alpha cold request is a MISS (warms scope=alpha) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +X-Tenant: alpha +--- response_headers +X-AI-Cache-Status: MISS +--- wait: 0.3 + + + +=== TEST 18: same prompt, tenant beta is a MISS (not shared with alpha) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +X-Tenant: beta +--- response_headers +X-AI-Cache-Status: MISS + + + +=== TEST 19: same prompt, tenant alpha is a HIT (its own scope persisted) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]} +--- more_headers +X-Tenant: alpha +--- error_code: 200 +--- response_headers +X-AI-Cache-Status: HIT + + + +=== TEST 20: set route with a 1-second exact ttl +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379, + "exact": { "ttl": 1 } + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 21: cold request is a MISS (cached with ttl=1), then wait past the ttl +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"ttl-expiry-test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- response_headers +X-AI-Cache-Status: MISS +--- wait: 2 + + + +=== TEST 22: same prompt is a MISS again (entry expired) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"ttl-expiry-test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- response_headers +X-AI-Cache-Status: MISS + + + +=== TEST 23: set an anthropic-messages route (cross-protocol) +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/2', + ngx.HTTP_PUT, + [[{ + "uri": "/v1/messages", + "plugins": { + "ai-proxy": { + "provider": "anthropic", + "auth": { "header": { "x-api-key": "test-key" } }, + "options": { "model": "claude-3-5-sonnet-20241022" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 24: anthropic cold request is a MISS +--- request +POST /v1/messages +{"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":"cross-protocol test"}],"max_tokens":100} +--- more_headers +X-AI-Fixture: anthropic/messages-basic.json +--- response_headers +X-AI-Cache-Status: MISS +--- wait: 0.3 + + + +=== TEST 25: identical anthropic re-request is a HIT (upstream not called) +--- request +POST /v1/messages +{"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":"cross-protocol test"}],"max_tokens":100} +--- error_code: 200 +--- response_headers +X-AI-Cache-Status: HIT + + + +=== TEST 26: set route whose redis is unreachable +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6390, + "redis_timeout": 200 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 27: redis unreachable fails open (request still proxied as MISS, no 5xx) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"redis-down failopen"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- error_code: 200 +--- response_headers +X-AI-Cache-Status: MISS +--- response_body_like eval +qr/1 \+ 1 = 2/ +--- error_log +ai-cache: redis unavailable, fail-open as MISS + + + +=== TEST 28: set route with cache_headers disabled +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379, + "cache_headers": false + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 29: cache_headers=false suppresses the X-AI-Cache-* headers +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"cache-headers-off-test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- error_code: 200 +--- response_headers +X-AI-Cache-Status: +X-AI-Cache-Age: +--- response_body_like eval +qr/1 \+ 1 = 2/ From 2d7eb3b87339d0ce2930ed5d6deaad91874432e1 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Tue, 23 Jun 2026 09:59:41 +0800 Subject: [PATCH 03/10] fix(ai-cache): canonical-encode fingerprint and switch bypass to bypass_on Encode the request fingerprint with rapidjson (sort_keys) plus a to_rapidjson_value pass that maps the JSON null sentinel and array_mt tables, mirroring ai-transport/http.lua. core.json.stably_encode (dkjson) raised on the cjson null sentinel, so a body carrying an explicit null (e.g. OpenAI's "stop": null) would error out of the access phase. Replace the cache_bypass var-ref opt-out with bypass_on: an array of {header, equals} rules that skip the cache when a request header exactly equals its value (per rfcs#78). Exact header == value only; any matching rule triggers BYPASS. Tests: add a null-body fingerprint regression, migrate the bypass tests to bypass_on, and cover multiple rules where any match bypasses. --- apisix/plugins/ai-cache.lua | 15 +---- apisix/plugins/ai-cache/key.lua | 40 ++++++++++- apisix/plugins/ai-cache/schema.lua | 10 ++- t/plugin/ai-cache.t | 102 ++++++++++++++++++++++------- 4 files changed, 127 insertions(+), 40 deletions(-) diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua index 8fdb5ec7347b..5495759cb9a2 100644 --- a/apisix/plugins/ai-cache.lua +++ b/apisix/plugins/ai-cache.lua @@ -23,7 +23,6 @@ local redis_util = require("apisix.utils.redis") local ngx = ngx local ngx_null = ngx.null local ipairs = ipairs -local str_sub = string.sub local CACHE_STATUS_HEADER = "X-AI-Cache-Status" local CACHE_AGE_HEADER = "X-AI-Cache-Age" @@ -72,17 +71,9 @@ function _M.access(conf, ctx) return end - -- explicit opt-out: any cache_bypass reference resolving to a value - -- that is non-empty and not "0" skips the cache (proxy-cache - -- `cache_bypass` / nginx `proxy_cache_bypass` semantics). A leading - -- "$" marks a variable to resolve; anything else is a literal. - if conf.cache_bypass then - for _, ref in ipairs(conf.cache_bypass) do - local val = ref - if str_sub(ref, 1, 1) == "$" then - val = ctx.var[str_sub(ref, 2)] - end - if val ~= nil and val ~= "" and val ~= "0" then + if conf.bypass_on then + for _, rule in ipairs(conf.bypass_on) do + if core.request.header(ctx, rule.header) == rule.equals then ctx.ai_cache_status = "BYPASS" return end diff --git a/apisix/plugins/ai-cache/key.lua b/apisix/plugins/ai-cache/key.lua index 283501bb3ec6..228c46e5830e 100644 --- a/apisix/plugins/ai-cache/key.lua +++ b/apisix/plugins/ai-cache/key.lua @@ -17,11 +17,18 @@ local core = require("apisix.core") local protocols = require("apisix.plugins.ai-protocols") +local rapidjson = require("rapidjson") local sha256 = require("resty.sha256") local to_hex = require("resty.string").to_hex -local ipairs = ipairs -local concat = table.concat +local ipairs = ipairs +local pairs = pairs +local type = type +local getmetatable = getmetatable +local concat = table.concat + +local rapidjson_null = rapidjson.null +local ENCODE_OPTS = { sort_keys = true } local _M = {} @@ -33,13 +40,40 @@ local function hex_digest(s) end +local function to_rapidjson_value(data) + if data == core.json.null then + return rapidjson_null + end + if type(data) ~= "table" then + return data + end + if getmetatable(data) == core.json.array_mt then + local arr = {} + for i, v in ipairs(data) do + arr[i] = to_rapidjson_value(v) + end + return rapidjson.array(arr) + end + local obj = {} + for k, v in pairs(data) do + obj[k] = to_rapidjson_value(v) + end + return obj +end + + +local function canonical_encode(value) + return rapidjson.encode(to_rapidjson_value(value), ENCODE_OPTS) +end + + function _M.fingerprint(ctx, body) local params = core.table.deepcopy(body) params.messages = nil params.model = nil params.stream = nil - local repr = core.json.stably_encode({ + local repr = canonical_encode({ protocol = ctx.ai_client_protocol or "", model = ctx.var.request_llm_model or body.model or "", messages = protocols.get_messages(body, ctx) or {}, diff --git a/apisix/plugins/ai-cache/schema.lua b/apisix/plugins/ai-cache/schema.lua index a36c67ab04f7..72d02f81f855 100644 --- a/apisix/plugins/ai-cache/schema.lua +++ b/apisix/plugins/ai-cache/schema.lua @@ -62,12 +62,16 @@ local _M = { type = "boolean", default = true, }, - cache_bypass = { + bypass_on = { type = "array", minItems = 1, items = { - type = "string", - pattern = [[(^[^\$].+$|^\$[0-9a-zA-Z_]+$)]], + type = "object", + properties = { + header = { type = "string" }, + equals = { type = "string" }, + }, + required = { "header", "equals" }, }, }, diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t index 1b58af1bbeca..a35305e3eb9e 100644 --- a/t/plugin/ai-cache.t +++ b/t/plugin/ai-cache.t @@ -194,6 +194,7 @@ qr/1 \+ 1 = 2/ --- config location /t { content_by_lua_block { + local core = require("apisix.core") local key = require("apisix.plugins.ai-cache.key") local function ctx(model) return { ai_client_protocol = "openai-chat", var = { request_llm_model = model } } @@ -216,6 +217,13 @@ qr/1 \+ 1 = 2/ assert(fp(model2) ~= b, "changed model must change the fingerprint") assert(fp(temp2) ~= b, "changed temperature must change the fingerprint") assert(fp(tools2) ~= b, "changed tools must change the fingerprint") + + local nullb = core.json.decode( + '{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}],"stop":null}') + local ok_null, fp_null = pcall(fp, nullb) + assert(ok_null, "explicit null must not raise: " .. tostring(fp_null)) + assert(fp(nullb) == fp_null, "null-bearing fingerprint must be stable") + assert(fp_null ~= b, "stop:null must change the fingerprint") ngx.say("passed") } } @@ -249,7 +257,7 @@ qr/1 \+ 1 = 2/ -=== TEST 10: set route with a cache_bypass variable rule +=== TEST 10: set route with a bypass_on header rule --- config location /t { content_by_lua_block { @@ -268,7 +276,7 @@ qr/1 \+ 1 = 2/ "ai-cache": { "redis_host": "127.0.0.1", "redis_port": 6379, - "cache_bypass": ["$http_x_ai_cache_bypass"] + "bypass_on": [{"header": "X-AI-Cache-Bypass", "equals": "1"}] } } }]] @@ -284,7 +292,7 @@ passed -=== TEST 11: a non-empty, non-"0" cache_bypass value is a BYPASS +=== TEST 11: a matching bypass_on header value is a BYPASS --- request POST /anything {"model":"gpt-4o","messages":[{"role":"user","content":"bypass rule test"}]} @@ -296,10 +304,10 @@ X-AI-Cache-Status: BYPASS -=== TEST 12: a cache_bypass value of "0" does not bypass (normal MISS) +=== TEST 12: a non-matching bypass_on header value does not bypass (normal MISS) --- request POST /anything -{"model":"gpt-4o","messages":[{"role":"user","content":"bypass-zero-test"}]} +{"model":"gpt-4o","messages":[{"role":"user","content":"bypass-nonmatch-test"}]} --- more_headers X-AI-Fixture: openai/chat-basic.json X-AI-Cache-Bypass: 0 @@ -308,7 +316,57 @@ X-AI-Cache-Status: MISS -=== TEST 13: set route with a tiny max_cache_body_size +=== TEST 13: set route with multiple bypass_on rules +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379, + "bypass_on": [ + {"header": "X-AI-Cache-Bypass", "equals": "1"}, + {"header": "X-Debug", "equals": "on"} + ] + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 14: any matching bypass_on rule triggers a BYPASS (second rule matches) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"any-rule-bypass-test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +X-Debug: on +--- response_headers +X-AI-Cache-Status: BYPASS + + + +=== TEST 15: set route with a tiny max_cache_body_size --- config location /t { content_by_lua_block { @@ -343,7 +401,7 @@ passed -=== TEST 14: cold request (response exceeds max_cache_body_size) is a MISS +=== TEST 16: cold request (response exceeds max_cache_body_size) is a MISS --- request POST /anything {"model":"gpt-4o","messages":[{"role":"user","content":"body-size-test"}]} @@ -355,7 +413,7 @@ X-AI-Cache-Status: MISS -=== TEST 15: same prompt is still a MISS (oversized response was not cached) +=== TEST 17: same prompt is still a MISS (oversized response was not cached) --- request POST /anything {"model":"gpt-4o","messages":[{"role":"user","content":"body-size-test"}]} @@ -366,7 +424,7 @@ X-AI-Cache-Status: MISS -=== TEST 16: set route isolating the cache by a request variable +=== TEST 18: set route isolating the cache by a request variable --- config location /t { content_by_lua_block { @@ -401,7 +459,7 @@ passed -=== TEST 17: tenant alpha cold request is a MISS (warms scope=alpha) +=== TEST 19: tenant alpha cold request is a MISS (warms scope=alpha) --- request POST /anything {"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]} @@ -414,7 +472,7 @@ X-AI-Cache-Status: MISS -=== TEST 18: same prompt, tenant beta is a MISS (not shared with alpha) +=== TEST 20: same prompt, tenant beta is a MISS (not shared with alpha) --- request POST /anything {"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]} @@ -426,7 +484,7 @@ X-AI-Cache-Status: MISS -=== TEST 19: same prompt, tenant alpha is a HIT (its own scope persisted) +=== TEST 21: same prompt, tenant alpha is a HIT (its own scope persisted) --- request POST /anything {"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation test"}]} @@ -438,7 +496,7 @@ X-AI-Cache-Status: HIT -=== TEST 20: set route with a 1-second exact ttl +=== TEST 22: set route with a 1-second exact ttl --- config location /t { content_by_lua_block { @@ -473,7 +531,7 @@ passed -=== TEST 21: cold request is a MISS (cached with ttl=1), then wait past the ttl +=== TEST 23: cold request is a MISS (cached with ttl=1), then wait past the ttl --- request POST /anything {"model":"gpt-4o","messages":[{"role":"user","content":"ttl-expiry-test"}]} @@ -485,7 +543,7 @@ X-AI-Cache-Status: MISS -=== TEST 22: same prompt is a MISS again (entry expired) +=== TEST 24: same prompt is a MISS again (entry expired) --- request POST /anything {"model":"gpt-4o","messages":[{"role":"user","content":"ttl-expiry-test"}]} @@ -496,7 +554,7 @@ X-AI-Cache-Status: MISS -=== TEST 23: set an anthropic-messages route (cross-protocol) +=== TEST 25: set an anthropic-messages route (cross-protocol) --- config location /t { content_by_lua_block { @@ -530,7 +588,7 @@ passed -=== TEST 24: anthropic cold request is a MISS +=== TEST 26: anthropic cold request is a MISS --- request POST /v1/messages {"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":"cross-protocol test"}],"max_tokens":100} @@ -542,7 +600,7 @@ X-AI-Cache-Status: MISS -=== TEST 25: identical anthropic re-request is a HIT (upstream not called) +=== TEST 27: identical anthropic re-request is a HIT (upstream not called) --- request POST /v1/messages {"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":"cross-protocol test"}],"max_tokens":100} @@ -552,7 +610,7 @@ X-AI-Cache-Status: HIT -=== TEST 26: set route whose redis is unreachable +=== TEST 28: set route whose redis is unreachable --- config location /t { content_by_lua_block { @@ -587,7 +645,7 @@ passed -=== TEST 27: redis unreachable fails open (request still proxied as MISS, no 5xx) +=== TEST 29: redis unreachable fails open (request still proxied as MISS, no 5xx) --- request POST /anything {"model":"gpt-4o","messages":[{"role":"user","content":"redis-down failopen"}]} @@ -603,7 +661,7 @@ ai-cache: redis unavailable, fail-open as MISS -=== TEST 28: set route with cache_headers disabled +=== TEST 30: set route with cache_headers disabled --- config location /t { content_by_lua_block { @@ -638,7 +696,7 @@ passed -=== TEST 29: cache_headers=false suppresses the X-AI-Cache-* headers +=== TEST 31: cache_headers=false suppresses the X-AI-Cache-* headers --- request POST /anything {"model":"gpt-4o","messages":[{"role":"user","content":"cache-headers-off-test"}]} From d91e68ab84983d2e9d40e12c84042328d443448f Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Tue, 23 Jun 2026 10:38:36 +0800 Subject: [PATCH 04/10] feat(ai-cache): enhance body filter to handle oversized responses and update fingerprinting logic --- apisix/plugins/ai-cache.lua | 15 ++++---- apisix/plugins/ai-cache/key.lua | 10 +++-- t/plugin/ai-cache.t | 66 ++++++++++++++++++++++++++++++++- 3 files changed, 77 insertions(+), 14 deletions(-) diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua index 5495759cb9a2..84965c5f8d5e 100644 --- a/apisix/plugins/ai-cache.lua +++ b/apisix/plugins/ai-cache.lua @@ -27,6 +27,7 @@ local ipairs = ipairs local CACHE_STATUS_HEADER = "X-AI-Cache-Status" local CACHE_AGE_HEADER = "X-AI-Cache-Age" local DEFAULT_TTL = 3600 +local DEFAULT_MAX_BODY = 1048576 local _M = { version = 0.1, @@ -129,12 +130,16 @@ end function _M.body_filter(conf, ctx) -- only a MISS gets written back; HIT exited in access, BYPASS opts out. - if ctx.ai_cache_status ~= "MISS" then + if ctx.ai_cache_status ~= "MISS" or ctx.ai_cache_oversized then return end local chunk = ngx.arg[1] if chunk and #chunk > 0 then ctx.ai_cache_buf = (ctx.ai_cache_buf or "") .. chunk + if #ctx.ai_cache_buf > (conf.max_cache_body_size or DEFAULT_MAX_BODY) then + ctx.ai_cache_buf = nil + ctx.ai_cache_oversized = true + end end end @@ -157,7 +162,6 @@ local function write_to_cache(premature, conf, cache_key, response_body) ok, err = red:set(cache_key, envelope, "EX", ttl) if not ok then core.log.warn("ai-cache: redis set failed: ", err) - return end release(conf, red) end @@ -167,18 +171,13 @@ function _M.log(conf, ctx) if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_key then return end - -- write-on-success only: never cache an error response. - if ngx.status < 200 or ngx.status >= 300 then + if ngx.status ~= 200 then return end local response_body = ctx.ai_cache_buf if not response_body or response_body == "" then return end - -- don't cache responses larger than the configured cap. - if #response_body > (conf.max_cache_body_size or 1048576) then - return - end local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body) if not ok then diff --git a/apisix/plugins/ai-cache/key.lua b/apisix/plugins/ai-cache/key.lua index 228c46e5830e..3f012892c0b9 100644 --- a/apisix/plugins/ai-cache/key.lua +++ b/apisix/plugins/ai-cache/key.lua @@ -68,10 +68,12 @@ end function _M.fingerprint(ctx, body) - local params = core.table.deepcopy(body) - params.messages = nil - params.model = nil - params.stream = nil + local params = {} + for k, v in pairs(body) do + if k ~= "messages" and k ~= "model" and k ~= "stream" then + params[k] = v + end + end local repr = canonical_encode({ protocol = ctx.ai_client_protocol or "", diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t index a35305e3eb9e..351bdf5031a1 100644 --- a/t/plugin/ai-cache.t +++ b/t/plugin/ai-cache.t @@ -704,7 +704,69 @@ POST /anything X-AI-Fixture: openai/chat-basic.json --- error_code: 200 --- response_headers -X-AI-Cache-Status: -X-AI-Cache-Age: +! X-AI-Cache-Status +! X-AI-Cache-Age +--- response_body_like eval +qr/1 \+ 1 = 2/ + + + +=== TEST 32: set a default ai-proxy + ai-cache route (for status-code tests) +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 33: a 2xx that is not 200 (201) is a MISS and is proxied through +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"status-201-test-prompt"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +X-AI-Fixture-Status: 201 +--- error_code: 201 +--- response_headers +X-AI-Cache-Status: MISS +--- wait: 0.3 + + + +=== TEST 34: same prompt with a 200 fixture is still a MISS (the 201 was not cached) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"status-201-test-prompt"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- error_code: 200 +--- response_headers +X-AI-Cache-Status: MISS --- response_body_like eval qr/1 \+ 1 = 2/ From 652a89fe129c5c462890817683652b0aff488526 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Tue, 23 Jun 2026 11:18:10 +0800 Subject: [PATCH 05/10] feat(ai-cache): optimize body caching logic and enforce header validation --- apisix/plugins/ai-cache.lua | 18 ++++++++++++++---- apisix/plugins/ai-cache/schema.lua | 2 +- t/plugin/ai-cache.t | 6 +++++- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua index 84965c5f8d5e..0c7053c176d1 100644 --- a/apisix/plugins/ai-cache.lua +++ b/apisix/plugins/ai-cache.lua @@ -23,6 +23,7 @@ local redis_util = require("apisix.utils.redis") local ngx = ngx local ngx_null = ngx.null local ipairs = ipairs +local concat = table.concat local CACHE_STATUS_HEADER = "X-AI-Cache-Status" local CACHE_AGE_HEADER = "X-AI-Cache-Age" @@ -135,8 +136,16 @@ function _M.body_filter(conf, ctx) end local chunk = ngx.arg[1] if chunk and #chunk > 0 then - ctx.ai_cache_buf = (ctx.ai_cache_buf or "") .. chunk - if #ctx.ai_cache_buf > (conf.max_cache_body_size or DEFAULT_MAX_BODY) then + local buf = ctx.ai_cache_buf + if not buf then + buf = { n = 0, bytes = 0 } + ctx.ai_cache_buf = buf + end + local n = buf.n + 1 + buf.n = n + buf[n] = chunk + buf.bytes = buf.bytes + #chunk + if buf.bytes > (conf.max_cache_body_size or DEFAULT_MAX_BODY) then ctx.ai_cache_buf = nil ctx.ai_cache_oversized = true end @@ -174,10 +183,11 @@ function _M.log(conf, ctx) if ngx.status ~= 200 then return end - local response_body = ctx.ai_cache_buf - if not response_body or response_body == "" then + local buf = ctx.ai_cache_buf + if not buf or buf.bytes == 0 then return end + local response_body = concat(buf, "", 1, buf.n) local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body) if not ok then diff --git a/apisix/plugins/ai-cache/schema.lua b/apisix/plugins/ai-cache/schema.lua index 72d02f81f855..d434d44f45a7 100644 --- a/apisix/plugins/ai-cache/schema.lua +++ b/apisix/plugins/ai-cache/schema.lua @@ -68,7 +68,7 @@ local _M = { items = { type = "object", properties = { - header = { type = "string" }, + header = { type = "string", minLength = 1 }, equals = { type = "string" }, }, required = { "header", "equals" }, diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t index 351bdf5031a1..c59f58f9181d 100644 --- a/t/plugin/ai-cache.t +++ b/t/plugin/ai-cache.t @@ -122,7 +122,11 @@ qr/layers/ ngx.say("redis connect failed: ", rerr) return end - red:flushall() + local fok, ferr = red:flushall() + if not fok then + ngx.say("redis flushall failed: ", ferr) + return + end local t = require("lib.test_admin").test local code, body = t('/apisix/admin/routes/1', From 4ac239875d0e092fb5830b70deacc3a3795e0812 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Tue, 23 Jun 2026 12:17:04 +0800 Subject: [PATCH 06/10] docs(ai-cache): add English and Chinese plugin documentation Document the ai-cache plugin: description, full attribute table (incl. all Redis policy fields), and Admin API / ADC / Ingress Controller examples covering cache MISS/HIT and bypass_on. Add the page to the en and zh plugin sidebars. --- docs/en/latest/config.json | 1 + docs/en/latest/plugins/ai-cache.md | 332 +++++++++++++++++++++++++++++ docs/zh/latest/config.json | 1 + docs/zh/latest/plugins/ai-cache.md | 332 +++++++++++++++++++++++++++++ 4 files changed, 666 insertions(+) create mode 100644 docs/en/latest/plugins/ai-cache.md create mode 100644 docs/zh/latest/plugins/ai-cache.md diff --git a/docs/en/latest/config.json b/docs/en/latest/config.json index 7691e45802e9..71b8ba828e9e 100644 --- a/docs/en/latest/config.json +++ b/docs/en/latest/config.json @@ -73,6 +73,7 @@ "items": [ "plugins/ai-proxy", "plugins/ai-proxy-multi", + "plugins/ai-cache", "plugins/ai-rate-limiting", "plugins/ai-prompt-guard", "plugins/ai-aws-content-moderation", diff --git a/docs/en/latest/plugins/ai-cache.md b/docs/en/latest/plugins/ai-cache.md new file mode 100644 index 000000000000..dc3a559d9adc --- /dev/null +++ b/docs/en/latest/plugins/ai-cache.md @@ -0,0 +1,332 @@ +--- +title: ai-cache +keywords: + - Apache APISIX + - API Gateway + - Plugin + - ai-cache + - AI + - LLM +description: The ai-cache Plugin caches LLM responses in Redis and replays them for later requests that resolve to the same prompt, cutting upstream token cost and latency. +--- + + + + + + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## Description + +The `ai-cache` Plugin caches LLM responses and replays them for later requests that resolve to the same prompt, cutting upstream token cost and latency for repetitive workloads (FAQ bots, document Q&A, translation). + +This release implements the **exact** cache layer (L1); a semantic cache layer (L2) is planned for a future release. + +The `ai-cache` Plugin must be used with the [`ai-proxy`](./ai-proxy.md) or [`ai-proxy-multi`](./ai-proxy-multi.md) Plugin. + +:::note + +The cache key uses the **requested** model. If routes rewrite the model server-side (`ai-proxy` `options.model` or `ai-proxy-multi` instance selection) and share one Redis and cache scope, isolate them with separate Redis instances or with `cache_key.include_vars` (for example `["route_id"]`). + +::: + +## Attributes + +| Name | Type | Required | Default | Valid values | Description | +|------|------|----------|---------|--------------|-------------| +| layers | array[string] | False | ["exact"] | ["exact"] | Cache layers to enable. Only the exact layer is available in this release. | +| exact.ttl | integer | False | 3600 | >= 1 | Time-to-live, in seconds, of an exact-cache entry. | +| cache_key.include_consumer | boolean | False | false | | If true, scope the cache per consumer so entries are not shared across consumers. | +| cache_key.include_vars | array[string] | False | [] | | NGINX variables added to the cache scope (for example `["route_id"]`), isolating entries by their values. | +| max_cache_body_size | integer | False | 1048576 | >= 0 | Maximum response body size, in bytes, to cache. Larger responses are not cached. | +| cache_headers | boolean | False | true | | If true, add the `X-AI-Cache-Status` response header (and `X-AI-Cache-Age`, the entry age in seconds, on a hit). | +| bypass_on | array[object] | False | | | Rules that skip the cache entirely (no lookup, no write-back) when any rule matches. | +| bypass_on[].header | string | True | | | Request header name to match. | +| bypass_on[].equals | string | True | | | Bypass when the request header's value exactly equals this string. | +| policy | string | False | redis | redis | Storage backend. Only single-node `redis` is available in this release. | +| redis_host | string | True | | | Address of the Redis node. | +| redis_port | integer | False | 6379 | >= 1 | Port of the Redis node. | +| redis_username | string | False | | | Username for Redis if Redis ACL is used. For the legacy `requirepass` method, configure only `redis_password`. | +| redis_password | string | False | | | Password of the Redis node. Encrypted with AES before being stored in etcd. | +| redis_database | integer | False | 0 | >= 0 | Database number in Redis. | +| redis_timeout | integer | False | 1000 | >= 1 | Redis timeout value in milliseconds. | +| redis_ssl | boolean | False | false | | If true, use SSL to connect to Redis. | +| redis_ssl_verify | boolean | False | false | | If true, verify the Redis server SSL certificate. | +| redis_keepalive_timeout | integer | False | 10000 | >= 1000 | Keepalive timeout, in milliseconds, for the Redis connection pool. | +| redis_keepalive_pool | integer | False | 100 | >= 1 | Maximum number of connections in the Redis keepalive pool. | + +## Example + +The example below uses OpenAI as the Upstream LLM provider. Obtain an [OpenAI API key](https://openai.com/blog/openai-api) and save it, along with your Admin API key, to environment variables: + +```shell +export OPENAI_API_KEY=your-openai-api-key +export admin_key=$(yq '.deployment.admin.admin_key[0].key' conf/config.yaml | sed 's/"//g') +``` + +A Redis instance must be reachable at the configured `redis_host`. + +### Cache LLM Responses + +Create a Route to the LLM chat completion endpoint with the [`ai-proxy`](./ai-proxy.md) and `ai-cache` Plugins. + + + + + +```shell +curl "http://127.0.0.1:9180/apisix/admin/routes" -X PUT \ + -H "X-API-KEY: ${admin_key}" \ + -d '{ + "id": "ai-cache-route", + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer '"$OPENAI_API_KEY"'" } }, + "options": { "model": "gpt-4o" } + }, + "ai-cache": { + "redis_host": "127.0.0.1" + } + } + }' +``` + + + + + +```yaml title="adc.yaml" +services: + - name: ai-cache-service + routes: + - name: ai-cache-route + uris: + - /anything + methods: + - POST + plugins: + ai-proxy: + provider: openai + auth: + header: + Authorization: "Bearer ${OPENAI_API_KEY}" + options: + model: gpt-4o + ai-cache: + redis_host: 127.0.0.1 +``` + +Synchronize the configuration to the gateway: + +```shell +adc sync -f adc.yaml +``` + + + + + + + + + +```yaml title="ai-cache-ic.yaml" +apiVersion: apisix.apache.org/v1alpha1 +kind: PluginConfig +metadata: + namespace: aic + name: ai-cache-plugin-config +spec: + plugins: + - name: ai-cache + config: + redis_host: 127.0.0.1 + - name: ai-proxy + config: + provider: openai + auth: + header: + Authorization: "Bearer your-openai-api-key" + options: + model: gpt-4o +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + namespace: aic + name: ai-cache-route +spec: + parentRefs: + - name: apisix + rules: + - matches: + - path: + type: Exact + value: /anything + method: POST + filters: + - type: ExtensionRef + extensionRef: + group: apisix.apache.org + kind: PluginConfig + name: ai-cache-plugin-config +``` + +Apply the configuration to your cluster: + +```shell +kubectl apply -f ai-cache-ic.yaml +``` + + + + + +```yaml title="ai-cache-ic.yaml" +apiVersion: apisix.apache.org/v2 +kind: ApisixRoute +metadata: + namespace: aic + name: ai-cache-route +spec: + ingressClassName: apisix + http: + - name: ai-cache-route + match: + paths: + - /anything + methods: + - POST + plugins: + - name: ai-cache + enable: true + config: + redis_host: 127.0.0.1 + - name: ai-proxy + enable: true + config: + provider: openai + auth: + header: + Authorization: "Bearer your-openai-api-key" + options: + model: gpt-4o +``` + +Apply the configuration to your cluster: + +```shell +kubectl apply -f ai-cache-ic.yaml +``` + + + + + + + + + +Send a request to the Route: + +```shell +curl -i "http://127.0.0.1:9080/anything" -X POST \ + -H "Content-Type: application/json" \ + -d '{ "messages": [{ "role": "user", "content": "What is Apache APISIX? Answer in one sentence." }] }' +``` + +The first request is a cache miss and is proxied to the LLM. The response carries the `X-AI-Cache-Status: MISS` header and a body similar to the following: + +```json +{ + "id": "chatcmpl-DtmdUDZeSZ0t62y6BvLkSk5qfH3zA", + "object": "chat.completion", + "created": 1782187368, + "model": "gpt-4o-2024-08-06", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Apache APISIX is a dynamic, cloud-native API gateway that provides high performance, scalability, and security for API management." + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 19, + "completion_tokens": 25, + "total_tokens": 44 + } +} +``` + +Send the same request again. It is served from the cache without calling the LLM, returning the identical body with the headers: + +```text +X-AI-Cache-Status: HIT +X-AI-Cache-Age: 8 +``` + +### Bypass the Cache + +To skip the cache for selected requests, add a `bypass_on` rule and update the Route: + +```shell +curl "http://127.0.0.1:9180/apisix/admin/routes/ai-cache-route" -X PATCH \ + -H "X-API-KEY: ${admin_key}" \ + -d '{ + "plugins": { + "ai-cache": { + "redis_host": "127.0.0.1", + "bypass_on": [{ "header": "X-Cache-Bypass", "equals": "1" }] + } + } + }' +``` + +Send a request with the matching header: + +```shell +curl -i "http://127.0.0.1:9080/anything" -X POST \ + -H "Content-Type: application/json" \ + -H "X-Cache-Bypass: 1" \ + -d '{ "messages": [{ "role": "user", "content": "What is Apache APISIX? Answer in one sentence." }] }' +``` + +The cache is skipped entirely (no lookup and no write-back), and the response carries the `X-AI-Cache-Status: BYPASS` header. diff --git a/docs/zh/latest/config.json b/docs/zh/latest/config.json index 78ab8ad88718..039eb130db30 100644 --- a/docs/zh/latest/config.json +++ b/docs/zh/latest/config.json @@ -64,6 +64,7 @@ "items": [ "plugins/ai-proxy", "plugins/ai-proxy-multi", + "plugins/ai-cache", "plugins/ai-rate-limiting", "plugins/ai-prompt-guard", "plugins/ai-aws-content-moderation", diff --git a/docs/zh/latest/plugins/ai-cache.md b/docs/zh/latest/plugins/ai-cache.md new file mode 100644 index 000000000000..fcb3e3e9483c --- /dev/null +++ b/docs/zh/latest/plugins/ai-cache.md @@ -0,0 +1,332 @@ +--- +title: ai-cache +keywords: + - Apache APISIX + - API 网关 + - 插件 + - ai-cache + - AI + - LLM +description: ai-cache 插件将 LLM 响应缓存在 Redis 中,并在后续解析到相同提示词的请求中重放这些响应,从而降低上游的 Token 消耗与延迟。 +--- + + + + + + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## 描述 + +`ai-cache` 插件缓存 LLM 响应,并在后续解析到相同提示词的请求中重放这些响应,从而为重复性工作负载(FAQ 机器人、文档问答、翻译等)降低上游的 Token 消耗与延迟。 + +本次发布实现了**精确**缓存层(L1);语义缓存层(L2)计划在未来的版本中提供。 + +`ai-cache` 插件必须与 [`ai-proxy`](./ai-proxy.md) 或 [`ai-proxy-multi`](./ai-proxy-multi.md) 插件一起使用。 + +:::note + +缓存键使用**请求中**的模型。如果路由在服务端改写模型(`ai-proxy` 的 `options.model` 或 `ai-proxy-multi` 的实例选择),并且共享同一个 Redis 与缓存作用域,请使用独立的 Redis 实例,或通过 `cache_key.include_vars`(例如 `["route_id"]`)将它们隔离。 + +::: + +## 属性 + +| 名称 | 类型 | 必选项 | 默认值 | 有效值 | 描述 | +|------|------|--------|--------|--------|------| +| layers | array[string] | 否 | ["exact"] | ["exact"] | 要启用的缓存层。本次发布仅支持精确(exact)缓存层。 | +| exact.ttl | integer | 否 | 3600 | >= 1 | 精确缓存条目的存活时间(TTL),单位为秒。 | +| cache_key.include_consumer | boolean | 否 | false | | 如果为 true,则按消费者隔离缓存,使缓存条目不会在不同消费者之间共享。 | +| cache_key.include_vars | array[string] | 否 | [] | | 加入缓存作用域的 NGINX 变量(例如 `["route_id"]`),按其取值隔离缓存条目。 | +| max_cache_body_size | integer | 否 | 1048576 | >= 0 | 允许缓存的最大响应体大小,单位为字节。超过该大小的响应不会被缓存。 | +| cache_headers | boolean | 否 | true | | 如果为 true,则添加 `X-AI-Cache-Status` 响应头(命中时还会添加 `X-AI-Cache-Age`,表示缓存条目的存在时长,单位为秒)。 | +| bypass_on | array[object] | 否 | | | 当任一规则匹配时,完全跳过缓存(不查询、不回写)的规则列表。 | +| bypass_on[].header | string | 是 | | | 要匹配的请求头名称。 | +| bypass_on[].equals | string | 是 | | | 当该请求头的值与此字符串完全相等时,绕过缓存。 | +| policy | string | 否 | redis | redis | 存储后端。本次发布仅支持单节点 `redis`。 | +| redis_host | string | 是 | | | Redis 节点的地址。 | +| redis_port | integer | 否 | 6379 | >= 1 | Redis 节点的端口。 | +| redis_username | string | 否 | | | 使用 Redis ACL 时的用户名。如果使用传统的 `requirepass` 认证方式,则仅配置 `redis_password`。 | +| redis_password | string | 否 | | | Redis 节点的密码。在存入 etcd 之前使用 AES 加密。 | +| redis_database | integer | 否 | 0 | >= 0 | Redis 中使用的数据库编号。 | +| redis_timeout | integer | 否 | 1000 | >= 1 | Redis 超时时间,单位为毫秒。 | +| redis_ssl | boolean | 否 | false | | 如果为 true,则使用 SSL 连接 Redis。 | +| redis_ssl_verify | boolean | 否 | false | | 如果为 true,则校验 Redis 服务器的 SSL 证书。 | +| redis_keepalive_timeout | integer | 否 | 10000 | >= 1000 | Redis 连接池的保活超时时间,单位为毫秒。 | +| redis_keepalive_pool | integer | 否 | 100 | >= 1 | Redis 保活连接池中的最大连接数。 | + +## 示例 + +以下示例使用 OpenAI 作为上游 LLM 服务提供商。请获取 [OpenAI API key](https://openai.com/blog/openai-api),并将其与 Admin API key 一起保存到环境变量中: + +```shell +export OPENAI_API_KEY=your-openai-api-key +export admin_key=$(yq '.deployment.admin.admin_key[0].key' conf/config.yaml | sed 's/"//g') +``` + +在配置的 `redis_host` 上必须有一个可访问的 Redis 实例。 + +### 缓存 LLM 响应 + +使用 [`ai-proxy`](./ai-proxy.md) 和 `ai-cache` 插件创建一个指向 LLM 聊天补全端点的路由。 + + + + + +```shell +curl "http://127.0.0.1:9180/apisix/admin/routes" -X PUT \ + -H "X-API-KEY: ${admin_key}" \ + -d '{ + "id": "ai-cache-route", + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer '"$OPENAI_API_KEY"'" } }, + "options": { "model": "gpt-4o" } + }, + "ai-cache": { + "redis_host": "127.0.0.1" + } + } + }' +``` + + + + + +```yaml title="adc.yaml" +services: + - name: ai-cache-service + routes: + - name: ai-cache-route + uris: + - /anything + methods: + - POST + plugins: + ai-proxy: + provider: openai + auth: + header: + Authorization: "Bearer ${OPENAI_API_KEY}" + options: + model: gpt-4o + ai-cache: + redis_host: 127.0.0.1 +``` + +将配置同步到网关: + +```shell +adc sync -f adc.yaml +``` + + + + + + + + + +```yaml title="ai-cache-ic.yaml" +apiVersion: apisix.apache.org/v1alpha1 +kind: PluginConfig +metadata: + namespace: aic + name: ai-cache-plugin-config +spec: + plugins: + - name: ai-cache + config: + redis_host: 127.0.0.1 + - name: ai-proxy + config: + provider: openai + auth: + header: + Authorization: "Bearer your-openai-api-key" + options: + model: gpt-4o +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + namespace: aic + name: ai-cache-route +spec: + parentRefs: + - name: apisix + rules: + - matches: + - path: + type: Exact + value: /anything + method: POST + filters: + - type: ExtensionRef + extensionRef: + group: apisix.apache.org + kind: PluginConfig + name: ai-cache-plugin-config +``` + +将配置应用到您的集群: + +```shell +kubectl apply -f ai-cache-ic.yaml +``` + + + + + +```yaml title="ai-cache-ic.yaml" +apiVersion: apisix.apache.org/v2 +kind: ApisixRoute +metadata: + namespace: aic + name: ai-cache-route +spec: + ingressClassName: apisix + http: + - name: ai-cache-route + match: + paths: + - /anything + methods: + - POST + plugins: + - name: ai-cache + enable: true + config: + redis_host: 127.0.0.1 + - name: ai-proxy + enable: true + config: + provider: openai + auth: + header: + Authorization: "Bearer your-openai-api-key" + options: + model: gpt-4o +``` + +将配置应用到您的集群: + +```shell +kubectl apply -f ai-cache-ic.yaml +``` + + + + + + + + + +向该路由发送请求: + +```shell +curl -i "http://127.0.0.1:9080/anything" -X POST \ + -H "Content-Type: application/json" \ + -d '{ "messages": [{ "role": "user", "content": "What is Apache APISIX? Answer in one sentence." }] }' +``` + +第一次请求是缓存未命中(MISS),会被代理到 LLM。响应中携带 `X-AI-Cache-Status: MISS` 响应头,响应体类似如下: + +```json +{ + "id": "chatcmpl-DtmdUDZeSZ0t62y6BvLkSk5qfH3zA", + "object": "chat.completion", + "created": 1782187368, + "model": "gpt-4o-2024-08-06", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Apache APISIX is a dynamic, cloud-native API gateway that provides high performance, scalability, and security for API management." + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 19, + "completion_tokens": 25, + "total_tokens": 44 + } +} +``` + +再次发送相同的请求。该请求将直接由缓存返回,而不会调用 LLM,返回完全相同的响应体,并携带以下响应头: + +```text +X-AI-Cache-Status: HIT +X-AI-Cache-Age: 8 +``` + +### 绕过缓存 + +如需为特定请求跳过缓存,可添加 `bypass_on` 规则并更新路由: + +```shell +curl "http://127.0.0.1:9180/apisix/admin/routes/ai-cache-route" -X PATCH \ + -H "X-API-KEY: ${admin_key}" \ + -d '{ + "plugins": { + "ai-cache": { + "redis_host": "127.0.0.1", + "bypass_on": [{ "header": "X-Cache-Bypass", "equals": "1" }] + } + } + }' +``` + +发送带有匹配请求头的请求: + +```shell +curl -i "http://127.0.0.1:9080/anything" -X POST \ + -H "Content-Type: application/json" \ + -H "X-Cache-Bypass: 1" \ + -d '{ "messages": [{ "role": "user", "content": "What is Apache APISIX? Answer in one sentence." }] }' +``` + +缓存被完全跳过(不查询、不回写),响应中携带 `X-AI-Cache-Status: BYPASS` 响应头。 From 84c5ccf0fac6c289385e99409f4fc5bce987d159 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Tue, 23 Jun 2026 15:57:13 +0800 Subject: [PATCH 07/10] feat(ai-cache): implement canonical JSON encoding and enhance cache key configuration --- apisix/core/json.lua | 47 ++++++++++++++++++++++ apisix/plugins/ai-cache.lua | 5 ++- apisix/plugins/ai-cache/key.lua | 58 +++++++--------------------- apisix/plugins/ai-cache/schema.lua | 11 +----- apisix/plugins/ai-transport/http.lua | 36 +---------------- docs/en/latest/plugins/ai-cache.md | 8 ++-- docs/zh/latest/plugins/ai-cache.md | 8 ++-- 7 files changed, 77 insertions(+), 96 deletions(-) diff --git a/apisix/core/json.lua b/apisix/core/json.lua index 397b80191e13..538e384fe985 100644 --- a/apisix/core/json.lua +++ b/apisix/core/json.lua @@ -28,8 +28,14 @@ local ngx = ngx local tostring = tostring local type = type local pairs = pairs +local ipairs = ipairs +local getmetatable = getmetatable local cached_tab = {} +local rapidjson +local rapidjson_null +local rapidjson_encode_opts = { sort_keys = true } + cjson.encode_escape_forward_slash(false) cjson.decode_array_with_array_mt(true) @@ -122,6 +128,47 @@ local function encode(data, force) end _M.encode = encode + +local function to_rapidjson_value(data) + if data == cjson_null then + return rapidjson_null + end + + if type(data) ~= "table" then + return data + end + + if getmetatable(data) == cjson.array_mt then + local arr = {} + for i, v in ipairs(data) do + arr[i] = to_rapidjson_value(v) + end + return rapidjson.array(arr) + end + + local obj = {} + for k, v in pairs(data) do + obj[k] = to_rapidjson_value(v) + end + return obj +end + + +--- Encode a Lua value to a canonical JSON string with sorted object keys. +-- Unlike core.json.encode, object keys are emitted in a stable (sorted) order, +-- so the same logical value always produces the same string -- suitable for +-- hashing, cache keys and signatures. cjson null / array_mt markers are +-- preserved. Backed by rapidjson, which is loaded on first use. +-- @tparam table data The value to encode. +-- @treturn string The canonically-encoded JSON string. +function _M.canonical_encode(data) + if not rapidjson then + rapidjson = require("rapidjson") + rapidjson_null = rapidjson.null + end + return rapidjson.encode(to_rapidjson_value(data), rapidjson_encode_opts) +end + local max_delay_encode_items = 16 local delay_tab_idx = 0 local delay_tab_arr = {} diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua index 0c7053c176d1..43e4f956e3ac 100644 --- a/apisix/plugins/ai-cache.lua +++ b/apisix/plugins/ai-cache.lua @@ -103,12 +103,13 @@ function _M.access(conf, ctx) local res res, err = red:get(ctx.ai_cache_key) - release(conf, red) if err then + red:close() core.log.warn("ai-cache: redis get failed, fail-open as MISS: ", err) ctx.ai_cache_status = "MISS" return end + release(conf, red) if res ~= nil and res ~= ngx_null then local cached = core.json.decode(res) @@ -170,7 +171,9 @@ local function write_to_cache(premature, conf, cache_key, response_body) local ok ok, err = red:set(cache_key, envelope, "EX", ttl) if not ok then + red:close() core.log.warn("ai-cache: redis set failed: ", err) + return end release(conf, red) end diff --git a/apisix/plugins/ai-cache/key.lua b/apisix/plugins/ai-cache/key.lua index 3f012892c0b9..6deb7cae4ca0 100644 --- a/apisix/plugins/ai-cache/key.lua +++ b/apisix/plugins/ai-cache/key.lua @@ -17,18 +17,12 @@ local core = require("apisix.core") local protocols = require("apisix.plugins.ai-protocols") -local rapidjson = require("rapidjson") local sha256 = require("resty.sha256") local to_hex = require("resty.string").to_hex -local ipairs = ipairs -local pairs = pairs -local type = type -local getmetatable = getmetatable -local concat = table.concat - -local rapidjson_null = rapidjson.null -local ENCODE_OPTS = { sort_keys = true } +local ipairs = ipairs +local pairs = pairs +local concat = table.concat local _M = {} @@ -40,33 +34,6 @@ local function hex_digest(s) end -local function to_rapidjson_value(data) - if data == core.json.null then - return rapidjson_null - end - if type(data) ~= "table" then - return data - end - if getmetatable(data) == core.json.array_mt then - local arr = {} - for i, v in ipairs(data) do - arr[i] = to_rapidjson_value(v) - end - return rapidjson.array(arr) - end - local obj = {} - for k, v in pairs(data) do - obj[k] = to_rapidjson_value(v) - end - return obj -end - - -local function canonical_encode(value) - return rapidjson.encode(to_rapidjson_value(value), ENCODE_OPTS) -end - - function _M.fingerprint(ctx, body) local params = {} for k, v in pairs(body) do @@ -75,7 +42,7 @@ function _M.fingerprint(ctx, body) end end - local repr = canonical_encode({ + local repr = core.json.canonical_encode({ protocol = ctx.ai_client_protocol or "", model = ctx.var.request_llm_model or body.model or "", messages = protocols.get_messages(body, ctx) or {}, @@ -86,21 +53,24 @@ end function _M.scope(conf, ctx) - local ck = conf.cache_key - local inc_vars = ck and ck.include_vars - if not (ck and ck.include_consumer) and (not inc_vars or #inc_vars == 0) then - return "shared" - end + local ck = conf.cache_key or {} local parts = {} + if not ck.share_across_routes then + parts[#parts + 1] = "route=" .. (ctx.var.route_id or "") + end if ck.include_consumer then parts[#parts + 1] = "consumer=" .. (ctx.consumer_name or "") end - if inc_vars then - for _, name in ipairs(inc_vars) do + if ck.include_vars then + for _, name in ipairs(ck.include_vars) do parts[#parts + 1] = name .. "=" .. (ctx.var[name] or "") end end + + if #parts == 0 then + return "shared" + end return concat(parts, ":") end diff --git a/apisix/plugins/ai-cache/schema.lua b/apisix/plugins/ai-cache/schema.lua index d434d44f45a7..9494dfeec2f4 100644 --- a/apisix/plugins/ai-cache/schema.lua +++ b/apisix/plugins/ai-cache/schema.lua @@ -23,16 +23,6 @@ local policy_to_additional_properties = core.table.deepcopy(redis_schema.schema) local _M = { type = "object", properties = { - layers = { - type = "array", - items = { - enum = { "exact" }, - }, - minItems = 1, - uniqueItems = true, - default = { "exact" }, - }, - exact = { type = "object", properties = { @@ -44,6 +34,7 @@ local _M = { cache_key = { type = "object", properties = { + share_across_routes = { type = "boolean", default = false }, include_consumer = { type = "boolean", default = false }, include_vars = { type = "array", diff --git a/apisix/plugins/ai-transport/http.lua b/apisix/plugins/ai-transport/http.lua index eb7efc34b57b..5ea9d2194545 100644 --- a/apisix/plugins/ai-transport/http.lua +++ b/apisix/plugins/ai-transport/http.lua @@ -20,8 +20,6 @@ local core = require("apisix.core") local http = require("resty.http") -local rapidjson = require("rapidjson") -local getmetatable = getmetatable local ngx_now = ngx.now local pairs = pairs local ipairs = ipairs @@ -31,8 +29,6 @@ local str_lower = string.lower local tostring = tostring local _M = {} -local rapidjson_encode_opts = {sort_keys = true} -local rapidjson_null = rapidjson.null --- Map network errors to HTTP status codes. @@ -73,38 +69,8 @@ function _M.construct_forward_headers(ext_opts_headers, ctx) end -local function to_rapidjson_value(data) - if data == core.json.null then - return rapidjson_null - end - - if type(data) ~= "table" then - return data - end - - if getmetatable(data) == core.json.array_mt then - local arr = {} - for i, v in ipairs(data) do - arr[i] = to_rapidjson_value(v) - end - return rapidjson.array(arr) - end - - local obj = {} - for k, v in pairs(data) do - obj[k] = to_rapidjson_value(v) - end - return obj -end - - -local function rapidjson_encode(body) - return rapidjson.encode(to_rapidjson_value(body), rapidjson_encode_opts) -end - - local function encode_body(body) - local ok, encoded = pcall(rapidjson_encode, body) + local ok, encoded = pcall(core.json.canonical_encode, body) if ok and encoded then return encoded end diff --git a/docs/en/latest/plugins/ai-cache.md b/docs/en/latest/plugins/ai-cache.md index dc3a559d9adc..4f4568c6ef33 100644 --- a/docs/en/latest/plugins/ai-cache.md +++ b/docs/en/latest/plugins/ai-cache.md @@ -46,7 +46,9 @@ The `ai-cache` Plugin must be used with the [`ai-proxy`](./ai-proxy.md) or [`ai- :::note -The cache key uses the **requested** model. If routes rewrite the model server-side (`ai-proxy` `options.model` or `ai-proxy-multi` instance selection) and share one Redis and cache scope, isolate them with separate Redis instances or with `cache_key.include_vars` (for example `["route_id"]`). +By default the cache is isolated per route, so two routes never serve each other's entries even when they see the same protocol, model and messages. Set `cache_key.share_across_routes` to `true` to share one cache space across routes. + +The cache key uses the **requested** model, not the model a route may rewrite to server-side (`ai-proxy` `options.model` or `ai-proxy-multi` instance selection). When sharing across routes, isolate routes that rewrite to different upstream models with separate Redis instances or with `cache_key.include_vars`. ::: @@ -54,10 +56,10 @@ The cache key uses the **requested** model. If routes rewrite the model server-s | Name | Type | Required | Default | Valid values | Description | |------|------|----------|---------|--------------|-------------| -| layers | array[string] | False | ["exact"] | ["exact"] | Cache layers to enable. Only the exact layer is available in this release. | | exact.ttl | integer | False | 3600 | >= 1 | Time-to-live, in seconds, of an exact-cache entry. | +| cache_key.share_across_routes | boolean | False | false | | By default the cache is isolated per route. If true, entries are shared across every route that computes the same key. | | cache_key.include_consumer | boolean | False | false | | If true, scope the cache per consumer so entries are not shared across consumers. | -| cache_key.include_vars | array[string] | False | [] | | NGINX variables added to the cache scope (for example `["route_id"]`), isolating entries by their values. | +| cache_key.include_vars | array[string] | False | [] | | NGINX variables added to the cache scope (for example `["http_x_tenant"]`), isolating entries by their values. | | max_cache_body_size | integer | False | 1048576 | >= 0 | Maximum response body size, in bytes, to cache. Larger responses are not cached. | | cache_headers | boolean | False | true | | If true, add the `X-AI-Cache-Status` response header (and `X-AI-Cache-Age`, the entry age in seconds, on a hit). | | bypass_on | array[object] | False | | | Rules that skip the cache entirely (no lookup, no write-back) when any rule matches. | diff --git a/docs/zh/latest/plugins/ai-cache.md b/docs/zh/latest/plugins/ai-cache.md index fcb3e3e9483c..38e6c163da60 100644 --- a/docs/zh/latest/plugins/ai-cache.md +++ b/docs/zh/latest/plugins/ai-cache.md @@ -46,7 +46,9 @@ import TabItem from '@theme/TabItem'; :::note -缓存键使用**请求中**的模型。如果路由在服务端改写模型(`ai-proxy` 的 `options.model` 或 `ai-proxy-multi` 的实例选择),并且共享同一个 Redis 与缓存作用域,请使用独立的 Redis 实例,或通过 `cache_key.include_vars`(例如 `["route_id"]`)将它们隔离。 +默认情况下缓存按路由隔离,因此即使两个路由看到相同的协议、模型与消息,也不会相互返回对方的缓存条目。将 `cache_key.share_across_routes` 设为 `true` 可让多个路由共享同一个缓存空间。 + +缓存键使用**请求中**的模型,而非路由在服务端改写后的模型(`ai-proxy` 的 `options.model` 或 `ai-proxy-multi` 的实例选择)。在跨路由共享时,如果不同路由改写到不同的上游模型,请使用独立的 Redis 实例,或通过 `cache_key.include_vars` 将它们隔离。 ::: @@ -54,10 +56,10 @@ import TabItem from '@theme/TabItem'; | 名称 | 类型 | 必选项 | 默认值 | 有效值 | 描述 | |------|------|--------|--------|--------|------| -| layers | array[string] | 否 | ["exact"] | ["exact"] | 要启用的缓存层。本次发布仅支持精确(exact)缓存层。 | | exact.ttl | integer | 否 | 3600 | >= 1 | 精确缓存条目的存活时间(TTL),单位为秒。 | +| cache_key.share_across_routes | boolean | 否 | false | | 默认情况下缓存按路由隔离。如果为 true,则计算出相同缓存键的所有路由之间共享缓存条目。 | | cache_key.include_consumer | boolean | 否 | false | | 如果为 true,则按消费者隔离缓存,使缓存条目不会在不同消费者之间共享。 | -| cache_key.include_vars | array[string] | 否 | [] | | 加入缓存作用域的 NGINX 变量(例如 `["route_id"]`),按其取值隔离缓存条目。 | +| cache_key.include_vars | array[string] | 否 | [] | | 加入缓存作用域的 NGINX 变量(例如 `["http_x_tenant"]`),按其取值隔离缓存条目。 | | max_cache_body_size | integer | 否 | 1048576 | >= 0 | 允许缓存的最大响应体大小,单位为字节。超过该大小的响应不会被缓存。 | | cache_headers | boolean | 否 | true | | 如果为 true,则添加 `X-AI-Cache-Status` 响应头(命中时还会添加 `X-AI-Cache-Age`,表示缓存条目的存在时长,单位为秒)。 | | bypass_on | array[object] | 否 | | | 当任一规则匹配时,完全跳过缓存(不查询、不回写)的规则列表。 | From 9024b70afc6fa90ef95eb177762d9826cb92984b Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Tue, 23 Jun 2026 16:09:18 +0800 Subject: [PATCH 08/10] feat(ai-cache): update tests for exact.ttl validation and add cross-route cache sharing scenarios --- t/plugin/ai-cache.t | 181 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 178 insertions(+), 3 deletions(-) diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t index c59f58f9181d..9ff45f7bfdbe 100644 --- a/t/plugin/ai-cache.t +++ b/t/plugin/ai-cache.t @@ -88,14 +88,14 @@ qr/then clause did not match/ -=== TEST 3: reject unknown layer value +=== TEST 3: reject an out-of-range exact.ttl --- config location /t { content_by_lua_block { local plugin = require("apisix.plugins.ai-cache") local ok, err = plugin.check_schema({ redis_host = "127.0.0.1", - layers = { "nonsense" }, + exact = { ttl = 0 }, }) if not ok then @@ -106,7 +106,7 @@ qr/then clause did not match/ } } --- response_body eval -qr/layers/ +qr/ttl/ @@ -774,3 +774,178 @@ X-AI-Fixture: openai/chat-basic.json X-AI-Cache-Status: MISS --- response_body_like eval qr/1 \+ 1 = 2/ + + + +=== TEST 35: set two openai routes (same model, default scope) sharing one Redis +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + + code, body = t('/apisix/admin/routes/2', + ngx.HTTP_PUT, + [[{ + "uri": "/cache-route-b", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed +passed + + + +=== TEST 36: route 1 cold request is a MISS (warms scope=route=1) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"cross-route isolation test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- response_headers +X-AI-Cache-Status: MISS +--- wait: 0.3 + + + +=== TEST 37: same prompt on route 2 is a MISS (not shared with route 1 by default) +--- request +POST /cache-route-b +{"model":"gpt-4o","messages":[{"role":"user","content":"cross-route isolation test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- response_headers +X-AI-Cache-Status: MISS + + + +=== TEST 38: same prompt on route 1 is a HIT (its own per-route scope persisted) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"cross-route isolation test"}]} +--- error_code: 200 +--- response_headers +X-AI-Cache-Status: HIT + + + +=== TEST 39: set both routes with share_across_routes enabled +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/anything", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379, + "cache_key": { "share_across_routes": true } + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + + code, body = t('/apisix/admin/routes/2', + ngx.HTTP_PUT, + [[{ + "uri": "/cache-route-b", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379, + "cache_key": { "share_across_routes": true } + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed +passed + + + +=== TEST 40: route 1 cold request is a MISS (warms the shared scope) +--- request +POST /anything +{"model":"gpt-4o","messages":[{"role":"user","content":"cross-route share test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- response_headers +X-AI-Cache-Status: MISS +--- wait: 0.3 + + + +=== TEST 41: same prompt on route 2 is a HIT (cache shared across routes) +--- request +POST /cache-route-b +{"model":"gpt-4o","messages":[{"role":"user","content":"cross-route share test"}]} +--- error_code: 200 +--- response_headers +X-AI-Cache-Status: HIT From 6f15de7841a24057804d6515b0e5b89ffde9be1b Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Wed, 24 Jun 2026 09:11:56 +0800 Subject: [PATCH 09/10] fix(json): remove redundant require statement in json.lua --- apisix/core/json.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/apisix/core/json.lua b/apisix/core/json.lua index 538e384fe985..9418917b355e 100644 --- a/apisix/core/json.lua +++ b/apisix/core/json.lua @@ -24,6 +24,7 @@ local json_encode = cjson.encode local json_decode = cjson.decode local cjson_null = cjson.null local clear_tab = require("table.clear") +local require = require local ngx = ngx local tostring = tostring local type = type From 4775bfc1fd822869d705b45bceb1856cefbd2df8 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Wed, 24 Jun 2026 15:53:11 +0800 Subject: [PATCH 10/10] feat(ai-cache): enhance error handling for unsupported requests and improve cache key generation --- apisix/plugins/ai-cache.lua | 22 +++- apisix/plugins/ai-cache/key.lua | 12 +- apisix/plugins/ai-cache/schema.lua | 3 + docs/en/latest/plugins/ai-cache.md | 3 +- docs/zh/latest/plugins/ai-cache.md | 3 +- t/plugin/ai-cache.t | 179 +++++++++++++++++++++++++++++ 6 files changed, 215 insertions(+), 7 deletions(-) diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua index 43e4f956e3ac..3b7dd5145b0a 100644 --- a/apisix/plugins/ai-cache.lua +++ b/apisix/plugins/ai-cache.lua @@ -18,6 +18,7 @@ local core = require("apisix.core") local schema = require("apisix.plugins.ai-cache.schema") local key_mod = require("apisix.plugins.ai-cache.key") +local binding = require("apisix.plugins.ai-protocols.binding") local redis_util = require("apisix.utils.redis") local ngx = ngx @@ -65,6 +66,18 @@ end function _M.access(conf, ctx) + if not ctx.picked_ai_instance then + local handled, code, body = binding.on_unsupported( + conf.fail_mode, _M.name, ctx, + "no ai instance picked (request did not pass through ai-proxy/ai-proxy-multi)", + 500, "ai-cache must be used with the ai-proxy or ai-proxy-multi plugin") + if handled then + return code, body + end + ctx.ai_cache_status = "BYPASS" + return + end + -- Streaming responses are not cached in PR-1 (SSE replay is a later -- increment). ai-proxy (higher priority) has already classified the -- request, so bypass before doing any work. @@ -89,8 +102,8 @@ function _M.access(conf, ctx) return end - ctx.ai_cache_key = "ai-cache:l1:" .. key_mod.scope(conf, ctx) - .. ":" .. key_mod.fingerprint(ctx, body) + ctx.ai_cache_fingerprint = key_mod.fingerprint(ctx, body) + ctx.ai_cache_key = key_mod.build(conf, ctx, ctx.ai_cache_fingerprint) local red red, err = redis_util.new(conf) @@ -180,7 +193,7 @@ end function _M.log(conf, ctx) - if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_key then + if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_fingerprint then return end if ngx.status ~= 200 then @@ -192,7 +205,8 @@ function _M.log(conf, ctx) end local response_body = concat(buf, "", 1, buf.n) - local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body) + local cache_key = key_mod.build(conf, ctx, ctx.ai_cache_fingerprint) + local ok, err = ngx.timer.at(0, write_to_cache, conf, cache_key, response_body) if not ok then core.log.warn("ai-cache: failed to schedule cache write: ", err) end diff --git a/apisix/plugins/ai-cache/key.lua b/apisix/plugins/ai-cache/key.lua index 6deb7cae4ca0..e08f0586daec 100644 --- a/apisix/plugins/ai-cache/key.lua +++ b/apisix/plugins/ai-cache/key.lua @@ -24,6 +24,8 @@ local ipairs = ipairs local pairs = pairs local concat = table.concat +local KEY_PREFIX = "ai-cache:l1:" + local _M = {} @@ -52,10 +54,13 @@ function _M.fingerprint(ctx, body) end -function _M.scope(conf, ctx) +local function scope(conf, ctx) local ck = conf.cache_key or {} local parts = {} + if ctx.picked_ai_instance_name then + parts[#parts + 1] = "instance=" .. ctx.picked_ai_instance_name + end if not ck.share_across_routes then parts[#parts + 1] = "route=" .. (ctx.var.route_id or "") end @@ -75,4 +80,9 @@ function _M.scope(conf, ctx) end +function _M.build(conf, ctx, fingerprint) + return KEY_PREFIX .. scope(conf, ctx) .. ":" .. fingerprint +end + + return _M diff --git a/apisix/plugins/ai-cache/schema.lua b/apisix/plugins/ai-cache/schema.lua index 9494dfeec2f4..5eb62661c73d 100644 --- a/apisix/plugins/ai-cache/schema.lua +++ b/apisix/plugins/ai-cache/schema.lua @@ -17,6 +17,7 @@ local core = require("apisix.core") local redis_schema = require("apisix.utils.redis-schema") +local binding = require("apisix.plugins.ai-protocols.binding") local policy_to_additional_properties = core.table.deepcopy(redis_schema.schema) @@ -53,6 +54,8 @@ local _M = { type = "boolean", default = true, }, + fail_mode = binding.schema_property("skip"), + bypass_on = { type = "array", minItems = 1, diff --git a/docs/en/latest/plugins/ai-cache.md b/docs/en/latest/plugins/ai-cache.md index 4f4568c6ef33..360f79b1b3bf 100644 --- a/docs/en/latest/plugins/ai-cache.md +++ b/docs/en/latest/plugins/ai-cache.md @@ -48,7 +48,7 @@ The `ai-cache` Plugin must be used with the [`ai-proxy`](./ai-proxy.md) or [`ai- By default the cache is isolated per route, so two routes never serve each other's entries even when they see the same protocol, model and messages. Set `cache_key.share_across_routes` to `true` to share one cache space across routes. -The cache key uses the **requested** model, not the model a route may rewrite to server-side (`ai-proxy` `options.model` or `ai-proxy-multi` instance selection). When sharing across routes, isolate routes that rewrite to different upstream models with separate Redis instances or with `cache_key.include_vars`. +Even with `cache_key.share_across_routes` enabled, responses from different upstream models or providers are kept in separate cache entries, so one model's response is never served for another. ::: @@ -62,6 +62,7 @@ The cache key uses the **requested** model, not the model a route may rewrite to | cache_key.include_vars | array[string] | False | [] | | NGINX variables added to the cache scope (for example `["http_x_tenant"]`), isolating entries by their values. | | max_cache_body_size | integer | False | 1048576 | >= 0 | Maximum response body size, in bytes, to cache. Larger responses are not cached. | | cache_headers | boolean | False | true | | If true, add the `X-AI-Cache-Status` response header (and `X-AI-Cache-Age`, the entry age in seconds, on a hit). | +| fail_mode | string | False | `"skip"` | `skip`, `warn`, `error` | Behavior when the request is not a recognized AI request that this Plugin can cache (for example, a request that did not pass through `ai-proxy` or `ai-proxy-multi`). `skip`: let the request pass through uncached; `warn`: pass through uncached and log a warning; `error`: reject the request. | | bypass_on | array[object] | False | | | Rules that skip the cache entirely (no lookup, no write-back) when any rule matches. | | bypass_on[].header | string | True | | | Request header name to match. | | bypass_on[].equals | string | True | | | Bypass when the request header's value exactly equals this string. | diff --git a/docs/zh/latest/plugins/ai-cache.md b/docs/zh/latest/plugins/ai-cache.md index 38e6c163da60..3793317bdd15 100644 --- a/docs/zh/latest/plugins/ai-cache.md +++ b/docs/zh/latest/plugins/ai-cache.md @@ -48,7 +48,7 @@ import TabItem from '@theme/TabItem'; 默认情况下缓存按路由隔离,因此即使两个路由看到相同的协议、模型与消息,也不会相互返回对方的缓存条目。将 `cache_key.share_across_routes` 设为 `true` 可让多个路由共享同一个缓存空间。 -缓存键使用**请求中**的模型,而非路由在服务端改写后的模型(`ai-proxy` 的 `options.model` 或 `ai-proxy-multi` 的实例选择)。在跨路由共享时,如果不同路由改写到不同的上游模型,请使用独立的 Redis 实例,或通过 `cache_key.include_vars` 将它们隔离。 +即使开启 `cache_key.share_across_routes`,来自不同上游模型或 provider 的响应也会分别存储在各自的缓存条目中,因此某个模型的响应绝不会被返回给另一个模型。 ::: @@ -62,6 +62,7 @@ import TabItem from '@theme/TabItem'; | cache_key.include_vars | array[string] | 否 | [] | | 加入缓存作用域的 NGINX 变量(例如 `["http_x_tenant"]`),按其取值隔离缓存条目。 | | max_cache_body_size | integer | 否 | 1048576 | >= 0 | 允许缓存的最大响应体大小,单位为字节。超过该大小的响应不会被缓存。 | | cache_headers | boolean | 否 | true | | 如果为 true,则添加 `X-AI-Cache-Status` 响应头(命中时还会添加 `X-AI-Cache-Age`,表示缓存条目的存在时长,单位为秒)。 | +| fail_mode | string | 否 | `"skip"` | `skip`、`warn`、`error` | 当请求不是该插件可缓存的 AI 请求时的处理行为(例如未经过 `ai-proxy` 或 `ai-proxy-multi` 的请求)。`skip`:放行请求且不缓存;`warn`:放行不缓存并记录 warning 日志;`error`:拒绝请求。 | | bypass_on | array[object] | 否 | | | 当任一规则匹配时,完全跳过缓存(不查询、不回写)的规则列表。 | | bypass_on[].header | string | 是 | | | 要匹配的请求头名称。 | | bypass_on[].equals | string | 是 | | | 当该请求头的值与此字符串完全相等时,绕过缓存。 | diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t index 9ff45f7bfdbe..e9fdf5292ac9 100644 --- a/t/plugin/ai-cache.t +++ b/t/plugin/ai-cache.t @@ -949,3 +949,182 @@ POST /cache-route-b --- error_code: 200 --- response_headers X-AI-Cache-Status: HIT + + + +=== TEST 42: route with ai-cache but NO ai-proxy in front +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/v1/chat/completions", + "upstream": { + "type": "roundrobin", + "nodes": { "127.0.0.1:1980": 1 } + }, + "plugins": { + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 43: a request that never passed through ai-proxy is bypassed, not cached +--- request +POST /v1/chat/completions +{"model":"gpt-4o","messages":[{"role":"user","content":"no ai-proxy guard test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- error_code: 200 +--- response_headers +X-AI-Cache-Status: BYPASS + + + +=== TEST 44: route with ai-cache fail_mode=error and NO ai-proxy +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/v1/chat/completions", + "upstream": { + "type": "roundrobin", + "nodes": { "127.0.0.1:1980": 1 } + }, + "plugins": { + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379, + "fail_mode": "error" + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 45: fail_mode=error rejects a request that bypassed the AI proxy +--- request +POST /v1/chat/completions +{"model":"gpt-4o","messages":[{"role":"user","content":"fail_mode error guard test"}]} +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- error_code: 500 +--- response_body_like eval +qr/must be used with the ai-proxy/ + + + +=== TEST 46: flush redis, then set one ai-proxy-multi route with two instances +--- extra_yaml_config +plugins: + - ai-proxy-multi + - ai-cache +--- config + location /t { + content_by_lua_block { + local redis = require("resty.redis") + local red = redis:new() + red:set_timeout(1000) + local ok, rerr = red:connect("127.0.0.1", 6379) + if not ok then + ngx.say("redis connect failed: ", rerr) + return + end + local fok, ferr = red:flushall() + if not fok then + ngx.say("redis flushall failed: ", ferr) + return + end + + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/multi", + "plugins": { + "ai-proxy-multi": { + "instances": [ + { + "name": "instance-gpt4o", + "provider": "openai", + "weight": 1, + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + }, + { + "name": "instance-gpt4o-mini", + "provider": "openai", + "weight": 1, + "auth": { "header": { "Authorization": "Bearer test-key" } }, + "options": { "model": "gpt-4o-mini" }, + "override": { "endpoint": "http://127.0.0.1:1980" } + } + ] + }, + "ai-cache": { + "redis_host": "127.0.0.1", + "redis_port": 6379 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 47: round-robin alternates instances, so each one caches independently +--- extra_yaml_config +plugins: + - ai-proxy-multi + - ai-cache +--- pipelined_requests eval +[ + "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}', + "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}', + "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}', + "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}', +] +--- more_headers +X-AI-Fixture: openai/chat-basic.json +--- response_headers eval +[ + "X-AI-Cache-Status: MISS", + "X-AI-Cache-Status: MISS", + "X-AI-Cache-Status: HIT", + "X-AI-Cache-Status: HIT", +]