diff --git a/Directory.Packages.props b/Directory.Packages.props
index bbfd3a695fc..f4c9c70891a 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -32,7 +32,7 @@
     <PackageVersion Include="System.Numerics.Tensors" Version="10.0.5" />
     <PackageVersion Include="Microsoft.Extensions.Hosting" Version="10.0.5" />
     <PackageVersion Include="Microsoft.Extensions.Hosting.WindowsServices" Version="10.0.5" />
-    <PackageVersion Include="diskann-garnet" Version="2.0.4" />
+    <PackageVersion Include="diskann-garnet" Version="3.0.0" />
     <PackageVersion Include="Microsoft.VisualStudio.Threading.Analyzers" Version="17.14.15" />
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/libs/server/Resp/Vector/AttributeExtractor.cs b/libs/server/Resp/Vector/AttributeExtractor.cs
index 0fcecbb43a0..412cd68b889 100644
--- a/libs/server/Resp/Vector/AttributeExtractor.cs
+++ b/libs/server/Resp/Vector/AttributeExtractor.cs
@@ -2,6 +2,7 @@
 // Licensed under the MIT license.
 
 using System;
+using System.Buffers.Binary;
 using System.Buffers.Text;
 
 namespace Garnet.server
@@ -28,8 +29,7 @@ public static int ExtractFields(
             Span<ExprToken> results,
             ref ExprProgram program)
         {
-            for (var i = 0; i < selectorRanges.Length; i++)
-                results[i] = default;
+            results[..selectorRanges.Length].Clear();
 
             var s = TrimWhiteSpace(json);
             if (s.IsEmpty || s[0] != (byte)'{') return 0;
@@ -81,7 +81,11 @@ public static int ExtractFields(
 
                 s = TrimWhiteSpace(s);
                 if (s.IsEmpty) return found;
-                if (s[0] == (byte)',') { s = s[1..]; continue; }
+                if (s[0] == (byte)',')
+                {
+                    s = s[1..];
+                    continue;
+                }
                 if (s[0] == (byte)'}') return found;
                 return found;
             }
@@ -126,7 +130,11 @@ public static ExprToken ExtractField(ReadOnlySpan<byte> json, ReadOnlySpan<byte>
 
                 s = TrimWhiteSpace(s);
                 if (s.IsEmpty) return default;
-                if (s[0] == (byte)',') { s = s[1..]; continue; }
+                if (s[0] == (byte)',')
+                {
+                    s = s[1..];
+                    continue;
+                }
                 if (s[0] == (byte)'}') return default;
                 return default;
             }
@@ -162,7 +170,7 @@ private static ExprToken ParseValueToken(ReadOnlySpan<byte> json, ref ReadOnlySp
 
             var c = s[0];
             if (c == (byte)'"') return ParseStringToken(json, ref s);
-            if (c == (byte)'[') return ParseArrayTokenNoPool(json, ref s);
+            if (c == (byte)'[') return ParseArrayTokenNoPool(ref s);
             if (c == (byte)'{') return default; // Nested objects not supported
             if (c == (byte)'t') return ParseLiteralToken(ref s, "true"u8, ExprTokenType.Num, 1);
             if (c == (byte)'f') return ParseLiteralToken(ref s, "false"u8, ExprTokenType.Num, 0);
@@ -245,7 +253,11 @@ internal static ExprToken ParseArrayToken(ReadOnlySpan<byte> json, ref ReadOnlyS
             s = TrimWhiteSpace(s);
 
             // Empty array
-            if (!s.IsEmpty && s[0] == (byte)']') { s = s[1..]; return ExprToken.NewTuple(0, 0); }
+            if (!s.IsEmpty && s[0] == (byte)']')
+            {
+                s = s[1..];
+                return ExprToken.NewTuple(0, 0);
+            }
 
             Span<ExprToken> localBuf = stackalloc ExprToken[MaxArrayElements];
             var count = 0;
@@ -254,7 +266,11 @@ internal static ExprToken ParseArrayToken(ReadOnlySpan<byte> json, ref ReadOnlyS
             {
                 s = TrimWhiteSpace(s);
                 if (s.IsEmpty) return default;
-                if (count >= MaxArrayElements) { SkipBracketed(ref s, (byte)'[', (byte)']'); return ExprToken.NewNull(); }
+                if (count >= MaxArrayElements)
+                {
+                    _ = SkipBracketed(ref s, (byte)'[', (byte)']');
+                    return ExprToken.NewNull();
+                }
 
                 var elem = ParseValueToken(json, ref s);
                 if (elem.IsNone) return default;
@@ -262,7 +278,11 @@ internal static ExprToken ParseArrayToken(ReadOnlySpan<byte> json, ref ReadOnlyS
 
                 s = TrimWhiteSpace(s);
                 if (s.IsEmpty) return default;
-                if (s[0] == (byte)']') { s = s[1..]; break; }
+                if (s[0] == (byte)']')
+                {
+                    s = s[1..];
+                    break;
+                }
                 if (s[0] != (byte)',') return default;
                 s = s[1..];
             }
@@ -283,7 +303,7 @@ internal static ExprToken ParseArrayToken(ReadOnlySpan<byte> json, ref ReadOnlyS
             return ExprToken.NewNull();
         }
 
-        private static ExprToken ParseArrayTokenNoPool(ReadOnlySpan<byte> json, ref ReadOnlySpan<byte> s)
+        private static ExprToken ParseArrayTokenNoPool(ref ReadOnlySpan<byte> s)
         {
             // Standalone extraction without a program — just skip the array
             if (!SkipValue(ref s)) return default;
@@ -315,8 +335,16 @@ private static bool SkipString(ref ReadOnlySpan<byte> s)
             s = s[1..];
             while (!s.IsEmpty)
             {
-                if (s[0] == (byte)'\\') { s = s[2..]; continue; }
-                if (s[0] == (byte)'"') { s = s[1..]; return true; }
+                if (s[0] == (byte)'\\')
+                {
+                    s = s[2..];
+                    continue;
+                }
+                if (s[0] == (byte)'"')
+                {
+                    s = s[1..];
+                    return true;
+                }
                 s = s[1..];
             }
             return false;
@@ -333,8 +361,15 @@ private static bool SkipBracketed(ref ReadOnlySpan<byte> s, byte opener, byte cl
                     if (!SkipString(ref s)) return false;
                     continue;
                 }
-                if (s[0] == opener) depth++;
-                else if (s[0] == closer) depth--;
+
+                if (s[0] == opener)
+                {
+                    depth++;
+                }
+                else if (s[0] == closer)
+                {
+                    depth--;
+                }
                 s = s[1..];
             }
             return depth == 0;
@@ -375,5 +410,303 @@ internal static ReadOnlySpan<byte> TrimWhiteSpace(ReadOnlySpan<byte> s)
         private static bool IsNumberChar(byte b) =>
             IsDigit(b) || b == (byte)'-' || b == (byte)'+' ||
             b == (byte)'.' || b == (byte)'e' || b == (byte)'E';
+
+        // ======================== Binary attribute format ========================
+        //
+        // Pre-extracted binary format for fast filter evaluation:
+        //   [0xFF marker]
+        //   [num_fields: u8]
+        //   For each field:
+        //     [field_name_len: u8]
+        //     [field_name: N bytes]         ← raw UTF-8
+        //     [value_type: u8]              ← 0=string, 1=number, 2=bool_true, 3=bool_false, 4=null
+        //     [value_len: u16 LE]
+        //     [value_bytes: N bytes]        ← UTF-8 string or 8-byte f64 LE
+
+        internal const byte BinaryMarker = 0xFF;
+
+        private const byte BinTypeString = 0;
+        private const byte BinTypeNumber = 1;
+        private const byte BinTypeBoolTrue = 2;
+        private const byte BinTypeBoolFalse = 3;
+        private const byte BinTypeNull = 4;
+
+        /// <summary>
+        /// Convert a top-level JSON object to pre-extracted binary format.
+        /// Returns total bytes written, or -1 if output is too small.
+        /// </summary>
+        public static int ConvertJsonToBinary(ReadOnlySpan<byte> json, Span<byte> output)
+        {
+            var s = TrimWhiteSpace(json);
+            if (s.IsEmpty || s[0] != (byte)'{') return -1;
+            s = s[1..];
+
+            if (output.Length < 2) return -1;
+            output[0] = BinaryMarker;
+            // output[1] = num_fields — written at the end
+            var pos = 2;
+            byte fieldCount = 0;
+
+            while (true)
+            {
+                s = TrimWhiteSpace(s);
+                if (s.IsEmpty) return -1;
+                if (s[0] == (byte)'}') break;
+
+                if (s[0] != (byte)'"') return -1;
+
+                // Parse key
+                var afterOpenQuote = s[1..];
+                if (!SkipString(ref s)) return -1;
+                var keyContent = afterOpenQuote[..(afterOpenQuote.Length - s.Length - 1)];
+
+                // Check for escape sequences in key (rare)
+                var keyHasEscape = false;
+                for (var ki = 0; ki < keyContent.Length; ki++)
+                {
+                    if (keyContent[ki] == (byte)'\\')
+                    {
+                        keyHasEscape = true;
+                        break;
+                    }
+                }
+                if (keyHasEscape) return -1; // keys with escapes not supported
+
+                // Write field_name_len + field_name
+                if (keyContent.Length > 255) return -1;
+                if (pos + 1 + keyContent.Length + 1 + 2 > output.Length) return -1;
+                output[pos++] = (byte)keyContent.Length;
+                keyContent.CopyTo(output[pos..]);
+                pos += keyContent.Length;
+
+                // Skip colon
+                s = TrimWhiteSpace(s);
+                if (s.IsEmpty || s[0] != (byte)':') return -1;
+                s = s[1..];
+
+                // Parse value
+                s = TrimWhiteSpace(s);
+                if (s.IsEmpty) return -1;
+
+                var c = s[0];
+                if (c == (byte)'"')
+                {
+                    // String value — need to unescape
+                    s = s[1..]; // skip opening quote
+                    var body = s;
+                    var hasEscape = false;
+                    while (!s.IsEmpty)
+                    {
+                        if (s[0] == (byte)'\\')
+                        {
+                            hasEscape = true;
+                            s = s[2..];
+                            continue;
+                        }
+                        if (s[0] == (byte)'"') break;
+                        s = s[1..];
+                    }
+                    if (s.IsEmpty) return -1;
+                    var strContent = body[..(body.Length - s.Length)];
+                    s = s[1..]; // skip closing quote
+
+                    output[pos++] = BinTypeString;
+
+                    if (!hasEscape)
+                    {
+                        // No escapes — direct copy
+                        if (pos + 2 + strContent.Length > output.Length) return -1;
+                        output[pos] = (byte)(strContent.Length & 0xFF);
+                        output[pos + 1] = (byte)((strContent.Length >> 8) & 0xFF);
+                        pos += 2;
+                        strContent.CopyTo(output[pos..]);
+                        pos += strContent.Length;
+                    }
+                    else
+                    {
+                        // Unescape into output
+                        var valueLenPos = pos;
+                        pos += 2; // reserve for value_len
+                        var valueStart = pos;
+                        for (var si = 0; si < strContent.Length; si++)
+                        {
+                            if (pos >= output.Length) return -1;
+                            if (strContent[si] == (byte)'\\' && si + 1 < strContent.Length)
+                            {
+                                si++;
+                                output[pos++] = strContent[si] switch
+                                {
+                                    (byte)'n' => (byte)'\n',
+                                    (byte)'r' => (byte)'\r',
+                                    (byte)'t' => (byte)'\t',
+                                    _ => strContent[si], // \", \\, \/ etc.
+                                };
+                            }
+                            else
+                            {
+                                output[pos++] = strContent[si];
+                            }
+                        }
+                        var valueLen = pos - valueStart;
+                        output[valueLenPos] = (byte)(valueLen & 0xFF);
+                        output[valueLenPos + 1] = (byte)((valueLen >> 8) & 0xFF);
+                    }
+                }
+                else if (IsDigit(c) || c == (byte)'-' || c == (byte)'+')
+                {
+                    // Number value — store as 8-byte f64 LE
+                    var numStart = s;
+                    while (!s.IsEmpty && IsNumberChar(s[0])) s = s[1..];
+                    var numSpan = numStart[..(numStart.Length - s.Length)];
+                    if (!Utf8Parser.TryParse(numSpan, out double numVal, out var consumed) || consumed != numSpan.Length)
+                        return -1;
+
+                    output[pos++] = BinTypeNumber;
+                    if (pos + 2 + 8 > output.Length) return -1;
+                    output[pos] = 8;
+                    output[pos + 1] = 0;
+                    pos += 2;
+                    BinaryPrimitives.WriteDoubleLittleEndian(output[pos..], numVal);
+                    pos += 8;
+                }
+                else if (c == (byte)'t')
+                {
+                    if (!s.StartsWith("true"u8)) return -1;
+                    s = s[4..];
+                    output[pos++] = BinTypeBoolTrue;
+                    if (pos + 2 > output.Length) return -1;
+                    output[pos] = 0; output[pos + 1] = 0;
+                    pos += 2;
+                }
+                else if (c == (byte)'f')
+                {
+                    if (!s.StartsWith("false"u8)) return -1;
+                    s = s[5..];
+                    output[pos++] = BinTypeBoolFalse;
+                    if (pos + 2 > output.Length) return -1;
+                    output[pos] = 0; output[pos + 1] = 0;
+                    pos += 2;
+                }
+                else if (c == (byte)'n')
+                {
+                    if (!s.StartsWith("null"u8)) return -1;
+                    s = s[4..];
+                    output[pos++] = BinTypeNull;
+                    if (pos + 2 > output.Length) return -1;
+                    output[pos] = 0; output[pos + 1] = 0;
+                    pos += 2;
+                }
+                else
+                {
+                    // Nested objects/arrays — not supported in binary format
+                    return -1;
+                }
+
+                fieldCount++;
+
+                // Next field or end
+                s = TrimWhiteSpace(s);
+                if (s.IsEmpty) return -1;
+                if (s[0] == (byte)',')
+                {
+                    s = s[1..];
+                    continue;
+                }
+                if (s[0] == (byte)'}') break;
+                return -1;
+            }
+
+            output[1] = fieldCount;
+            return pos;
+        }
+
+        /// <summary>
+        /// Extract fields from pre-extracted binary attribute data.
+        /// Same contract as ExtractFields but ~10x faster (no JSON parsing).
+        /// </summary>
+        public static int ExtractFieldsBinary(
+            ReadOnlySpan<byte> binary,
+            ReadOnlySpan<byte> filterBytes,
+            ReadOnlySpan<(int Start, int Length)> selectorRanges,
+            Span<ExprToken> results)
+        {
+            results[..selectorRanges.Length].Clear();
+
+            if (binary.Length < 2 || binary[0] != BinaryMarker)
+                return 0;
+
+            var numFields = binary[1];
+            var pos = 2;
+            var found = 0;
+            var needed = selectorRanges.Length;
+
+            for (var f = 0; f < numFields && pos < binary.Length; f++)
+            {
+                // Read field name
+                if (pos >= binary.Length) break;
+                var nameLen = binary[pos++];
+                if (pos + nameLen > binary.Length) break;
+                var fieldName = binary.Slice(pos, nameLen);
+                pos += nameLen;
+
+                // Read value type
+                if (pos >= binary.Length) break;
+                var valueType = binary[pos++];
+
+                // Read value length
+                if (pos + 2 > binary.Length) break;
+                var valueLen = binary[pos] | (binary[pos + 1] << 8);
+                pos += 2;
+
+                // Read value bytes
+                if (pos + valueLen > binary.Length) break;
+
+                // Match against selectors
+                var matchIndex = -1;
+                for (var i = 0; i < selectorRanges.Length; i++)
+                {
+                    if (results[i].IsNone &&
+                        fieldName.SequenceEqual(filterBytes.Slice(selectorRanges[i].Start, selectorRanges[i].Length)))
+                    {
+                        matchIndex = i;
+                        break;
+                    }
+                }
+
+                if (matchIndex >= 0)
+                {
+                    switch (valueType)
+                    {
+                        case BinTypeString:
+                            // Create a Str token referencing the binary buffer offsets
+                            results[matchIndex] = ExprToken.NewStr(pos, valueLen, hasEscape: false);
+                            break;
+                        case BinTypeNumber:
+                            if (valueLen == 8)
+                            {
+                                var numVal = BinaryPrimitives.ReadDoubleLittleEndian(binary[pos..]);
+                                results[matchIndex] = ExprToken.NewNum(numVal);
+                            }
+                            break;
+                        case BinTypeBoolTrue:
+                            results[matchIndex] = ExprToken.NewNum(1);
+                            break;
+                        case BinTypeBoolFalse:
+                            results[matchIndex] = ExprToken.NewNum(0);
+                            break;
+                        case BinTypeNull:
+                            results[matchIndex] = ExprToken.NewNull();
+                            break;
+                    }
+
+                    found++;
+                    if (found == needed) return found;
+                }
+
+                pos += valueLen;
+            }
+
+            return found;
+        }
     }
 }
\ No newline at end of file
diff --git a/libs/server/Resp/Vector/DiskANNService.cs b/libs/server/Resp/Vector/DiskANNService.cs
index 14ecdf28fc7..56f72e90028 100644
--- a/libs/server/Resp/Vector/DiskANNService.cs
+++ b/libs/server/Resp/Vector/DiskANNService.cs
@@ -18,7 +18,7 @@ internal sealed unsafe class DiskANNService
         internal const byte Attributes = 3;
         private const byte Metadata = 4;
         internal const byte InternalIdMap = 5;
-        private const byte ExternalIdMap = 6;
+        internal const byte ExternalIdMap = 6;
 
 #if DEBUG
         /// <summary>
@@ -44,6 +44,7 @@ public nint CreateIndex(
             delegate* unmanaged[Cdecl]<ulong, nint, nuint, nint, nuint, byte> writeCallback,
             delegate* unmanaged[Cdecl]<ulong, nint, nuint, byte> deleteCallback,
             delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> readModifyWriteCallback,
+            delegate* unmanaged[Cdecl]<ulong, uint, byte> filterCallback,
             out bool quantizationRequested
         )
         {
@@ -55,7 +56,7 @@ out bool quantizationRequested
 
             unsafe
             {
-                var ret = NativeDiskANNMethods.create_index(context, dimensions, reduceDims, quantType, distanceMetric, buildExplorationFactor, numLinks, (nint)readCallback, (nint)writeCallback, (nint)deleteCallback, (nint)readModifyWriteCallback);
+                var ret = NativeDiskANNMethods.create_index(context, dimensions, reduceDims, quantType, distanceMetric, buildExplorationFactor, numLinks, (nint)readCallback, (nint)writeCallback, (nint)deleteCallback, (nint)readModifyWriteCallback, (nint)filterCallback);
 
                 Debug.Assert(ret != 0, "create_index failed, returning a null pointer - this shouldn't be possible");
 
@@ -75,9 +76,10 @@ public nint RecreateIndex(
             delegate* unmanaged[Cdecl]<ulong, nint, nuint, nint, nuint, byte> writeCallback,
             delegate* unmanaged[Cdecl]<ulong, nint, nuint, byte> deleteCallback,
             delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> readModifyWriteCallback,
+            delegate* unmanaged[Cdecl]<ulong, uint, byte> filterCallback,
             out bool quantizationRequested
         )
-        => CreateIndex(context, dimensions, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetricType, readCallback, writeCallback, deleteCallback, readModifyWriteCallback, out quantizationRequested);
+        => CreateIndex(context, dimensions, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetricType, readCallback, writeCallback, deleteCallback, readModifyWriteCallback, filterCallback, out quantizationRequested);
 
         public void DropIndex(ulong context, nint index)
         {
@@ -344,7 +346,8 @@ public static partial nint create_index(
             nint readCallback,
             nint writeCallback,
             nint deleteCallback,
-            nint readModifyWriteCallback
+            nint readModifyWriteCallback,
+            nint filterCallback
         );
 
         [LibraryImport(DISKANN_GARNET)]
diff --git a/libs/server/Resp/Vector/ExprRunner.cs b/libs/server/Resp/Vector/ExprRunner.cs
index 69e14008adb..53195c86b7b 100644
--- a/libs/server/Resp/Vector/ExprRunner.cs
+++ b/libs/server/Resp/Vector/ExprRunner.cs
@@ -51,7 +51,7 @@ public static bool Run(
         {
             stack.Clear();
 
-            for (var i = 0; i < program.Length; i++)
+            for (var i = 0; i < program.Instructions.Length; i++)
             {
                 var inst = program.Instructions[i];
 
diff --git a/libs/server/Resp/Vector/RespServerSessionVectors.cs b/libs/server/Resp/Vector/RespServerSessionVectors.cs
index a45384c4f4d..7aab64ea7c7 100644
--- a/libs/server/Resp/Vector/RespServerSessionVectors.cs
+++ b/libs/server/Resp/Vector/RespServerSessionVectors.cs
@@ -811,9 +811,9 @@ private bool NetworkVSIM<TGarnetApi>(ref TGarnetApi storageApi)
                             return AbortWithWrongNumberOfArguments("VSIM");
                         }
 
-                        if (!parseState.TryGetInt(curIx, out var maxFilteringEffortNonNull) || maxFilteringEffortNonNull < 0 || maxFilteringEffortNonNull > VectorManager.MaxRetrieveCount)
+                        if (!parseState.TryGetInt(curIx, out var maxFilteringEffortNonNull) || maxFilteringEffortNonNull < 4 || maxFilteringEffortNonNull > VectorManager.MaxFilteringScaleFactor)
                         {
-                            return AbortWithErrorMessage($"ERR FILTER-EF must be an integer between 0 and {VectorManager.MaxRetrieveCount}");
+                            return AbortWithErrorMessage($"ERR FILTER-EF must be an integer between 4 and {VectorManager.MaxFilteringScaleFactor}");
                         }
 
                         maxFilteringEffort = maxFilteringEffortNonNull;
@@ -860,7 +860,7 @@ private bool NetworkVSIM<TGarnetApi>(ref TGarnetApi storageApi)
                 delta ??= 2f;
                 searchExplorationFactor ??= 100;
                 filter ??= default;
-                maxFilteringEffort ??= (int)Math.Min((long)count.Value * 200, VectorManager.MaxRetrieveCount);
+                maxFilteringEffort ??= 16;
 
                 // TODO: these stackallocs are dangerous, need logic to avoid stack overflow
                 Span<byte> idSpace = stackalloc byte[(DefaultResultSetSize * DefaultIdSize) + (DefaultResultSetSize * sizeof(int))];
diff --git a/libs/server/Resp/Vector/VectorFilterExpression.cs b/libs/server/Resp/Vector/VectorFilterExpression.cs
index b5984042bd5..af0afce8ef9 100644
--- a/libs/server/Resp/Vector/VectorFilterExpression.cs
+++ b/libs/server/Resp/Vector/VectorFilterExpression.cs
@@ -354,18 +354,12 @@ internal ref struct ExprProgram
         /// <summary>The compiled postfix instruction sequence.</summary>
         public Span<ExprToken> Instructions;
 
-        /// <summary>Number of instructions in the program.</summary>
-        public int Length;
-
         /// <summary>
         /// Flat pool of tuple element tokens. Tuple tokens in <see cref="Instructions"/>
         /// store (StartIndex, Count) into this span.
         /// </summary>
         public Span<ExprToken> TuplePool;
 
-        /// <summary>Number of elements used in <see cref="TuplePool"/>.</summary>
-        public int TuplePoolLength;
-
         /// <summary>
         /// Runtime tuple pool for extracted JSON array elements.
         /// Reused across candidate evaluations. Runtime elements are appended
diff --git a/libs/server/Resp/Vector/VectorManager.Callbacks.cs b/libs/server/Resp/Vector/VectorManager.Callbacks.cs
index 301f159b504..1c7e7d3062b 100644
--- a/libs/server/Resp/Vector/VectorManager.Callbacks.cs
+++ b/libs/server/Resp/Vector/VectorManager.Callbacks.cs
@@ -18,7 +18,7 @@ public sealed partial class VectorManager
     {
         public unsafe
 #if NET9_0_OR_GREATER
-            ref 
+            ref
 #endif
             struct VectorReadBatch : IReadArgBatch<VectorElementKey, VectorInput, VectorOutput>
         {
@@ -186,6 +186,7 @@ internal readonly void CompletePending(ref VectorBasicContext objectContext)
         private unsafe delegate* unmanaged[Cdecl]<ulong, nint, nuint, nint, nuint, byte> WriteCallbackPtr { get; } = &WriteCallbackUnmanaged;
         private unsafe delegate* unmanaged[Cdecl]<ulong, nint, nuint, byte> DeleteCallbackPtr { get; } = &DeleteCallbackUnmanaged;
         private unsafe delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> ReadModifyWriteCallbackPtr { get; } = &ReadModifyWriteCallbackUnmanaged;
+        private unsafe delegate* unmanaged[Cdecl]<ulong, uint, byte> InlineFilterCallbackPtr { get; } = &FilterCallbackUnmanaged;
 
         /// <summary>
         /// Used to thread the active <see cref="StorageSession"/> across p/invoke and reverse p/invoke boundaries into DiskANN.
@@ -274,6 +275,12 @@ private static byte ReadModifyWriteCallbackUnmanaged(ulong context, nint keyData
             return status.IsCompletedSuccessfully ? (byte)1 : default;
         }
 
+        [UnmanagedCallersOnly(CallConvs = [typeof(CallConvCdecl)])]
+        private static byte FilterCallbackUnmanaged(ulong context, uint internalId)
+        {
+            return EvaluateCandidateFilter(context, internalId);
+        }
+
         private static unsafe bool ReadSizeUnknown(ulong context, bool forceAlignment, ReadOnlySpan<byte> key, ref SpanByteAndMemory value)
         {
 #pragma warning disable IDE0302 // [...]-style collection initialization doesn't actually _guarantee_ stackalloc (or inline arrays), which we need here
diff --git a/libs/server/Resp/Vector/VectorManager.Filter.cs b/libs/server/Resp/Vector/VectorManager.Filter.cs
index 52b25ae8f43..562b94fd58a 100644
--- a/libs/server/Resp/Vector/VectorManager.Filter.cs
+++ b/libs/server/Resp/Vector/VectorManager.Filter.cs
@@ -3,7 +3,12 @@
 
 using System;
 using System.Buffers.Binary;
+using System.Diagnostics;
+#if NET9_0_OR_GREATER
+using System.Runtime.CompilerServices;
+#endif
 using System.Runtime.InteropServices;
+using Tsavorite.core;
 
 namespace Garnet.server
 {
@@ -135,9 +140,7 @@ internal static int ApplyPostFilter(
                 var program = new ExprProgram
                 {
                     Instructions = instrBuf[..instrCount],
-                    Length = instrCount,
                     TuplePool = tuplePoolBuf[..tupleCount],
-                    TuplePoolLength = tupleCount,
                     RuntimePool = runtimePoolBuf,
                     RuntimePoolLength = 0,
                 };
@@ -146,7 +149,7 @@ internal static int ApplyPostFilter(
                 filterBitmap.Clear();
 
                 // ── Collect unique selectors ──────────────────────────────
-                var selectorCount = GetSelectorRanges(program.Instructions, program.Length, filter, selectorBuf);
+                var selectorCount = GetSelectorRanges(program.Instructions, program.Instructions.Length, filter, selectorBuf);
                 var selectorRanges = selectorBuf[..selectorCount];
 
                 // Slice extractedFields to actual selector count
@@ -215,5 +218,108 @@ internal static int GetSelectorRanges(
             }
             return count;
         }
+
+        // ── Inline filter callback infrastructure ─────
+        //
+        // These types allow the Rust DiskANN pipeline to call
+        // back into C# for per-candidate filter evaluation, avoiding the need
+        // to over-fetch candidates and filter them afterwards.
+        //
+        // The compiled filter program and scratch buffers are stored in
+        // [ThreadStatic] fields before the FFI call. The callback runs on the
+        // same thread, so it reads the pre-compiled state directly — no need
+        // to marshal pointers through the FFI boundary.
+
+        /// <summary>
+        /// Thread-static state for the inline filter callback.
+        /// Set before the FFI call into Rust, read by <see cref="FilterCallbackUnmanaged"/>.
+        /// </summary>
+        [ThreadStatic]
+#pragma warning disable CS8500 // InlineFilterState only contains unmanaged types or spans of pinned arrays, this is safe
+        internal static unsafe InlineFilterState* InlineFilterStatePtr;
+#pragma warning restore CS8500
+
+        /// <summary>
+        /// Per-query filter state maintained on the C# side.
+        /// Populated before calling into Rust; the callback reads it from thread-static storage.
+        /// All Span/pointer fields reference pinned scratch-buffer memory that remains
+        /// valid for the duration of the FFI call.
+        /// </summary>
+        internal ref struct InlineFilterState
+        {
+            // Pointers into scratch buffer (pinned for FFI duration):
+            public Span<ExprToken> InstrBuf;
+            public Span<ExprToken> TuplePoolBuf;
+            public Span<ExprToken> RuntimePoolBuf;
+            public Span<ExprToken> ExtractedFields;
+            public Span<ExprToken> StackBuf;
+            public Span<(int Start, int Length)> SelectorRanges;
+
+            /// <summary>Pointer to the filter expression bytes.</summary>
+            public ReadOnlySpan<byte> FilterBytes;
+        }
+
+        /// <summary>
+        /// Shared filter evaluation logic for both single and batch callbacks.
+        /// Reads the candidate's external ID and attributes, then evaluates the compiled filter.
+        /// </summary>
+        private static unsafe byte EvaluateCandidateFilter(ulong context, uint internalId)
+        {
+            Debug.Assert(InlineFilterStatePtr != null, "Shouldn't call without pinning a filter state");
+            ref var state
+#if NET9_0_OR_GREATER
+                = ref Unsafe.AsRef<InlineFilterState>(InlineFilterStatePtr);
+#else
+                = ref *InlineFilterStatePtr;
+#endif
+
+            // 1. Read external ID for this internal_id via ExtMap
+            Span<byte> iidKey = stackalloc byte[sizeof(uint)];
+            BinaryPrimitives.WriteUInt32LittleEndian(iidKey, internalId);
+
+            Span<byte> eidBuf = stackalloc byte[128];
+            var eidMem = SpanByteAndMemory.FromPinnedSpan(eidBuf);
+            try
+            {
+                if (!ReadSizeUnknown(context | DiskANNService.ExternalIdMap, true, iidKey, ref eidMem))
+                    return 0; // can't find external ID → exclude
+
+                // 2. Read attributes by external ID
+                Span<byte> attrBuf = stackalloc byte[256];
+                var attrMem = SpanByteAndMemory.FromPinnedSpan(attrBuf);
+                try
+                {
+                    if (!ReadSizeUnknown(context | DiskANNService.Attributes, true, eidMem.ReadOnlySpan, ref attrMem))
+                        return 0; // no attributes → exclude
+
+                    // 3. Rebuild ExprProgram from thread-static state pointers
+                    var program = new ExprProgram
+                    {
+                        Instructions = state.InstrBuf,
+                        TuplePool = state.TuplePoolBuf,
+                        RuntimePool = state.RuntimePoolBuf,
+                        RuntimePoolLength = 0,
+                    };
+
+                    program.ResetRuntimePool();
+
+                    AttributeExtractor.ExtractFields(attrMem.ReadOnlySpan, state.FilterBytes, state.SelectorRanges, state.ExtractedFields, ref program);
+
+                    var stack = new ExprStack(state.StackBuf);
+                    var pass = ExprRunner.Run(ref program, attrMem.ReadOnlySpan, state.FilterBytes, state.SelectorRanges, state.ExtractedFields, ref stack);
+
+                    return pass ? (byte)1 : (byte)0;
+                }
+                finally
+                {
+                    attrMem.Memory?.Dispose();
+                }
+            }
+            finally
+            {
+                eidMem.Memory?.Dispose();
+            }
+        }
+
     }
 }
\ No newline at end of file
diff --git a/libs/server/Resp/Vector/VectorManager.Locking.cs b/libs/server/Resp/Vector/VectorManager.Locking.cs
index 2c53ae1865a..463ffe1c195 100644
--- a/libs/server/Resp/Vector/VectorManager.Locking.cs
+++ b/libs/server/Resp/Vector/VectorManager.Locking.cs
@@ -171,7 +171,7 @@ internal VectorSetLock ReadVectorIndex(StorageSession storageSession, ReadOnlySp
                         bool requestQuantization;
                         unsafe
                         {
-                            newlyAllocatedIndex = Service.RecreateIndex(indexContext, dims, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, out requestQuantization);
+                            newlyAllocatedIndex = Service.RecreateIndex(indexContext, dims, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, InlineFilterCallbackPtr, out requestQuantization);
                         }
 
                         input.header.cmd = RespCommand.VADD;
@@ -365,7 +365,7 @@ out GarnetStatus status
 
                             unsafe
                             {
-                                newlyAllocatedIndex = Service.RecreateIndex(indexContext, dims, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, out requestQuantization);
+                                newlyAllocatedIndex = Service.RecreateIndex(indexContext, dims, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, InlineFilterCallbackPtr, out requestQuantization);
                             }
 
                             input.parseState.EnsureCapacity(12);
@@ -397,7 +397,7 @@ out GarnetStatus status
 
                             unsafe
                             {
-                                newlyAllocatedIndex = Service.CreateIndex(indexContext, dims, reduceDims, quantizer, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, out requestQuantization);
+                                newlyAllocatedIndex = Service.CreateIndex(indexContext, dims, reduceDims, quantizer, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, InlineFilterCallbackPtr, out requestQuantization);
                             }
 
                             input.parseState.EnsureCapacity(12);
diff --git a/libs/server/Resp/Vector/VectorManager.Migration.cs b/libs/server/Resp/Vector/VectorManager.Migration.cs
index a1188054f38..cc2c19e587c 100644
--- a/libs/server/Resp/Vector/VectorManager.Migration.cs
+++ b/libs/server/Resp/Vector/VectorManager.Migration.cs
@@ -176,7 +176,7 @@ public void HandleMigratedIndexKey(
                 bool requestQuantization;
                 unsafe
                 {
-                    newlyAllocatedIndex = Service.RecreateIndex(context, dimensions, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, out requestQuantization);
+                    newlyAllocatedIndex = Service.RecreateIndex(context, dimensions, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, InlineFilterCallbackPtr, out requestQuantization);
                 }
 
                 var ctxArg = PinnedSpanByte.FromPinnedSpan(MemoryMarshal.Cast<ulong, byte>(MemoryMarshal.CreateSpan(ref context, 1)));
diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs
index 7affdc610b9..8a94e707de0 100644
--- a/libs/server/Resp/Vector/VectorManager.cs
+++ b/libs/server/Resp/Vector/VectorManager.cs
@@ -71,6 +71,11 @@ public sealed partial class VectorManager : IDisposable
         /// </summary>
         internal const int MaxRetrieveCount = 100_000_000;
 
+        /// <summary>
+        /// Maximum scale factor for adaptive-L inline filtering.
+        /// </summary>
+        internal const int MaxFilteringScaleFactor = 256;
+
         /// <summary>
         /// Maximum exploration factor (EF) for build and search operations.
         /// Matches Redis's hardcoded limit of 1,000,000.
@@ -120,6 +125,24 @@ private static void EnsureIdBufferSize(ref SpanByteAndMemory buffer, int retriev
             }
         }
 
+        /// <summary>
+        /// Ensures the VSIM filter bitmap buffer has at least one bit per result
+        /// (<paramref name="resultCount"/> bits, rounded up to whole bytes).
+        /// Rents from <see cref="MemoryPool{T}"/> if the current buffer is too small.
+        /// </summary>
+        private static void EnsureFilterBitmapSize(ref SpanByteAndMemory buffer, int resultCount)
+        {
+            var sizeBytes = (resultCount + 7) >> 3;
+            if (sizeBytes > buffer.Length)
+            {
+                buffer.Memory?.Dispose();
+
+                buffer = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(sizeBytes), sizeBytes);
+            }
+
+            buffer.Length = sizeBytes;
+        }
+
         /// <summary>
         /// This managers instance of <see cref="DiskANNService"/>.
         /// 
@@ -612,25 +635,10 @@ ref SpanByteAndMemory filterBitmap
 
             ReadIndex(indexValue, out var context, out var dimensions, out _, out var quantType, out _, out _, out _, out var indexPtr);
 
-            // When a filter is present, over-retrieve candidates from DiskANN so that
-            // post-filtering has enough results to fill the requested count.
-            //
-            // FILTER-EF controls both the graph exploration breadth and the output
-            // buffer size when a filter is active, allowing it to be tuned independently
-            // from EF (which is used for unfiltered searches).
-            var retrieveCount = !filter.IsEmpty ? maxFilteringEffort : count;
-            var effectiveEF = !filter.IsEmpty
-                ? Math.Max(searchExplorationFactor, maxFilteringEffort)
-                : searchExplorationFactor;
-
-            // No point in asking for more data than the effort we'll put in
-            if (retrieveCount > effectiveEF)
-            {
-                retrieveCount = effectiveEF;
-            }
+            var effectiveEF = Math.Max(searchExplorationFactor, count);
 
-            EnsureDistanceBufferSize(ref outputDistances, retrieveCount);
-            EnsureIdBufferSize(ref outputIds, retrieveCount);
+            EnsureDistanceBufferSize(ref outputDistances, count);
+            EnsureIdBufferSize(ref outputIds, count);
 
             int found;
             nint continuation;
@@ -651,20 +659,109 @@ ref SpanByteAndMemory filterBitmap
                     return VectorManagerResult.BadParams;
                 }
 
-                found =
-                    Service.SearchVector(
-                        context,
-                        indexPtr,
-                        vectorData.ReadOnlySpan,
-                        vectorData.ElementCount,
-                        delta,
-                        effectiveEF,
-                        filter,
-                        maxFilteringEffort,
-                        outputIds,
-                        outputDistances,
-                        out continuation
-                    );
+                if (!filter.IsEmpty)
+                {
+                    // ── Inline filtered search path ─────────
+                    // Compile the filter, set up callback state, and let Rust
+                    // evaluate per-candidate via InlineFilterCandidateCallbackImpl.
+                    // Only passing candidates are written to the output buffer,
+                    // so we size it for the desired count, not the overfetch.
+
+                    // Borrow scratch space for compiled filter program
+                    var bufferSlice = ActiveThreadSession.scratchBufferBuilder.CreateArgSlice(
+                        TotalPoolTokens * ExprToken.Size + MaxSelectors * 2 * sizeof(int));
+                    var span = MemoryMarshal.Cast<byte, ExprToken>(bufferSlice.Span);
+                    var selectorBuf = MemoryMarshal.Cast<byte, (int Start, int Length)>(
+                        bufferSlice.Span.Slice(TotalPoolTokens * ExprToken.Size));
+
+                    try
+                    {
+                        span.Clear();
+
+                        var offset = 0;
+                        var instrBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                        var tuplePoolBuf = span.Slice(offset, MaxTuplePool); offset += MaxTuplePool;
+                        var tokensBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                        var opsStackBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                        var runtimePoolBuf = span.Slice(offset, MaxRuntimePool); offset += MaxRuntimePool;
+                        var extractedFields = span.Slice(offset, MaxSelectors); offset += MaxSelectors;
+                        var stackBuf = span.Slice(offset, StackCapacity);
+
+                        var instrCount = ExprCompiler.TryCompile(filter, instrBuf, tuplePoolBuf, tokensBuf, opsStackBuf, out var tupleCount, out _);
+                        if (instrCount < 0)
+                        {
+                            // Compile failed — return zero results
+                            outputDistances.Length = 0;
+                            filterBitmap.Length = 0;
+                            outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                            errorMsg = "ERR Compiling filter failed"u8;
+                            return VectorManagerResult.BadParams;
+                        }
+
+                        var selectorCount = GetSelectorRanges(instrBuf[..instrCount], instrCount, filter, selectorBuf);
+
+                        var filterState = new InlineFilterState
+                        {
+                            InstrBuf = instrBuf[..instrCount],
+                            TuplePoolBuf = tuplePoolBuf[..tupleCount],
+                            RuntimePoolBuf = runtimePoolBuf,
+                            ExtractedFields = extractedFields[..Math.Max(selectorCount, 1)],
+                            StackBuf = stackBuf,
+                            SelectorRanges = selectorBuf[..selectorCount],
+                            FilterBytes = filter,
+                        };
+
+                        // InlineFilterState is a ref struct, so will remain on stack for the SearchVector call.
+                        //
+                        // Save a pointer off so it's easy to grab InlineFilterState in callbacks.
+                        unsafe
+                        {
+#pragma warning disable CS8500 // InlineFilterState only contains unmanaged types or spans of pinned arrays, this is safe
+                            InlineFilterStatePtr = &filterState;
+#pragma warning restore CS8500
+                        }
+
+                        found = Service.SearchVector(
+                            context,
+                            indexPtr,
+                            vectorData.ReadOnlySpan,
+                            vectorData.ElementCount,
+                            delta,
+                            effectiveEF,
+                            filter,
+                            maxFilteringEffort,
+                            outputIds,
+                            outputDistances,
+                            out continuation
+                        );
+                    }
+                    finally
+                    {
+                        ActiveThreadSession.scratchBufferBuilder.RewindScratchBuffer(bufferSlice);
+
+                        unsafe
+                        {
+                            InlineFilterStatePtr = null;
+                        }
+                    }
+                }
+                else
+                {
+                    found =
+                        Service.SearchVector(
+                            context,
+                            indexPtr,
+                            vectorData.ReadOnlySpan,
+                            vectorData.ElementCount,
+                            delta,
+                            effectiveEF,
+                            filter,
+                            maxFilteringEffort,
+                            outputIds,
+                            outputDistances,
+                            out continuation
+                        );
+                }
             }
 
             if (found < 0)
@@ -683,17 +780,7 @@ out continuation
             // Apply post-filtering if filter is specified
             if (!filter.IsEmpty)
             {
-                // Ensure bitmap is large enough for the over-retrieved result set
-                var requiredBitmapBytes = (found + 7) >> 3;
-                if (requiredBitmapBytes > filterBitmap.Length)
-                {
-                    if (!filterBitmap.IsSpanByte)
-                    {
-                        filterBitmap.Memory.Dispose();
-                    }
-
-                    filterBitmap = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(requiredBitmapBytes), requiredBitmapBytes);
-                }
+                EnsureFilterBitmapSize(ref filterBitmap, found);
 
                 _ = ApplyPostFilter(filter, found, outputAttributes.ReadOnlySpan, filterBitmap.Span, ActiveThreadSession.scratchBufferBuilder);
             }
@@ -736,23 +823,101 @@ ref SpanByteAndMemory filterBitmap
 
             ReadIndex(indexValue, out var context, out _, out _, out var quantType, out _, out _, out _, out var indexPtr);
 
-            // When a filter is present, over-retrieve candidates from DiskANN
-            var retrieveCount = !filter.IsEmpty ? maxFilteringEffort : count;
-            var effectiveEF = !filter.IsEmpty
-                ? Math.Max(searchExplorationFactor, maxFilteringEffort)
-                : searchExplorationFactor;
+            var effectiveEF = Math.Max(searchExplorationFactor, count);
 
-            // No point in asking for more data than the effort we'll put in
-            if (retrieveCount > effectiveEF)
+            EnsureDistanceBufferSize(ref outputDistances, count);
+            EnsureIdBufferSize(ref outputIds, count);
+
+            int found;
+            nint continuation;
+
+            if (!filter.IsEmpty)
             {
-                retrieveCount = effectiveEF;
-            }
+                // ── Inline-filtered search path ──────────────────────────
+                // Size output buffers for desired result count
+                EnsureDistanceBufferSize(ref outputDistances, count);
+                EnsureIdBufferSize(ref outputIds, count);
+
+                // Borrow scratch space for compiled filter program
+                var bufferSlice = ActiveThreadSession.scratchBufferBuilder.CreateArgSlice(
+                    TotalPoolTokens * ExprToken.Size + MaxSelectors * 2 * sizeof(int));
+                var span = MemoryMarshal.Cast<byte, ExprToken>(bufferSlice.Span);
+                var selectorBuf = MemoryMarshal.Cast<byte, (int Start, int Length)>(
+                    bufferSlice.Span.Slice(TotalPoolTokens * ExprToken.Size));
+
+                try
+                {
+                    span.Clear();
+
+                    var offset = 0;
+                    var instrBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                    var tuplePoolBuf = span.Slice(offset, MaxTuplePool); offset += MaxTuplePool;
+                    var tokensBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                    var opsStackBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                    var runtimePoolBuf = span.Slice(offset, MaxRuntimePool); offset += MaxRuntimePool;
+                    var extractedFields = span.Slice(offset, MaxSelectors); offset += MaxSelectors;
+                    var stackBuf = span.Slice(offset, StackCapacity);
+
+                    var instrCount = ExprCompiler.TryCompile(filter, instrBuf, tuplePoolBuf, tokensBuf, opsStackBuf, out var tupleCount, out _);
+                    if (instrCount < 0)
+                    {
+                        outputDistances.Length = 0;
+                        filterBitmap.Length = 0;
+                        outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                        return VectorManagerResult.BadParams;
+                    }
 
-            EnsureDistanceBufferSize(ref outputDistances, retrieveCount);
-            EnsureIdBufferSize(ref outputIds, retrieveCount);
+                    var selectorCount = GetSelectorRanges(instrBuf[..instrCount], instrCount, filter, selectorBuf);
+
+                    var filterState = new InlineFilterState
+                    {
+                        InstrBuf = instrBuf[..instrCount],
+                        TuplePoolBuf = tuplePoolBuf[..tupleCount],
+                        RuntimePoolBuf = runtimePoolBuf,
+                        ExtractedFields = extractedFields[..Math.Max(selectorCount, 1)],
+                        StackBuf = stackBuf,
+                        SelectorRanges = selectorBuf[..selectorCount],
+                        FilterBytes = filter,
+                    };
+
+                    // InlineFilterState is a ref struct, so will remain on stack for the SearchVector call.
+                    //
+                    // Save a pointer off so it's easy to grab InlineFilterState in callbacks.
+                    unsafe
+                    {
+#pragma warning disable CS8500 // InlineFilterState only contains unmanaged types or spans of pinned arrays, this is safe
+                        InlineFilterStatePtr = &filterState;
+#pragma warning restore CS8500
+                    }
 
-            var found =
-                Service.SearchElement(
+                    found = Service.SearchElement(
+                        context,
+                        indexPtr,
+                        element,
+                        delta,
+                        effectiveEF,
+                        filter,
+                        maxFilteringEffort,
+                        outputIds,
+                        outputDistances,
+                        out continuation
+                    );
+
+                }
+                finally
+                {
+                    ActiveThreadSession.scratchBufferBuilder.RewindScratchBuffer(bufferSlice);
+
+                    unsafe
+                    {
+                        InlineFilterStatePtr = null;
+                    }
+                }
+            }
+            else
+            {
+                found =
+                    Service.SearchElement(
                     context,
                     indexPtr,
                     element,
@@ -762,8 +927,9 @@ ref SpanByteAndMemory filterBitmap
                     maxFilteringEffort,
                     outputIds,
                     outputDistances,
-                    out var continuation
-                );
+                    out continuation
+                    );
+            }
 
             if (found < 0)
             {
@@ -780,17 +946,7 @@ out var continuation
             // Apply post-filtering if filter is specified
             if (!filter.IsEmpty)
             {
-                // Ensure bitmap is large enough for the over-retrieved result set
-                var requiredBitmapBytes = (found + 7) >> 3;
-                if (requiredBitmapBytes > filterBitmap.Length)
-                {
-                    if (!filterBitmap.IsSpanByte)
-                    {
-                        filterBitmap.Memory.Dispose();
-                    }
-
-                    filterBitmap = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(requiredBitmapBytes), requiredBitmapBytes);
-                }
+                EnsureFilterBitmapSize(ref filterBitmap, found);
 
                 _ = ApplyPostFilter(filter, found, outputAttributes.ReadOnlySpan, filterBitmap.Span, ActiveThreadSession.scratchBufferBuilder);
             }
diff --git a/test/standalone/Garnet.test.extensions/DiskANN/DiskANNServiceTests.cs b/test/standalone/Garnet.test.extensions/DiskANN/DiskANNServiceTests.cs
index 67db2b1b7a8..0e9f12c72ad 100644
--- a/test/standalone/Garnet.test.extensions/DiskANN/DiskANNServiceTests.cs
+++ b/test/standalone/Garnet.test.extensions/DiskANN/DiskANNServiceTests.cs
@@ -23,6 +23,7 @@ public class DiskANNServiceTests : TestBase
         private delegate byte WriteCallbackDelegate(ulong context, nint keyData, nuint keyLength, nint writeData, nuint writeLength);
         private delegate byte DeleteCallbackDelegate(ulong context, nint keyData, nuint keyLength);
         private delegate byte ReadModifyWriteCallbackDelegate(ulong context, nint keyData, nuint keyLength, nuint writeLength, nint dataCallback, nint dataCallbackContext);
+        private delegate byte InlineFilterCallbackDelegate(ulong context, uint internalId);
 
         private sealed class ContextAndKeyComparer : IEqualityComparer<(ulong Context, byte[] Data)>
         {
@@ -158,17 +159,24 @@ unsafe byte ReadModifyWriteCallback(ulong context, nint keyData, nuint keyLength
                 return 1;
             }
 
+            unsafe byte InlineFilterCallback(ulong context, uint internalId)
+            {
+                return 1;
+            }
+
             ReadCallbackDelegate readDel = ReadCallback;
             WriteCallbackDelegate writeDel = WriteCallback;
             DeleteCallbackDelegate deleteDel = DeleteCallback;
             ReadModifyWriteCallbackDelegate rmwDel = ReadModifyWriteCallback;
+            InlineFilterCallbackDelegate filterDel = InlineFilterCallback;
 
             var readFuncPtr = Marshal.GetFunctionPointerForDelegate(readDel);
             var writeFuncPtr = Marshal.GetFunctionPointerForDelegate(writeDel);
             var deleteFuncPtr = Marshal.GetFunctionPointerForDelegate(deleteDel);
             var rmwFuncPtr = Marshal.GetFunctionPointerForDelegate(rmwDel);
+            var filterFuncPtr = Marshal.GetFunctionPointerForDelegate(filterDel);
 
-            var rawIndex = NativeDiskANNMethods.create_index(Context, 75, 0, VectorQuantType.XNoQuant_U8, VectorDistanceMetricType.L2, 10, 10, readFuncPtr, writeFuncPtr, deleteFuncPtr, rmwFuncPtr);
+            var rawIndex = NativeDiskANNMethods.create_index(Context, 75, 0, VectorQuantType.XNoQuant_U8, VectorDistanceMetricType.L2, 10, 10, readFuncPtr, writeFuncPtr, deleteFuncPtr, rmwFuncPtr, filterFuncPtr);
 
             Span<byte> id = [0, 1, 2, 3];
             Span<byte> elem = Enumerable.Range(0, 75).Select(static x => (byte)x).ToArray();
@@ -353,17 +361,25 @@ unsafe byte ReadModifyWriteCallback(ulong context, nint keyData, nuint keyLength
                 return 1;
             }
 
+            unsafe byte InlineFilterCallback(ulong context, uint internalId)
+            {
+                return 1;
+            }
+
+
             ReadCallbackDelegate readDel = ReadCallback;
             WriteCallbackDelegate writeDel = WriteCallback;
             DeleteCallbackDelegate deleteDel = DeleteCallback;
             ReadModifyWriteCallbackDelegate rmwDel = ReadModifyWriteCallback;
+            InlineFilterCallbackDelegate filterDel = InlineFilterCallback;
 
             var readFuncPtr = Marshal.GetFunctionPointerForDelegate(readDel);
             var writeFuncPtr = Marshal.GetFunctionPointerForDelegate(writeDel);
             var deleteFuncPtr = Marshal.GetFunctionPointerForDelegate(deleteDel);
             var rmwFuncPtr = Marshal.GetFunctionPointerForDelegate(rmwDel);
+            var filterFuncPtr = Marshal.GetFunctionPointerForDelegate(filterDel);
 
-            var rawIndex = NativeDiskANNMethods.create_index(Context, 75, 0, VectorQuantType.XNoQuant_U8, VectorDistanceMetricType.L2, 10, 10, readFuncPtr, writeFuncPtr, deleteFuncPtr, rmwFuncPtr);
+            var rawIndex = NativeDiskANNMethods.create_index(Context, 75, 0, VectorQuantType.XNoQuant_U8, VectorDistanceMetricType.L2, 10, 10, readFuncPtr, writeFuncPtr, deleteFuncPtr, rmwFuncPtr, filterFuncPtr);
 
             Span<byte> id = [0, 1, 2, 3];
             Span<byte> elem = Enumerable.Range(0, 75).Select(static x => (byte)x).ToArray();
@@ -408,7 +424,7 @@ unsafe byte ReadModifyWriteCallback(ulong context, nint keyData, nuint keyLength
             {
                 NativeDiskANNMethods.drop_index(Context, rawIndex);
 
-                rawIndex = NativeDiskANNMethods.create_index(Context, 75, 0, VectorQuantType.XNoQuant_U8, VectorDistanceMetricType.L2, 10, 10, readFuncPtr, writeFuncPtr, deleteFuncPtr, rmwFuncPtr);
+                rawIndex = NativeDiskANNMethods.create_index(Context, 75, 0, VectorQuantType.XNoQuant_U8, VectorDistanceMetricType.L2, 10, 10, readFuncPtr, writeFuncPtr, deleteFuncPtr, rmwFuncPtr, filterFuncPtr);
             }
 
             // Search value
diff --git a/test/standalone/Garnet.test.vectorset/RespVectorSetTests.cs b/test/standalone/Garnet.test.vectorset/RespVectorSetTests.cs
index cfb6b50dc4c..98f7f555f0e 100644
--- a/test/standalone/Garnet.test.vectorset/RespVectorSetTests.cs
+++ b/test/standalone/Garnet.test.vectorset/RespVectorSetTests.cs
@@ -867,6 +867,236 @@ public void VSIMWithAdvancedFilteringELEWithoutWithAttribs()
             ClassicAssert.AreEqual(2, res3.Length, "ELE + FILTER without WITHATTRIBS: arithmetic and comparison");
         }
 
+        [Test]
+        public void VSIMBadFilters()
+        {
+            const string VectorSet = "vs";
+            const string CompileErr = "ERR Compiling filter failed";
+
+            using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+            var db = redis.GetDatabase(0);
+
+            _ = db.KeyDelete(VectorSet);
+
+            // Seed:
+            //   ids 0..2  -> valid JSON attributes (year + genre)
+            //   id  3     -> malformed JSON attribute
+            //   id  4     -> no SETATTR at all
+            var add0 = db.Execute("VADD", [VectorSet, "VALUES", "3", "1.0", "2.0", "3.0", new byte[] { 0, 0, 0, 0 }, "NOQUANT", "SETATTR", "{\"year\":1980,\"genre\":\"action\"}"]);
+            ClassicAssert.AreEqual(1, (int)add0);
+            var add1 = db.Execute("VADD", [VectorSet, "VALUES", "3", "1.1", "2.1", "3.1", new byte[] { 0, 0, 0, 1 }, "NOQUANT", "SETATTR", "{\"year\":1990,\"genre\":\"drama\"}"]);
+            ClassicAssert.AreEqual(1, (int)add1);
+            var add2 = db.Execute("VADD", [VectorSet, "VALUES", "3", "1.2", "2.2", "3.2", new byte[] { 0, 0, 0, 2 }, "NOQUANT", "SETATTR", "{\"year\":2000,\"genre\":\"sci-fi\"}"]);
+            ClassicAssert.AreEqual(1, (int)add2);
+            var add3 = db.Execute("VADD", [VectorSet, "VALUES", "3", "1.3", "2.3", "3.3", new byte[] { 0, 0, 0, 3 }, "NOQUANT", "SETATTR", "{not-valid-json"]);
+            ClassicAssert.AreEqual(1, (int)add3);
+            var add4 = db.Execute("VADD", [VectorSet, "VALUES", "3", "1.4", "2.4", "3.4", new byte[] { 0, 0, 0, 4 }, "NOQUANT"]);
+            ClassicAssert.AreEqual(1, (int)add4);
+
+            // ── Section A: compile-time errors ─────────────────────────────────
+            // Every entry below must surface as "ERR Compiling filter failed".
+            (string Filter, string Why)[] badFilters =
+            [
+                ("   ", "whitespace-only filter (compiler sees zero tokens)"),
+                ("(.year > 1980", "unclosed opening paren"),
+                (".year > 1980)", "extra closing paren"),
+                ("()", "empty parens with no expression"),
+                (".genre == \"action", "unterminated double-quoted string"),
+                (".genre == 'action", "unterminated single-quoted string"),
+                (". > 1", "bare-dot selector with no field name"),
+                ("> 1980", "binary operator with no left operand"),
+                (".year >", "binary operator with no right operand"),
+                (".year > > 1980", "two consecutive binary operators"),
+                (".year 1980", "two consecutive operands with no operator"),
+                (".year > 1.2.3", "malformed number literal"),
+                ("foobar", "unknown identifier"),
+                ("@ > 1", "character not allowed in any token"),
+                (".x in [1, 2", "unterminated tuple literal"),
+                (".x in [1 2]", "tuple elements without a comma separator"),
+                ("not", "unary 'not' with no operand"),
+                ("in [1, 2]", "'in' operator with no left operand"),
+                (".x in", "'in' operator with no right operand"),
+                (">", "naked binary operator"),
+            ];
+
+            foreach (var (filter, why) in badFilters)
+            {
+                var exc = ClassicAssert.Throws<RedisServerException>(
+                    () => db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", filter, "COUNT", "10"]),
+                    $"Expected compile failure for filter '{filter}' ({why})");
+                ClassicAssert.AreEqual(CompileErr, exc.Message, $"Wrong error message for filter '{filter}' ({why})");
+            }
+
+            // ── Section B: documented "skip silently" behavior ─────────────────
+            // Per the filter-expressions docs: "If a field is missing or invalid,
+            // the element is skipped without error." None of the queries below
+            // should raise an exception.
+
+            // Empty FILTER string is treated as no filter at all by the VSIM
+            // parser (length-0 check before compile), so it returns all elements.
+            var emptyFilter = (byte[][])db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", "", "COUNT", "10"]);
+            ClassicAssert.AreEqual(5, emptyFilter.Length, "Empty FILTER string should behave as no filter");
+
+            // Filter referencing a field no element has -> 0 results, no error.
+            var missingField = (byte[][])db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".nonexistent > 5", "COUNT", "10"]);
+            ClassicAssert.AreEqual(0, missingField.Length, "Filter on a non-existent field should return zero results, not an error");
+
+            // Type-mismatched comparisons must not raise. Exact result count
+            // depends on whether the runner skips or coerces, which the spec
+            // leaves unspecified, so we only assert "no error" and that the
+            // result stays within the seeded population.
+            var numCmpString = (byte[][])db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".genre > 5", "COUNT", "10"]);
+            ClassicAssert.IsNotNull(numCmpString, "Numeric comparison against a string field must not raise");
+            ClassicAssert.LessOrEqual(numCmpString.Length, 5);
+
+            var stringEqOnNum = (byte[][])db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".year == \"hello\"", "COUNT", "10"]);
+            ClassicAssert.IsNotNull(stringEqOnNum, "Comparing a numeric field to a string literal must not raise");
+            ClassicAssert.LessOrEqual(stringEqOnNum.Length, 5);
+
+            // A permissive valid filter should match the 3 well-formed elements
+            // and silently skip the malformed-JSON (id 3) and no-attr (id 4)
+            // elements, demonstrating both documented skip cases at once.
+            var validFilter = (byte[][])db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".year >= 1980", "COUNT", "10"]);
+            ClassicAssert.AreEqual(3, validFilter.Length, "Only the 3 well-formed elements should match; malformed-JSON and no-attr elements must be skipped silently");
+            var matchedIds = new HashSet<byte[]>(validFilter, ByteArrayComparer.Instance);
+            ClassicAssert.IsTrue(matchedIds.Contains([0, 0, 0, 0]), "id 0 (valid attrs) should be in results");
+            ClassicAssert.IsTrue(matchedIds.Contains([0, 0, 0, 1]), "id 1 (valid attrs) should be in results");
+            ClassicAssert.IsTrue(matchedIds.Contains([0, 0, 0, 2]), "id 2 (valid attrs) should be in results");
+            ClassicAssert.IsFalse(matchedIds.Contains([0, 0, 0, 3]), "id 3 (malformed JSON) should be silently skipped");
+            ClassicAssert.IsFalse(matchedIds.Contains([0, 0, 0, 4]), "id 4 (no SETATTR) should be silently skipped");
+        }
+
+        [Test]
+        public void VSIMComplexJsonAttributes()
+        {
+            const string VectorSet = "vs";
+
+            using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+            var db = redis.GetDatabase(0);
+
+            _ = db.KeyDelete(VectorSet);
+
+            // Seed 11 elements covering nested objects, booleans, null, arrays,
+            // non-object top-level JSON, empty objects, dash-in-field-name, and
+            // same-named top-level vs nested fields.
+            //
+            //   id 0  -> top-level year + nested meta.director
+            //   id 1  -> year only exists at nested depth
+            //   id 2  -> top-level boolean true
+            //   id 3  -> top-level boolean false
+            //   id 4  -> top-level null
+            //   id 5  -> top-level number array
+            //   id 6  -> non-object top-level JSON (whole attr is an array)
+            //   id 7  -> empty object
+            //   id 8  -> field name contains a dash
+            //   id 9  -> same-named field both top-level (1980) and nested (2020)
+            //   id 10 -> top-level string array + nested object value
+            (byte[] Id, string Attr)[] seed =
+            [
+                ([0, 0, 0, 0],  "{\"year\":1980,\"meta\":{\"director\":\"Spielberg\"}}"),
+                ([0, 0, 0, 1],  "{\"meta\":{\"year\":1980}}"),
+                ([0, 0, 0, 2],  "{\"active\":true}"),
+                ([0, 0, 0, 3],  "{\"active\":false}"),
+                ([0, 0, 0, 4],  "{\"year\":null}"),
+                ([0, 0, 0, 5],  "{\"scores\":[1,2,3]}"),
+                ([0, 0, 0, 6],  "[1,2,3]"),
+                ([0, 0, 0, 7],  "{}"),
+                ([0, 0, 0, 8],  "{\"year-old\":1980}"),
+                ([0, 0, 0, 9],  "{\"year\":1980,\"nested\":{\"year\":2020}}"),
+                ([0, 0, 0, 10], "{\"tags\":[\"classic\"],\"director\":{\"name\":\"Spielberg\"}}"),
+            ];
+
+            for (var i = 0; i < seed.Length; i++)
+            {
+                var (id, attr) = seed[i];
+                // Spread the vectors slightly so cosine/L2 doesn't collapse them on top of each other.
+                var v0 = (1.0f + i * 0.1f).ToString();
+                var v1 = (2.0f + i * 0.1f).ToString();
+                var v2 = (3.0f + i * 0.1f).ToString();
+                var res = db.Execute("VADD", [VectorSet, "VALUES", "3", v0, v1, v2, id, "NOQUANT", "SETATTR", attr]);
+                ClassicAssert.AreEqual(1, (int)res, $"VADD for id {i} should succeed even with unusual attribute shape");
+            }
+
+            // Sanity: all 11 elements made it into the set.
+            var info = (RedisValue[])db.Execute("VINFO", [VectorSet]);
+            var infoMap = new Dictionary<string, string>();
+            for (var i = 0; i < info.Length; i += 2)
+                infoMap[info[i]] = info[i + 1];
+            ClassicAssert.AreEqual("11", infoMap["size"], "All 11 elements must be present");
+
+            // ── Case 1 + 9: top-level .year is visible; nested .year is not ───
+            // Filter .year > 1900 should match id 0 and id 9 (both have top-level
+            // year 1980). It must NOT match id 1 (nested-only) or id 4 (null).
+            var byYear = MatchedIds(db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".year > 1900", "COUNT", "20"]));
+            AssertSameIds(ExpectIds([0, 0, 0, 0], [0, 0, 0, 9]), byYear, "Top-level .year > 1900 should match only ids 0 and 9");
+
+            // ── Case 9 specifically: nested .year=2020 must be invisible ──────
+            var byYear2000 = MatchedIds(db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".year > 2000", "COUNT", "20"]));
+            ClassicAssert.AreEqual(0, byYear2000.Count, ".year > 2000 must not see the nested year=2020 in id 9");
+
+            var byYearRange = MatchedIds(db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".year > 1900 and .year < 2000", "COUNT", "20"]));
+            AssertSameIds(ExpectIds([0, 0, 0, 0], [0, 0, 0, 9]), byYearRange, "Range filter should still see only top-level .year for ids 0 and 9");
+
+            // ── Case 1 sub: top-level field whose value is an object is unusable
+            // id 0's .meta and id 1's .meta are objects. Comparing to a string
+            // must yield 0 matches without raising.
+            var metaEq = (byte[][])db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".meta == \"Spielberg\"", "COUNT", "20"]);
+            ClassicAssert.AreEqual(0, metaEq.Length, "Equality against an object-valued top-level field must yield 0 results");
+
+            // Same idea for case 10: .director is an object on id 10.
+            var directorEq = (byte[][])db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".director == \"Spielberg\"", "COUNT", "20"]);
+            ClassicAssert.AreEqual(0, directorEq.Length, "Equality against object-valued .director must yield 0 results");
+
+            // ── Case 3: top-level booleans coerce to 1 / 0 ────────────────────
+            var activeTrue = MatchedIds(db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".active == 1", "COUNT", "20"]));
+            AssertSameIds(ExpectIds([0, 0, 0, 2]), activeTrue, ".active == 1 should match only the element whose JSON value is true");
+
+            var activeFalse = MatchedIds(db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".active == 0", "COUNT", "20"]));
+            AssertSameIds(ExpectIds([0, 0, 0, 3]), activeFalse, ".active == 0 should match only the element whose JSON value is false");
+
+            // ── Case 4: top-level null does not match numeric > comparisons ───
+            // (.year > 5 with year=null: id 4 must NOT appear; ids 0 and 9 do.)
+            var yearGt5 = MatchedIds(db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".year > 5", "COUNT", "20"]));
+            AssertSameIds(ExpectIds([0, 0, 0, 0], [0, 0, 0, 9]), yearGt5, ".year > 5 must skip the null-valued id 4");
+
+            // ── Case 5: top-level number array works with `in`, fails > silently
+            var inHit = MatchedIds(db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", "2 in .scores", "COUNT", "20"]));
+            AssertSameIds(ExpectIds([0, 0, 0, 5]), inHit, "2 in .scores should match only id 5");
+
+            var inMiss = (byte[][])db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", "99 in .scores", "COUNT", "20"]);
+            ClassicAssert.AreEqual(0, inMiss.Length, "99 in .scores should match nothing");
+
+            var arrAsNum = (byte[][])db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".scores > 5", "COUNT", "20"]);
+            ClassicAssert.AreEqual(0, arrAsNum.Length, "Numeric comparison against an array-valued field must yield 0 results without raising");
+
+            // ── Case 8: selector greedily includes '-' so .year-old is one name
+            // The filter must NOT be interpreted as `.year - old > 1900`.
+            var yearOld = MatchedIds(db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".year-old > 1900", "COUNT", "20"]));
+            AssertSameIds(ExpectIds([0, 0, 0, 8]), yearOld, ".year-old must be treated as a single selector and match only id 8");
+
+            // ── Case 10: top-level string array still works with `in` ─────────
+            var classicInTags = MatchedIds(db.Execute("VSIM", [VectorSet, "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", "\"classic\" in .tags", "COUNT", "20"]));
+            AssertSameIds(ExpectIds([0, 0, 0, 10]), classicInTags, "\"classic\" in .tags should match only id 10");
+
+            // ── Case 6 + 7 (implicit): the above filters together demonstrate
+            // that ids 6 (non-object top-level JSON) and 7 (empty object) never
+            // appear in any field-based result and never cause an error.
+            ClassicAssert.IsFalse(yearOld.Contains([0, 0, 0, 6], ByteArrayComparer.Instance), "Non-object top-level JSON (id 6) must be silently skipped, not error");
+            ClassicAssert.IsFalse(yearOld.Contains([0, 0, 0, 7], ByteArrayComparer.Instance), "Empty-object JSON (id 7) must be silently skipped, not error");
+
+            static HashSet<byte[]> MatchedIds(RedisResult res)
+                => new((byte[][])res, ByteArrayComparer.Instance);
+
+            static HashSet<byte[]> ExpectIds(params byte[][] ids)
+                => new(ids, ByteArrayComparer.Instance);
+
+            static void AssertSameIds(HashSet<byte[]> expected, HashSet<byte[]> actual, string message)
+                => ClassicAssert.IsTrue(expected.SetEquals(actual), $"{message} (expected {Format(expected)}, got {Format(actual)})");
+
+            static string Format(HashSet<byte[]> set)
+                => "{" + string.Join(", ", set.Select(static b => "[" + string.Join(",", b) + "]")) + "}";
+        }
+
         [Test]
         public void VSIMErrors()
         {
@@ -879,9 +1109,9 @@ public void VSIMErrors()
             var res1 = db.Execute("VADD", ["foo", "VALUES", "3", "1.0", "2.0", "3.0", new byte[] { 0, 0, 0, 0 }, "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980}"]);
             ClassicAssert.AreEqual(1, (int)res1);
 
-            // FILTER-EF exceeding MaxRetrieveCount must be rejected
+            // FILTER-EF exceeding MaxFilteringScaleFactor must be rejected
             var exc1 = ClassicAssert.Throws<RedisServerException>(() => db.Execute("VSIM", ["foo", "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".year > 1950", "FILTER-EF", "999999999", "COUNT", "3", "WITHATTRIBS"]));
-            ClassicAssert.AreEqual("ERR FILTER-EF must be an integer between 0 and 100000000", exc1.Message);
+            ClassicAssert.AreEqual("ERR FILTER-EF must be an integer between 4 and 256", exc1.Message);
 
             // COUNT exceeding MaxRetrieveCount must be rejected
             var exc2 = ClassicAssert.Throws<RedisServerException>(() => db.Execute("VSIM", ["foo", "VALUES", "3", "0.0", "0.0", "0.0", "COUNT", "999999999"]));
@@ -896,32 +1126,6 @@ public void VSIMErrors()
             ClassicAssert.AreEqual("ERR EF must be an integer between 1 and 1000000", exc4.Message);
         }
 
-        [Test]
-        public void VSIMWithDefaultFilterEFOverflowDoesNotCrash()
-        {
-            using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
-            var db = redis.GetDatabase(0);
-
-            _ = db.KeyDelete("foo");
-
-            // Add a vector with attributes so FILTER can be used
-            var res1 = db.Execute("VADD", ["foo", "VALUES", "3", "1.0", "2.0", "3.0", new byte[] { 0, 0, 0, 0 }, "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980}"]);
-            ClassicAssert.AreEqual(1, (int)res1);
-
-            // Verify that a moderate COUNT with FILTER (no explicit FILTER-EF) works correctly.
-            // The default maxFilteringEffort = count*200. With count=1000, that's 200,000 which is safe.
-            // This validates the code path through the (long) cast fix without hitting resource limits.
-            var res = (byte[][])db.Execute("VSIM", ["foo", "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".year > 1950", "COUNT", "1000", "WITHATTRIBS"]);
-            ClassicAssert.AreEqual(2, res.Length, "Should return 1 result (1 pair of id+attribute) for year > 1950");
-
-            // Verify that COUNT values which would overflow count*200 in int32 are rejected.
-            // 10,737,419 * 200 = 2,147,483,800 > int32.MaxValue.
-            // Our (long) cast prevents the overflow, but MaxRetrieveCount caps COUNT itself.
-            // Any COUNT above MaxRetrieveCount (~178M) is rejected at parse time.
-            var ex = Assert.Throws<RedisServerException>(() => db.Execute("VSIM", ["foo", "VALUES", "3", "0.0", "0.0", "0.0", "FILTER", ".year > 1950", "COUNT", "999999999", "WITHATTRIBS"]));
-            ClassicAssert.IsTrue(ex.Message.Contains("COUNT must be an integer between"), $"Expected COUNT validation error, got: {ex.Message}");
-        }
-
         private static byte[] SeedMoviesForAdvancedFiltering(IDatabase db)
         {
             _ = db.KeyDelete("movies");
diff --git a/website/docs/commands/vector-sets.md b/website/docs/commands/vector-sets.md
index c4f1583bea6..bd4000ff299 100644
--- a/website/docs/commands/vector-sets.md
+++ b/website/docs/commands/vector-sets.md
@@ -10,7 +10,7 @@ slug: vector-sets
 Vector Sets are a Garnet data type backed by the [DiskANN](https://github.com/microsoft/DiskANN) algorithm — Microsoft's
 graph-based approximate nearest-neighbor (ANN) index — coupled with the scalable storage performance of Tsavorite,
 Garnet's storage engine for holding the state. They let you insert high-dimensional vector embeddings under a Garnet
-key and perform fast similarity search over them, with optional JSON attributes for post-filtering.
+key and perform fast similarity search over them, with optional JSON attributes for filtering.
 
 The command surface is inspired by Redis' `V*` Vector Set commands but is implemented natively on top of Garnet's
 storage stack and DiskANN. Some commands are Garnet-specific extensions (prefixed `X*`).
@@ -381,8 +381,8 @@ The query's effective dimension must match the index's `input-vector-dimensions`
 | `COUNT n` | `10` | Maximum number of results to return. Must be in `[0, 100000000]`. |
 | `EPSILON delta` | `2.0` | DiskANN `L_search` epsilon — controls how aggressively the graph is explored beyond the current best. |
 | `EF n` | `100` | Search-time exploration factor (`L_search` candidate-list size). Must be in `[1, 1000000]`. |
-| `FILTER expr` | _none_ | Post-filter results by an attribute expression (see [Filter Expressions](#filter-expressions)). |
-| `FILTER-EF n` | `min(COUNT * 200, 100000000)` | Maximum number of nearest neighbors to **inspect** before filtering. Must be in `[0, 100000000]`. |
+| `FILTER expr` | _none_ | Filter results by an attribute expression (see [Filter Expressions](#filter-expressions)). |
+| `FILTER-EF n` | `16` | Scale factor for adaptive inline filter search. Must be in `[4, 256]`. This controls how high the EF will scale based on selectivity. |
 | `TRUTH` | _off_ | Accepted for compatibility; exact / brute-force search is not yet wired up. |
 | `NOTHREAD` | _off_ | Accepted for compatibility; currently ignored (search always runs on the calling thread). |
 
@@ -423,7 +423,7 @@ VSIM movies VALUES 3 0.0 0.0 0.0 \
 
 ### Filter Expressions
 
-`VSIM ... FILTER <expr>` post-filters candidates by their JSON attribute. The expression is compiled once and
+`VSIM ... FILTER <expr>` filters candidates by their JSON attribute. The expression is compiled once and
 evaluated against each candidate's attribute.
 
 #### Syntax
@@ -634,7 +634,8 @@ Or in `garnet.conf`:
 |-------|-------|--------|
 | Maximum vector dimensions | 65,536 | `VectorManager.MaxVectorDimensions` |
 | Maximum build / search EF | 1,000,000 | `VectorManager.MaxExplorationFactor` |
-| Maximum `COUNT` / `FILTER-EF` | 100,000,000 | `VectorManager.MaxRetrieveCount` |
+| Maximum `COUNT` | 100,000,000 | `VectorManager.MaxRetrieveCount` |
+| Maximum `FILTER-EF` | 256 | `VectorManager.MaxFilteringScaleFactor` |
 | Maximum elements per Vector Set | 2³² − 1 | DiskANN limit |
 | Concurrent Vector Sets per instance | ~15 | Internal context metadata limit |
 | Empty Vector Set keys | not allowed | Returns `ERR Vector Set key cannot be empty` (preview restriction) |
diff --git a/website/docs/dev/filtered-search-design.md b/website/docs/dev/filtered-search-design.md
new file mode 100644
index 00000000000..ebea1d6db43
--- /dev/null
+++ b/website/docs/dev/filtered-search-design.md
@@ -0,0 +1,326 @@
+# Filtered Vector Search — End-to-End Design Document
+
+## 1. Motivation
+
+Garnet's vector search (`VSIM` command family) supports similarity search over DiskANN graph indexes. Users frequently need to combine similarity search with metadata filtering (e.g., "find the 10 nearest images where `year > 2020 AND genre IN ['action', 'comedy']`").
+
+### Problem with post-filtering
+
+The naive approach — fetch K results, then discard non-matching ones — suffers from two issues:
+
+1. **Overfetch waste**: To return K filtered results, you must fetch K×(1/selectivity) candidates. At 1% selectivity, that's 100× overfetch.
+2. **Recall loss**: Even with overfetch, the final result set may contain fewer than K results or miss closer matches that were pruned before the filter was applied.
+
+### Solution: Inline filtering
+
+Evaluate the filter predicate *during* graph traversal so that non-matching candidates never occupy result slots. This eliminates overfetch and improves recall for selective filters. This requires changes on both the Garnet side (attribute storage design) and the DiskANN library side (search algorithm).
+
+---
+
+## 2. Garnet-Side: Attribute Storage Design for Inline Filtering
+
+### Existing Attribute Store
+
+The existing Garnet attribute store was designed for general-purpose access — attributes are stored as **raw JSON keyed by external (user-facing) ID**. This is the natural choice for a key-value store: the user inserts a vector with key `"doc:42"` and attributes `{"year": 2021, "genre": "action"}`, so the attributes are stored under that same key. This store serves RESP command operations (e.g., `VGETATTR`) and remains unchanged.
+
+However, this store creates a mismatch with how DiskANN's graph traversal operates during inline filtering. DiskANN works entirely in **internal ID space** — every candidate is a `uint32` internal ID. To evaluate a filter using only the existing store, the callback must:
+
+1. **Read `ExternalIdMap[internal_id]`** → translate the internal ID to the external key (one Garnet store read)
+2. **Read `Attributes[external_key]`** → fetch the raw JSON payload (second Garnet store read)
+3. **Parse JSON at query time** → `ExtractFields()` runs a JSON tokenizer to locate and parse the fields referenced by the filter expression
+
+With inline filtering, this callback runs on **every candidate the graph traversal considers** (potentially thousands per query). The two store reads and JSON parsing per candidate become the dominant cost on the hot path.
+
+### Solution: Store binary attributes on the ends of quant vectors (or full precision for unqunatized indexes)
+
+Instead of storing raw JSON blobs, the attributes will be serialized as an optimized binary blob which is stored immediately after the most used vector data (in an unquantized index, this is the full vector; in an quantized index, this is the quant vector).
+
+### Why store alongside vectors?
+
+By storing after vector data, we can access the attributes at the same time as the vector data saving any extra key accesses. A single read will allow us to compute distance and evaluate the filter. Because vector data is a fixed length, the filter attributes are easy to pick out.
+
+### Why store in binary format
+
+Raw JSON forces parsing on every candidate at query time. Extracting a numeric field like `.year` requires scanning for the key, skipping whitespace, and parsing a number string into a double. This work is repeated identically for every candidate, every query. The JSON structure does not change between queries — this is wasted work.
+
+The binary store **shifts the cost of JSON parsing from query time to ingestion time:**
+
+- **At ingestion** (vector insert/update): JSON is parsed once and converted to binary via `ConvertJsonToBinary()`. The binary format is `[0xFF marker][field count][per-field: name_len, name, type_tag, value_len, value_bytes]`, with numbers pre-converted to 8-byte LE f64. This is a one-time cost, written to the new store alongside the existing JSON store.
+- **At query time** (per-candidate): `ExtractFieldsBinary()` performs a direct scan over length-prefixed fields. No JSON tokenizer. Field names compared as raw byte spans. Numbers read directly as f64 — no string parsing. ~10× faster than JSON extraction.
+
+Since each vector is inserted once but may be evaluated as a candidate across thousands of queries, this tradeoff — pay more at write, pay less at read — is the correct one for a read-heavy similarity search workload.
+
+### Per-candidate callback comparison
+
+```
+Without binary attribute store (2 store reads + JSON parse per candidate):
+  1. Read ExternalIdMap[internal_id] → external key       ← ID translation
+  2. Read Attributes[external_key] → JSON bytes           ← existing JSON store
+  3. ExtractFields(json, selectors) → field values         ← JSON parse at query time
+  4. ExprRunner.Run(program) → bool
+
+With binary attribute store (1 store read + binary scan per candidate):
+  1. Read BinaryAttributes[internal_id] → binary bytes     ← new store, direct lookup
+  2. ExtractFieldsBinary(binary, selectors) → field values ← pre-parsed, ~10× faster
+  3. ExprRunner.Run(program) → bool
+```
+
+### Summary of inline filter per-candidate cost
+
+| Aspect | Only external ID keyed JSON attribute store | Current change (internal ID keyed binary attribute) | Further optimization (co-locate binary attribute with vector data) |
+|--------|---------------------------------------------|---------------------------------------|----------------------------------------------|
+| Store reads per candidate | 2 (ExternalIdMap + Attributes) | 1 (Attributes only) | 0 (already accessible during traversal) |
+| ID translation | Required (internal → external) | Eliminated (keyed by internal ID) | Eliminated |
+| Field extraction | JSON parse at query time | Binary scan (~10× faster) | Binary scan (~10× faster) |
+| Parse cost paid at | Query time (per candidate, per query) | Ingestion time (once per insert) | Ingestion time (once per insert) |
+| Total per-candidate overhead | 2 reads + JSON parse + eval | 1 read + binary scan + eval | Binary scan + eval |
+
+### Further optimization: Co-locate attributes with vector data
+
+The current change still requires one Garnet store read per candidate to fetch the binary attributes by internal ID. A further optimization is to **co-locate the binary attribute payload directly after the vector data** in the same Garnet record.
+
+During graph traversal, DiskANN already accesses the vector record for each candidate to compute distances. If the binary attributes are stored as trailing bytes in the same record, the callback can read them from the data DiskANN already has a reference to — no additional store read required.
+
+```
+Current change (1 store read per candidate):
+  1. Read Attributes[internal_id] → binary bytes           ← still a separate read
+  2. ExtractFieldsBinary(binary, selectors) → field values
+  3. ExprRunner.Run(program) → bool
+
+Co-located (0 extra store reads per candidate):
+  1. Read trailing bytes from vector record[internal_id]   ← already accessible during traversal
+  2. ExtractFieldsBinary(binary, selectors) → field values
+  3. ExprRunner.Run(program) → bool
+```
+
+This would reduce the per-candidate cost to **zero extra store reads** — the only remaining overhead is the binary field scan and expression evaluation.
+
+### Further with attribute index: Pre-built attribute index to replace per-candidate filter evaluation
+
+If an attribute index is available (e.g., inverted indexes or roaring bitmaps built over attribute values), the filter predicate can be evaluated **at query planning time** rather than per-candidate during graph traversal. The index would produce a pre-computed set of matching internal IDs (e.g., a bitmap), which can be fed directly into DiskANN as a `GarnetFilter::Bitmap`. This replaces the per-candidate FFI callback entirely — DiskANN checks the bitmap with a single bit lookup instead of reading attributes and running the expression evaluator.
+
+This would shift the filter cost from O(candidates_visited) callback invocations to a single O(matching_vectors) bitmap construction at query start, eliminating per-candidate attribute reads and expression evaluation altogether.
+
+---
+
+## 3. DiskANN-Side: Filtered Search Algorithms
+
+The DiskANN library provides multiple search algorithms for filtered queries. All receive a filter predicate and differ in how they integrate filtering into graph traversal.
+
+### 3.1 Comparison of DiskANN Filtered Search Algorithms
+
+| Aspect                                 | Inline (w adaptive L)                                                       | BetaFilter                                                     |
+|----------------------------------------|-----------------------------------------------------------------------------|----------------------------------------------------------------|
+| Filter integration                     | Evaluate filter during search, scaling Lsearch based on sampled selectivity | Scale distances by beta factor for non-matching nodes          |
+| Data structures                        | `NeighborPriorityQueue` (sorted array)                                      | Wraps any search strategy                                      |
+| Exploration breadth at low selectivity | Bounded by the adaptive Lsearch                                             | Moderate — non-matching nodes appear farther but still compete |
+| Convergence                            | Standard greedy convergence                                                 | Standard greedy convergence                                    |
+| Adaptive budget                        | Yes                                                                         | No                                                             |
+
+#### Performance Comparison (TBD)
+
+Benchmark results on the 100K YFCC dataset comparing recall and latency are pending, but our choice of inline with adaptive-L was based on DiskANN benchmarks with in-mem providers across
+a range of selectivities and across various datasets.
+
+### 3.2 Inline w/ Adaptive-L Algorithm (Current Choice)
+
+Please see the algorithm description in DiskANN.
+
+### 3.3 Filter Mode Dispatch (Rust)
+
+**File**: `DiskANN/diskann-garnet/src/provider.rs`, `dyn_index.rs`
+
+A filter callback is provided which DiskANN will invoke to check whether vectors match the filter expression.
+
+
+## 4. Architecture Overview
+
+```
+┌──────────────────────────────────────────────────────┐
+│  Client (RESP)                                       │
+│  VSIM key 10 VALUES vec... FILTER ".year > 2020"     │
+│         FILTER-EF 32                                 │
+└──────────┬───────────────────────────────────────────┘
+           │
+           ▼
+┌──────────────────────────────────────────────────────┐
+│  Garnet Server (C#)                                  │
+│                                                      │
+│  VectorManager.ValueSimilarity()                     │
+│    ├─ ExprCompiler.TryCompile(filter) → postfix pgm  │
+│    ├─ Pin scratch buffers, set t_inlineFilterState   │
+│    └─ DiskANNService.SearchVector(                   │
+│         ..., filterData, filterLen, maxFilterEffort)  │
+└──────────┬───────────────────────────────────────────┘
+           │  P/Invoke (FFI)
+           ▼
+┌──────────────────────────────────────────────────────┐
+│  DiskANN (Rust, diskann-garnet)                      │
+│                                                      │
+│  search_vector()                                     │
+│    │  For each candidate node:                       │
+│    │    ├─ Call filterCallback(ctx, internal_id)──┐  │
+│    │    │                    ┌────────────────────┘  │
+│    │    │                    ▼                       │
+│    │    │  ┌─────────────────────────────────────┐   │
+│    │    │  │ C# InlineFilterCandidateCallback    │   │
+│    │    │  │  ├─ Read BinaryAttrs[internal_id]   │   │
+│    │    │  │  ├─ ExtractFieldsBinary(selectors)  │   │
+│    │    │  │  └─ ExprRunner.Run(program)→0/1     │   │
+│    │    │  └─────────────────────────────────────┘   │
+│    │                                                 │
+│    └─ Return top-K                                   │
+└──────────────────────────────────────────────────────┘
+```
+
+---
+
+## 5. Filter Compilation (C#)
+
+**File**: `libs/server/Resp/Vector/VectorManager.Filter.cs`
+
+### Expression Language
+
+Supports boolean expressions over JSON attributes:
+
+```
+.year > 2020 AND .genre IN ["action", "comedy"] AND NOT .archived
+```
+
+Operators: `=`, `!=`, `<`, `<=`, `>`, `>=`, `IN`, `NOT IN`, `AND`, `OR`, `NOT`
+
+### Compilation Pipeline
+
+1. **Tokenize** — extract field selectors (`.field`), operators, literals
+2. **Shunting-yard** — convert infix to postfix via `ExprCompiler.TryCompile`
+3. **Output** — array of `ExprToken` (instruction stream) + selector ranges (unique field names referenced)
+
+### Zero-Allocation Design
+
+All compilation and evaluation buffers come from a session-local `ScratchBufferBuilder` with a fixed ~9 KB layout:
+
+| Buffer | Size | Purpose |
+|--------|------|---------|
+| `instrBuf` | 2048 B | Compiled instructions |
+| `tuplePoolBuf` | 2048 B | Tuple literal storage |
+| `tokensBuf` | 1024 B | Tokenizer workspace |
+| `opsStackBuf` | 512 B | Shunting-yard operator stack |
+| `runtimePoolBuf` | 1024 B | IN-operator array expansion |
+| `extractedFields` | 1024 B | Field extraction output |
+| `stackBuf` | 1024 B | Expression evaluation stack |
+
+No heap allocations occur during filter compilation or evaluation.
+
+---
+
+## 6. FFI Callback Protocol
+
+### Registration
+
+At index creation (`CreateIndex` / `RecreateIndex`), C# passes `InlineFilterCallbackPtr` to Rust:
+
+```csharp
+delegate* unmanaged[Cdecl]<ulong, uint, byte> InlineFilterCallbackPtr
+    = &InlineFilterCandidateCallbackImpl;
+```
+
+Rust stores this in its `Callbacks` struct alongside read/write/delete callbacks.
+
+### Per-Search Setup (C# side)
+
+Before each FFI search call:
+
+1. Compile filter expression
+2. Pin all scratch buffers
+3. Populate `[ThreadStatic] t_inlineFilterState` with pointers to:
+   - Compiled instructions
+   - Tuple pool
+   - Selector ranges
+   - Filter bytes
+   - Garnet storage context
+4. Call `Service.SearchVector(...)` with `filter_data`, `filter_len`, `max_filtering_effort`
+
+### Per-Candidate Callback (Rust → C#)
+
+```
+Rust calls: filterCallback(context: u64, internal_id: u32) → u8
+                                                            └─ 1 = pass, 0 = reject
+
+C# InlineFilterCandidateCallbackImpl:
+  1. Read BinaryAttributes[internal_id] → binary bytes (via ReadSizeUnknown)
+  2. ExtractFieldsBinary(binary, selectors) → field values
+  3. ExprRunner.Run(instructions, fields) → bool
+  4. Return 1 or 0
+```
+
+### Thread Safety
+
+- DiskANN search is single-threaded per query
+- `[ThreadStatic]` state ensures no cross-query interference
+- `ActiveThreadSession` is set before FFI and cleared on lock release
+
+---
+
+## 7. Attribute Extraction
+
+**File**: `libs/server/Resp/Vector/AttributeExtractor.cs`
+
+Two storage formats are supported:
+
+### JSON Format
+
+Default format for the existing external ID keyed store. Attributes stored as raw JSON (e.g., `{"year": 2021, "genre": "action"}`). `ExtractFields()` performs a single-pass scan, matching field names against selectors and parsing values into `ExprToken`.
+
+### Binary Format
+
+Used by the new internal ID keyed store. Pre-extracted binary layout: `[0xFF marker][field count][per-field: name_len, name, type_tag, value_len, value_bytes]`. Numbers stored as 8-byte LE f64. `ExtractFieldsBinary()` is ~10× faster than JSON extraction. Conversion via `ConvertJsonToBinary()`.
+
+Both paths are zero-allocation, operating on `ReadOnlySpan<byte>`.
+
+---
+
+## 8. End-to-End Data Flow
+
+```
+1. VSIM command parsed → filter bytes + maxFilteringEffort extracted
+
+2. VectorManager.ValueSimilarity()
+   ├─ filter non-empty → inline filtered path
+   ├─ ExprCompiler.TryCompile(filter) → postfix program
+   ├─ Pin buffers, populate t_inlineFilterState
+   └─ DiskANNService.SearchVector(query, k, ef, filterData, filterLen, maxEffort)
+
+3. P/Invoke → Rust search_vector()
+   ├─ Detect GarnetFilter::Callback
+   ├─ Create TwoQueueSearch with GarnetFilterProvider
+   └─ Run two-queue algorithm:
+       For each candidate:
+         ├─ Compute distance
+         ├─ Insert into candidates min-heap
+         ├─ FFI callback → C# evaluates filter → accept/reject
+         └─ If accepted → insert into filtered_results max-heap
+
+4. Return top-K internal IDs + distances (only matching candidates)
+
+5. Back in C# VectorManager:
+   ├─ Map internal IDs → external keys via ExternalIdMap
+   ├─ Optionally fetch attributes for results
+   └─ Serialize RESP response to client
+```
+
+---
+
+## 9. Performance Characteristics
+
+### Compared to Post-Filtering
+
+| Aspect                    | Post-Filter                  | Inline with Adaptive-L                                 |
+|---------------------------|------------------------------|--------------------------------------------------------|
+| Overfetch required        | Yes (K/selectivity)          | No                                                     |
+| Recall at low selectivity | Poor (misses nearby matches) | High (explores broadly)                                |
+| Per-candidate cost        | Distance only                | Distance + FFI callback + attribute read + filter eval |
+| Memory                    | Large result buffers         | Fixed-size heaps                                       |
+
+### Tuning
+
+- Use FILTER-EF to control the scaling of Lsearch when selectivity is low. Defaults to 16.