apache · Rich-T-kid · Jun 23, 2026 · Jun 23, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/arrow-flight/Cargo.toml b/arrow-flight/Cargo.toml
@@ -32,7 +32,7 @@ arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 # Cast is needed to work around https://github.com/apache/arrow-rs/issues/3389
 arrow-cast = { workspace = true }
-arrow-data = { workspace = true, optional = true }
+arrow-data = { workspace = true }
 arrow-ipc = { workspace = true }
 arrow-ord = { workspace = true, optional = true }
 arrow-row = { workspace = true, optional = true }
@@ -62,7 +62,7 @@ all-features = true
 
 [features]
 default = []
-flight-sql = ["dep:arrow-arith", "dep:arrow-data", "dep:arrow-ord", "dep:arrow-row", "dep:arrow-select", "dep:arrow-string", "dep:once_cell", "dep:paste"]
+flight-sql = ["dep:arrow-arith", "dep:arrow-ord", "dep:arrow-row", "dep:arrow-select", "dep:arrow-string", "dep:once_cell", "dep:paste"]
 # TODO: Remove in the next release
 flight-sql-experimental = ["flight-sql"]
 tls-aws-lc= ["tonic/tls-aws-lc"]

diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs
@@ -18,6 +18,8 @@
 use crate::{FlightData, trailers::LazyTrailers, utils::flight_data_to_arrow_batch};
 use arrow_array::{ArrayRef, RecordBatch};
 use arrow_buffer::Buffer;
+use arrow_data::UnsafeFlag;
+//use arrow_ipc::reader;
 use arrow_schema::{Schema, SchemaRef};
 use bytes::Bytes;
 use futures::{Stream, StreamExt, ready, stream::BoxStream};
@@ -228,6 +230,8 @@ pub struct FlightDataDecoder {
     state: Option<FlightStreamState>,
     /// Seen the end of the inner stream?
     done: bool,
+    /// Skip validation of decoded arrays (UTF-8, offset bounds, null counts).
+    skip_validation: UnsafeFlag,
 }
 
 impl Debug for FlightDataDecoder {
@@ -236,6 +240,7 @@ impl Debug for FlightDataDecoder {
             .field("response", &"<stream>")
             .field("state", &self.state)
             .field("done", &self.done)
+            .field("skip_validation", &self.skip_validation)
             .finish()
     }
 }
@@ -250,9 +255,17 @@ impl FlightDataDecoder {
             state: None,
             response: response.boxed(),
             done: false,
+            skip_validation: UnsafeFlag::new(),
         }
     }
 
+    /// # Safety
+    /// Invalid data may cause undefined behavior. Only use for trusted senders.
+    pub unsafe fn with_skip_validation(mut self) -> Self {
+        unsafe { self.skip_validation.set(true) };
+        self
+    }
+
     /// Returns the current schema for this stream
     pub fn schema(&self) -> Option<&SchemaRef> {
         self.state.as_ref().map(|state| &state.schema)
@@ -323,6 +336,7 @@ impl FlightDataDecoder {
                     &data,
                     Arc::clone(&state.schema),
                     &state.dictionaries_by_field,
+                    self.skip_validation.clone(),
                 )
                 .map_err(|e| {
                     FlightError::DecodeError(format!("Error decoding ipc RecordBatch: {e}"))

diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs
@@ -18,6 +18,7 @@
 //! A FlightSQL Client [`FlightSqlServiceClient`]
 
 use arrow_buffer::Buffer;
+use arrow_data::UnsafeFlag;
 use arrow_ipc::MessageHeader;
 use arrow_ipc::convert::fb_to_schema;
 use arrow_ipc::reader::read_record_batch;
@@ -651,6 +652,7 @@ pub fn arrow_data_from_flight_data(
                 &dictionaries_by_field,
                 None,
                 &ipc_message.version(),
+                UnsafeFlag::new(),
             )?;
             Ok(ArrowFlightData::RecordBatch(record_batch))
         }

diff --git a/arrow-flight/src/utils.rs b/arrow-flight/src/utils.rs
@@ -23,6 +23,7 @@ use std::sync::Arc;
 
 use arrow_array::{ArrayRef, RecordBatch};
 use arrow_buffer::Buffer;
+use arrow_data::UnsafeFlag;
 use arrow_ipc::convert::fb_to_schema;
 use arrow_ipc::writer::CompressionContext;
 use arrow_ipc::{reader, root_as_message, writer, writer::IpcWriteOptions};
@@ -45,7 +46,12 @@ pub fn flight_data_to_batches(flight_data: &[FlightData]) -> Result<Vec<RecordBa
     let mut batches = vec![];
     let dictionaries_by_id = HashMap::new();
     for datum in flight_data[1..].iter() {
-        let batch = flight_data_to_arrow_batch(datum, schema.clone(), &dictionaries_by_id)?;
+        let batch = flight_data_to_arrow_batch(
+            datum,
+            schema.clone(),
+            &dictionaries_by_id,
+            UnsafeFlag::new(),
+        )?;
         batches.push(batch);
     }
     Ok(batches)
@@ -56,6 +62,7 @@ pub fn flight_data_to_arrow_batch(
     data: &FlightData,
     schema: SchemaRef,
     dictionaries_by_id: &HashMap<i64, ArrayRef>,
+    skip_validation: UnsafeFlag,
 ) -> Result<RecordBatch, ArrowError> {
     // check that the data_header is a record batch message
     let message = arrow_ipc::root_as_message(&data.data_header[..])
@@ -69,13 +76,19 @@ pub fn flight_data_to_arrow_batch(
             )
         })
         .map(|batch| {
+            let buf = if data.data_body.as_ptr() as usize % 64 == 0 {
+                Buffer::from(data.data_body.clone())
+            } else {
+                Buffer::from(data.data_body.as_ref())
+            };
             reader::read_record_batch(
-                &Buffer::from(data.data_body.as_ref()),
+                &buf,
                 batch,
                 schema,
                 dictionaries_by_id,
                 None,
                 &message.version(),
+                skip_validation,
             )
         })?
 }

diff --git a/arrow-integration-testing/Cargo.toml b/arrow-integration-testing/Cargo.toml
@@ -35,6 +35,7 @@ logging = ["tracing-subscriber"]
 
 [dependencies]
 arrow = { path = "../arrow", default-features = false, features = ["test_utils", "ipc", "ipc_compression", "json", "ffi"] }
+arrow-data = { workspace = true }
 arrow-flight = { path = "../arrow-flight", default-features = false }
 arrow-integration-test = { path = "../arrow-integration-test", default-features = false }
 clap = { version = "4", default-features = false, features = ["std", "derive", "help", "error-context", "usage"] }

diff --git a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
@@ -249,9 +249,13 @@ async fn consume_flight_location(
         let metadata = counter.to_string().into_bytes();
         assert_eq!(metadata, data.app_metadata);
 
-        let actual_batch =
-            flight_data_to_arrow_batch(&data, actual_schema.clone(), &dictionaries_by_id)
-                .expect("Unable to convert flight data to Arrow batch");
+        let actual_batch = flight_data_to_arrow_batch(
+            &data,
+            actual_schema.clone(),
+            &dictionaries_by_id,
+            arrow_data::UnsafeFlag::new(),
+        )
+        .expect("Unable to convert flight data to Arrow batch");
 
         assert_eq!(actual_schema, actual_batch.schema());
         assert_eq!(expected_batch.num_columns(), actual_batch.num_columns());

diff --git a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
@@ -30,6 +30,7 @@ use arrow::{
     ipc::{self, reader, writer},
     record_batch::RecordBatch,
 };
+use arrow_data::UnsafeFlag;
 use arrow_flight::{
     Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo,
     HandshakeRequest, HandshakeResponse, IpcMessage, PollInfo, PutResult, SchemaAsIpc,
@@ -335,6 +336,7 @@ async fn record_batch_from_message(
         dictionaries_by_id,
         None,
         &message.version(),
+        UnsafeFlag::new(),
     );
 
     arrow_batch_result

diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
@@ -146,7 +146,7 @@ impl RecordBatchDecoder<'_> {
                 let null_buffer = self.next_buffer()?;
 
                 // read the arrays for each field
-                let mut struct_arrays = vec![];
+                let mut struct_arrays = Vec::with_capacity(struct_fields.len());
                 // TODO investigate whether just knowing the number of buffers could
                 // still work
                 for struct_field in struct_fields {
@@ -557,7 +557,7 @@ impl<'a> RecordBatchDecoder<'a> {
 
         let schema = Arc::clone(&self.schema);
         if let Some(projection) = self.projection {
-            let mut arrays = vec![];
+            let mut arrays = Vec::with_capacity(projection.len());
             // project fields
             for (idx, field) in schema.fields().iter().enumerate() {
                 // A projected field can appear more than once, so collect all matching positions.
@@ -597,7 +597,7 @@ impl<'a> RecordBatchDecoder<'a> {
                 RecordBatch::try_new_with_options(schema, columns, &options)
             }
         } else {
-            let mut children = vec![];
+            let mut children = Vec::with_capacity(schema.fields().len());
             // keep track of index as lists require more than one node
             for field in schema.fields() {
                 let child = self.create_array(field, &mut variadic_counts)?;
@@ -771,11 +771,13 @@ pub fn read_record_batch(
     dictionaries_by_id: &HashMap<i64, ArrayRef>,
     projection: Option<&[usize]>,
     metadata: &MetadataVersion,
+    skip_validation: UnsafeFlag,
 ) -> Result<RecordBatch, ArrowError> {
-    RecordBatchDecoder::try_new(buf, batch, schema, dictionaries_by_id, metadata)?
+    let decoder = RecordBatchDecoder::try_new(buf, batch, schema, dictionaries_by_id, metadata)?
         .with_projection(projection)
         .with_require_alignment(false)
-        .read_record_batch()
+        .with_skip_validation(skip_validation);
+    decoder.read_record_batch()
 }
 
 /// Read the dictionary from the buffer and provided metadata,