diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index 0ee973f2a2d6..cf8debb79e73 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.21.0) + * Autogenerated by Thrift Compiler (0.23.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -420,10 +420,10 @@ int _kEncodingValues[] = { */ Encoding::PLAIN, /** - * Deprecated: Dictionary encoding. The values in the dictionary are encoded in the + * DEPRECATED: Dictionary encoding. The values in the dictionary are encoded in the * plain type. - * in a data page use RLE_DICTIONARY instead. - * in a Dictionary page use PLAIN instead + * For a data page use RLE_DICTIONARY instead. + * For a Dictionary page use PLAIN instead. */ Encoding::PLAIN_DICTIONARY, /** @@ -432,8 +432,9 @@ int _kEncodingValues[] = { */ Encoding::RLE, /** - * Bit packed encoding. This can only be used if the data has a known max + * DEPRECATED: Bit packed encoding. This can only be used if the data has a known max * width. Usable for definition/repetition levels encoding. + * Superseded by RLE (which is a hybrid of RLE and bit packing); see Encodings.md. */ Encoding::BIT_PACKED, /** @@ -481,10 +482,10 @@ const char* _kEncodingNames[] = { */ "PLAIN", /** - * Deprecated: Dictionary encoding. The values in the dictionary are encoded in the + * DEPRECATED: Dictionary encoding. The values in the dictionary are encoded in the * plain type. - * in a data page use RLE_DICTIONARY instead. - * in a Dictionary page use PLAIN instead + * For a data page use RLE_DICTIONARY instead. + * For a Dictionary page use PLAIN instead. */ "PLAIN_DICTIONARY", /** @@ -493,8 +494,9 @@ const char* _kEncodingNames[] = { */ "RLE", /** - * Bit packed encoding. This can only be used if the data has a known max + * DEPRECATED: Bit packed encoding. This can only be used if the data has a known max * width. Usable for definition/repetition levels encoding. + * Superseded by RLE (which is a hybrid of RLE and bit packing); see Encodings.md. */ "BIT_PACKED", /** @@ -684,7 +686,7 @@ std::ostream& operator<<(std::ostream& out, const SizeStatistics& obj) } -void swap(SizeStatistics &a, SizeStatistics &b) { +void swap(SizeStatistics &a, SizeStatistics &b) noexcept { using ::std::swap; swap(a.unencoded_byte_array_data_bytes, b.unencoded_byte_array_data_bytes); swap(a.repetition_level_histogram, b.repetition_level_histogram); @@ -801,7 +803,7 @@ std::ostream& operator<<(std::ostream& out, const BoundingBox& obj) } -void swap(BoundingBox &a, BoundingBox &b) { +void swap(BoundingBox &a, BoundingBox &b) noexcept { using ::std::swap; swap(a.xmin, b.xmin); swap(a.xmax, b.xmax); @@ -926,7 +928,7 @@ std::ostream& operator<<(std::ostream& out, const GeospatialStatistics& obj) } -void swap(GeospatialStatistics &a, GeospatialStatistics &b) { +void swap(GeospatialStatistics &a, GeospatialStatistics &b) noexcept { using ::std::swap; swap(a.bbox, b.bbox); swap(a.geospatial_types, b.geospatial_types); @@ -988,7 +990,8 @@ Statistics::Statistics() noexcept max_value(), min_value(), is_max_value_exact(0), - is_min_value_exact(0) { + is_min_value_exact(0), + nan_count(0) { } void Statistics::__set_max(const std::string& val) { @@ -1030,6 +1033,11 @@ void Statistics::__set_is_min_value_exact(const bool val) { this->is_min_value_exact = val; __isset.is_min_value_exact = true; } + +void Statistics::__set_nan_count(const int64_t val) { + this->nan_count = val; +__isset.nan_count = true; +} std::ostream& operator<<(std::ostream& out, const Statistics& obj) { obj.printTo(out); @@ -1037,7 +1045,7 @@ std::ostream& operator<<(std::ostream& out, const Statistics& obj) } -void swap(Statistics &a, Statistics &b) { +void swap(Statistics &a, Statistics &b) noexcept { using ::std::swap; swap(a.max, b.max); swap(a.min, b.min); @@ -1047,6 +1055,7 @@ void swap(Statistics &a, Statistics &b) { swap(a.min_value, b.min_value); swap(a.is_max_value_exact, b.is_max_value_exact); swap(a.is_min_value_exact, b.is_min_value_exact); + swap(a.nan_count, b.nan_count); swap(a.__isset, b.__isset); } @@ -1084,6 +1093,10 @@ bool Statistics::operator==(const Statistics & rhs) const return false; else if (__isset.is_min_value_exact && !(is_min_value_exact == rhs.is_min_value_exact)) return false; + if (__isset.nan_count != rhs.__isset.nan_count) + return false; + else if (__isset.nan_count && !(nan_count == rhs.nan_count)) + return false; return true; } @@ -1096,6 +1109,7 @@ Statistics::Statistics(const Statistics& other30) { min_value = other30.min_value; is_max_value_exact = other30.is_max_value_exact; is_min_value_exact = other30.is_min_value_exact; + nan_count = other30.nan_count; __isset = other30.__isset; } Statistics::Statistics(Statistics&& other31) noexcept { @@ -1107,6 +1121,7 @@ Statistics::Statistics(Statistics&& other31) noexcept { min_value = std::move(other31.min_value); is_max_value_exact = other31.is_max_value_exact; is_min_value_exact = other31.is_min_value_exact; + nan_count = other31.nan_count; __isset = other31.__isset; } Statistics& Statistics::operator=(const Statistics& other32) { @@ -1118,6 +1133,7 @@ Statistics& Statistics::operator=(const Statistics& other32) { min_value = other32.min_value; is_max_value_exact = other32.is_max_value_exact; is_min_value_exact = other32.is_min_value_exact; + nan_count = other32.nan_count; __isset = other32.__isset; return *this; } @@ -1130,6 +1146,7 @@ Statistics& Statistics::operator=(Statistics&& other33) noexcept { min_value = std::move(other33.min_value); is_max_value_exact = other33.is_max_value_exact; is_min_value_exact = other33.is_min_value_exact; + nan_count = other33.nan_count; __isset = other33.__isset; return *this; } @@ -1144,6 +1161,7 @@ void Statistics::printTo(std::ostream& out) const { out << ", " << "min_value="; (__isset.min_value ? (out << to_string(min_value)) : (out << "")); out << ", " << "is_max_value_exact="; (__isset.is_max_value_exact ? (out << to_string(is_max_value_exact)) : (out << "")); out << ", " << "is_min_value_exact="; (__isset.is_min_value_exact ? (out << to_string(is_min_value_exact)) : (out << "")); + out << ", " << "nan_count="; (__isset.nan_count ? (out << to_string(nan_count)) : (out << "")); out << ")"; } @@ -1160,7 +1178,7 @@ std::ostream& operator<<(std::ostream& out, const StringType& obj) } -void swap(StringType &a, StringType &b) { +void swap(StringType &a, StringType &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -1204,7 +1222,7 @@ std::ostream& operator<<(std::ostream& out, const UUIDType& obj) } -void swap(UUIDType &a, UUIDType &b) { +void swap(UUIDType &a, UUIDType &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -1248,7 +1266,7 @@ std::ostream& operator<<(std::ostream& out, const MapType& obj) } -void swap(MapType &a, MapType &b) { +void swap(MapType &a, MapType &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -1292,7 +1310,7 @@ std::ostream& operator<<(std::ostream& out, const ListType& obj) } -void swap(ListType &a, ListType &b) { +void swap(ListType &a, ListType &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -1336,7 +1354,7 @@ std::ostream& operator<<(std::ostream& out, const EnumType& obj) } -void swap(EnumType &a, EnumType &b) { +void swap(EnumType &a, EnumType &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -1380,7 +1398,7 @@ std::ostream& operator<<(std::ostream& out, const DateType& obj) } -void swap(DateType &a, DateType &b) { +void swap(DateType &a, DateType &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -1424,7 +1442,7 @@ std::ostream& operator<<(std::ostream& out, const Float16Type& obj) } -void swap(Float16Type &a, Float16Type &b) { +void swap(Float16Type &a, Float16Type &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -1468,7 +1486,7 @@ std::ostream& operator<<(std::ostream& out, const NullType& obj) } -void swap(NullType &a, NullType &b) { +void swap(NullType &a, NullType &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -1522,7 +1540,7 @@ std::ostream& operator<<(std::ostream& out, const DecimalType& obj) } -void swap(DecimalType &a, DecimalType &b) { +void swap(DecimalType &a, DecimalType &b) noexcept { using ::std::swap; swap(a.scale, b.scale); swap(a.precision, b.precision); @@ -1576,7 +1594,7 @@ std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) } -void swap(MilliSeconds &a, MilliSeconds &b) { +void swap(MilliSeconds &a, MilliSeconds &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -1620,7 +1638,7 @@ std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) } -void swap(MicroSeconds &a, MicroSeconds &b) { +void swap(MicroSeconds &a, MicroSeconds &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -1664,7 +1682,7 @@ std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) } -void swap(NanoSeconds &a, NanoSeconds &b) { +void swap(NanoSeconds &a, NanoSeconds &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -1723,7 +1741,7 @@ std::ostream& operator<<(std::ostream& out, const TimeUnit& obj) } -void swap(TimeUnit &a, TimeUnit &b) { +void swap(TimeUnit &a, TimeUnit &b) noexcept { using ::std::swap; swap(a.MILLIS, b.MILLIS); swap(a.MICROS, b.MICROS); @@ -1805,7 +1823,7 @@ std::ostream& operator<<(std::ostream& out, const TimestampType& obj) } -void swap(TimestampType &a, TimestampType &b) { +void swap(TimestampType &a, TimestampType &b) noexcept { using ::std::swap; swap(a.isAdjustedToUTC, b.isAdjustedToUTC); swap(a.unit, b.unit); @@ -1868,7 +1886,7 @@ std::ostream& operator<<(std::ostream& out, const TimeType& obj) } -void swap(TimeType &a, TimeType &b) { +void swap(TimeType &a, TimeType &b) noexcept { using ::std::swap; swap(a.isAdjustedToUTC, b.isAdjustedToUTC); swap(a.unit, b.unit); @@ -1932,7 +1950,7 @@ std::ostream& operator<<(std::ostream& out, const IntType& obj) } -void swap(IntType &a, IntType &b) { +void swap(IntType &a, IntType &b) noexcept { using ::std::swap; swap(a.bitWidth, b.bitWidth); swap(a.isSigned, b.isSigned); @@ -1986,7 +2004,7 @@ std::ostream& operator<<(std::ostream& out, const JsonType& obj) } -void swap(JsonType &a, JsonType &b) { +void swap(JsonType &a, JsonType &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -2030,7 +2048,7 @@ std::ostream& operator<<(std::ostream& out, const BsonType& obj) } -void swap(BsonType &a, BsonType &b) { +void swap(BsonType &a, BsonType &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -2080,7 +2098,7 @@ std::ostream& operator<<(std::ostream& out, const VariantType& obj) } -void swap(VariantType &a, VariantType &b) { +void swap(VariantType &a, VariantType &b) noexcept { using ::std::swap; swap(a.specification_version, b.specification_version); swap(a.__isset, b.__isset); @@ -2139,7 +2157,7 @@ std::ostream& operator<<(std::ostream& out, const GeometryType& obj) } -void swap(GeometryType &a, GeometryType &b) { +void swap(GeometryType &a, GeometryType &b) noexcept { using ::std::swap; swap(a.crs, b.crs); swap(a.__isset, b.__isset); @@ -2204,7 +2222,7 @@ std::ostream& operator<<(std::ostream& out, const GeographyType& obj) } -void swap(GeographyType &a, GeographyType &b) { +void swap(GeographyType &a, GeographyType &b) noexcept { using ::std::swap; swap(a.crs, b.crs); swap(a.algorithm, b.algorithm); @@ -2352,7 +2370,7 @@ std::ostream& operator<<(std::ostream& out, const LogicalType& obj) } -void swap(LogicalType &a, LogicalType &b) { +void swap(LogicalType &a, LogicalType &b) noexcept { using ::std::swap; swap(a.STRING, b.STRING); swap(a.MAP, b.MAP); @@ -2623,7 +2641,7 @@ std::ostream& operator<<(std::ostream& out, const SchemaElement& obj) } -void swap(SchemaElement &a, SchemaElement &b) { +void swap(SchemaElement &a, SchemaElement &b) noexcept { using ::std::swap; swap(a.type, b.type); swap(a.type_length, b.type_length); @@ -2789,7 +2807,7 @@ std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj) } -void swap(DataPageHeader &a, DataPageHeader &b) { +void swap(DataPageHeader &a, DataPageHeader &b) noexcept { using ::std::swap; swap(a.num_values, b.num_values); swap(a.encoding, b.encoding); @@ -2874,7 +2892,7 @@ std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) } -void swap(IndexPageHeader &a, IndexPageHeader &b) { +void swap(IndexPageHeader &a, IndexPageHeader &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -2934,7 +2952,7 @@ std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj) } -void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { +void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) noexcept { using ::std::swap; swap(a.num_values, b.num_values); swap(a.encoding, b.encoding); @@ -3044,7 +3062,7 @@ std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj) } -void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { +void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) noexcept { using ::std::swap; swap(a.num_values, b.num_values); swap(a.num_nulls, b.num_nulls); @@ -3155,7 +3173,7 @@ std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj) } -void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) { +void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -3204,7 +3222,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj) } -void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) { +void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) noexcept { using ::std::swap; swap(a.BLOCK, b.BLOCK); swap(a.__isset, b.__isset); @@ -3257,7 +3275,7 @@ std::ostream& operator<<(std::ostream& out, const XxHash& obj) } -void swap(XxHash &a, XxHash &b) { +void swap(XxHash &a, XxHash &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -3306,7 +3324,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj) } -void swap(BloomFilterHash &a, BloomFilterHash &b) { +void swap(BloomFilterHash &a, BloomFilterHash &b) noexcept { using ::std::swap; swap(a.XXHASH, b.XXHASH); swap(a.__isset, b.__isset); @@ -3359,7 +3377,7 @@ std::ostream& operator<<(std::ostream& out, const Uncompressed& obj) } -void swap(Uncompressed &a, Uncompressed &b) { +void swap(Uncompressed &a, Uncompressed &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -3408,7 +3426,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj) } -void swap(BloomFilterCompression &a, BloomFilterCompression &b) { +void swap(BloomFilterCompression &a, BloomFilterCompression &b) noexcept { using ::std::swap; swap(a.UNCOMPRESSED, b.UNCOMPRESSED); swap(a.__isset, b.__isset); @@ -3478,7 +3496,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHeader& obj) } -void swap(BloomFilterHeader &a, BloomFilterHeader &b) { +void swap(BloomFilterHeader &a, BloomFilterHeader &b) noexcept { using ::std::swap; swap(a.numBytes, b.numBytes); swap(a.algorithm, b.algorithm); @@ -3589,7 +3607,7 @@ std::ostream& operator<<(std::ostream& out, const PageHeader& obj) } -void swap(PageHeader &a, PageHeader &b) { +void swap(PageHeader &a, PageHeader &b) noexcept { using ::std::swap; swap(a.type, b.type); swap(a.uncompressed_page_size, b.uncompressed_page_size); @@ -3717,7 +3735,7 @@ std::ostream& operator<<(std::ostream& out, const KeyValue& obj) } -void swap(KeyValue &a, KeyValue &b) { +void swap(KeyValue &a, KeyValue &b) noexcept { using ::std::swap; swap(a.key, b.key); swap(a.value, b.value); @@ -3793,7 +3811,7 @@ std::ostream& operator<<(std::ostream& out, const SortingColumn& obj) } -void swap(SortingColumn &a, SortingColumn &b) { +void swap(SortingColumn &a, SortingColumn &b) noexcept { using ::std::swap; swap(a.column_idx, b.column_idx); swap(a.descending, b.descending); @@ -3870,7 +3888,7 @@ std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj) } -void swap(PageEncodingStats &a, PageEncodingStats &b) { +void swap(PageEncodingStats &a, PageEncodingStats &b) noexcept { using ::std::swap; swap(a.page_type, b.page_type); swap(a.encoding, b.encoding); @@ -4019,7 +4037,7 @@ std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) } -void swap(ColumnMetaData &a, ColumnMetaData &b) { +void swap(ColumnMetaData &a, ColumnMetaData &b) noexcept { using ::std::swap; swap(a.type, b.type); swap(a.encodings, b.encodings); @@ -4216,7 +4234,7 @@ std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj) } -void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { +void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -4270,7 +4288,7 @@ std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj) } -void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { +void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) noexcept { using ::std::swap; swap(a.path_in_schema, b.path_in_schema); swap(a.key_metadata, b.key_metadata); @@ -4341,7 +4359,7 @@ std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj) } -void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { +void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) noexcept { using ::std::swap; swap(a.ENCRYPTION_WITH_FOOTER_KEY, b.ENCRYPTION_WITH_FOOTER_KEY); swap(a.ENCRYPTION_WITH_COLUMN_KEY, b.ENCRYPTION_WITH_COLUMN_KEY); @@ -4455,7 +4473,7 @@ std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj) } -void swap(ColumnChunk &a, ColumnChunk &b) { +void swap(ColumnChunk &a, ColumnChunk &b) noexcept { using ::std::swap; swap(a.file_path, b.file_path); swap(a.file_offset, b.file_offset); @@ -4623,7 +4641,7 @@ std::ostream& operator<<(std::ostream& out, const RowGroup& obj) } -void swap(RowGroup &a, RowGroup &b) { +void swap(RowGroup &a, RowGroup &b) noexcept { using ::std::swap; swap(a.columns, b.columns); swap(a.total_byte_size, b.total_byte_size); @@ -4730,7 +4748,7 @@ std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) } -void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { +void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) noexcept { using ::std::swap; (void) a; (void) b; @@ -4762,6 +4780,50 @@ void TypeDefinedOrder::printTo(std::ostream& out) const { } +IEEE754TotalOrder::~IEEE754TotalOrder() noexcept { +} + +IEEE754TotalOrder::IEEE754TotalOrder() noexcept { +} +std::ostream& operator<<(std::ostream& out, const IEEE754TotalOrder& obj) +{ + obj.printTo(out); + return out; +} + + +void swap(IEEE754TotalOrder &a, IEEE754TotalOrder &b) noexcept { + using ::std::swap; + (void) a; + (void) b; +} + +bool IEEE754TotalOrder::operator==(const IEEE754TotalOrder & /* rhs */) const +{ + return true; +} + +IEEE754TotalOrder::IEEE754TotalOrder(const IEEE754TotalOrder& other271) noexcept { + (void) other271; +} +IEEE754TotalOrder::IEEE754TotalOrder(IEEE754TotalOrder&& other272) noexcept { + (void) other272; +} +IEEE754TotalOrder& IEEE754TotalOrder::operator=(const IEEE754TotalOrder& other273) noexcept { + (void) other273; + return *this; +} +IEEE754TotalOrder& IEEE754TotalOrder::operator=(IEEE754TotalOrder&& other274) noexcept { + (void) other274; + return *this; +} +void IEEE754TotalOrder::printTo(std::ostream& out) const { + using ::apache::thrift::to_string; + out << "IEEE754TotalOrder("; + out << ")"; +} + + ColumnOrder::~ColumnOrder() noexcept { } @@ -4772,6 +4834,11 @@ void ColumnOrder::__set_TYPE_ORDER(const TypeDefinedOrder& val) { this->TYPE_ORDER = val; __isset.TYPE_ORDER = true; } + +void ColumnOrder::__set_IEEE_754_TOTAL_ORDER(const IEEE754TotalOrder& val) { + this->IEEE_754_TOTAL_ORDER = val; +__isset.IEEE_754_TOTAL_ORDER = true; +} std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj) { obj.printTo(out); @@ -4779,9 +4846,10 @@ std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj) } -void swap(ColumnOrder &a, ColumnOrder &b) { +void swap(ColumnOrder &a, ColumnOrder &b) noexcept { using ::std::swap; swap(a.TYPE_ORDER, b.TYPE_ORDER); + swap(a.IEEE_754_TOTAL_ORDER, b.IEEE_754_TOTAL_ORDER); swap(a.__isset, b.__isset); } @@ -4791,31 +4859,40 @@ bool ColumnOrder::operator==(const ColumnOrder & rhs) const return false; else if (__isset.TYPE_ORDER && !(TYPE_ORDER == rhs.TYPE_ORDER)) return false; + if (__isset.IEEE_754_TOTAL_ORDER != rhs.__isset.IEEE_754_TOTAL_ORDER) + return false; + else if (__isset.IEEE_754_TOTAL_ORDER && !(IEEE_754_TOTAL_ORDER == rhs.IEEE_754_TOTAL_ORDER)) + return false; return true; } -ColumnOrder::ColumnOrder(const ColumnOrder& other271) noexcept { - TYPE_ORDER = other271.TYPE_ORDER; - __isset = other271.__isset; +ColumnOrder::ColumnOrder(const ColumnOrder& other275) noexcept { + TYPE_ORDER = other275.TYPE_ORDER; + IEEE_754_TOTAL_ORDER = other275.IEEE_754_TOTAL_ORDER; + __isset = other275.__isset; } -ColumnOrder::ColumnOrder(ColumnOrder&& other272) noexcept { - TYPE_ORDER = std::move(other272.TYPE_ORDER); - __isset = other272.__isset; +ColumnOrder::ColumnOrder(ColumnOrder&& other276) noexcept { + TYPE_ORDER = std::move(other276.TYPE_ORDER); + IEEE_754_TOTAL_ORDER = std::move(other276.IEEE_754_TOTAL_ORDER); + __isset = other276.__isset; } -ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other273) noexcept { - TYPE_ORDER = other273.TYPE_ORDER; - __isset = other273.__isset; +ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other277) noexcept { + TYPE_ORDER = other277.TYPE_ORDER; + IEEE_754_TOTAL_ORDER = other277.IEEE_754_TOTAL_ORDER; + __isset = other277.__isset; return *this; } -ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other274) noexcept { - TYPE_ORDER = std::move(other274.TYPE_ORDER); - __isset = other274.__isset; +ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other278) noexcept { + TYPE_ORDER = std::move(other278.TYPE_ORDER); + IEEE_754_TOTAL_ORDER = std::move(other278.IEEE_754_TOTAL_ORDER); + __isset = other278.__isset; return *this; } void ColumnOrder::printTo(std::ostream& out) const { using ::apache::thrift::to_string; out << "ColumnOrder("; out << "TYPE_ORDER="; (__isset.TYPE_ORDER ? (out << to_string(TYPE_ORDER)) : (out << "")); + out << ", " << "IEEE_754_TOTAL_ORDER="; (__isset.IEEE_754_TOTAL_ORDER ? (out << to_string(IEEE_754_TOTAL_ORDER)) : (out << "")); out << ")"; } @@ -4847,7 +4924,7 @@ std::ostream& operator<<(std::ostream& out, const PageLocation& obj) } -void swap(PageLocation &a, PageLocation &b) { +void swap(PageLocation &a, PageLocation &b) noexcept { using ::std::swap; swap(a.offset, b.offset); swap(a.compressed_page_size, b.compressed_page_size); @@ -4865,26 +4942,26 @@ bool PageLocation::operator==(const PageLocation & rhs) const return true; } -PageLocation::PageLocation(const PageLocation& other275) noexcept { - offset = other275.offset; - compressed_page_size = other275.compressed_page_size; - first_row_index = other275.first_row_index; +PageLocation::PageLocation(const PageLocation& other279) noexcept { + offset = other279.offset; + compressed_page_size = other279.compressed_page_size; + first_row_index = other279.first_row_index; } -PageLocation::PageLocation(PageLocation&& other276) noexcept { - offset = other276.offset; - compressed_page_size = other276.compressed_page_size; - first_row_index = other276.first_row_index; +PageLocation::PageLocation(PageLocation&& other280) noexcept { + offset = other280.offset; + compressed_page_size = other280.compressed_page_size; + first_row_index = other280.first_row_index; } -PageLocation& PageLocation::operator=(const PageLocation& other277) noexcept { - offset = other277.offset; - compressed_page_size = other277.compressed_page_size; - first_row_index = other277.first_row_index; +PageLocation& PageLocation::operator=(const PageLocation& other281) noexcept { + offset = other281.offset; + compressed_page_size = other281.compressed_page_size; + first_row_index = other281.first_row_index; return *this; } -PageLocation& PageLocation::operator=(PageLocation&& other278) noexcept { - offset = other278.offset; - compressed_page_size = other278.compressed_page_size; - first_row_index = other278.first_row_index; +PageLocation& PageLocation::operator=(PageLocation&& other282) noexcept { + offset = other282.offset; + compressed_page_size = other282.compressed_page_size; + first_row_index = other282.first_row_index; return *this; } void PageLocation::printTo(std::ostream& out) const { @@ -4918,7 +4995,7 @@ std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj) } -void swap(OffsetIndex &a, OffsetIndex &b) { +void swap(OffsetIndex &a, OffsetIndex &b) noexcept { using ::std::swap; swap(a.page_locations, b.page_locations); swap(a.unencoded_byte_array_data_bytes, b.unencoded_byte_array_data_bytes); @@ -4936,26 +5013,26 @@ bool OffsetIndex::operator==(const OffsetIndex & rhs) const return true; } -OffsetIndex::OffsetIndex(const OffsetIndex& other291) { - page_locations = other291.page_locations; - unencoded_byte_array_data_bytes = other291.unencoded_byte_array_data_bytes; - __isset = other291.__isset; +OffsetIndex::OffsetIndex(const OffsetIndex& other295) { + page_locations = other295.page_locations; + unencoded_byte_array_data_bytes = other295.unencoded_byte_array_data_bytes; + __isset = other295.__isset; } -OffsetIndex::OffsetIndex(OffsetIndex&& other292) noexcept { - page_locations = std::move(other292.page_locations); - unencoded_byte_array_data_bytes = std::move(other292.unencoded_byte_array_data_bytes); - __isset = other292.__isset; +OffsetIndex::OffsetIndex(OffsetIndex&& other296) noexcept { + page_locations = std::move(other296.page_locations); + unencoded_byte_array_data_bytes = std::move(other296.unencoded_byte_array_data_bytes); + __isset = other296.__isset; } -OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other293) { - page_locations = other293.page_locations; - unencoded_byte_array_data_bytes = other293.unencoded_byte_array_data_bytes; - __isset = other293.__isset; +OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other297) { + page_locations = other297.page_locations; + unencoded_byte_array_data_bytes = other297.unencoded_byte_array_data_bytes; + __isset = other297.__isset; return *this; } -OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other294) noexcept { - page_locations = std::move(other294.page_locations); - unencoded_byte_array_data_bytes = std::move(other294.unencoded_byte_array_data_bytes); - __isset = other294.__isset; +OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other298) noexcept { + page_locations = std::move(other298.page_locations); + unencoded_byte_array_data_bytes = std::move(other298.unencoded_byte_array_data_bytes); + __isset = other298.__isset; return *this; } void OffsetIndex::printTo(std::ostream& out) const { @@ -5004,6 +5081,11 @@ void ColumnIndex::__set_definition_level_histograms(const std::vector & this->definition_level_histograms = val; __isset.definition_level_histograms = true; } + +void ColumnIndex::__set_nan_counts(const std::vector & val) { + this->nan_counts = val; +__isset.nan_counts = true; +} std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) { obj.printTo(out); @@ -5011,7 +5093,7 @@ std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) } -void swap(ColumnIndex &a, ColumnIndex &b) { +void swap(ColumnIndex &a, ColumnIndex &b) noexcept { using ::std::swap; swap(a.null_pages, b.null_pages); swap(a.min_values, b.min_values); @@ -5020,6 +5102,7 @@ void swap(ColumnIndex &a, ColumnIndex &b) { swap(a.null_counts, b.null_counts); swap(a.repetition_level_histograms, b.repetition_level_histograms); swap(a.definition_level_histograms, b.definition_level_histograms); + swap(a.nan_counts, b.nan_counts); swap(a.__isset, b.__isset); } @@ -5045,49 +5128,57 @@ bool ColumnIndex::operator==(const ColumnIndex & rhs) const return false; else if (__isset.definition_level_histograms && !(definition_level_histograms == rhs.definition_level_histograms)) return false; + if (__isset.nan_counts != rhs.__isset.nan_counts) + return false; + else if (__isset.nan_counts && !(nan_counts == rhs.nan_counts)) + return false; return true; } -ColumnIndex::ColumnIndex(const ColumnIndex& other332) { - null_pages = other332.null_pages; - min_values = other332.min_values; - max_values = other332.max_values; - boundary_order = other332.boundary_order; - null_counts = other332.null_counts; - repetition_level_histograms = other332.repetition_level_histograms; - definition_level_histograms = other332.definition_level_histograms; - __isset = other332.__isset; -} -ColumnIndex::ColumnIndex(ColumnIndex&& other333) noexcept { - null_pages = std::move(other333.null_pages); - min_values = std::move(other333.min_values); - max_values = std::move(other333.max_values); - boundary_order = other333.boundary_order; - null_counts = std::move(other333.null_counts); - repetition_level_histograms = std::move(other333.repetition_level_histograms); - definition_level_histograms = std::move(other333.definition_level_histograms); - __isset = other333.__isset; -} -ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other334) { - null_pages = other334.null_pages; - min_values = other334.min_values; - max_values = other334.max_values; - boundary_order = other334.boundary_order; - null_counts = other334.null_counts; - repetition_level_histograms = other334.repetition_level_histograms; - definition_level_histograms = other334.definition_level_histograms; - __isset = other334.__isset; +ColumnIndex::ColumnIndex(const ColumnIndex& other342) { + null_pages = other342.null_pages; + min_values = other342.min_values; + max_values = other342.max_values; + boundary_order = other342.boundary_order; + null_counts = other342.null_counts; + repetition_level_histograms = other342.repetition_level_histograms; + definition_level_histograms = other342.definition_level_histograms; + nan_counts = other342.nan_counts; + __isset = other342.__isset; +} +ColumnIndex::ColumnIndex(ColumnIndex&& other343) noexcept { + null_pages = std::move(other343.null_pages); + min_values = std::move(other343.min_values); + max_values = std::move(other343.max_values); + boundary_order = other343.boundary_order; + null_counts = std::move(other343.null_counts); + repetition_level_histograms = std::move(other343.repetition_level_histograms); + definition_level_histograms = std::move(other343.definition_level_histograms); + nan_counts = std::move(other343.nan_counts); + __isset = other343.__isset; +} +ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other344) { + null_pages = other344.null_pages; + min_values = other344.min_values; + max_values = other344.max_values; + boundary_order = other344.boundary_order; + null_counts = other344.null_counts; + repetition_level_histograms = other344.repetition_level_histograms; + definition_level_histograms = other344.definition_level_histograms; + nan_counts = other344.nan_counts; + __isset = other344.__isset; return *this; } -ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other335) noexcept { - null_pages = std::move(other335.null_pages); - min_values = std::move(other335.min_values); - max_values = std::move(other335.max_values); - boundary_order = other335.boundary_order; - null_counts = std::move(other335.null_counts); - repetition_level_histograms = std::move(other335.repetition_level_histograms); - definition_level_histograms = std::move(other335.definition_level_histograms); - __isset = other335.__isset; +ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other345) noexcept { + null_pages = std::move(other345.null_pages); + min_values = std::move(other345.min_values); + max_values = std::move(other345.max_values); + boundary_order = other345.boundary_order; + null_counts = std::move(other345.null_counts); + repetition_level_histograms = std::move(other345.repetition_level_histograms); + definition_level_histograms = std::move(other345.definition_level_histograms); + nan_counts = std::move(other345.nan_counts); + __isset = other345.__isset; return *this; } void ColumnIndex::printTo(std::ostream& out) const { @@ -5100,6 +5191,7 @@ void ColumnIndex::printTo(std::ostream& out) const { out << ", " << "null_counts="; (__isset.null_counts ? (out << to_string(null_counts)) : (out << "")); out << ", " << "repetition_level_histograms="; (__isset.repetition_level_histograms ? (out << to_string(repetition_level_histograms)) : (out << "")); out << ", " << "definition_level_histograms="; (__isset.definition_level_histograms ? (out << to_string(definition_level_histograms)) : (out << "")); + out << ", " << "nan_counts="; (__isset.nan_counts ? (out << to_string(nan_counts)) : (out << "")); out << ")"; } @@ -5134,7 +5226,7 @@ std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj) } -void swap(AesGcmV1 &a, AesGcmV1 &b) { +void swap(AesGcmV1 &a, AesGcmV1 &b) noexcept { using ::std::swap; swap(a.aad_prefix, b.aad_prefix); swap(a.aad_file_unique, b.aad_file_unique); @@ -5159,30 +5251,30 @@ bool AesGcmV1::operator==(const AesGcmV1 & rhs) const return true; } -AesGcmV1::AesGcmV1(const AesGcmV1& other336) { - aad_prefix = other336.aad_prefix; - aad_file_unique = other336.aad_file_unique; - supply_aad_prefix = other336.supply_aad_prefix; - __isset = other336.__isset; +AesGcmV1::AesGcmV1(const AesGcmV1& other346) { + aad_prefix = other346.aad_prefix; + aad_file_unique = other346.aad_file_unique; + supply_aad_prefix = other346.supply_aad_prefix; + __isset = other346.__isset; } -AesGcmV1::AesGcmV1(AesGcmV1&& other337) noexcept { - aad_prefix = std::move(other337.aad_prefix); - aad_file_unique = std::move(other337.aad_file_unique); - supply_aad_prefix = other337.supply_aad_prefix; - __isset = other337.__isset; +AesGcmV1::AesGcmV1(AesGcmV1&& other347) noexcept { + aad_prefix = std::move(other347.aad_prefix); + aad_file_unique = std::move(other347.aad_file_unique); + supply_aad_prefix = other347.supply_aad_prefix; + __isset = other347.__isset; } -AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other338) { - aad_prefix = other338.aad_prefix; - aad_file_unique = other338.aad_file_unique; - supply_aad_prefix = other338.supply_aad_prefix; - __isset = other338.__isset; +AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other348) { + aad_prefix = other348.aad_prefix; + aad_file_unique = other348.aad_file_unique; + supply_aad_prefix = other348.supply_aad_prefix; + __isset = other348.__isset; return *this; } -AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other339) noexcept { - aad_prefix = std::move(other339.aad_prefix); - aad_file_unique = std::move(other339.aad_file_unique); - supply_aad_prefix = other339.supply_aad_prefix; - __isset = other339.__isset; +AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other349) noexcept { + aad_prefix = std::move(other349.aad_prefix); + aad_file_unique = std::move(other349.aad_file_unique); + supply_aad_prefix = other349.supply_aad_prefix; + __isset = other349.__isset; return *this; } void AesGcmV1::printTo(std::ostream& out) const { @@ -5225,7 +5317,7 @@ std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj) } -void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { +void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) noexcept { using ::std::swap; swap(a.aad_prefix, b.aad_prefix); swap(a.aad_file_unique, b.aad_file_unique); @@ -5250,30 +5342,30 @@ bool AesGcmCtrV1::operator==(const AesGcmCtrV1 & rhs) const return true; } -AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other340) { - aad_prefix = other340.aad_prefix; - aad_file_unique = other340.aad_file_unique; - supply_aad_prefix = other340.supply_aad_prefix; - __isset = other340.__isset; +AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other350) { + aad_prefix = other350.aad_prefix; + aad_file_unique = other350.aad_file_unique; + supply_aad_prefix = other350.supply_aad_prefix; + __isset = other350.__isset; } -AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other341) noexcept { - aad_prefix = std::move(other341.aad_prefix); - aad_file_unique = std::move(other341.aad_file_unique); - supply_aad_prefix = other341.supply_aad_prefix; - __isset = other341.__isset; +AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other351) noexcept { + aad_prefix = std::move(other351.aad_prefix); + aad_file_unique = std::move(other351.aad_file_unique); + supply_aad_prefix = other351.supply_aad_prefix; + __isset = other351.__isset; } -AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other342) { - aad_prefix = other342.aad_prefix; - aad_file_unique = other342.aad_file_unique; - supply_aad_prefix = other342.supply_aad_prefix; - __isset = other342.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other352) { + aad_prefix = other352.aad_prefix; + aad_file_unique = other352.aad_file_unique; + supply_aad_prefix = other352.supply_aad_prefix; + __isset = other352.__isset; return *this; } -AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other343) noexcept { - aad_prefix = std::move(other343.aad_prefix); - aad_file_unique = std::move(other343.aad_file_unique); - supply_aad_prefix = other343.supply_aad_prefix; - __isset = other343.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other353) noexcept { + aad_prefix = std::move(other353.aad_prefix); + aad_file_unique = std::move(other353.aad_file_unique); + supply_aad_prefix = other353.supply_aad_prefix; + __isset = other353.__isset; return *this; } void AesGcmCtrV1::printTo(std::ostream& out) const { @@ -5308,7 +5400,7 @@ std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj) } -void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { +void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) noexcept { using ::std::swap; swap(a.AES_GCM_V1, b.AES_GCM_V1); swap(a.AES_GCM_CTR_V1, b.AES_GCM_CTR_V1); @@ -5328,26 +5420,26 @@ bool EncryptionAlgorithm::operator==(const EncryptionAlgorithm & rhs) const return true; } -EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other344) { - AES_GCM_V1 = other344.AES_GCM_V1; - AES_GCM_CTR_V1 = other344.AES_GCM_CTR_V1; - __isset = other344.__isset; +EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other354) { + AES_GCM_V1 = other354.AES_GCM_V1; + AES_GCM_CTR_V1 = other354.AES_GCM_CTR_V1; + __isset = other354.__isset; } -EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other345) noexcept { - AES_GCM_V1 = std::move(other345.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other345.AES_GCM_CTR_V1); - __isset = other345.__isset; +EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other355) noexcept { + AES_GCM_V1 = std::move(other355.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other355.AES_GCM_CTR_V1); + __isset = other355.__isset; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other346) { - AES_GCM_V1 = other346.AES_GCM_V1; - AES_GCM_CTR_V1 = other346.AES_GCM_CTR_V1; - __isset = other346.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other356) { + AES_GCM_V1 = other356.AES_GCM_V1; + AES_GCM_CTR_V1 = other356.AES_GCM_CTR_V1; + __isset = other356.__isset; return *this; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other347) noexcept { - AES_GCM_V1 = std::move(other347.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other347.AES_GCM_CTR_V1); - __isset = other347.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other357) noexcept { + AES_GCM_V1 = std::move(other357.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other357.AES_GCM_CTR_V1); + __isset = other357.__isset; return *this; } void EncryptionAlgorithm::printTo(std::ostream& out) const { @@ -5416,7 +5508,7 @@ std::ostream& operator<<(std::ostream& out, const FileMetaData& obj) } -void swap(FileMetaData &a, FileMetaData &b) { +void swap(FileMetaData &a, FileMetaData &b) noexcept { using ::std::swap; swap(a.version, b.version); swap(a.schema, b.schema); @@ -5463,54 +5555,54 @@ bool FileMetaData::operator==(const FileMetaData & rhs) const return true; } -FileMetaData::FileMetaData(const FileMetaData& other372) { - version = other372.version; - schema = other372.schema; - num_rows = other372.num_rows; - row_groups = other372.row_groups; - key_value_metadata = other372.key_value_metadata; - created_by = other372.created_by; - column_orders = other372.column_orders; - encryption_algorithm = other372.encryption_algorithm; - footer_signing_key_metadata = other372.footer_signing_key_metadata; - __isset = other372.__isset; -} -FileMetaData::FileMetaData(FileMetaData&& other373) noexcept { - version = other373.version; - schema = std::move(other373.schema); - num_rows = other373.num_rows; - row_groups = std::move(other373.row_groups); - key_value_metadata = std::move(other373.key_value_metadata); - created_by = std::move(other373.created_by); - column_orders = std::move(other373.column_orders); - encryption_algorithm = std::move(other373.encryption_algorithm); - footer_signing_key_metadata = std::move(other373.footer_signing_key_metadata); - __isset = other373.__isset; -} -FileMetaData& FileMetaData::operator=(const FileMetaData& other374) { - version = other374.version; - schema = other374.schema; - num_rows = other374.num_rows; - row_groups = other374.row_groups; - key_value_metadata = other374.key_value_metadata; - created_by = other374.created_by; - column_orders = other374.column_orders; - encryption_algorithm = other374.encryption_algorithm; - footer_signing_key_metadata = other374.footer_signing_key_metadata; - __isset = other374.__isset; +FileMetaData::FileMetaData(const FileMetaData& other382) { + version = other382.version; + schema = other382.schema; + num_rows = other382.num_rows; + row_groups = other382.row_groups; + key_value_metadata = other382.key_value_metadata; + created_by = other382.created_by; + column_orders = other382.column_orders; + encryption_algorithm = other382.encryption_algorithm; + footer_signing_key_metadata = other382.footer_signing_key_metadata; + __isset = other382.__isset; +} +FileMetaData::FileMetaData(FileMetaData&& other383) noexcept { + version = other383.version; + schema = std::move(other383.schema); + num_rows = other383.num_rows; + row_groups = std::move(other383.row_groups); + key_value_metadata = std::move(other383.key_value_metadata); + created_by = std::move(other383.created_by); + column_orders = std::move(other383.column_orders); + encryption_algorithm = std::move(other383.encryption_algorithm); + footer_signing_key_metadata = std::move(other383.footer_signing_key_metadata); + __isset = other383.__isset; +} +FileMetaData& FileMetaData::operator=(const FileMetaData& other384) { + version = other384.version; + schema = other384.schema; + num_rows = other384.num_rows; + row_groups = other384.row_groups; + key_value_metadata = other384.key_value_metadata; + created_by = other384.created_by; + column_orders = other384.column_orders; + encryption_algorithm = other384.encryption_algorithm; + footer_signing_key_metadata = other384.footer_signing_key_metadata; + __isset = other384.__isset; return *this; } -FileMetaData& FileMetaData::operator=(FileMetaData&& other375) noexcept { - version = other375.version; - schema = std::move(other375.schema); - num_rows = other375.num_rows; - row_groups = std::move(other375.row_groups); - key_value_metadata = std::move(other375.key_value_metadata); - created_by = std::move(other375.created_by); - column_orders = std::move(other375.column_orders); - encryption_algorithm = std::move(other375.encryption_algorithm); - footer_signing_key_metadata = std::move(other375.footer_signing_key_metadata); - __isset = other375.__isset; +FileMetaData& FileMetaData::operator=(FileMetaData&& other385) noexcept { + version = other385.version; + schema = std::move(other385.schema); + num_rows = other385.num_rows; + row_groups = std::move(other385.row_groups); + key_value_metadata = std::move(other385.key_value_metadata); + created_by = std::move(other385.created_by); + column_orders = std::move(other385.column_orders); + encryption_algorithm = std::move(other385.encryption_algorithm); + footer_signing_key_metadata = std::move(other385.footer_signing_key_metadata); + __isset = other385.__isset; return *this; } void FileMetaData::printTo(std::ostream& out) const { @@ -5551,7 +5643,7 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj) } -void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { +void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) noexcept { using ::std::swap; swap(a.encryption_algorithm, b.encryption_algorithm); swap(a.key_metadata, b.key_metadata); @@ -5569,26 +5661,26 @@ bool FileCryptoMetaData::operator==(const FileCryptoMetaData & rhs) const return true; } -FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other376) { - encryption_algorithm = other376.encryption_algorithm; - key_metadata = other376.key_metadata; - __isset = other376.__isset; +FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other386) { + encryption_algorithm = other386.encryption_algorithm; + key_metadata = other386.key_metadata; + __isset = other386.__isset; } -FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other377) noexcept { - encryption_algorithm = std::move(other377.encryption_algorithm); - key_metadata = std::move(other377.key_metadata); - __isset = other377.__isset; +FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other387) noexcept { + encryption_algorithm = std::move(other387.encryption_algorithm); + key_metadata = std::move(other387.key_metadata); + __isset = other387.__isset; } -FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other378) { - encryption_algorithm = other378.encryption_algorithm; - key_metadata = other378.key_metadata; - __isset = other378.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other388) { + encryption_algorithm = other388.encryption_algorithm; + key_metadata = other388.key_metadata; + __isset = other388.__isset; return *this; } -FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other379) noexcept { - encryption_algorithm = std::move(other379.encryption_algorithm); - key_metadata = std::move(other379.key_metadata); - __isset = other379.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other389) noexcept { + encryption_algorithm = std::move(other389.encryption_algorithm); + key_metadata = std::move(other389.key_metadata); + __isset = other389.__isset; return *this; } void FileCryptoMetaData::printTo(std::ostream& out) const { diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index 1f1e254f5cf2..7dc3ccc2de2c 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.21.0) + * Autogenerated by Thrift Compiler (0.23.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -48,7 +48,7 @@ std::ostream& operator<<(std::ostream& out, const Type::type& val); std::string to_string(const Type::type& val); /** - * DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. + * DEPRECATED: Common types used by frameworks (e.g. Hive, Pig) using parquet. * ConvertedType is superseded by LogicalType. This enum should not be extended. * * See LogicalTypes.md for conversion between ConvertedType and LogicalType. @@ -250,10 +250,10 @@ struct Encoding { */ PLAIN = 0, /** - * Deprecated: Dictionary encoding. The values in the dictionary are encoded in the + * DEPRECATED: Dictionary encoding. The values in the dictionary are encoded in the * plain type. - * in a data page use RLE_DICTIONARY instead. - * in a Dictionary page use PLAIN instead + * For a data page use RLE_DICTIONARY instead. + * For a Dictionary page use PLAIN instead. */ PLAIN_DICTIONARY = 2, /** @@ -262,8 +262,9 @@ struct Encoding { */ RLE = 3, /** - * Bit packed encoding. This can only be used if the data has a known max + * DEPRECATED: Bit packed encoding. This can only be used if the data has a known max * width. Usable for definition/repetition levels encoding. + * Superseded by RLE (which is a hybrid of RLE and bit packing); see Encodings.md. */ BIT_PACKED = 4, /** @@ -465,6 +466,8 @@ class RowGroup; class TypeDefinedOrder; +class IEEE754TotalOrder; + class ColumnOrder; class PageLocation; @@ -507,7 +510,7 @@ class SizeStatistics { SizeStatistics& operator=(SizeStatistics&&) noexcept; SizeStatistics() noexcept; - virtual ~SizeStatistics() noexcept; + ~SizeStatistics() noexcept; /** * The number of physical bytes stored for BYTE_ARRAY data values assuming * no encoding. This is exclusive of the bytes needed to store the length of @@ -566,10 +569,10 @@ class SizeStatistics { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(SizeStatistics &a, SizeStatistics &b); +void swap(SizeStatistics &a, SizeStatistics &b) noexcept; std::ostream& operator<<(std::ostream& out, const SizeStatistics& obj); @@ -594,7 +597,7 @@ class BoundingBox { BoundingBox& operator=(BoundingBox&&) noexcept; BoundingBox() noexcept; - virtual ~BoundingBox() noexcept; + ~BoundingBox() noexcept; double xmin; double xmax; double ymin; @@ -634,10 +637,10 @@ class BoundingBox { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(BoundingBox &a, BoundingBox &b); +void swap(BoundingBox &a, BoundingBox &b) noexcept; std::ostream& operator<<(std::ostream& out, const BoundingBox& obj); @@ -659,7 +662,7 @@ class GeospatialStatistics { GeospatialStatistics& operator=(GeospatialStatistics&&) noexcept; GeospatialStatistics() noexcept; - virtual ~GeospatialStatistics() noexcept; + ~GeospatialStatistics() noexcept; /** * A bounding box of geospatial instances */ @@ -687,15 +690,15 @@ class GeospatialStatistics { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(GeospatialStatistics &a, GeospatialStatistics &b); +void swap(GeospatialStatistics &a, GeospatialStatistics &b) noexcept; std::ostream& operator<<(std::ostream& out, const GeospatialStatistics& obj); typedef struct _Statistics__isset { - _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false), max_value(false), min_value(false), is_max_value_exact(false), is_min_value_exact(false) {} + _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false), max_value(false), min_value(false), is_max_value_exact(false), is_min_value_exact(false), nan_count(false) {} bool max :1; bool min :1; bool null_count :1; @@ -704,6 +707,7 @@ typedef struct _Statistics__isset { bool min_value :1; bool is_max_value_exact :1; bool is_min_value_exact :1; + bool nan_count :1; } _Statistics__isset; /** @@ -719,7 +723,7 @@ class Statistics { Statistics& operator=(Statistics&&) noexcept; Statistics() noexcept; - virtual ~Statistics() noexcept; + ~Statistics() noexcept; /** * DEPRECATED: min and max value of the column. Use min_value and max_value. * @@ -770,6 +774,13 @@ class Statistics { * If true, min_value is the actual minimum value for a column */ bool is_min_value_exact; + /** + * Count of NaN values in the column; only present if physical type is FLOAT + * or DOUBLE, or logical type is FLOAT16. + * If this field is not present, readers MUST assume NaNs may be present + * (i.e. MUST assume nan_count > 0 and MAY NOT assume nan_count == 0). + */ + int64_t nan_count; _Statistics__isset __isset; @@ -789,6 +800,8 @@ class Statistics { void __set_is_min_value_exact(const bool val); + void __set_nan_count(const int64_t val); + bool operator == (const Statistics & rhs) const; bool operator != (const Statistics &rhs) const { return !(*this == rhs); @@ -801,10 +814,10 @@ class Statistics { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(Statistics &a, Statistics &b); +void swap(Statistics &a, Statistics &b) noexcept; std::ostream& operator<<(std::ostream& out, const Statistics& obj); @@ -821,7 +834,7 @@ class StringType { StringType& operator=(StringType&&) noexcept; StringType() noexcept; - virtual ~StringType() noexcept; + ~StringType() noexcept; bool operator == (const StringType & /* rhs */) const; bool operator != (const StringType &rhs) const { @@ -835,10 +848,10 @@ class StringType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(StringType &a, StringType &b); +void swap(StringType &a, StringType &b) noexcept; std::ostream& operator<<(std::ostream& out, const StringType& obj); @@ -852,7 +865,7 @@ class UUIDType { UUIDType& operator=(UUIDType&&) noexcept; UUIDType() noexcept; - virtual ~UUIDType() noexcept; + ~UUIDType() noexcept; bool operator == (const UUIDType & /* rhs */) const; bool operator != (const UUIDType &rhs) const { @@ -866,10 +879,10 @@ class UUIDType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(UUIDType &a, UUIDType &b); +void swap(UUIDType &a, UUIDType &b) noexcept; std::ostream& operator<<(std::ostream& out, const UUIDType& obj); @@ -883,7 +896,7 @@ class MapType { MapType& operator=(MapType&&) noexcept; MapType() noexcept; - virtual ~MapType() noexcept; + ~MapType() noexcept; bool operator == (const MapType & /* rhs */) const; bool operator != (const MapType &rhs) const { @@ -897,10 +910,10 @@ class MapType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(MapType &a, MapType &b); +void swap(MapType &a, MapType &b) noexcept; std::ostream& operator<<(std::ostream& out, const MapType& obj); @@ -914,7 +927,7 @@ class ListType { ListType& operator=(ListType&&) noexcept; ListType() noexcept; - virtual ~ListType() noexcept; + ~ListType() noexcept; bool operator == (const ListType & /* rhs */) const; bool operator != (const ListType &rhs) const { @@ -928,10 +941,10 @@ class ListType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(ListType &a, ListType &b); +void swap(ListType &a, ListType &b) noexcept; std::ostream& operator<<(std::ostream& out, const ListType& obj); @@ -945,7 +958,7 @@ class EnumType { EnumType& operator=(EnumType&&) noexcept; EnumType() noexcept; - virtual ~EnumType() noexcept; + ~EnumType() noexcept; bool operator == (const EnumType & /* rhs */) const; bool operator != (const EnumType &rhs) const { @@ -959,10 +972,10 @@ class EnumType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(EnumType &a, EnumType &b); +void swap(EnumType &a, EnumType &b) noexcept; std::ostream& operator<<(std::ostream& out, const EnumType& obj); @@ -976,7 +989,7 @@ class DateType { DateType& operator=(DateType&&) noexcept; DateType() noexcept; - virtual ~DateType() noexcept; + ~DateType() noexcept; bool operator == (const DateType & /* rhs */) const; bool operator != (const DateType &rhs) const { @@ -990,10 +1003,10 @@ class DateType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(DateType &a, DateType &b); +void swap(DateType &a, DateType &b) noexcept; std::ostream& operator<<(std::ostream& out, const DateType& obj); @@ -1007,7 +1020,7 @@ class Float16Type { Float16Type& operator=(Float16Type&&) noexcept; Float16Type() noexcept; - virtual ~Float16Type() noexcept; + ~Float16Type() noexcept; bool operator == (const Float16Type & /* rhs */) const; bool operator != (const Float16Type &rhs) const { @@ -1021,10 +1034,10 @@ class Float16Type { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(Float16Type &a, Float16Type &b); +void swap(Float16Type &a, Float16Type &b) noexcept; std::ostream& operator<<(std::ostream& out, const Float16Type& obj); @@ -1045,7 +1058,7 @@ class NullType { NullType& operator=(NullType&&) noexcept; NullType() noexcept; - virtual ~NullType() noexcept; + ~NullType() noexcept; bool operator == (const NullType & /* rhs */) const; bool operator != (const NullType &rhs) const { @@ -1059,10 +1072,10 @@ class NullType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(NullType &a, NullType &b); +void swap(NullType &a, NullType &b) noexcept; std::ostream& operator<<(std::ostream& out, const NullType& obj); @@ -1087,7 +1100,7 @@ class DecimalType { DecimalType& operator=(DecimalType&&) noexcept; DecimalType() noexcept; - virtual ~DecimalType() noexcept; + ~DecimalType() noexcept; int32_t scale; int32_t precision; @@ -1107,10 +1120,10 @@ class DecimalType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(DecimalType &a, DecimalType &b); +void swap(DecimalType &a, DecimalType &b) noexcept; std::ostream& operator<<(std::ostream& out, const DecimalType& obj); @@ -1127,7 +1140,7 @@ class MilliSeconds { MilliSeconds& operator=(MilliSeconds&&) noexcept; MilliSeconds() noexcept; - virtual ~MilliSeconds() noexcept; + ~MilliSeconds() noexcept; bool operator == (const MilliSeconds & /* rhs */) const; bool operator != (const MilliSeconds &rhs) const { @@ -1141,10 +1154,10 @@ class MilliSeconds { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(MilliSeconds &a, MilliSeconds &b); +void swap(MilliSeconds &a, MilliSeconds &b) noexcept; std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj); @@ -1158,7 +1171,7 @@ class MicroSeconds { MicroSeconds& operator=(MicroSeconds&&) noexcept; MicroSeconds() noexcept; - virtual ~MicroSeconds() noexcept; + ~MicroSeconds() noexcept; bool operator == (const MicroSeconds & /* rhs */) const; bool operator != (const MicroSeconds &rhs) const { @@ -1172,10 +1185,10 @@ class MicroSeconds { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(MicroSeconds &a, MicroSeconds &b); +void swap(MicroSeconds &a, MicroSeconds &b) noexcept; std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj); @@ -1189,7 +1202,7 @@ class NanoSeconds { NanoSeconds& operator=(NanoSeconds&&) noexcept; NanoSeconds() noexcept; - virtual ~NanoSeconds() noexcept; + ~NanoSeconds() noexcept; bool operator == (const NanoSeconds & /* rhs */) const; bool operator != (const NanoSeconds &rhs) const { @@ -1203,10 +1216,10 @@ class NanoSeconds { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(NanoSeconds &a, NanoSeconds &b); +void swap(NanoSeconds &a, NanoSeconds &b) noexcept; std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj); @@ -1226,7 +1239,7 @@ class TimeUnit { TimeUnit& operator=(TimeUnit&&) noexcept; TimeUnit() noexcept; - virtual ~TimeUnit() noexcept; + ~TimeUnit() noexcept; MilliSeconds MILLIS; MicroSeconds MICROS; NanoSeconds NANOS; @@ -1251,10 +1264,10 @@ class TimeUnit { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(TimeUnit &a, TimeUnit &b); +void swap(TimeUnit &a, TimeUnit &b) noexcept; std::ostream& operator<<(std::ostream& out, const TimeUnit& obj); @@ -1273,7 +1286,7 @@ class TimestampType { TimestampType& operator=(TimestampType&&) noexcept; TimestampType() noexcept; - virtual ~TimestampType() noexcept; + ~TimestampType() noexcept; bool isAdjustedToUTC; TimeUnit unit; @@ -1293,10 +1306,10 @@ class TimestampType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(TimestampType &a, TimestampType &b); +void swap(TimestampType &a, TimestampType &b) noexcept; std::ostream& operator<<(std::ostream& out, const TimestampType& obj); @@ -1315,7 +1328,7 @@ class TimeType { TimeType& operator=(TimeType&&) noexcept; TimeType() noexcept; - virtual ~TimeType() noexcept; + ~TimeType() noexcept; bool isAdjustedToUTC; TimeUnit unit; @@ -1335,10 +1348,10 @@ class TimeType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(TimeType &a, TimeType &b); +void swap(TimeType &a, TimeType &b) noexcept; std::ostream& operator<<(std::ostream& out, const TimeType& obj); @@ -1359,7 +1372,7 @@ class IntType { IntType& operator=(IntType&&) noexcept; IntType() noexcept; - virtual ~IntType() noexcept; + ~IntType() noexcept; int8_t bitWidth; bool isSigned; @@ -1379,10 +1392,10 @@ class IntType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(IntType &a, IntType &b); +void swap(IntType &a, IntType &b) noexcept; std::ostream& operator<<(std::ostream& out, const IntType& obj); @@ -1401,7 +1414,7 @@ class JsonType { JsonType& operator=(JsonType&&) noexcept; JsonType() noexcept; - virtual ~JsonType() noexcept; + ~JsonType() noexcept; bool operator == (const JsonType & /* rhs */) const; bool operator != (const JsonType &rhs) const { @@ -1415,10 +1428,10 @@ class JsonType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(JsonType &a, JsonType &b); +void swap(JsonType &a, JsonType &b) noexcept; std::ostream& operator<<(std::ostream& out, const JsonType& obj); @@ -1437,7 +1450,7 @@ class BsonType { BsonType& operator=(BsonType&&) noexcept; BsonType() noexcept; - virtual ~BsonType() noexcept; + ~BsonType() noexcept; bool operator == (const BsonType & /* rhs */) const; bool operator != (const BsonType &rhs) const { @@ -1451,10 +1464,10 @@ class BsonType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(BsonType &a, BsonType &b); +void swap(BsonType &a, BsonType &b) noexcept; std::ostream& operator<<(std::ostream& out, const BsonType& obj); @@ -1475,7 +1488,7 @@ class VariantType { VariantType& operator=(VariantType&&) noexcept; VariantType() noexcept; - virtual ~VariantType() noexcept; + ~VariantType() noexcept; int8_t specification_version; _VariantType__isset __isset; @@ -1494,10 +1507,10 @@ class VariantType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(VariantType &a, VariantType &b); +void swap(VariantType &a, VariantType &b) noexcept; std::ostream& operator<<(std::ostream& out, const VariantType& obj); @@ -1509,7 +1522,7 @@ typedef struct _GeometryType__isset { /** * Embedded Geometry logical type annotation * - * Geospatial features in the Well-Known Binary (WKB) format and edges interpolation + * Geospatial features in the Well-Known Binary (WKB) format and `edges` interpolation * is always linear/planar. * * A custom CRS can be set by the crs field. If unset, it defaults to "OGC:CRS84", @@ -1529,7 +1542,7 @@ class GeometryType { GeometryType& operator=(GeometryType&&) noexcept; GeometryType() noexcept; - virtual ~GeometryType() noexcept; + ~GeometryType() noexcept; std::string crs; _GeometryType__isset __isset; @@ -1548,10 +1561,10 @@ class GeometryType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(GeometryType &a, GeometryType &b); +void swap(GeometryType &a, GeometryType &b) noexcept; std::ostream& operator<<(std::ostream& out, const GeometryType& obj); @@ -1565,13 +1578,13 @@ typedef struct _GeographyType__isset { * Embedded Geography logical type annotation * * Geospatial features in the WKB format with an explicit (non-linear/non-planar) - * edges interpolation algorithm. + * `edges` interpolation algorithm. * * A custom geographic CRS can be set by the crs field, where longitudes are * bound by [-180, 180] and latitudes are bound by [-90, 90]. If unset, the CRS * defaults to "OGC:CRS84". * - * An optional algorithm can be set to correctly interpret edges interpolation + * An optional algorithm can be set to correctly interpret `edges` interpolation * of the geometries. If unset, the algorithm defaults to SPHERICAL. * * Allowed for physical type: BYTE_ARRAY. @@ -1587,7 +1600,7 @@ class GeographyType { GeographyType& operator=(GeographyType&&) noexcept; GeographyType() noexcept; - virtual ~GeographyType() noexcept; + ~GeographyType() noexcept; std::string crs; /** * @@ -1613,10 +1626,10 @@ class GeographyType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(GeographyType &a, GeographyType &b); +void swap(GeographyType &a, GeographyType &b) noexcept; std::ostream& operator<<(std::ostream& out, const GeographyType& obj); @@ -1657,7 +1670,7 @@ class LogicalType { LogicalType& operator=(LogicalType&&) noexcept; LogicalType() noexcept; - virtual ~LogicalType() noexcept; + ~LogicalType() noexcept; StringType STRING; MapType MAP; ListType LIST; @@ -1724,10 +1737,10 @@ class LogicalType { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(LogicalType &a, LogicalType &b); +void swap(LogicalType &a, LogicalType &b) noexcept; std::ostream& operator<<(std::ostream& out, const LogicalType& obj); @@ -1745,7 +1758,7 @@ typedef struct _SchemaElement__isset { } _SchemaElement__isset; /** - * Represents a element inside a schema definition. + * Represents an element inside a schema definition. * - if it is a group (inner node) then type is undefined and num_children is defined * - if it is a primitive type (leaf) then type is defined and num_children is undefined * the nodes are listed in depth first traversal order. @@ -1759,7 +1772,7 @@ class SchemaElement { SchemaElement& operator=(SchemaElement&&) noexcept; SchemaElement() noexcept; - virtual ~SchemaElement() noexcept; + ~SchemaElement() noexcept; /** * Data type for this field. Not set if the current element is a non-leaf node * @@ -1855,10 +1868,10 @@ class SchemaElement { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(SchemaElement &a, SchemaElement &b); +void swap(SchemaElement &a, SchemaElement &b) noexcept; std::ostream& operator<<(std::ostream& out, const SchemaElement& obj); @@ -1879,11 +1892,11 @@ class DataPageHeader { DataPageHeader& operator=(DataPageHeader&&) noexcept; DataPageHeader() noexcept; - virtual ~DataPageHeader() noexcept; + ~DataPageHeader() noexcept; /** * Number of values, including NULLs, in this data page. * - * If a OffsetIndex is present, a page must begin at a row + * If an OffsetIndex is present, a page must begin at a row * boundary (repetition_level = 0). Otherwise, pages may begin * within a row (repetition_level > 0). * @@ -1936,10 +1949,10 @@ class DataPageHeader { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(DataPageHeader &a, DataPageHeader &b); +void swap(DataPageHeader &a, DataPageHeader &b) noexcept; std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj); @@ -1953,7 +1966,7 @@ class IndexPageHeader { IndexPageHeader& operator=(IndexPageHeader&&) noexcept; IndexPageHeader() noexcept; - virtual ~IndexPageHeader() noexcept; + ~IndexPageHeader() noexcept; bool operator == (const IndexPageHeader & /* rhs */) const; bool operator != (const IndexPageHeader &rhs) const { @@ -1967,10 +1980,10 @@ class IndexPageHeader { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(IndexPageHeader &a, IndexPageHeader &b); +void swap(IndexPageHeader &a, IndexPageHeader &b) noexcept; std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj); @@ -1994,7 +2007,7 @@ class DictionaryPageHeader { DictionaryPageHeader& operator=(DictionaryPageHeader&&) noexcept; DictionaryPageHeader() noexcept; - virtual ~DictionaryPageHeader() noexcept; + ~DictionaryPageHeader() noexcept; /** * Number of values in the dictionary * */ @@ -2030,10 +2043,10 @@ class DictionaryPageHeader { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(DictionaryPageHeader &a, DictionaryPageHeader &b); +void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) noexcept; std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj); @@ -2044,10 +2057,15 @@ typedef struct _DataPageHeaderV2__isset { } _DataPageHeaderV2__isset; /** - * New page format allowing reading levels without decompressing the data + * Alternate page format allowing reading levels without decompressing the data * Repetition and definition levels are uncompressed * The remaining section containing the data is compressed if is_compressed is true * + * Implementation note - this header is not necessarily a strict improvement over + * `DataPageHeader` (in particular the original header might provide better compression + * in some scenarios). Page indexes require pages to start and end at row boundaries, + * regardless of which page header is used. + * */ class DataPageHeaderV2 { public: @@ -2058,7 +2076,7 @@ class DataPageHeaderV2 { DataPageHeaderV2& operator=(DataPageHeaderV2&&) noexcept; DataPageHeaderV2() noexcept; - virtual ~DataPageHeaderV2() noexcept; + ~DataPageHeaderV2() noexcept; /** * Number of values, including NULLs, in this data page. * */ @@ -2092,7 +2110,7 @@ class DataPageHeaderV2 { /** * Whether the values are compressed. * Which means the section of the page between - * definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) + * definition_levels_byte_length + repetition_levels_byte_length and compressed_page_size (included) * is compressed with the compression_codec. * If missing it is considered compressed */ @@ -2132,10 +2150,10 @@ class DataPageHeaderV2 { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b); +void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) noexcept; std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj); @@ -2152,7 +2170,7 @@ class SplitBlockAlgorithm { SplitBlockAlgorithm& operator=(SplitBlockAlgorithm&&) noexcept; SplitBlockAlgorithm() noexcept; - virtual ~SplitBlockAlgorithm() noexcept; + ~SplitBlockAlgorithm() noexcept; bool operator == (const SplitBlockAlgorithm & /* rhs */) const; bool operator != (const SplitBlockAlgorithm &rhs) const { @@ -2166,10 +2184,10 @@ class SplitBlockAlgorithm { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b); +void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) noexcept; std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj); @@ -2190,7 +2208,7 @@ class BloomFilterAlgorithm { BloomFilterAlgorithm& operator=(BloomFilterAlgorithm&&) noexcept; BloomFilterAlgorithm() noexcept; - virtual ~BloomFilterAlgorithm() noexcept; + ~BloomFilterAlgorithm() noexcept; /** * Block-based Bloom filter. * */ @@ -2212,10 +2230,10 @@ class BloomFilterAlgorithm { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b); +void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) noexcept; std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj); @@ -2234,7 +2252,7 @@ class XxHash { XxHash& operator=(XxHash&&) noexcept; XxHash() noexcept; - virtual ~XxHash() noexcept; + ~XxHash() noexcept; bool operator == (const XxHash & /* rhs */) const; bool operator != (const XxHash &rhs) const { @@ -2248,10 +2266,10 @@ class XxHash { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(XxHash &a, XxHash &b); +void swap(XxHash &a, XxHash &b) noexcept; std::ostream& operator<<(std::ostream& out, const XxHash& obj); @@ -2274,7 +2292,7 @@ class BloomFilterHash { BloomFilterHash& operator=(BloomFilterHash&&) noexcept; BloomFilterHash() noexcept; - virtual ~BloomFilterHash() noexcept; + ~BloomFilterHash() noexcept; /** * xxHash Strategy. * */ @@ -2296,10 +2314,10 @@ class BloomFilterHash { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(BloomFilterHash &a, BloomFilterHash &b); +void swap(BloomFilterHash &a, BloomFilterHash &b) noexcept; std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj); @@ -2317,7 +2335,7 @@ class Uncompressed { Uncompressed& operator=(Uncompressed&&) noexcept; Uncompressed() noexcept; - virtual ~Uncompressed() noexcept; + ~Uncompressed() noexcept; bool operator == (const Uncompressed & /* rhs */) const; bool operator != (const Uncompressed &rhs) const { @@ -2331,10 +2349,10 @@ class Uncompressed { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(Uncompressed &a, Uncompressed &b); +void swap(Uncompressed &a, Uncompressed &b) noexcept; std::ostream& operator<<(std::ostream& out, const Uncompressed& obj); @@ -2352,7 +2370,7 @@ class BloomFilterCompression { BloomFilterCompression& operator=(BloomFilterCompression&&) noexcept; BloomFilterCompression() noexcept; - virtual ~BloomFilterCompression() noexcept; + ~BloomFilterCompression() noexcept; Uncompressed UNCOMPRESSED; _BloomFilterCompression__isset __isset; @@ -2371,10 +2389,10 @@ class BloomFilterCompression { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(BloomFilterCompression &a, BloomFilterCompression &b); +void swap(BloomFilterCompression &a, BloomFilterCompression &b) noexcept; std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj); @@ -2393,7 +2411,7 @@ class BloomFilterHeader { BloomFilterHeader& operator=(BloomFilterHeader&&) noexcept; BloomFilterHeader() noexcept; - virtual ~BloomFilterHeader() noexcept; + ~BloomFilterHeader() noexcept; /** * The size of bitset in bytes * */ @@ -2431,10 +2449,10 @@ class BloomFilterHeader { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(BloomFilterHeader &a, BloomFilterHeader &b); +void swap(BloomFilterHeader &a, BloomFilterHeader &b) noexcept; std::ostream& operator<<(std::ostream& out, const BloomFilterHeader& obj); @@ -2456,7 +2474,7 @@ class PageHeader { PageHeader& operator=(PageHeader&&) noexcept; PageHeader() noexcept; - virtual ~PageHeader() noexcept; + ~PageHeader() noexcept; /** * the type of the page: indicates which of the *_header fields is set * * @@ -2472,10 +2490,10 @@ class PageHeader { */ int32_t compressed_page_size; /** - * The 32-bit CRC checksum for the page, to be be calculated as follows: + * The 32-bit CRC checksum for the page, to be calculated as follows: * * - The standard CRC32 algorithm is used (with polynomial 0x04C11DB7, - * the same as in e.g. GZip). + * the same as in e.g. GZIP). * - All page types can have a CRC (v1 and v2 data pages, dictionary pages, * etc.). * - The CRC is computed on the serialization binary representation of the page @@ -2525,10 +2543,10 @@ class PageHeader { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(PageHeader &a, PageHeader &b); +void swap(PageHeader &a, PageHeader &b) noexcept; std::ostream& operator<<(std::ostream& out, const PageHeader& obj); @@ -2549,7 +2567,7 @@ class KeyValue { KeyValue& operator=(KeyValue&&) noexcept; KeyValue() noexcept; - virtual ~KeyValue() noexcept; + ~KeyValue() noexcept; std::string key; std::string value; @@ -2571,10 +2589,10 @@ class KeyValue { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(KeyValue &a, KeyValue &b); +void swap(KeyValue &a, KeyValue &b) noexcept; std::ostream& operator<<(std::ostream& out, const KeyValue& obj); @@ -2591,7 +2609,7 @@ class SortingColumn { SortingColumn& operator=(SortingColumn&&) noexcept; SortingColumn() noexcept; - virtual ~SortingColumn() noexcept; + ~SortingColumn() noexcept; /** * The ordinal position of the column (in this row group) * */ @@ -2624,10 +2642,10 @@ class SortingColumn { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(SortingColumn &a, SortingColumn &b); +void swap(SortingColumn &a, SortingColumn &b) noexcept; std::ostream& operator<<(std::ostream& out, const SortingColumn& obj); @@ -2644,7 +2662,7 @@ class PageEncodingStats { PageEncodingStats& operator=(PageEncodingStats&&) noexcept; PageEncodingStats() noexcept; - virtual ~PageEncodingStats() noexcept; + ~PageEncodingStats() noexcept; /** * the page type (data/dic/...) * * @@ -2680,10 +2698,10 @@ class PageEncodingStats { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(PageEncodingStats &a, PageEncodingStats &b); +void swap(PageEncodingStats &a, PageEncodingStats &b) noexcept; std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj); @@ -2712,7 +2730,7 @@ class ColumnMetaData { ColumnMetaData& operator=(ColumnMetaData&&) noexcept; ColumnMetaData() noexcept; - virtual ~ColumnMetaData() noexcept; + ~ColumnMetaData() noexcept; /** * Type of this column * * @@ -2845,10 +2863,10 @@ class ColumnMetaData { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(ColumnMetaData &a, ColumnMetaData &b); +void swap(ColumnMetaData &a, ColumnMetaData &b) noexcept; std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj); @@ -2862,7 +2880,7 @@ class EncryptionWithFooterKey { EncryptionWithFooterKey& operator=(EncryptionWithFooterKey&&) noexcept; EncryptionWithFooterKey() noexcept; - virtual ~EncryptionWithFooterKey() noexcept; + ~EncryptionWithFooterKey() noexcept; bool operator == (const EncryptionWithFooterKey & /* rhs */) const; bool operator != (const EncryptionWithFooterKey &rhs) const { @@ -2876,10 +2894,10 @@ class EncryptionWithFooterKey { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b); +void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) noexcept; std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj); @@ -2897,7 +2915,7 @@ class EncryptionWithColumnKey { EncryptionWithColumnKey& operator=(EncryptionWithColumnKey&&) noexcept; EncryptionWithColumnKey() noexcept; - virtual ~EncryptionWithColumnKey() noexcept; + ~EncryptionWithColumnKey() noexcept; /** * Column path in schema * */ @@ -2925,10 +2943,10 @@ class EncryptionWithColumnKey { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b); +void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) noexcept; std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj); @@ -2947,7 +2965,7 @@ class ColumnCryptoMetaData { ColumnCryptoMetaData& operator=(ColumnCryptoMetaData&&) noexcept; ColumnCryptoMetaData() noexcept; - virtual ~ColumnCryptoMetaData() noexcept; + ~ColumnCryptoMetaData() noexcept; EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY; EncryptionWithColumnKey ENCRYPTION_WITH_COLUMN_KEY; @@ -2969,10 +2987,10 @@ class ColumnCryptoMetaData { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b); +void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) noexcept; std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj); @@ -2997,15 +3015,30 @@ class ColumnChunk { ColumnChunk& operator=(ColumnChunk&&) noexcept; ColumnChunk() noexcept; - virtual ~ColumnChunk() noexcept; + ~ColumnChunk() noexcept; /** * File where column data is stored. If not set, assumed to be same file as * metadata. This path is relative to the current file. * + * As of December 2025, the only known use-case for this field is writing summary + * parquet files (i.e. "_metadata" files). These files consolidate footers from + * multiple parquet files to allow for efficient reading of footers to avoid file + * listing costs and prune out files that do not need to be read based on statistics. + * + * These files do not appear to have ever been formally specified in the specification. + * and are potentially problematic from a correctness perspective [1]. + * + * [1] https://lists.apache.org/thread/ootf2kmyg3p01b1bvplpvp4ftd1bt72d + * + * There is no other known usage of this field. Specifically, there are no known + * reference implementations that will read externally stored column data if this field is populated + * within a standard parquet file. Making use of the field for this purpose is + * not considered part of the Parquet specification. + * */ std::string file_path; /** - * Deprecated: Byte offset in file_path to the ColumnMetaData + * DEPRECATED: Byte offset in file_path to the ColumnMetaData * * Past use of this field has been inconsistent, with some implementations * using it to point to the ColumnMetaData and some using it to point to @@ -3080,10 +3113,10 @@ class ColumnChunk { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(ColumnChunk &a, ColumnChunk &b); +void swap(ColumnChunk &a, ColumnChunk &b) noexcept; std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj); @@ -3104,7 +3137,7 @@ class RowGroup { RowGroup& operator=(RowGroup&&) noexcept; RowGroup() noexcept; - virtual ~RowGroup() noexcept; + ~RowGroup() noexcept; /** * Metadata for each column chunk in this row group. * This list must have the same order as the SchemaElement list in FileMetaData. @@ -3167,10 +3200,10 @@ class RowGroup { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(RowGroup &a, RowGroup &b); +void swap(RowGroup &a, RowGroup &b) noexcept; std::ostream& operator<<(std::ostream& out, const RowGroup& obj); @@ -3187,7 +3220,7 @@ class TypeDefinedOrder { TypeDefinedOrder& operator=(TypeDefinedOrder&&) noexcept; TypeDefinedOrder() noexcept; - virtual ~TypeDefinedOrder() noexcept; + ~TypeDefinedOrder() noexcept; bool operator == (const TypeDefinedOrder & /* rhs */) const; bool operator != (const TypeDefinedOrder &rhs) const { @@ -3201,16 +3234,51 @@ class TypeDefinedOrder { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(TypeDefinedOrder &a, TypeDefinedOrder &b); +void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) noexcept; std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj); + +/** + * Empty struct to signal IEEE 754 total order for floating point types + */ +class IEEE754TotalOrder { + public: + + IEEE754TotalOrder(const IEEE754TotalOrder&) noexcept; + IEEE754TotalOrder(IEEE754TotalOrder&&) noexcept; + IEEE754TotalOrder& operator=(const IEEE754TotalOrder&) noexcept; + IEEE754TotalOrder& operator=(IEEE754TotalOrder&&) noexcept; + IEEE754TotalOrder() noexcept; + + ~IEEE754TotalOrder() noexcept; + + bool operator == (const IEEE754TotalOrder & /* rhs */) const; + bool operator != (const IEEE754TotalOrder &rhs) const { + return !(*this == rhs); + } + + bool operator < (const IEEE754TotalOrder & ) const; + + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; + + void printTo(std::ostream& out) const; +}; + +void swap(IEEE754TotalOrder &a, IEEE754TotalOrder &b) noexcept; + +std::ostream& operator<<(std::ostream& out, const IEEE754TotalOrder& obj); + typedef struct _ColumnOrder__isset { - _ColumnOrder__isset() : TYPE_ORDER(false) {} + _ColumnOrder__isset() : TYPE_ORDER(false), IEEE_754_TOTAL_ORDER(false) {} bool TYPE_ORDER :1; + bool IEEE_754_TOTAL_ORDER :1; } _ColumnOrder__isset; /** @@ -3221,6 +3289,7 @@ typedef struct _ColumnOrder__isset { * Possible values are: * * TypeDefinedOrder - the column uses the order defined by its logical or * physical type (if there is no logical type). + * * IEEE754TotalOrder - the floating point column uses IEEE 754 total order. * * If the reader does not support the value of this union, min and max stats * for this column should be ignored. @@ -3234,7 +3303,7 @@ class ColumnOrder { ColumnOrder& operator=(ColumnOrder&&) noexcept; ColumnOrder() noexcept; - virtual ~ColumnOrder() noexcept; + ~ColumnOrder() noexcept; /** * The sort orders for logical types are: * UTF8 - unsigned byte-wise comparison @@ -3248,6 +3317,7 @@ class ColumnOrder { * UINT64 - unsigned comparison * DECIMAL - signed comparison of the represented value * DATE - signed comparison + * FLOAT16 - signed comparison of the represented value (*) * TIME_MILLIS - signed comparison * TIME_MICROS - signed comparison * TIMESTAMP_MILLIS - signed comparison @@ -3266,34 +3336,73 @@ class ColumnOrder { * BOOLEAN - false, true * INT32 - signed comparison * INT64 - signed comparison - * INT96 (only used for legacy timestamps) - undefined + * INT96 (only used for legacy timestamps) - undefined(+) * FLOAT - signed comparison of the represented value (*) * DOUBLE - signed comparison of the represented value (*) * BYTE_ARRAY - unsigned byte-wise comparison * FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison * - * (*) Because the sorting order is not specified properly for floating - * point values (relations vs. total ordering) the following + * (+) While the INT96 type has been deprecated, at the time of writing it is + * still used in many legacy systems. If a Parquet implementation chooses + * to write statistics for INT96 columns, it is recommended to order them + * according to the legacy rules: + * - compare the last 4 bytes (days) as a little-endian 32-bit signed integer + * - if equal last 4 bytes, compare the first 8 bytes as a little-endian + * 64-bit signed integer (nanos) + * See https://github.com/apache/parquet-format/issues/502 for more details + * + * (*) Because TYPE_ORDER is ambiguous for floating point types due to + * underspecified handling of NaN and -0/+0, it is recommended that writers + * use IEEE_754_TOTAL_ORDER for these types. + * + * If TYPE_ORDER is used for floating point types, then the following * compatibility rules should be applied when reading statistics: * - If the min is a NaN, it should be ignored. * - If the max is a NaN, it should be ignored. + * - If the nan_count field is set, a reader can compute + * nan_count + null_count == num_values to deduce whether all non-null + * values are NaN. * - If the min is +0, the row group may contain -0 values as well. * - If the max is -0, the row group may contain +0 values as well. * - When looking for NaN values, min and max should be ignored. + * If the nan_count field is set, it can be used to check whether + * NaNs are present. * - * When writing statistics the following rules should be followed: - * - NaNs should not be written to min or max statistics fields. + * When writing page or column chunk statistics for columns with + * TYPE_ORDER order, the following rules must be followed: + * - The nan_count field must be set for floating point types, even if + * it is zero. + * - If the nan_count field is set, min and max statistics fields, when + * present, must not contain NaN values and must be computed from + * non-NaN values only. This signals to readers that the min and max + * statistics are reliable for non-NaN values. + * - If all non-null values are NaN, min and max statistics must not be + * written. * - If the computed max value is zero (whether negative or positive), * `+0.0` should be written into the max statistics field. * - If the computed min value is zero (whether negative or positive), * `-0.0` should be written into the min statistics field. + * + * When writing column indexes for columns with TYPE_ORDER order, the + * following rules must be followed: + * - NaNs must not be written to min_values or max_values. + * - If all non-null values of a page are NaN, a column index must not + * be written for this column chunk because min_values and max_values + * are required. + * - If the computed max value is zero (whether negative or positive), + * `+0.0` should be written into the corresponding max_values entry. + * - If the computed min value is zero (whether negative or positive), + * `-0.0` should be written into the corresponding min_values entry. */ TypeDefinedOrder TYPE_ORDER; + IEEE754TotalOrder IEEE_754_TOTAL_ORDER; _ColumnOrder__isset __isset; void __set_TYPE_ORDER(const TypeDefinedOrder& val); + void __set_IEEE_754_TOTAL_ORDER(const IEEE754TotalOrder& val); + bool operator == (const ColumnOrder & rhs) const; bool operator != (const ColumnOrder &rhs) const { return !(*this == rhs); @@ -3306,10 +3415,10 @@ class ColumnOrder { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(ColumnOrder &a, ColumnOrder &b); +void swap(ColumnOrder &a, ColumnOrder &b) noexcept; std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj); @@ -3323,14 +3432,14 @@ class PageLocation { PageLocation& operator=(PageLocation&&) noexcept; PageLocation() noexcept; - virtual ~PageLocation() noexcept; + ~PageLocation() noexcept; /** * Offset of the page in the file * */ int64_t offset; /** - * Size of the page, including header. Sum of compressed_page_size and header - * length + * Size of the page, including header. Equal to the sum of the page's + * PageHeader.compressed_page_size and the size of the serialized PageHeader. */ int32_t compressed_page_size; /** @@ -3358,10 +3467,10 @@ class PageLocation { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(PageLocation &a, PageLocation &b); +void swap(PageLocation &a, PageLocation &b) noexcept; std::ostream& operator<<(std::ostream& out, const PageLocation& obj); @@ -3386,7 +3495,7 @@ class OffsetIndex { OffsetIndex& operator=(OffsetIndex&&) noexcept; OffsetIndex() noexcept; - virtual ~OffsetIndex() noexcept; + ~OffsetIndex() noexcept; /** * PageLocations, ordered by increasing PageLocation.offset. It is required * that page_locations[i].first_row_index < page_locations[i+1].first_row_index. @@ -3395,7 +3504,7 @@ class OffsetIndex { /** * Unencoded/uncompressed size for BYTE_ARRAY types. * - * See documention for unencoded_byte_array_data_bytes in SizeStatistics for + * See documentation for unencoded_byte_array_data_bytes in SizeStatistics for * more details on this field. */ std::vector unencoded_byte_array_data_bytes; @@ -3418,18 +3527,19 @@ class OffsetIndex { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(OffsetIndex &a, OffsetIndex &b); +void swap(OffsetIndex &a, OffsetIndex &b) noexcept; std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj); typedef struct _ColumnIndex__isset { - _ColumnIndex__isset() : null_counts(false), repetition_level_histograms(false), definition_level_histograms(false) {} + _ColumnIndex__isset() : null_counts(false), repetition_level_histograms(false), definition_level_histograms(false), nan_counts(false) {} bool null_counts :1; bool repetition_level_histograms :1; bool definition_level_histograms :1; + bool nan_counts :1; } _ColumnIndex__isset; /** @@ -3451,7 +3561,7 @@ class ColumnIndex { ColumnIndex& operator=(ColumnIndex&&) noexcept; ColumnIndex() noexcept; - virtual ~ColumnIndex() noexcept; + ~ColumnIndex() noexcept; /** * A list of Boolean values to determine the validity of the corresponding * min and max values. If true, a page contains only null values, and writers @@ -3464,11 +3574,23 @@ class ColumnIndex { * Two lists containing lower and upper bounds for the values of each page * determined by the ColumnOrder of the column. These may be the actual * minimum and maximum values found on a page, but can also be (more compact) - * values that do not exist on a page. For example, instead of storing ""Blart + * values that do not exist on a page. For example, instead of storing "Blart * Versenwald III", a writer may set min_values[i]="B", max_values[i]="C". * Such more compact values must still be valid values within the column's * logical type. Readers must make sure that list entries are populated before * using them by inspecting null_pages. + * + * For columns of physical type FLOAT or DOUBLE, or logical type FLOAT16, + * NaN values are not to be included in these bounds. If all non-null values + * of a page are NaN, then a writer must do the following: + * - If the order of this column is TYPE_ORDER, then a column index must + * not be written for this column chunk. While this is unfortunate for + * performance, it is necessary to avoid conflict with legacy files that + * still included NaN in min_values and max_values even if the page had + * non-NaN values. To mitigate this, IEEE754_TOTAL_ORDER is recommended. + * - If the order of this column is IEEE754_TOTAL_ORDER, then min_values[i] + * and max_values[i] of that page must be set to the smallest and largest + * NaN values as defined by IEEE 754 total order. */ std::vector min_values; std::vector max_values; @@ -3511,6 +3633,13 @@ class ColumnIndex { * */ std::vector definition_level_histograms; + /** + * A list containing the number of NaN values for each page. Only present + * for columns of physical type FLOAT or DOUBLE, or logical type FLOAT16. + * If this field is not present, readers MUST assume that there might be + * NaN values in any page. + */ + std::vector nan_counts; _ColumnIndex__isset __isset; @@ -3528,6 +3657,8 @@ class ColumnIndex { void __set_definition_level_histograms(const std::vector & val); + void __set_nan_counts(const std::vector & val); + bool operator == (const ColumnIndex & rhs) const; bool operator != (const ColumnIndex &rhs) const { return !(*this == rhs); @@ -3540,10 +3671,10 @@ class ColumnIndex { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(ColumnIndex &a, ColumnIndex &b); +void swap(ColumnIndex &a, ColumnIndex &b) noexcept; std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj); @@ -3563,7 +3694,7 @@ class AesGcmV1 { AesGcmV1& operator=(AesGcmV1&&) noexcept; AesGcmV1() noexcept; - virtual ~AesGcmV1() noexcept; + ~AesGcmV1() noexcept; /** * AAD prefix * */ @@ -3598,10 +3729,10 @@ class AesGcmV1 { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(AesGcmV1 &a, AesGcmV1 &b); +void swap(AesGcmV1 &a, AesGcmV1 &b) noexcept; std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj); @@ -3621,7 +3752,7 @@ class AesGcmCtrV1 { AesGcmCtrV1& operator=(AesGcmCtrV1&&) noexcept; AesGcmCtrV1() noexcept; - virtual ~AesGcmCtrV1() noexcept; + ~AesGcmCtrV1() noexcept; /** * AAD prefix * */ @@ -3656,10 +3787,10 @@ class AesGcmCtrV1 { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b); +void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) noexcept; std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj); @@ -3678,7 +3809,7 @@ class EncryptionAlgorithm { EncryptionAlgorithm& operator=(EncryptionAlgorithm&&) noexcept; EncryptionAlgorithm() noexcept; - virtual ~EncryptionAlgorithm() noexcept; + ~EncryptionAlgorithm() noexcept; AesGcmV1 AES_GCM_V1; AesGcmCtrV1 AES_GCM_CTR_V1; @@ -3700,10 +3831,10 @@ class EncryptionAlgorithm { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b); +void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) noexcept; std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj); @@ -3728,9 +3859,15 @@ class FileMetaData { FileMetaData& operator=(FileMetaData&&) noexcept; FileMetaData() noexcept; - virtual ~FileMetaData() noexcept; + ~FileMetaData() noexcept; /** - * Version of this file * + * Version of this file + * + * As of December 2025, there is no agreed upon consensus of what constitutes + * version 2 of the file. For maximum compatibility with readers, writers should + * always populate "1" for version. For maximum compatibility with writers, + * readers should accept "1" and "2" interchangeably. All other versions are + * reserved for potential future use-cases. */ int32_t version; /** @@ -3765,7 +3902,7 @@ class FileMetaData { * Sort order used for the min_value and max_value fields in the Statistics * objects and the min_values and max_values fields in the ColumnIndex * objects of each column in this file. Sort orders are listed in the order - * matching the columns in the schema. The indexes are not necessary the same + * matching the columns in the schema. The indexes are not necessarily the same * though, because only leaf nodes of the schema are represented in the list * of sort orders. * @@ -3822,10 +3959,10 @@ class FileMetaData { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(FileMetaData &a, FileMetaData &b); +void swap(FileMetaData &a, FileMetaData &b) noexcept; std::ostream& operator<<(std::ostream& out, const FileMetaData& obj); @@ -3846,7 +3983,7 @@ class FileCryptoMetaData { FileCryptoMetaData& operator=(FileCryptoMetaData&&) noexcept; FileCryptoMetaData() noexcept; - virtual ~FileCryptoMetaData() noexcept; + ~FileCryptoMetaData() noexcept; /** * Encryption algorithm. This field is only used for files * with encrypted footer. Files with plaintext footer store algorithm id @@ -3877,10 +4014,10 @@ class FileCryptoMetaData { template uint32_t write(Protocol_* oprot) const; - virtual void printTo(std::ostream& out) const; + void printTo(std::ostream& out) const; }; -void swap(FileCryptoMetaData &a, FileCryptoMetaData &b); +void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) noexcept; std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj); diff --git a/cpp/src/generated/parquet_types.tcc b/cpp/src/generated/parquet_types.tcc index 78e3e2549394..01559f897372 100644 --- a/cpp/src/generated/parquet_types.tcc +++ b/cpp/src/generated/parquet_types.tcc @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.21.0) + * Autogenerated by Thrift Compiler (0.23.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -469,6 +469,14 @@ uint32_t Statistics::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; + case 9: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->nan_count); + this->__isset.nan_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -527,6 +535,11 @@ uint32_t Statistics::write(Protocol_* oprot) const { xfer += oprot->writeBool(this->is_min_value_exact); xfer += oprot->writeFieldEnd(); } + if (this->__isset.nan_count) { + xfer += oprot->writeFieldBegin("nan_count", ::apache::thrift::protocol::T_I64, 9); + xfer += oprot->writeI64(this->nan_count); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -4262,6 +4275,46 @@ uint32_t TypeDefinedOrder::write(Protocol_* oprot) const { return xfer; } +template +uint32_t IEEE754TotalOrder::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t IEEE754TotalOrder::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("IEEE754TotalOrder"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + template uint32_t ColumnOrder::read(Protocol_* iprot) { @@ -4292,6 +4345,14 @@ uint32_t ColumnOrder::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->IEEE_754_TOTAL_ORDER.read(iprot); + this->__isset.IEEE_754_TOTAL_ORDER = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -4315,6 +4376,11 @@ uint32_t ColumnOrder::write(Protocol_* oprot) const { xfer += this->TYPE_ORDER.write(oprot); xfer += oprot->writeFieldEnd(); } + if (this->__isset.IEEE_754_TOTAL_ORDER) { + xfer += oprot->writeFieldBegin("IEEE_754_TOTAL_ORDER", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->IEEE_754_TOTAL_ORDER.write(oprot); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -4437,14 +4503,14 @@ uint32_t OffsetIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->page_locations.clear(); - uint32_t _size279; - ::apache::thrift::protocol::TType _etype282; - xfer += iprot->readListBegin(_etype282, _size279); - this->page_locations.resize(_size279); - uint32_t _i283; - for (_i283 = 0; _i283 < _size279; ++_i283) + uint32_t _size283; + ::apache::thrift::protocol::TType _etype286; + xfer += iprot->readListBegin(_etype286, _size283); + this->page_locations.resize(_size283); + uint32_t _i287; + for (_i287 = 0; _i287 < _size283; ++_i287) { - xfer += this->page_locations[_i283].read(iprot); + xfer += this->page_locations[_i287].read(iprot); } xfer += iprot->readListEnd(); } @@ -4457,14 +4523,14 @@ uint32_t OffsetIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->unencoded_byte_array_data_bytes.clear(); - uint32_t _size284; - ::apache::thrift::protocol::TType _etype287; - xfer += iprot->readListBegin(_etype287, _size284); - this->unencoded_byte_array_data_bytes.resize(_size284); - uint32_t _i288; - for (_i288 = 0; _i288 < _size284; ++_i288) + uint32_t _size288; + ::apache::thrift::protocol::TType _etype291; + xfer += iprot->readListBegin(_etype291, _size288); + this->unencoded_byte_array_data_bytes.resize(_size288); + uint32_t _i292; + for (_i292 = 0; _i292 < _size288; ++_i292) { - xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i288]); + xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i292]); } xfer += iprot->readListEnd(); } @@ -4496,10 +4562,10 @@ uint32_t OffsetIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - std::vector ::const_iterator _iter289; - for (_iter289 = this->page_locations.begin(); _iter289 != this->page_locations.end(); ++_iter289) + std::vector ::const_iterator _iter293; + for (_iter293 = this->page_locations.begin(); _iter293 != this->page_locations.end(); ++_iter293) { - xfer += (*_iter289).write(oprot); + xfer += (*_iter293).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4509,10 +4575,10 @@ uint32_t OffsetIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->unencoded_byte_array_data_bytes.size())); - std::vector ::const_iterator _iter290; - for (_iter290 = this->unencoded_byte_array_data_bytes.begin(); _iter290 != this->unencoded_byte_array_data_bytes.end(); ++_iter290) + std::vector ::const_iterator _iter294; + for (_iter294 = this->unencoded_byte_array_data_bytes.begin(); _iter294 != this->unencoded_byte_array_data_bytes.end(); ++_iter294) { - xfer += oprot->writeI64((*_iter290)); + xfer += oprot->writeI64((*_iter294)); } xfer += oprot->writeListEnd(); } @@ -4553,14 +4619,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_pages.clear(); - uint32_t _size295; - ::apache::thrift::protocol::TType _etype298; - xfer += iprot->readListBegin(_etype298, _size295); - this->null_pages.resize(_size295); - uint32_t _i299; - for (_i299 = 0; _i299 < _size295; ++_i299) + uint32_t _size299; + ::apache::thrift::protocol::TType _etype302; + xfer += iprot->readListBegin(_etype302, _size299); + this->null_pages.resize(_size299); + uint32_t _i303; + for (_i303 = 0; _i303 < _size299; ++_i303) { - xfer += iprot->readBool(this->null_pages[_i299]); + xfer += iprot->readBool(this->null_pages[_i303]); } xfer += iprot->readListEnd(); } @@ -4573,14 +4639,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->min_values.clear(); - uint32_t _size300; - ::apache::thrift::protocol::TType _etype303; - xfer += iprot->readListBegin(_etype303, _size300); - this->min_values.resize(_size300); - uint32_t _i304; - for (_i304 = 0; _i304 < _size300; ++_i304) + uint32_t _size304; + ::apache::thrift::protocol::TType _etype307; + xfer += iprot->readListBegin(_etype307, _size304); + this->min_values.resize(_size304); + uint32_t _i308; + for (_i308 = 0; _i308 < _size304; ++_i308) { - xfer += iprot->readBinary(this->min_values[_i304]); + xfer += iprot->readBinary(this->min_values[_i308]); } xfer += iprot->readListEnd(); } @@ -4593,14 +4659,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->max_values.clear(); - uint32_t _size305; - ::apache::thrift::protocol::TType _etype308; - xfer += iprot->readListBegin(_etype308, _size305); - this->max_values.resize(_size305); - uint32_t _i309; - for (_i309 = 0; _i309 < _size305; ++_i309) + uint32_t _size309; + ::apache::thrift::protocol::TType _etype312; + xfer += iprot->readListBegin(_etype312, _size309); + this->max_values.resize(_size309); + uint32_t _i313; + for (_i313 = 0; _i313 < _size309; ++_i313) { - xfer += iprot->readBinary(this->max_values[_i309]); + xfer += iprot->readBinary(this->max_values[_i313]); } xfer += iprot->readListEnd(); } @@ -4611,9 +4677,9 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast310; - xfer += iprot->readI32(ecast310); - this->boundary_order = static_cast(ecast310); + int32_t ecast314; + xfer += iprot->readI32(ecast314); + this->boundary_order = static_cast(ecast314); isset_boundary_order = true; } else { xfer += iprot->skip(ftype); @@ -4623,14 +4689,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_counts.clear(); - uint32_t _size311; - ::apache::thrift::protocol::TType _etype314; - xfer += iprot->readListBegin(_etype314, _size311); - this->null_counts.resize(_size311); - uint32_t _i315; - for (_i315 = 0; _i315 < _size311; ++_i315) + uint32_t _size315; + ::apache::thrift::protocol::TType _etype318; + xfer += iprot->readListBegin(_etype318, _size315); + this->null_counts.resize(_size315); + uint32_t _i319; + for (_i319 = 0; _i319 < _size315; ++_i319) { - xfer += iprot->readI64(this->null_counts[_i315]); + xfer += iprot->readI64(this->null_counts[_i319]); } xfer += iprot->readListEnd(); } @@ -4643,14 +4709,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->repetition_level_histograms.clear(); - uint32_t _size316; - ::apache::thrift::protocol::TType _etype319; - xfer += iprot->readListBegin(_etype319, _size316); - this->repetition_level_histograms.resize(_size316); - uint32_t _i320; - for (_i320 = 0; _i320 < _size316; ++_i320) + uint32_t _size320; + ::apache::thrift::protocol::TType _etype323; + xfer += iprot->readListBegin(_etype323, _size320); + this->repetition_level_histograms.resize(_size320); + uint32_t _i324; + for (_i324 = 0; _i324 < _size320; ++_i324) { - xfer += iprot->readI64(this->repetition_level_histograms[_i320]); + xfer += iprot->readI64(this->repetition_level_histograms[_i324]); } xfer += iprot->readListEnd(); } @@ -4663,14 +4729,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->definition_level_histograms.clear(); - uint32_t _size321; - ::apache::thrift::protocol::TType _etype324; - xfer += iprot->readListBegin(_etype324, _size321); - this->definition_level_histograms.resize(_size321); - uint32_t _i325; - for (_i325 = 0; _i325 < _size321; ++_i325) + uint32_t _size325; + ::apache::thrift::protocol::TType _etype328; + xfer += iprot->readListBegin(_etype328, _size325); + this->definition_level_histograms.resize(_size325); + uint32_t _i329; + for (_i329 = 0; _i329 < _size325; ++_i329) { - xfer += iprot->readI64(this->definition_level_histograms[_i325]); + xfer += iprot->readI64(this->definition_level_histograms[_i329]); } xfer += iprot->readListEnd(); } @@ -4679,6 +4745,26 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; + case 8: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->nan_counts.clear(); + uint32_t _size330; + ::apache::thrift::protocol::TType _etype333; + xfer += iprot->readListBegin(_etype333, _size330); + this->nan_counts.resize(_size330); + uint32_t _i334; + for (_i334 = 0; _i334 < _size330; ++_i334) + { + xfer += iprot->readI64(this->nan_counts[_i334]); + } + xfer += iprot->readListEnd(); + } + this->__isset.nan_counts = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -4708,10 +4794,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter326; - for (_iter326 = this->null_pages.begin(); _iter326 != this->null_pages.end(); ++_iter326) + std::vector ::const_iterator _iter335; + for (_iter335 = this->null_pages.begin(); _iter335 != this->null_pages.end(); ++_iter335) { - xfer += oprot->writeBool((*_iter326)); + xfer += oprot->writeBool((*_iter335)); } xfer += oprot->writeListEnd(); } @@ -4720,10 +4806,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter327; - for (_iter327 = this->min_values.begin(); _iter327 != this->min_values.end(); ++_iter327) + std::vector ::const_iterator _iter336; + for (_iter336 = this->min_values.begin(); _iter336 != this->min_values.end(); ++_iter336) { - xfer += oprot->writeBinary((*_iter327)); + xfer += oprot->writeBinary((*_iter336)); } xfer += oprot->writeListEnd(); } @@ -4732,10 +4818,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter328; - for (_iter328 = this->max_values.begin(); _iter328 != this->max_values.end(); ++_iter328) + std::vector ::const_iterator _iter337; + for (_iter337 = this->max_values.begin(); _iter337 != this->max_values.end(); ++_iter337) { - xfer += oprot->writeBinary((*_iter328)); + xfer += oprot->writeBinary((*_iter337)); } xfer += oprot->writeListEnd(); } @@ -4749,10 +4835,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter329; - for (_iter329 = this->null_counts.begin(); _iter329 != this->null_counts.end(); ++_iter329) + std::vector ::const_iterator _iter338; + for (_iter338 = this->null_counts.begin(); _iter338 != this->null_counts.end(); ++_iter338) { - xfer += oprot->writeI64((*_iter329)); + xfer += oprot->writeI64((*_iter338)); } xfer += oprot->writeListEnd(); } @@ -4762,10 +4848,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); - std::vector ::const_iterator _iter330; - for (_iter330 = this->repetition_level_histograms.begin(); _iter330 != this->repetition_level_histograms.end(); ++_iter330) + std::vector ::const_iterator _iter339; + for (_iter339 = this->repetition_level_histograms.begin(); _iter339 != this->repetition_level_histograms.end(); ++_iter339) { - xfer += oprot->writeI64((*_iter330)); + xfer += oprot->writeI64((*_iter339)); } xfer += oprot->writeListEnd(); } @@ -4775,10 +4861,23 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); - std::vector ::const_iterator _iter331; - for (_iter331 = this->definition_level_histograms.begin(); _iter331 != this->definition_level_histograms.end(); ++_iter331) + std::vector ::const_iterator _iter340; + for (_iter340 = this->definition_level_histograms.begin(); _iter340 != this->definition_level_histograms.end(); ++_iter340) { - xfer += oprot->writeI64((*_iter331)); + xfer += oprot->writeI64((*_iter340)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.nan_counts) { + xfer += oprot->writeFieldBegin("nan_counts", ::apache::thrift::protocol::T_LIST, 8); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->nan_counts.size())); + std::vector ::const_iterator _iter341; + for (_iter341 = this->nan_counts.begin(); _iter341 != this->nan_counts.end(); ++_iter341) + { + xfer += oprot->writeI64((*_iter341)); } xfer += oprot->writeListEnd(); } @@ -5066,14 +5165,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->schema.clear(); - uint32_t _size348; - ::apache::thrift::protocol::TType _etype351; - xfer += iprot->readListBegin(_etype351, _size348); - this->schema.resize(_size348); - uint32_t _i352; - for (_i352 = 0; _i352 < _size348; ++_i352) + uint32_t _size358; + ::apache::thrift::protocol::TType _etype361; + xfer += iprot->readListBegin(_etype361, _size358); + this->schema.resize(_size358); + uint32_t _i362; + for (_i362 = 0; _i362 < _size358; ++_i362) { - xfer += this->schema[_i352].read(iprot); + xfer += this->schema[_i362].read(iprot); } xfer += iprot->readListEnd(); } @@ -5094,14 +5193,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->row_groups.clear(); - uint32_t _size353; - ::apache::thrift::protocol::TType _etype356; - xfer += iprot->readListBegin(_etype356, _size353); - this->row_groups.resize(_size353); - uint32_t _i357; - for (_i357 = 0; _i357 < _size353; ++_i357) + uint32_t _size363; + ::apache::thrift::protocol::TType _etype366; + xfer += iprot->readListBegin(_etype366, _size363); + this->row_groups.resize(_size363); + uint32_t _i367; + for (_i367 = 0; _i367 < _size363; ++_i367) { - xfer += this->row_groups[_i357].read(iprot); + xfer += this->row_groups[_i367].read(iprot); } xfer += iprot->readListEnd(); } @@ -5114,14 +5213,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size358; - ::apache::thrift::protocol::TType _etype361; - xfer += iprot->readListBegin(_etype361, _size358); - this->key_value_metadata.resize(_size358); - uint32_t _i362; - for (_i362 = 0; _i362 < _size358; ++_i362) + uint32_t _size368; + ::apache::thrift::protocol::TType _etype371; + xfer += iprot->readListBegin(_etype371, _size368); + this->key_value_metadata.resize(_size368); + uint32_t _i372; + for (_i372 = 0; _i372 < _size368; ++_i372) { - xfer += this->key_value_metadata[_i362].read(iprot); + xfer += this->key_value_metadata[_i372].read(iprot); } xfer += iprot->readListEnd(); } @@ -5142,14 +5241,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->column_orders.clear(); - uint32_t _size363; - ::apache::thrift::protocol::TType _etype366; - xfer += iprot->readListBegin(_etype366, _size363); - this->column_orders.resize(_size363); - uint32_t _i367; - for (_i367 = 0; _i367 < _size363; ++_i367) + uint32_t _size373; + ::apache::thrift::protocol::TType _etype376; + xfer += iprot->readListBegin(_etype376, _size373); + this->column_orders.resize(_size373); + uint32_t _i377; + for (_i377 = 0; _i377 < _size373; ++_i377) { - xfer += this->column_orders[_i367].read(iprot); + xfer += this->column_orders[_i377].read(iprot); } xfer += iprot->readListEnd(); } @@ -5207,10 +5306,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter368; - for (_iter368 = this->schema.begin(); _iter368 != this->schema.end(); ++_iter368) + std::vector ::const_iterator _iter378; + for (_iter378 = this->schema.begin(); _iter378 != this->schema.end(); ++_iter378) { - xfer += (*_iter368).write(oprot); + xfer += (*_iter378).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5223,10 +5322,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter369; - for (_iter369 = this->row_groups.begin(); _iter369 != this->row_groups.end(); ++_iter369) + std::vector ::const_iterator _iter379; + for (_iter379 = this->row_groups.begin(); _iter379 != this->row_groups.end(); ++_iter379) { - xfer += (*_iter369).write(oprot); + xfer += (*_iter379).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5236,10 +5335,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter370; - for (_iter370 = this->key_value_metadata.begin(); _iter370 != this->key_value_metadata.end(); ++_iter370) + std::vector ::const_iterator _iter380; + for (_iter380 = this->key_value_metadata.begin(); _iter380 != this->key_value_metadata.end(); ++_iter380) { - xfer += (*_iter370).write(oprot); + xfer += (*_iter380).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5254,10 +5353,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter371; - for (_iter371 = this->column_orders.begin(); _iter371 != this->column_orders.end(); ++_iter371) + std::vector ::const_iterator _iter381; + for (_iter381 = this->column_orders.begin(); _iter381 != this->column_orders.end(); ++_iter381) { - xfer += (*_iter371).write(oprot); + xfer += (*_iter381).write(oprot); } xfer += oprot->writeListEnd(); } diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift index e3cc5adb9648..9603cefed388 100644 --- a/cpp/src/parquet/parquet.thrift +++ b/cpp/src/parquet/parquet.thrift @@ -34,7 +34,7 @@ enum Type { BOOLEAN = 0; INT32 = 1; INT64 = 2; - INT96 = 3; // deprecated, only used by legacy implementations. + INT96 = 3; // deprecated, new Parquet writers should not write data in INT96 FLOAT = 4; DOUBLE = 5; BYTE_ARRAY = 6; @@ -42,7 +42,7 @@ enum Type { } /** - * DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. + * DEPRECATED: Common types used by frameworks (e.g. Hive, Pig) using parquet. * ConvertedType is superseded by LogicalType. This enum should not be extended. * * See LogicalTypes.md for conversion between ConvertedType and LogicalType. @@ -281,7 +281,7 @@ struct Statistics { */ 1: optional binary max; 2: optional binary min; - /** + /** * Count of null values in the column. * * Writers SHOULD always write this field even if it is zero (i.e. no null value) @@ -310,16 +310,23 @@ struct Statistics { 7: optional bool is_max_value_exact; /** If true, min_value is the actual minimum value for a column */ 8: optional bool is_min_value_exact; + /** + * Count of NaN values in the column; only present if physical type is FLOAT + * or DOUBLE, or logical type is FLOAT16. + * If this field is not present, readers MUST assume NaNs may be present + * (i.e. MUST assume nan_count > 0 and MAY NOT assume nan_count == 0). + */ + 9: optional i64 nan_count; } /** Empty structs to use as logical type annotations */ struct StringType {} // allowed for BYTE_ARRAY, must be encoded with UTF-8 -struct UUIDType {} // allowed for FIXED[16], must encoded raw UUID bytes +struct UUIDType {} // allowed for FIXED[16], must be encoded as raw UUID bytes struct MapType {} // see LogicalTypes.md struct ListType {} // see LogicalTypes.md struct EnumType {} // allowed for BYTE_ARRAY, must be encoded with UTF-8 struct DateType {} // allowed for INT32 -struct Float16Type {} // allowed for FIXED[2], must encoded raw FLOAT16 bytes +struct Float16Type {} // allowed for FIXED[2], must be encoded as raw FLOAT16 bytes (see LogicalTypes.md) /** * Logical type to annotate a column that is always null. @@ -425,7 +432,7 @@ enum EdgeInterpolationAlgorithm { /** * Embedded Geometry logical type annotation * - * Geospatial features in the Well-Known Binary (WKB) format and edges interpolation + * Geospatial features in the Well-Known Binary (WKB) format and `edges` interpolation * is always linear/planar. * * A custom CRS can be set by the crs field. If unset, it defaults to "OGC:CRS84", @@ -444,13 +451,13 @@ struct GeometryType { * Embedded Geography logical type annotation * * Geospatial features in the WKB format with an explicit (non-linear/non-planar) - * edges interpolation algorithm. + * `edges` interpolation algorithm. * * A custom geographic CRS can be set by the crs field, where longitudes are * bound by [-180, 180] and latitudes are bound by [-90, 90]. If unset, the CRS * defaults to "OGC:CRS84". * - * An optional algorithm can be set to correctly interpret edges interpolation + * An optional algorithm can be set to correctly interpret `edges` interpolation * of the geometries. If unset, the algorithm defaults to SPHERICAL. * * Allowed for physical type: BYTE_ARRAY. @@ -498,7 +505,7 @@ union LogicalType { } /** - * Represents a element inside a schema definition. + * Represents an element inside a schema definition. * - if it is a group (inner node) then type is undefined and num_children is defined * - if it is a primitive type (leaf) then type is defined and num_children is undefined * the nodes are listed in depth first traversal order. @@ -577,15 +584,15 @@ enum Encoding { PLAIN = 0; /** Group VarInt encoding for INT32/INT64. - * This encoding is deprecated. It was never used + * This encoding is deprecated. It was never used. */ // GROUP_VAR_INT = 1; /** - * Deprecated: Dictionary encoding. The values in the dictionary are encoded in the + * DEPRECATED: Dictionary encoding. The values in the dictionary are encoded in the * plain type. - * in a data page use RLE_DICTIONARY instead. - * in a Dictionary page use PLAIN instead + * For a data page use RLE_DICTIONARY instead. + * For a Dictionary page use PLAIN instead. */ PLAIN_DICTIONARY = 2; @@ -594,8 +601,9 @@ enum Encoding { */ RLE = 3; - /** Bit packed encoding. This can only be used if the data has a known max + /** DEPRECATED: Bit packed encoding. This can only be used if the data has a known max * width. Usable for definition/repetition levels encoding. + * Superseded by RLE (which is a hybrid of RLE and bit packing); see Encodings.md. */ BIT_PACKED = 4; @@ -673,7 +681,7 @@ struct DataPageHeader { /** * Number of values, including NULLs, in this data page. * - * If a OffsetIndex is present, a page must begin at a row + * If an OffsetIndex is present, a page must begin at a row * boundary (repetition_level = 0). Otherwise, pages may begin * within a row (repetition_level > 0). **/ @@ -713,9 +721,14 @@ struct DictionaryPageHeader { } /** - * New page format allowing reading levels without decompressing the data + * Alternate page format allowing reading levels without decompressing the data * Repetition and definition levels are uncompressed * The remaining section containing the data is compressed if is_compressed is true + * + * Implementation note - this header is not necessarily a strict improvement over + * `DataPageHeader` (in particular the original header might provide better compression + * in some scenarios). Page indexes require pages to start and end at row boundaries, + * regardless of which page header is used. **/ struct DataPageHeaderV2 { /** Number of values, including NULLs, in this data page. **/ @@ -741,7 +754,7 @@ struct DataPageHeaderV2 { /** Whether the values are compressed. Which means the section of the page between - definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) + definition_levels_byte_length + repetition_levels_byte_length and compressed_page_size (included) is compressed with the compression_codec. If missing it is considered compressed */ 7: optional bool is_compressed = true; @@ -805,10 +818,10 @@ struct PageHeader { /** Compressed (and potentially encrypted) page size in bytes, not including this header **/ 3: required i32 compressed_page_size - /** The 32-bit CRC checksum for the page, to be be calculated as follows: + /** The 32-bit CRC checksum for the page, to be calculated as follows: * * - The standard CRC32 algorithm is used (with polynomial 0x04C11DB7, - * the same as in e.g. GZip). + * the same as in e.g. GZIP). * - All page types can have a CRC (v1 and v2 data pages, dictionary pages, * etc.). * - The CRC is computed on the serialization binary representation of the page @@ -893,7 +906,7 @@ struct ColumnMetaData { /** total byte size of all uncompressed pages in this column chunk (including the headers) **/ 6: required i64 total_uncompressed_size - /** total byte size of all compressed, and potentially encrypted, pages + /** total byte size of all compressed, and potentially encrypted, pages * in this column chunk (including the headers) **/ 7: required i64 total_compressed_size @@ -959,10 +972,25 @@ union ColumnCryptoMetaData { struct ColumnChunk { /** File where column data is stored. If not set, assumed to be same file as * metadata. This path is relative to the current file. + * + * As of December 2025, the only known use-case for this field is writing summary + * parquet files (i.e. "_metadata" files). These files consolidate footers from + * multiple parquet files to allow for efficient reading of footers to avoid file + * listing costs and prune out files that do not need to be read based on statistics. + * + * These files do not appear to have ever been formally specified in the specification. + * and are potentially problematic from a correctness perspective [1]. + * + * [1] https://lists.apache.org/thread/ootf2kmyg3p01b1bvplpvp4ftd1bt72d + * + * There is no other known usage of this field. Specifically, there are no known + * reference implementations that will read externally stored column data if this field is populated + * within a standard parquet file. Making use of the field for this purpose is + * not considered part of the Parquet specification. **/ 1: optional string file_path - /** Deprecated: Byte offset in file_path to the ColumnMetaData + /** DEPRECATED: Byte offset in file_path to the ColumnMetaData * * Past use of this field has been inconsistent, with some implementations * using it to point to the ColumnMetaData and some using it to point to @@ -1020,10 +1048,10 @@ struct RowGroup { * in this row group **/ 5: optional i64 file_offset - /** Total byte size of all compressed (and potentially encrypted) column data + /** Total byte size of all compressed (and potentially encrypted) column data * in this row group **/ 6: optional i64 total_compressed_size - + /** Row group ordinal in the file **/ 7: optional i16 ordinal } @@ -1031,6 +1059,9 @@ struct RowGroup { /** Empty struct to signal the order defined by the physical or logical type */ struct TypeDefinedOrder {} +/** Empty struct to signal IEEE 754 total order for floating point types */ +struct IEEE754TotalOrder {} + /** * Union to specify the order used for the min_value and max_value fields for a * column. This union takes the role of an enhanced enum that allows rich @@ -1039,6 +1070,7 @@ struct TypeDefinedOrder {} * Possible values are: * * TypeDefinedOrder - the column uses the order defined by its logical or * physical type (if there is no logical type). + * * IEEE754TotalOrder - the floating point column uses IEEE 754 total order. * * If the reader does not support the value of this union, min and max stats * for this column should be ignored. @@ -1058,6 +1090,7 @@ union ColumnOrder { * UINT64 - unsigned comparison * DECIMAL - signed comparison of the represented value * DATE - signed comparison + * FLOAT16 - signed comparison of the represented value (*) * TIME_MILLIS - signed comparison * TIME_MICROS - signed comparison * TIMESTAMP_MILLIS - signed comparison @@ -1076,29 +1109,93 @@ union ColumnOrder { * BOOLEAN - false, true * INT32 - signed comparison * INT64 - signed comparison - * INT96 (only used for legacy timestamps) - undefined + * INT96 (only used for legacy timestamps) - undefined(+) * FLOAT - signed comparison of the represented value (*) * DOUBLE - signed comparison of the represented value (*) * BYTE_ARRAY - unsigned byte-wise comparison * FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison * - * (*) Because the sorting order is not specified properly for floating - * point values (relations vs. total ordering) the following + * (+) While the INT96 type has been deprecated, at the time of writing it is + * still used in many legacy systems. If a Parquet implementation chooses + * to write statistics for INT96 columns, it is recommended to order them + * according to the legacy rules: + * - compare the last 4 bytes (days) as a little-endian 32-bit signed integer + * - if equal last 4 bytes, compare the first 8 bytes as a little-endian + * 64-bit signed integer (nanos) + * See https://github.com/apache/parquet-format/issues/502 for more details + * + * (*) Because TYPE_ORDER is ambiguous for floating point types due to + * underspecified handling of NaN and -0/+0, it is recommended that writers + * use IEEE_754_TOTAL_ORDER for these types. + * + * If TYPE_ORDER is used for floating point types, then the following * compatibility rules should be applied when reading statistics: * - If the min is a NaN, it should be ignored. * - If the max is a NaN, it should be ignored. + * - If the nan_count field is set, a reader can compute + * nan_count + null_count == num_values to deduce whether all non-null + * values are NaN. * - If the min is +0, the row group may contain -0 values as well. * - If the max is -0, the row group may contain +0 values as well. * - When looking for NaN values, min and max should be ignored. - * - * When writing statistics the following rules should be followed: - * - NaNs should not be written to min or max statistics fields. + * If the nan_count field is set, it can be used to check whether + * NaNs are present. + * + * When writing page or column chunk statistics for columns with + * TYPE_ORDER order, the following rules must be followed: + * - The nan_count field must be set for floating point types, even if + * it is zero. + * - If the nan_count field is set, min and max statistics fields, when + * present, must not contain NaN values and must be computed from + * non-NaN values only. This signals to readers that the min and max + * statistics are reliable for non-NaN values. + * - If all non-null values are NaN, min and max statistics must not be + * written. * - If the computed max value is zero (whether negative or positive), * `+0.0` should be written into the max statistics field. * - If the computed min value is zero (whether negative or positive), * `-0.0` should be written into the min statistics field. + * + * When writing column indexes for columns with TYPE_ORDER order, the + * following rules must be followed: + * - NaNs must not be written to min_values or max_values. + * - If all non-null values of a page are NaN, a column index must not + * be written for this column chunk because min_values and max_values + * are required. + * - If the computed max value is zero (whether negative or positive), + * `+0.0` should be written into the corresponding max_values entry. + * - If the computed min value is zero (whether negative or positive), + * `-0.0` should be written into the corresponding min_values entry. */ 1: TypeDefinedOrder TYPE_ORDER; + + /* + * The floating point type is ordered according to the totalOrder predicate, + * as defined in section 5.10 of IEEE-754 (2008 revision). Only columns of + * physical type FLOAT or DOUBLE, or logical type FLOAT16 may use this ordering. + * + * Intuitively, this orders floats mathematically, but defines -0 to be less + * than +0, -NaN to be less than anything else, and +NaN to be greater than + * anything else. It also defines an order between different bit representations + * of the same value. + * + * When writing statistics for columns with IEEE_754_TOTAL_ORDER order, then + * following rules must be followed: + * - Writing the nan_count field is mandatory when using this ordering. + * - Min and max statistics must contain the smallest and largest non-NaN + * values respectively, or if all non-null values are NaN, the smallest and + * largest NaN values as defined by IEEE 754 total order. + * + * When reading statistics for columns with this order, the following rules + * should be followed: + * - Readers should consult the nan_count field to determine whether NaNs + * are present. + * - A reader can compute nan_count + null_count == num_values to deduce + * whether all non-null values are NaN. In the page index, which does not + * have a num_values field, the presence of a NaN value in min_values + * or max_values indicates that all non-null values are NaN. + */ + 2: IEEE754TotalOrder IEEE_754_TOTAL_ORDER; } struct PageLocation { @@ -1106,8 +1203,8 @@ struct PageLocation { 1: required i64 offset /** - * Size of the page, including header. Sum of compressed_page_size and header - * length + * Size of the page, including header. Equal to the sum of the page's + * PageHeader.compressed_page_size and the size of the serialized PageHeader. */ 2: required i32 compressed_page_size @@ -1135,7 +1232,7 @@ struct OffsetIndex { /** * Unencoded/uncompressed size for BYTE_ARRAY types. * - * See documention for unencoded_byte_array_data_bytes in SizeStatistics for + * See documentation for unencoded_byte_array_data_bytes in SizeStatistics for * more details on this field. */ 2: optional list unencoded_byte_array_data_bytes @@ -1165,11 +1262,23 @@ struct ColumnIndex { * Two lists containing lower and upper bounds for the values of each page * determined by the ColumnOrder of the column. These may be the actual * minimum and maximum values found on a page, but can also be (more compact) - * values that do not exist on a page. For example, instead of storing ""Blart + * values that do not exist on a page. For example, instead of storing "Blart * Versenwald III", a writer may set min_values[i]="B", max_values[i]="C". * Such more compact values must still be valid values within the column's * logical type. Readers must make sure that list entries are populated before * using them by inspecting null_pages. + * + * For columns of physical type FLOAT or DOUBLE, or logical type FLOAT16, + * NaN values are not to be included in these bounds. If all non-null values + * of a page are NaN, then a writer must do the following: + * - If the order of this column is TYPE_ORDER, then a column index must + * not be written for this column chunk. While this is unfortunate for + * performance, it is necessary to avoid conflict with legacy files that + * still included NaN in min_values and max_values even if the page had + * non-NaN values. To mitigate this, IEEE754_TOTAL_ORDER is recommended. + * - If the order of this column is IEEE754_TOTAL_ORDER, then min_values[i] + * and max_values[i] of that page must be set to the smallest and largest + * NaN values as defined by IEEE 754 total order. */ 2: required list min_values 3: required list max_values @@ -1183,13 +1292,13 @@ struct ColumnIndex { 4: required BoundaryOrder boundary_order /** - * A list containing the number of null values for each page + * A list containing the number of null values for each page * * Writers SHOULD always write this field even if no null values * are present or the column is not nullable. - * Readers MUST distinguish between null_counts not being present + * Readers MUST distinguish between null_counts not being present * and null_count being 0. - * If null_counts are not present, readers MUST NOT assume all + * If null_counts are not present, readers MUST NOT assume all * null counts are 0. */ 5: optional list null_counts @@ -1211,6 +1320,15 @@ struct ColumnIndex { * Same as repetition_level_histograms except for definitions levels. **/ 7: optional list definition_level_histograms; + + /** + * A list containing the number of NaN values for each page. Only present + * for columns of physical type FLOAT or DOUBLE, or logical type FLOAT16. + * If this field is not present, readers MUST assume that there might be + * NaN values in any page. + */ + 8: optional list nan_counts + } struct AesGcmV1 { @@ -1246,7 +1364,14 @@ union EncryptionAlgorithm { * Description for file metadata */ struct FileMetaData { - /** Version of this file **/ + /** Version of this file + * + * As of December 2025, there is no agreed upon consensus of what constitutes + * version 2 of the file. For maximum compatibility with readers, writers should + * always populate "1" for version. For maximum compatibility with writers, + * readers should accept "1" and "2" interchangeably. All other versions are + * reserved for potential future use-cases. + */ 1: required i32 version /** Parquet schema for this file. This schema contains metadata for all the columns. @@ -1276,7 +1401,7 @@ struct FileMetaData { * Sort order used for the min_value and max_value fields in the Statistics * objects and the min_values and max_values fields in the ColumnIndex * objects of each column in this file. Sort orders are listed in the order - * matching the columns in the schema. The indexes are not necessary the same + * matching the columns in the schema. The indexes are not necessarily the same * though, because only leaf nodes of the schema are represented in the list * of sort orders. * @@ -1290,30 +1415,30 @@ struct FileMetaData { */ 7: optional list column_orders; - /** + /** * Encryption algorithm. This field is set only in encrypted files * with plaintext footer. Files with encrypted footer store algorithm id * in FileCryptoMetaData structure. */ 8: optional EncryptionAlgorithm encryption_algorithm - /** - * Retrieval metadata of key used for signing the footer. - * Used only in encrypted files with plaintext footer. - */ + /** + * Retrieval metadata of key used for signing the footer. + * Used only in encrypted files with plaintext footer. + */ 9: optional binary footer_signing_key_metadata } /** Crypto metadata for files with encrypted footer **/ struct FileCryptoMetaData { - /** + /** * Encryption algorithm. This field is only used for files * with encrypted footer. Files with plaintext footer store algorithm id * inside footer (FileMetaData structure). */ 1: required EncryptionAlgorithm encryption_algorithm - - /** Retrieval metadata of key used for encryption of footer, + + /** Retrieval metadata of key used for encryption of footer, * and (possibly) columns **/ 2: optional binary key_metadata }