From e21fd8bac657400db8f82400c3822be053a82dcc Mon Sep 17 00:00:00 2001 From: Ante Kresic Date: Mon, 22 Jun 2026 22:07:40 +0000 Subject: [PATCH] Add compact_chunk function This new function will compact the chunk by looking for overlapping batches and combining them together in order to produce globally ordered chunks. This change is a first step towards supporting direct compress in production workloads. --- .unreleased/pr_9957 | 1 + sql/maintenance_utils.sql | 4 + sql/updates/reverse-dev.sql | 1 + src/cross_module_fn.c | 2 + src/cross_module_fn.h | 1 + tsl/src/compression/COMPACT_CHUNK.md | 139 ++ tsl/src/compression/compression.c | 14 +- tsl/src/compression/compression.h | 1 + tsl/src/compression/recompress.c | 756 +++++++++ tsl/src/compression/recompress.h | 7 + tsl/src/init.c | 1 + tsl/test/expected/compact_chunk.out | 1495 +++++++++++++++++ .../expected/compact_chunk_concurrent.out | 246 +++ tsl/test/isolation/specs/CMakeLists.txt | 1 + .../specs/compact_chunk_concurrent.spec | 99 ++ tsl/test/shared/expected/extension.out | 1 + tsl/test/sql/CMakeLists.txt | 1 + tsl/test/sql/compact_chunk.sql | 1068 ++++++++++++ 18 files changed, 3834 insertions(+), 4 deletions(-) create mode 100644 .unreleased/pr_9957 create mode 100644 tsl/src/compression/COMPACT_CHUNK.md create mode 100644 tsl/test/expected/compact_chunk.out create mode 100644 tsl/test/isolation/expected/compact_chunk_concurrent.out create mode 100644 tsl/test/isolation/specs/compact_chunk_concurrent.spec create mode 100644 tsl/test/sql/compact_chunk.sql diff --git a/.unreleased/pr_9957 b/.unreleased/pr_9957 new file mode 100644 index 00000000000..a69caa00ca5 --- /dev/null +++ b/.unreleased/pr_9957 @@ -0,0 +1 @@ +Implements: #9957 Add compact_chunk function diff --git a/sql/maintenance_utils.sql b/sql/maintenance_utils.sql index f02dd9091dd..57f15eb0c1e 100644 --- a/sql/maintenance_utils.sql +++ b/sql/maintenance_utils.sql @@ -89,6 +89,10 @@ CREATE OR REPLACE FUNCTION _timescaledb_functions.recompress_chunk_segmentwise( if_compressed BOOLEAN = true ) RETURNS REGCLASS AS '@MODULE_PATHNAME@', 'ts_recompress_chunk_segmentwise' LANGUAGE C STRICT VOLATILE; +CREATE OR REPLACE FUNCTION _timescaledb_functions.compact_chunk( + uncompressed_chunk REGCLASS +) RETURNS REGCLASS AS '@MODULE_PATHNAME@', 'ts_compact_chunk' LANGUAGE C STRICT VOLATILE; + -- find the index on the compressed chunk that can be used to recompress efficiently -- this index must contain all the segmentby columns and the meta_sequence_number column last CREATE OR REPLACE FUNCTION _timescaledb_functions.get_compressed_chunk_index_for_recompression( diff --git a/sql/updates/reverse-dev.sql b/sql/updates/reverse-dev.sql index 82ec132c050..71004093f3a 100644 --- a/sql/updates/reverse-dev.sql +++ b/sql/updates/reverse-dev.sql @@ -1,2 +1,3 @@ DROP FUNCTION IF EXISTS _timescaledb_functions.decompress_batch(record); DROP FUNCTION IF EXISTS _timescaledb_functions.estimate_uncompressed_size(regclass, double precision); +DROP FUNCTION IF EXISTS _timescaledb_functions.compact_chunk(REGCLASS); diff --git a/src/cross_module_fn.c b/src/cross_module_fn.c index 67a43627a86..59ab75290e1 100644 --- a/src/cross_module_fn.c +++ b/src/cross_module_fn.c @@ -100,6 +100,7 @@ CROSSMODULE_WRAPPER(chunk_freeze_chunk); CROSSMODULE_WRAPPER(chunk_unfreeze_chunk); CROSSMODULE_WRAPPER(recompress_chunk_segmentwise); +CROSSMODULE_WRAPPER(compact_chunk); CROSSMODULE_WRAPPER(get_compressed_chunk_index_for_recompression); CROSSMODULE_WRAPPER(merge_chunks); CROSSMODULE_WRAPPER(split_chunk); @@ -397,6 +398,7 @@ TSDLLEXPORT CrossModuleFunctions ts_cm_functions_default = { .chunk_freeze_chunk = error_no_default_fn_pg_community, .chunk_unfreeze_chunk = error_no_default_fn_pg_community, .recompress_chunk_segmentwise = error_no_default_fn_pg_community, + .compact_chunk = error_no_default_fn_pg_community, .get_compressed_chunk_index_for_recompression = error_no_default_fn_pg_community, .preprocess_query_tsl = preprocess_query_tsl_default_fn_community, diff --git a/src/cross_module_fn.h b/src/cross_module_fn.h index 35fa113a99d..3f4d23c8dd8 100644 --- a/src/cross_module_fn.h +++ b/src/cross_module_fn.h @@ -174,6 +174,7 @@ typedef struct CrossModuleFunctions PGFunction chunk_freeze_chunk; PGFunction chunk_unfreeze_chunk; PGFunction recompress_chunk_segmentwise; + PGFunction compact_chunk; PGFunction get_compressed_chunk_index_for_recompression; void (*preprocess_query_tsl)(Query *parse, int *cursor_opts); diff --git a/tsl/src/compression/COMPACT_CHUNK.md b/tsl/src/compression/COMPACT_CHUNK.md new file mode 100644 index 00000000000..ba0f6820435 --- /dev/null +++ b/tsl/src/compression/COMPACT_CHUNK.md @@ -0,0 +1,139 @@ +# compact_chunk + +Merges overlapping compressed batches within a chunk. Only touches batches +that need fixing — correctly ordered batches are left as-is. + +Overlap detection reads the firstlast sparse metadata, which stores the exact +orderby values of each batch's first and last rows. Two adjacent batches overlap +when the current batch's first row sorts before the previous batch's last row. +Because the metadata holds the real boundary rows for every orderby column, no +decompression is needed, even for multi-column orderby. + +## How It Works + +``` + Phase 1: FIND Phase 2: RECOMPRESS Phase 3: VERIFY + ┌──────────────┐ ┌──────────────────┐ ┌────────────────┐ + │ Index scan │──────▶│ Decompress+merge │─────▶│ Re-scan with │ + │ Stop at │ │ overlapping │ │ fresh snapshot │ + │ first issue │ │ batches, continue│ │ Clear UNORDERED│ + └──────────────┘ │ scanning for more│ │ if clean │ + └──────────────────┘ └────────────────┘ +``` + +## Handling specific compression and batch configurations + +### 1. No overlaps (no-op) + +``` + ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ + │ 1..100 │ │101..200│ │201..300│ ───▶ │ 1..100 │ │101..200│ │201..300│ + └────────┘ └────────┘ └────────┘ └────────┘ └────────┘ └────────┘ + (unchanged, UNORDERED cleared) +``` + +### 2. Overlapping batches + +``` + ┌──────────┐ ┌───────┐┌───────┐┌───────┐┌──┐┌────────┐ + │ 1..100 │ │ 1..50 ││51..100││101 ││ ││201..300│ + └──────────┘ │ ││ ││ ..150 ││… │└────────┘ + ┌──────────┐ ───▶ └───────┘└───────┘└───────┘└──┘ + │ 50..150 │ ◄────── merged + re-sorted ──► ◄ kept ► + └──────────┘ + ┌────────┐ + │201..300│ + └────────┘ +``` + +### 3. Segmentby — independent per segment + +``` + d1: ┌──────┐ ┌───────┐ d1: ┌──────────────┐ + │1..100│ │50..200│ ──▶ │ 1..200 merged│ + └──────┘ └───────┘ └──────────────┘ + d2: ┌──────┐ ┌───────┐ d2: ┌──────────────┐ + │1..100│ │50..200│ ──▶ │ 1..200 merged│ + └──────┘ └───────┘ └──────────────┘ +``` + +### 4. DESC orderby + +``` + orderby='time DESC' max◄────────────────────►min + + ┌──────────┐ ┌──────────────────┐ + │ 200..100 │ │ 200..........100 │ + └──────────┘ ──▶ │ merged │ + ┌──────────┐ └──────────────────┘ + │ 150..50 │ + └──────────┘ +``` + +### 5. Multi-column orderby — boundary tie resolution + +When col1 first/last tie, compare the secondary columns straight from the +first/last metadata — no decompression. + +``` + orderby='device,time' Both batches tie on device (first=d2, last=d2) + + Batch 1 last row: (d2, 08:20) ◄─ from last metadata + Batch 2 first row: (d2, 08:21) ◄─ from first metadata + 08:20 < 08:21 → no overlap ✓ + + Batch 1 last row: (d2, 08:20) + Batch 2 first row: (d2, 08:11) + 08:20 > 08:11 → OVERLAP → merge +``` + +### 6. Mixed-null batch overlaps a neighbor + +A batch with both NULL and non-NULL values in the first orderby column has a +NULL boundary row. With NULLS LAST its last row is NULL, which sorts after a +following non-null batch — so the two batches overlap. The merge re-sorts the +rows and the NULLs settle at the end (NULLS LAST). (A mixed-null batch with no +neighbor to overlap is already ordered and is left as-is.) + +``` + orderby='value NULLS LAST' last row of batch 1 is NULL, sorts after batch 2 + + ┌─────────────────────┐ ┌──────────┐ ┌────────────────────────────────┐ + │ 1001..1800, NULL×200│ │1801..2800│ ──▶ │ 1001..2800 re-sorted, NULL×200 │ + └─────────────────────┘ └──────────┘ └────────────────────────────────┘ + first=1001, last=NULL ──▶ overlap merged, NULLs at end (NULLS LAST) +``` + +### 7. Overlap merge preserving NULLs + +When overlapping batches with nullable first orderby are merged, the re-sort +keeps the NULL rows in their correct ordered position. + +``` + orderby='value NULLS LAST' + + ┌──────────────────┐ ┌──────────────────────────┐ + │ 1..400, NULL×100 │ ──▶ │ 1..699 re-sorted, NULL×100│ + └──────────────────┘ └──────────────────────────┘ + ┌──────────┐ overlap NULLs kept at end (NULLS LAST) + │ 200..699 │ on 200..400 + └──────────┘ +``` + +### 8. Secondary column NULLs at boundary tie + +The boundary comparison uses the column's sort order, which already places NULLs +per its NULLS FIRST/LAST setting, so a NULL boundary value compares like any +other value. + +``` + orderby='time, value NULLS LAST' + + Batch 1 last row: (08:20, NULL) CORRECT: NULL with NULLS LAST + Batch 2 first row: (08:20, 1001) means NULL > 1001 → OVERLAP → merge + + + ┌──────────────────┐ ┌──────────────────┐ ┌──────────────────────────┐ + │ ..., (08:20,NULL)│ │(08:20,1001), ... │ ──▶ │ merged + correctly sorted│ + └──────────────────┘ └──────────────────┘ └──────────────────────────┘ +``` diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index a50fa33d327..4704fa2ee79 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -3235,10 +3235,10 @@ tsl_compressed_data_info(PG_FUNCTION_ARGS) return HeapTupleGetDatum(tuple); } -extern Datum -tsl_compressed_data_has_nulls(PG_FUNCTION_ARGS) +bool +compressed_data_has_nulls(Datum compressed_data) { - const CompressedDataHeader *header = get_compressed_data_header(PG_GETARG_DATUM(0)); + const CompressedDataHeader *header = get_compressed_data_header(compressed_data); bool has_nulls = false; switch (header->compression_algorithm) @@ -3269,7 +3269,13 @@ tsl_compressed_data_has_nulls(PG_FUNCTION_ARGS) break; } - return BoolGetDatum(has_nulls); + return has_nulls; +} + +extern Datum +tsl_compressed_data_has_nulls(PG_FUNCTION_ARGS) +{ + return BoolGetDatum(compressed_data_has_nulls(PG_GETARG_DATUM(0))); } extern CompressionStorage diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index f8114a172bd..033b8d6c91b 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -349,6 +349,7 @@ extern Datum tsl_compressed_data_in(PG_FUNCTION_ARGS); extern Datum tsl_compressed_data_out(PG_FUNCTION_ARGS); extern Datum tsl_compressed_data_info(PG_FUNCTION_ARGS); extern Datum tsl_compressed_data_has_nulls(PG_FUNCTION_ARGS); +extern bool compressed_data_has_nulls(Datum compressed_data); extern Datum tsl_compressed_data_column_size(PG_FUNCTION_ARGS); extern Datum tsl_compressed_data_to_array(PG_FUNCTION_ARGS); extern Datum tsl_decompress_batch(PG_FUNCTION_ARGS); diff --git a/tsl/src/compression/recompress.c b/tsl/src/compression/recompress.c index aaaa3a34b89..6459bc4dbb8 100644 --- a/tsl/src/compression/recompress.c +++ b/tsl/src/compression/recompress.c @@ -51,6 +51,77 @@ #define RECOMPRESS_EXCLUSIVE_LOCK_TIMEOUT 5000 /* ms */ #endif +/* + * Scan state saved by compact_chunk_find_overlapping_batches. The caller can + * inspect the result and then pass the same state to + * compact_chunk_recompress_overlapping_batches to continue without restarting + * the scan. + */ +typedef struct CompactChunkScanState +{ + ItemPointerData previous_tid; /* TID of the batch processed just before the current one */ + ItemPointerData first_overlap_tid; /* TID of the first overlapping batch */ + + /* Segmentby key values of the current batch, for segment-group detection. */ + Datum *seg_values; + bool *seg_isnull; + + /* First-row and last-row orderby tuples of the current batch, read straight + * from the index. */ + Datum *curr_first; + bool *curr_first_isnull; + Datum *curr_last; + bool *curr_last_isnull; + + /* Last-row orderby tuple of the batch processed just before the current + * one. Holds copies so it survives advancing the index scan. */ + Datum *prev_last; + bool *prev_last_isnull; + + bool *isdesc; /* orderby column DESC settings */ +} CompactChunkScanState; + +static CompactChunkScanState * +compact_chunk_scan_state_init(RecompressContext *recompress_ctx, CompressionSettings *settings) +{ + CompactChunkScanState *state = palloc(sizeof(CompactChunkScanState)); + ItemPointerSetInvalid(&state->previous_tid); + ItemPointerSetInvalid(&state->first_overlap_tid); + state->seg_values = palloc(sizeof(Datum) * recompress_ctx->num_segmentby); + state->seg_isnull = palloc(sizeof(bool) * recompress_ctx->num_segmentby); + state->curr_first = palloc(sizeof(Datum) * recompress_ctx->num_orderby); + state->curr_first_isnull = palloc(sizeof(bool) * recompress_ctx->num_orderby); + state->curr_last = palloc(sizeof(Datum) * recompress_ctx->num_orderby); + state->curr_last_isnull = palloc(sizeof(bool) * recompress_ctx->num_orderby); + state->prev_last = palloc0(sizeof(Datum) * recompress_ctx->num_orderby); + state->prev_last_isnull = palloc(sizeof(bool) * recompress_ctx->num_orderby); + state->isdesc = palloc(sizeof(bool) * recompress_ctx->num_orderby); + for (int i = 0; i < recompress_ctx->num_orderby; i++) + { + state->prev_last_isnull[i] = true; + state->isdesc[i] = ts_array_get_element_bool(settings->fd.orderby_desc, i + 1); + } + return state; +} + +static void +compact_chunk_scan_state_reset(CompactChunkScanState *state, RecompressContext *recompress_ctx) +{ + ItemPointerSetInvalid(&state->previous_tid); + ItemPointerSetInvalid(&state->first_overlap_tid); + for (int i = 0; i < recompress_ctx->num_orderby; i++) + { + int key = recompress_ctx->num_segmentby + i; + if (!state->prev_last_isnull[i] && !recompress_ctx->key_byval[key] && + PointerIsValid(DatumGetPointer(state->prev_last[i]))) + { + pfree(DatumGetPointer(state->prev_last[i])); + } + state->prev_last[i] = (Datum) 0; + state->prev_last_isnull[i] = true; + } +} + static bool fetch_uncompressed_chunk_into_tuplesort(Tuplesortstate *tuplesortstate, Relation uncompressed_chunk_rel, Snapshot snapshot); @@ -73,6 +144,26 @@ static bool check_changed_group(CompressedSegmentInfo *current_segment, Datum *v bool *isnulls, int nsegmentby_cols); static void recompress_segment(Tuplesortstate *tuplesortstate, Relation compressed_chunk_rel, RowCompressor *row_compressor, BulkWriter *writer); +static IndexScanDesc compact_chunk_begin_index_scan(Relation compressed_chunk_rel, + Relation index_rel, Snapshot snapshot); +static void read_batch_firstlast(IndexScanDesc index_scan, RecompressContext *recompress_ctx, + CompactChunkScanState *state); +static void save_prev_last(CompactChunkScanState *state, RecompressContext *recompress_ctx); +static bool batches_overlap_firstlast(RecompressContext *recompress_ctx, Datum *prev_last, + bool *prev_last_isnull, Datum *curr_first, + bool *curr_first_isnull); +static void decompress_batch_to_tuplesort(TupleTableSlot *slot, TupleDesc tupdesc, + RowDecompressor *decompressor, + Tuplesortstate *recompress_tuplesortstate, + Relation compressed_chunk_rel, Snapshot snapshot); +static bool compact_chunk_find_overlapping_batches(Relation compressed_chunk_rel, + IndexScanDesc index_scan, + RecompressContext *recompress_ctx, + CompactChunkScanState *state); +static bool compact_chunk_recompress_overlapping_batches( + Relation compressed_chunk_rel, IndexScanDesc index_scan, Snapshot snapshot, + RecompressContext *recompress_ctx, CompactChunkScanState *state, RowCompressor *compressor, + RowDecompressor *decompressor, Tuplesortstate *recompress_tuplesortstate, BulkWriter *writer); static void try_updating_chunk_status(Chunk *uncompressed_chunk, Relation uncompressed_chunk_rel); /* @@ -137,6 +228,45 @@ tsl_recompress_chunk_segmentwise(PG_FUNCTION_ARGS) PG_RETURN_OID(uncompressed_relid); } +/* + * Compact a chunk by recombining overlapping batches + * + * 0 uncompressed_chunk_id REGCLASS + */ +Datum +tsl_compact_chunk(PG_FUNCTION_ARGS) +{ + Oid uncompressed_relid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0); + + ts_feature_flag_check(FEATURE_HYPERTABLE_COMPRESSION); + TS_PREVENT_FUNC_IF_READ_ONLY(); + Chunk *chunk = ts_chunk_get_by_relid(uncompressed_relid, true); + + ts_hypertable_permissions_check(chunk->hypertable_relid, GetUserId()); + + if (!ts_chunk_is_compressed(chunk)) + { + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("trying to compact an uncompressed chunk %s.%s", + NameStr(chunk->fd.schema_name), + NameStr(chunk->fd.table_name)))); + } + + if (ts_chunk_is_partial(chunk)) + { + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("trying to compact a partially compressed chunk %s.%s", + NameStr(chunk->fd.schema_name), + NameStr(chunk->fd.table_name)))); + } + + uncompressed_relid = compact_chunk_impl(chunk); + + PG_RETURN_OID(uncompressed_relid); +} + static RecompressContext * compress_chunk_populate_recompress_ctx(CompressionSettings *settings, Relation uncompressed_chunk_rel, @@ -204,6 +334,22 @@ compress_chunk_populate_recompress_ctx(CompressionSettings *settings, compressed_chunk_rel, recompress_ctx->orderby_scankeys); + /* Cache the sort support for each orderby column, used to compare batch + * boundary values during compaction. The ordering operator fills in the + * reverse flag (DESC) and the comparator, while the collation and nulls + * ordering let ApplySortComparator place NULLs in total order for us. */ + recompress_ctx->orderby_ssup = palloc0(sizeof(SortSupportData) * recompress_ctx->num_orderby); + for (int i = 0; i < recompress_ctx->num_orderby; i++) + { + int key = recompress_ctx->num_segmentby + i; + SortSupport ssup = &recompress_ctx->orderby_ssup[i]; + + ssup->ssup_cxt = CurrentMemoryContext; + ssup->ssup_collation = recompress_ctx->sort_collations[key]; + ssup->ssup_nulls_first = recompress_ctx->nulls_first[key]; + PrepareSortSupportFromOrderingOp(recompress_ctx->sort_operators[key], ssup); + } + return recompress_ctx; } @@ -237,6 +383,7 @@ free_chunk_recompress_ctx(RecompressContext *recompress_ctx) } } + pfree(recompress_ctx->orderby_ssup); pfree(recompress_ctx); } @@ -746,6 +893,615 @@ recompress_chunk_segmentwise_impl(Chunk *uncompressed_chunk, table_close(compressed_chunk_rel, NoLock); } +static IndexScanDesc +compact_chunk_begin_index_scan(Relation compressed_chunk_rel, Relation index_rel, Snapshot snapshot) +{ + IndexScanDesc index_scan = + index_beginscan_compat(compressed_chunk_rel, index_rel, snapshot, NULL, 0, 0); + /* We use index tuples directly to fetch the values */ + index_scan->xs_want_itup = true; + index_rescan(index_scan, NULL, 0, NULL, 0); + return index_scan; +} + +/* + * Read the current batch's segmentby key values and the first-row / last-row + * orderby tuples from the index tuple into the scan state. + * + * Index key order is [segby1, ...segbyN, orderby_lower_1, orderby_upper_1, ...]. + * The index stores a (lower, upper) metadata pair per orderby column. With + * firstlast metadata the lower/upper columns are the values of that column in + * the batch's boundary rows: ascending stores (first, last), descending stores + * (last, first). So curr_first/curr_last are exactly the orderby values in the + * batch's first and last rows. + */ +static void +read_batch_firstlast(IndexScanDesc index_scan, RecompressContext *recompress_ctx, + CompactChunkScanState *state) +{ + for (int i = 0; i < recompress_ctx->num_segmentby; i++) + { + state->seg_values[i] = index_getattr(index_scan->xs_itup, + AttrOffsetGetAttrNumber(i), + index_scan->xs_itupdesc, + &state->seg_isnull[i]); + } + + for (int i = 0; i < recompress_ctx->num_orderby; i++) + { + AttrNumber lower = AttrOffsetGetAttrNumber(recompress_ctx->num_segmentby + i * 2); + AttrNumber upper = lower + 1; + AttrNumber first_attno = state->isdesc[i] ? upper : lower; + AttrNumber last_attno = state->isdesc[i] ? lower : upper; + + state->curr_first[i] = index_getattr(index_scan->xs_itup, + first_attno, + index_scan->xs_itupdesc, + &state->curr_first_isnull[i]); + state->curr_last[i] = index_getattr(index_scan->xs_itup, + last_attno, + index_scan->xs_itupdesc, + &state->curr_last_isnull[i]); + } +} + +/* + * Remember the current batch's last-row orderby tuple as the predecessor for + * the next batch. The index tuple is only valid for the current scan position, + * so pass-by-reference values are deep-copied to survive advancing the scan. + */ +static void +save_prev_last(CompactChunkScanState *state, RecompressContext *recompress_ctx) +{ + for (int i = 0; i < recompress_ctx->num_orderby; i++) + { + int key = recompress_ctx->num_segmentby + i; + + if (!state->prev_last_isnull[i] && !recompress_ctx->key_byval[key] && + PointerIsValid(DatumGetPointer(state->prev_last[i]))) + { + pfree(DatumGetPointer(state->prev_last[i])); + } + + state->prev_last_isnull[i] = state->curr_last_isnull[i]; + state->prev_last[i] = state->curr_last_isnull[i] ? + (Datum) 0 : + datumCopy(state->curr_last[i], + recompress_ctx->key_byval[key], + recompress_ctx->key_typlen[key]); + } +} + +/* + * Decide whether two adjacent batches overlap from their boundary metadata. + * + * Batches are scanned in sort order, so the previous batch's last row and the + * current batch's first row are the touching boundaries. They overlap when the + * current batch's first row sorts strictly before the previous batch's last + * row. A boundary touch (the tuples are equal) is not an overlap. + * + * ApplySortComparator gives a total order that already accounts for the column + * direction (ASC/DESC) and the NULLS FIRST/LAST placement. + */ +static bool +batches_overlap_firstlast(RecompressContext *recompress_ctx, Datum *prev_last, + bool *prev_last_isnull, Datum *curr_first, bool *curr_first_isnull) +{ + for (int i = 0; i < recompress_ctx->num_orderby; i++) + { + int cmp = ApplySortComparator(prev_last[i], + prev_last_isnull[i], + curr_first[i], + curr_first_isnull[i], + &recompress_ctx->orderby_ssup[i]); + + if (cmp < 0) + { + return false; /* prev sorts before curr: no overlap */ + } + + if (cmp > 0) + { + return true; /* curr sorts before prev: overlap */ + } + + /* Equal on this column, move on to the next one. */ + } + + /* The boundary tuples are equal across all columns: a touch, not an overlap. */ + return false; +} + +/* + * Decompress a compressed batch into the tuplesort and delete the original. + * + * The tuplesort sorts by the orderby keys, including their NULLS FIRST/LAST + * setting, so NULL orderby values land at the correct end of the sort with no + * special handling. + */ +static void +decompress_batch_to_tuplesort(TupleTableSlot *slot, TupleDesc tupdesc, + RowDecompressor *decompressor, + Tuplesortstate *recompress_tuplesortstate, + Relation compressed_chunk_rel, Snapshot snapshot) +{ + bool should_free; + HeapTuple compressed_tuple = ExecFetchSlotHeapTuple(slot, false, &should_free); + + heap_deform_tuple(compressed_tuple, + tupdesc, + decompressor->compressed_datums, + decompressor->compressed_is_nulls); + + int n_rows = decompress_batch(decompressor); + + for (int i = 0; i < n_rows; i++) + { + tuplesort_puttupleslot(recompress_tuplesortstate, decompressor->decompressed_slots[i]); + } + + row_decompressor_reset(decompressor); + + if (!delete_tuple_for_recompression(compressed_chunk_rel, &slot->tts_tid, snapshot)) + { + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("aborting compaction due to concurrent updates on " + "compressed data, retrying with next policy run"))); + } + + if (should_free) + { + heap_freetuple(compressed_tuple); + } +} + +/* + * Scan the compressed chunk index in order, looking for the first batch that + * overlaps its predecessor in the same segment group. + * + * Returns true on the first overlap found without modifying the heap, recording + * the overlapping pair (previous_tid and first_overlap_tid) and the current + * batch's boundary tuples in state so the recompress pass can resume from here. + */ +static bool +compact_chunk_find_overlapping_batches(Relation compressed_chunk_rel, IndexScanDesc index_scan, + RecompressContext *recompress_ctx, + CompactChunkScanState *state) +{ + TupleTableSlot *compressed_slot = table_slot_create(compressed_chunk_rel, NULL); + + while (index_getnext_slot(index_scan, ForwardScanDirection, compressed_slot)) + { + read_batch_firstlast(index_scan, recompress_ctx, state); + + /* First batch overall or first batch of a new segment group: nothing to + * compare against, just remember it as the predecessor. */ + if (!ItemPointerIsValid(&state->previous_tid) || + check_changed_group(recompress_ctx->current_segment, + state->seg_values, + state->seg_isnull, + recompress_ctx->num_segmentby)) + { + ItemPointerCopy(&index_scan->xs_heaptid, &state->previous_tid); + update_current_segment(recompress_ctx->current_segment, + state->seg_values, + state->seg_isnull, + recompress_ctx->num_segmentby); + save_prev_last(state, recompress_ctx); + continue; + } + + if (batches_overlap_firstlast(recompress_ctx, + state->prev_last, + state->prev_last_isnull, + state->curr_first, + state->curr_first_isnull)) + { + ItemPointerCopy(&index_scan->xs_heaptid, &state->first_overlap_tid); + ExecDropSingleTupleTableSlot(compressed_slot); + return true; + } + + /* No overlap: this batch becomes the predecessor for the next one. */ + ItemPointerCopy(&index_scan->xs_heaptid, &state->previous_tid); + save_prev_last(state, recompress_ctx); + } + + ExecDropSingleTupleTableSlot(compressed_slot); + return false; +} + +/* + * Recompress all overlapping batches in the compressed chunk. + * + * The caller runs a find pass first, so state->first_overlap_tid and + * state->previous_tid identify the first overlapping pair. They are fetched by + * TID and merged, then the main scan loop continues from where the find pass + * stopped, absorbing further overlapping batches in the same group. + * + * Overlapping batches are decompressed into recompress_tuplesortstate and + * re-compressed once the group ends. The tuplesort orders by the orderby keys + * including their NULLS FIRST/LAST setting, so NULL orderby values sort to the + * correct end on their own. + * + * Returns true if any overlapping batches were found and recompressed. + */ +static bool +compact_chunk_recompress_overlapping_batches( + Relation compressed_chunk_rel, IndexScanDesc index_scan, Snapshot snapshot, + RecompressContext *recompress_ctx, CompactChunkScanState *state, RowCompressor *compressor, + RowDecompressor *decompressor, Tuplesortstate *recompress_tuplesortstate, BulkWriter *writer) +{ + TupleTableSlot *previous_compressed_slot = table_slot_create(compressed_chunk_rel, NULL); + TupleTableSlot *compressed_slot = table_slot_create(compressed_chunk_rel, NULL); + + TupleDesc compressed_rel_tupdesc = RelationGetDescr(compressed_chunk_rel); + bool overlapping = false; + bool found_overlaps = false; + + /* + * The find pass identified the first overlapping pair. Fetch both batches by + * TID and decompress them into the shared tuplesort to start a merge group. + */ + if (ItemPointerIsValid(&state->first_overlap_tid)) + { + bool found pg_attribute_unused(); + bool call_again = false; + bool all_dead = false; + + found = table_index_fetch_tuple(index_scan->xs_heapfetch, + &state->first_overlap_tid, + index_scan->xs_snapshot, + previous_compressed_slot, + &call_again, + &all_dead); + Assert(found); + decompress_batch_to_tuplesort(previous_compressed_slot, + compressed_rel_tupdesc, + decompressor, + recompress_tuplesortstate, + compressed_chunk_rel, + snapshot); + + found = table_index_fetch_tuple(index_scan->xs_heapfetch, + &state->previous_tid, + index_scan->xs_snapshot, + previous_compressed_slot, + &call_again, + &all_dead); + Assert(found); + decompress_batch_to_tuplesort(previous_compressed_slot, + compressed_rel_tupdesc, + decompressor, + recompress_tuplesortstate, + compressed_chunk_rel, + snapshot); + + overlapping = true; + found_overlaps = true; + CommandCounterIncrement(); + + /* The overlapping batch becomes the predecessor for the scan loop. */ + ItemPointerCopy(&state->first_overlap_tid, &state->previous_tid); + update_current_segment(recompress_ctx->current_segment, + state->seg_values, + state->seg_isnull, + recompress_ctx->num_segmentby); + save_prev_last(state, recompress_ctx); + } + + while (index_getnext_slot(index_scan, ForwardScanDirection, compressed_slot)) + { + read_batch_firstlast(index_scan, recompress_ctx, state); + + /* First batch of a new segment group: close any open merge group before + * starting fresh. */ + if (!ItemPointerIsValid(&state->previous_tid) || + check_changed_group(recompress_ctx->current_segment, + state->seg_values, + state->seg_isnull, + recompress_ctx->num_segmentby)) + { + if (overlapping) + { + recompress_segment(recompress_tuplesortstate, + compressed_chunk_rel, + compressor, + writer); + overlapping = false; + } + + ItemPointerCopy(&index_scan->xs_heaptid, &state->previous_tid); + update_current_segment(recompress_ctx->current_segment, + state->seg_values, + state->seg_isnull, + recompress_ctx->num_segmentby); + save_prev_last(state, recompress_ctx); + continue; + } + + /* A batch joins the current group when it overlaps its predecessor; the + * first batch that no longer overlaps closes the group. */ + bool batch_overlaps = batches_overlap_firstlast(recompress_ctx, + state->prev_last, + state->prev_last_isnull, + state->curr_first, + state->curr_first_isnull); + + if (batch_overlaps) + { + if (!overlapping) + { + bool found pg_attribute_unused() = + table_index_fetch_tuple(index_scan->xs_heapfetch, + &state->previous_tid, + index_scan->xs_snapshot, + previous_compressed_slot, + &index_scan->xs_heap_continue, + NULL); + Assert(found); + + decompress_batch_to_tuplesort(previous_compressed_slot, + compressed_rel_tupdesc, + decompressor, + recompress_tuplesortstate, + compressed_chunk_rel, + snapshot); + + overlapping = true; + found_overlaps = true; + } + + decompress_batch_to_tuplesort(compressed_slot, + compressed_rel_tupdesc, + decompressor, + recompress_tuplesortstate, + compressed_chunk_rel, + snapshot); + + CommandCounterIncrement(); + } + else if (overlapping) + { + /* This batch no longer overlaps the group; recompress what we + * gathered and let it start a fresh comparison. */ + recompress_segment(recompress_tuplesortstate, compressed_chunk_rel, compressor, writer); + overlapping = false; + CommandCounterIncrement(); + } + + ItemPointerCopy(&index_scan->xs_heaptid, &state->previous_tid); + save_prev_last(state, recompress_ctx); + } + + if (overlapping) + { + recompress_segment(recompress_tuplesortstate, compressed_chunk_rel, compressor, writer); + } + + ExecDropSingleTupleTableSlot(previous_compressed_slot); + ExecDropSingleTupleTableSlot(compressed_slot); + + return found_overlaps; +} + +Oid +compact_chunk_impl(Chunk *uncompressed_chunk) +{ + Oid uncompressed_chunk_id = uncompressed_chunk->table_id; + + if (!ts_chunk_is_compressed(uncompressed_chunk)) + { + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("unexpected chunk status %d in chunk %s.%s", + uncompressed_chunk->fd.status, + NameStr(uncompressed_chunk->fd.schema_name), + NameStr(uncompressed_chunk->fd.table_name)))); + } + + Chunk *compressed_chunk = ts_chunk_get_by_id(uncompressed_chunk->fd.compressed_chunk_id, true); + Ensure(compressed_chunk != NULL, + "compressed chunk not found for chunk \"%s\"", + get_rel_name(uncompressed_chunk->table_id)); + + ereport(DEBUG1, + (errmsg("acquiring locks for recompression: \"%s.%s\"", + NameStr(uncompressed_chunk->fd.schema_name), + NameStr(uncompressed_chunk->fd.table_name)))); + + /* Taking a ShareExclusiveLock on compressed chunk mostly to block DDL, + * this could potentially be a RowExclusiveLock with enough testing. + * + * For uncompressed chunk, we just need to read so AccessShareLock is fine. + */ + Relation uncompressed_chunk_rel = table_open(uncompressed_chunk->table_id, AccessShareLock); + Relation compressed_chunk_rel = + table_open(compressed_chunk->table_id, ShareUpdateExclusiveLock); + + int count; + LOCKTAG locktag; + SET_LOCKTAG_RELATION(locktag, MyDatabaseId, uncompressed_chunk_id); + + /* Check if any backends currently hold locks on the uncompressed chunk + * that would conflict with ExclusiveLock. This detects concurrent DML + * (which holds RowExclusiveLock) without actually acquiring ExclusiveLock + * ourselves. If conflicts exist, we skip compaction to avoid blocking. */ + GetLockConflicts(&locktag, ExclusiveLock, &count); + + if (count > 0) + { + elog(WARNING, + "delaying compaction on chunk %s.%s due to concurrent DML", + NameStr(uncompressed_chunk->fd.schema_name), + NameStr(uncompressed_chunk->fd.table_name)); + + /* Safe to drop the lock, we didn't change anything */ + table_close(uncompressed_chunk_rel, NoLock); + table_close(compressed_chunk_rel, NoLock); + + return uncompressed_chunk_id; + } + + TupleDesc uncompressed_rel_tupdesc = RelationGetDescr(uncompressed_chunk_rel); + CompressionSettings *settings = ts_compression_settings_get(uncompressed_chunk->table_id); + + /* + * Check if first orderby column is nullable. We need + * additional null-handling logic during compaction if so. + */ + int num_orderby = ts_array_length(settings->fd.orderby); + Ensure(num_orderby > 0, + "trying to compact chunk \"%s\" with no orderby columns", + get_rel_name(uncompressed_chunk->table_id)); + + /* + * Compaction reads each batch's exact boundary rows from the firstlast + * sparse metadata. Skip chunks whose orderby columns lack it, which is only + * possible with a legacy sparse index configuration. + */ + for (int pos = 1; pos <= num_orderby; pos++) + { + if (orderby_sparse_kind(settings, pos) != ORDERBY_SPARSE_FIRSTLAST) + { + ereport(WARNING, + (errmsg("skipping compaction on chunk %s.%s", + NameStr(uncompressed_chunk->fd.schema_name), + NameStr(uncompressed_chunk->fd.table_name)), + errdetail("Orderby column \"%s\" has no firstlast sparse index.", + ts_array_get_element_text(settings->fd.orderby, pos)), + errhint("Recompress the chunk to add firstlast sparse index metadata for its " + "orderby columns."))); + table_close(uncompressed_chunk_rel, NoLock); + table_close(compressed_chunk_rel, NoLock); + return uncompressed_chunk_id; + } + } + + BulkWriter writer = bulk_writer_build(compressed_chunk_rel, 0); + Oid index_oid = get_compressed_chunk_index(writer.indexstate, settings); + Relation index_rel = index_open(index_oid, RowExclusiveLock); + ereport(DEBUG1, + (errmsg("locks acquired for compaction: \"%s.%s\"", + NameStr(uncompressed_chunk->fd.schema_name), + NameStr(uncompressed_chunk->fd.table_name)))); + + RecompressContext *recompress_ctx = + compress_chunk_populate_recompress_ctx(settings, + uncompressed_chunk_rel, + compressed_chunk_rel, + index_rel, + true); + + Snapshot snapshot = RegisterSnapshot(GetTransactionSnapshot()); + IndexScanDesc index_scan = + compact_chunk_begin_index_scan(compressed_chunk_rel, index_rel, snapshot); + + CompactChunkScanState *state = compact_chunk_scan_state_init(recompress_ctx, settings); + + bool found_overlaps = compact_chunk_find_overlapping_batches(compressed_chunk_rel, + index_scan, + recompress_ctx, + state); + + if (found_overlaps) + { + /* Recompress the overlaps */ + RowCompressor compressor; + RowDecompressor decompressor; + Tuplesortstate *recompress_tuplesortstate; + + row_compressor_init(&compressor, + settings, + RelationGetDescr(uncompressed_chunk_rel), + RelationGetDescr(compressed_chunk_rel)); + decompressor = build_decompressor(RelationGetDescr(compressed_chunk_rel), + RelationGetDescr(uncompressed_chunk_rel), + RelationGetRelid(compressed_chunk_rel), + RelationGetRelid(uncompressed_chunk_rel)); + /* Used for gathering and resorting the tuples that should be recompressed together. + * Since we are working on a per-segment level here, we only need to sort them + * based on the orderby settings. + */ + recompress_tuplesortstate = + tuplesort_begin_heap(uncompressed_rel_tupdesc, + recompress_ctx->num_orderby, + &recompress_ctx->sort_keys[recompress_ctx->num_segmentby], + &recompress_ctx->sort_operators[recompress_ctx->num_segmentby], + &recompress_ctx->sort_collations[recompress_ctx->num_segmentby], + &recompress_ctx->nulls_first[recompress_ctx->num_segmentby], + maintenance_work_mem, + NULL, + false); + + compact_chunk_recompress_overlapping_batches(compressed_chunk_rel, + index_scan, + snapshot, + recompress_ctx, + state, + &compressor, + &decompressor, + recompress_tuplesortstate, + &writer); + row_compressor_close(&compressor); + row_decompressor_close(&decompressor); + tuplesort_end(recompress_tuplesortstate); + } + + /* At this point, we have resolved all the overlaps. + * Try to switch the chunk status if we can get the exclusive lock + */ + if (ConditionalLockRelation(compressed_chunk_rel, ExclusiveLock)) + { + /* + * Use a fresh snapshot for the verification scan. If recompression + * happened, the original snapshot predates the CommandCounterIncrement() + * calls made during recompression, so it would still see the deleted + * batches and miss the newly inserted ones. A fresh snapshot correctly + * reflects the post-recompression state. + */ + index_endscan(index_scan); + UnregisterSnapshot(snapshot); + snapshot = RegisterSnapshot(GetTransactionSnapshot()); + index_scan = compact_chunk_begin_index_scan(compressed_chunk_rel, index_rel, snapshot); + compact_chunk_scan_state_reset(state, recompress_ctx); + found_overlaps = compact_chunk_find_overlapping_batches(compressed_chunk_rel, + index_scan, + recompress_ctx, + state); + if (!found_overlaps) + { + /* + * Only clear UNORDERED status from chunk. + */ + if (ts_chunk_clear_status(uncompressed_chunk, CHUNK_STATUS_COMPRESSED_UNORDERED)) + { + ereport(DEBUG1, + (errmsg("cleared unordered chunk status for compaction: \"%s.%s\"", + NameStr(uncompressed_chunk->fd.schema_name), + NameStr(uncompressed_chunk->fd.table_name)))); + } + + /* changed chunk status, so invalidate any plans involving this chunk */ + CacheInvalidateRelcacheByRelid(uncompressed_chunk->table_id); + } + } + + index_endscan(index_scan); + UnregisterSnapshot(snapshot); + index_close(index_rel, NoLock); + + bulk_writer_close(&writer); + + free_chunk_recompress_ctx(recompress_ctx); + + table_close(uncompressed_chunk_rel, NoLock); + table_close(compressed_chunk_rel, NoLock); + + return uncompressed_chunk_id; +} + /* * perform_recompression expects appropriate permissions and checks have already been done. * Relations must have appropriate locks and the CompressionSettings of compressed_chunk and diff --git a/tsl/src/compression/recompress.h b/tsl/src/compression/recompress.h index af6cba3419c..1748a56475b 100644 --- a/tsl/src/compression/recompress.h +++ b/tsl/src/compression/recompress.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "chunk.h" @@ -26,11 +27,17 @@ typedef struct RecompressContext ScanKeyData orderby_scankeys[INDEX_MAX_KEYS * 2]; /* for min and max */ bool key_byval[INDEX_MAX_KEYS]; int16 key_typlen[INDEX_MAX_KEYS]; + + /* Cached sort support per orderby column, used to compare batch boundary + * values during compaction. */ + SortSupportData *orderby_ssup; } RecompressContext; extern Datum tsl_recompress_chunk_segmentwise(PG_FUNCTION_ARGS); +extern Datum tsl_compact_chunk(PG_FUNCTION_ARGS); void recompress_chunk_segmentwise_impl(Chunk *chunk, bool fullrecompress); +Oid compact_chunk_impl(Chunk *chunk); bool recompress_chunk_in_memory_impl(Chunk *uncompressed_chunk); void rebuild_sparse_index_impl(Chunk *uncompressed_chunk, bool force); diff --git a/tsl/src/init.c b/tsl/src/init.c index 84ed220b61f..7d38c5c22ee 100644 --- a/tsl/src/init.c +++ b/tsl/src/init.c @@ -183,6 +183,7 @@ CrossModuleFunctions tsl_cm_functions = { .chunk_freeze_chunk = chunk_freeze_chunk, .chunk_unfreeze_chunk = chunk_unfreeze_chunk, .recompress_chunk_segmentwise = tsl_recompress_chunk_segmentwise, + .compact_chunk = tsl_compact_chunk, .get_compressed_chunk_index_for_recompression = tsl_get_compressed_chunk_index_for_recompression, .preprocess_query_tsl = tsl_preprocess_query, diff --git a/tsl/test/expected/compact_chunk.out b/tsl/test/expected/compact_chunk.out new file mode 100644 index 00000000000..65d5b33eda6 --- /dev/null +++ b/tsl/test/expected/compact_chunk.out @@ -0,0 +1,1495 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +-- Tests for the compact_chunk function. +-- compact_chunk merges overlapping compressed batches within a chunk +-- without decompressing/recompressing batches that are already ordered. +CREATE TABLE metrics (time TIMESTAMPTZ NOT NULL, device TEXT, value float) WITH (tsdb.hypertable, tsdb.orderby='time'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- compact_chunk with no overlapping batches (no-op case) +-- Insert 3000 ordered rows: Jan 2 00:00 to Jan 4 02:00 PST. +-- Direct compress insert creates 3 batches: 3x1000 rows. +INSERT INTO metrics +SELECT '2025-01-02'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,3000) i; +-- Status should be COMPRESSED,UNORDERED after direct compress insert +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics') chunk; + chunk_status_text +------------------------ + {COMPRESSED,UNORDERED} + +-- Get the first compressed chunk for inspection +SELECT comp_ch.table_name AS "CHUNK_NAME", + comp_ch.schema_name || '.' || comp_ch.table_name AS "CHUNK_FULL_NAME" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- Show batch metadata: 3 non-overlapping batches (ordered by min time) +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,1) | 1000 | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST + (0,2) | 1000 | Thu Jan 02 16:41:00 2025 PST | Fri Jan 03 09:20:00 2025 PST | Thu Jan 02 16:41:00 2025 PST | Fri Jan 03 09:20:00 2025 PST + (0,3) | 1000 | Fri Jan 03 09:21:00 2025 PST | Sat Jan 04 02:00:00 2025 PST | Fri Jan 03 09:21:00 2025 PST | Sat Jan 04 02:00:00 2025 PST + +-- compact_chunk on non-overlapping batches is a no-op; returns the chunk regclass +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics') chunk; + compact_chunk +---------------------------------------- + _timescaledb_internal._hyper_1_1_chunk + +-- Show batch metadata: 3 non-overlapping batches with same ctids as before +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,1) | 1000 | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST + (0,2) | 1000 | Thu Jan 02 16:41:00 2025 PST | Fri Jan 03 09:20:00 2025 PST | Thu Jan 02 16:41:00 2025 PST | Fri Jan 03 09:20:00 2025 PST + (0,3) | 1000 | Fri Jan 03 09:21:00 2025 PST | Sat Jan 04 02:00:00 2025 PST | Fri Jan 03 09:21:00 2025 PST | Sat Jan 04 02:00:00 2025 PST + +-- Status UNORDERED removed: compact_chunk clears the UNORDERED flag +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- compact_chunk with overlapping batches +-- Insert data that overlaps with existing batches. +-- Jan 1 to Jan 3 data overlaps with the existing Jan 2 to Jan 4 batches +-- in the compressed chunk. +INSERT INTO metrics +SELECT '2025-01-01'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,3000) i; +-- Show compressed chunk metadata: overlapping batches are now visible. +-- The new batches from the Jan 1 insert interleave with the old Jan 2 batches. +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,4) | 1000 | Wed Jan 01 16:00:00 2025 PST | Thu Jan 02 08:39:00 2025 PST | Wed Jan 01 16:00:00 2025 PST | Thu Jan 02 08:39:00 2025 PST + (0,1) | 1000 | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST + (0,5) | 1000 | Thu Jan 02 08:40:00 2025 PST | Fri Jan 03 01:19:00 2025 PST | Thu Jan 02 08:40:00 2025 PST | Fri Jan 03 01:19:00 2025 PST + (0,2) | 1000 | Thu Jan 02 16:41:00 2025 PST | Fri Jan 03 09:20:00 2025 PST | Thu Jan 02 16:41:00 2025 PST | Fri Jan 03 09:20:00 2025 PST + (0,6) | 41 | Fri Jan 03 01:20:00 2025 PST | Fri Jan 03 02:00:00 2025 PST | Fri Jan 03 01:20:00 2025 PST | Fri Jan 03 02:00:00 2025 PST + (0,3) | 1000 | Fri Jan 03 09:21:00 2025 PST | Sat Jan 04 02:00:00 2025 PST | Fri Jan 03 09:21:00 2025 PST | Sat Jan 04 02:00:00 2025 PST + +-- Status should contain UNORDERED flag +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics') chunk; + chunk_status_text +------------------------ + {COMPRESSED,UNORDERED} + +-- compact_chunk should identify and merge the overlapping batches. +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics') chunk; + compact_chunk +---------------------------------------- + _timescaledb_internal._hyper_1_1_chunk + _timescaledb_internal._hyper_1_3_chunk + +-- Show compressed chunk metadata: no overlapping batches anymore. +-- We should see new ctids of the newly merged batches. +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,7) | 1000 | Wed Jan 01 16:00:00 2025 PST | Thu Jan 02 04:20:00 2025 PST | Wed Jan 01 16:00:00 2025 PST | Thu Jan 02 04:20:00 2025 PST + (0,8) | 1000 | Thu Jan 02 04:20:00 2025 PST | Thu Jan 02 12:40:00 2025 PST | Thu Jan 02 04:20:00 2025 PST | Thu Jan 02 12:40:00 2025 PST + (0,9) | 1000 | Thu Jan 02 12:40:00 2025 PST | Thu Jan 02 21:00:00 2025 PST | Thu Jan 02 12:40:00 2025 PST | Thu Jan 02 21:00:00 2025 PST + (0,10) | 1000 | Thu Jan 02 21:00:00 2025 PST | Fri Jan 03 08:39:00 2025 PST | Thu Jan 02 21:00:00 2025 PST | Fri Jan 03 08:39:00 2025 PST + (1,1) | 41 | Fri Jan 03 08:40:00 2025 PST | Fri Jan 03 09:20:00 2025 PST | Fri Jan 03 08:40:00 2025 PST | Fri Jan 03 09:20:00 2025 PST + (0,3) | 1000 | Fri Jan 03 09:21:00 2025 PST | Sat Jan 04 02:00:00 2025 PST | Fri Jan 03 09:21:00 2025 PST | Sat Jan 04 02:00:00 2025 PST + +-- Status should not contain UNORDERED flag +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- compact an uncompressed chunk +-- Create a new uncompressed chunk for a different time range by +-- inserting with direct compress insert disabled. +SET timescaledb.enable_direct_compress_insert = false; +INSERT INTO metrics VALUES ('2025-12-01', 'd1', -1.0); +SET timescaledb.enable_direct_compress_insert = true; +-- The new chunk for Dec 2025 should be uncompressed (empty status array) +SELECT _timescaledb_functions.chunk_status_text(chunk) AS chunk_status, + chunk::text AS chunk_name +FROM show_chunks('metrics') chunk +WHERE NOT (_timescaledb_functions.chunk_status_text(chunk) && ARRAY['COMPRESSED']) +ORDER BY chunk; + chunk_status | chunk_name +--------------+---------------------------------------- + {} | _timescaledb_internal._hyper_1_5_chunk + +-- compact_chunk on an uncompressed chunk must fail +SELECT chunk AS "UNCOMPRESSED_CHUNK" +FROM show_chunks('metrics') chunk +WHERE NOT (_timescaledb_functions.chunk_status_text(chunk) && ARRAY['COMPRESSED']) +LIMIT 1 \gset +\set ON_ERROR_STOP 0 +SELECT _timescaledb_functions.compact_chunk(:'UNCOMPRESSED_CHUNK'); +ERROR: trying to compact an uncompressed chunk _timescaledb_internal._hyper_1_5_chunk +\set ON_ERROR_STOP 1 +-- compact a partially compressed chunk +-- Insert an uncompressed row into the already-compressed Jan 2 chunk. +-- This makes chunk 1 PARTIAL (it has both compressed and uncompressed rows). +SET timescaledb.enable_direct_compress_insert = false; +INSERT INTO metrics VALUES ('2025-01-02 12:00', 'd1', -1.0); +SET timescaledb.enable_direct_compress_insert = true; +-- Chunk 1 should now show PARTIAL status +SELECT _timescaledb_functions.chunk_status_text(chunk) AS chunk_status, + chunk::text AS chunk_name +FROM show_chunks('metrics') chunk +WHERE _timescaledb_functions.chunk_status_text(chunk) && ARRAY['PARTIAL'] +ORDER BY chunk; + chunk_status | chunk_name +----------------------+---------------------------------------- + {COMPRESSED,PARTIAL} | _timescaledb_internal._hyper_1_1_chunk + +-- compact_chunk on a partially compressed chunk must fail +SELECT chunk AS "PARTIAL_CHUNK" +FROM show_chunks('metrics') chunk +WHERE _timescaledb_functions.chunk_status_text(chunk) && ARRAY['PARTIAL'] +LIMIT 1 \gset +\set ON_ERROR_STOP 0 +SELECT _timescaledb_functions.compact_chunk(:'PARTIAL_CHUNK'); +ERROR: trying to compact a partially compressed chunk _timescaledb_internal._hyper_1_1_chunk +\set ON_ERROR_STOP 1 +-- compact_chunk is STRICT, a NULL argument returns NULL +SELECT _timescaledb_functions.compact_chunk(NULL::regclass); + compact_chunk +--------------- + + +-- compact_chunk in read-only mode must fail +SET default_transaction_read_only TO on; +\set ON_ERROR_STOP 0 +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics') chunk; +ERROR: cannot execute compact_chunk() in a read-only transaction +\set ON_ERROR_STOP 1 +SET default_transaction_read_only TO off; +-- Create a hypertable with a segmentby column. +-- Each segment ('d1', 'd2') will have its own set of batches, and +-- compact_chunk should handle overlaps per segment independently. +CREATE TABLE metrics_seg (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time', tsdb.segmentby='device'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- Insert 2000 rows for each of two devices: non-overlapping within each segment +INSERT INTO metrics_seg +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(0,1999) i; +INSERT INTO metrics_seg +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', i::float +FROM generate_series(0,1999) i; +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_seg') chunk; + chunk_status_text +------------------------ + {COMPRESSED,UNORDERED} + +-- Get the compressed chunk for metrics_seg +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "SEG_CHUNK_FULL_NAME" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_seg' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- 4 batches: 2 per segment, no overlaps +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEG_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,1) | d1 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + (0,2) | d1 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST + (0,3) | d2 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + (0,4) | d2 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST + +-- compact_chunk with no overlaps: no-op for both segments +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_seg') chunk; + compact_chunk +---------------------------------------- + _timescaledb_internal._hyper_3_6_chunk + +-- Same ctids: nothing was rewritten +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEG_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,1) | d1 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + (0,2) | d1 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST + (0,3) | d2 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + (0,4) | d2 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST + +-- status should be updated to COMPRESSED only +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_seg') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- Insert overlapping data +INSERT INTO metrics_seg +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(0,1999) i; +INSERT INTO metrics_seg +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', i::float +FROM generate_series(0,1999) i; +-- 8 batches: 4 per segment, overlapping +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEG_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,1) | d1 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + (0,5) | d1 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + (0,2) | d1 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST + (0,6) | d1 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST + (0,3) | d2 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + (1,1) | d2 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + (0,4) | d2 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST + (1,2) | d2 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST + +-- status should be updated to COMPRESSED, UNORDERED +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_seg') chunk; + chunk_status_text +------------------------ + {COMPRESSED,UNORDERED} + +-- compact_chunk with overlaps: combines the batches +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_seg') chunk; + compact_chunk +---------------------------------------- + _timescaledb_internal._hyper_3_6_chunk + +-- New ctids: overlapping batches merged and re-sorted +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEG_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +--------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (1,3) | d1 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 08:19:00 2025 PST | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 08:19:00 2025 PST + (1,4) | d1 | 1000 | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + (1,5) | d1 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 00:59:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 00:59:00 2025 PST + (1,6) | d1 | 1000 | Sat Jan 04 01:00:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 01:00:00 2025 PST | Sat Jan 04 09:19:00 2025 PST + (1,7) | d2 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 08:19:00 2025 PST | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 08:19:00 2025 PST + (1,8) | d2 | 1000 | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + (1,9) | d2 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 00:59:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 00:59:00 2025 PST + (1,10) | d2 | 1000 | Sat Jan 04 01:00:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 01:00:00 2025 PST | Sat Jan 04 09:19:00 2025 PST + +-- status should be COMPRESSED only (UNORDERED cleared after compaction) +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_seg') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- compact_chunk with nullable secondary orderby column (NULLS FIRST) +-- The second batch starts at a boundary tie on col1 (time) with a NULL +-- in col2 (value). With NULLS FIRST, (time=T, value=NULL) should sort +-- BEFORE (time=T, value=non-null), but the second batch is positioned +-- after the first in the index — an ordering violation. +CREATE TABLE metrics_nullable (time TIMESTAMPTZ, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time,value NULLS FIRST'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +-- Insert batch 1 (1000 rows): time [00:01..16:40], all non-null values. +-- Last row: (16:40, 1000). +INSERT INTO metrics_nullable +SELECT '2025-01-02'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,1000) i; +-- compact_chunk on nullable orderby with no nulls in data should succeed (no-op) +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nullable') chunk; + compact_chunk +---------------------------------------- + _timescaledb_internal._hyper_5_8_chunk + +-- Get the compressed chunk for metrics_nullable +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "NULLABLE_CHUNK_FULL_NAME" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_nullable' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- 1 batch, no nulls +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLABLE_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+----------------+------------------------------+------------------------------+------------------------------+------------------------------+----------------+----------------+-------------------------+------------------------ + (0,1) | 1000 | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST | 1 | 1000 | 1 | 1000 + +-- Insert batch 2 (1000 rows): starts at batch 1's max time (i=1000, time=16:40). +-- First row has NULL value: (16:40, NULL). +-- With NULLS FIRST, (16:40, NULL) should sort BEFORE (16:40, 1000) from +-- batch 1's last row. But batch 2 is after batch 1 in the index → unordered. +INSERT INTO metrics_nullable +SELECT '2025-01-02'::timestamptz + ((999 + i) || ' minute')::interval, + 'd1', + CASE WHEN i = 1 THEN NULL ELSE (1000 + i)::float END +FROM generate_series(1,1000) i; +-- 2 batches: boundary tie on col1, NULL in col2 at the boundary +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLABLE_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+----------------+------------------------------+------------------------------+------------------------------+------------------------------+----------------+----------------+-------------------------+------------------------ + (0,1) | 1000 | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST | 1 | 1000 | 1 | 1000 + (0,2) | 1000 | Thu Jan 02 16:40:00 2025 PST | Fri Jan 03 09:19:00 2025 PST | Thu Jan 02 16:40:00 2025 PST | Fri Jan 03 09:19:00 2025 PST | 1002 | 2000 | | 2000 + +-- compact_chunk should detect the boundary-tie overlap via NULL in col2 +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nullable') chunk; + compact_chunk +---------------------------------------- + _timescaledb_internal._hyper_5_8_chunk + +-- After compaction: batches merged and re-sorted +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLABLE_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+----------------+------------------------------+------------------------------+------------------------------+------------------------------+----------------+----------------+-------------------------+------------------------ + (0,3) | 1000 | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST | Thu Jan 02 00:01:00 2025 PST | Thu Jan 02 16:40:00 2025 PST | 1 | 999 | 1 | + (0,4) | 1000 | Thu Jan 02 16:40:00 2025 PST | Fri Jan 03 09:19:00 2025 PST | Thu Jan 02 16:40:00 2025 PST | Fri Jan 03 09:19:00 2025 PST | 1000 | 2000 | 1000 | 2000 + +-- Verify total row count is preserved +SELECT count(*) FROM metrics_nullable; + count +------- + 2000 + +-- Verify NULL rows are accessible +SELECT count(*) AS null_value_count FROM metrics_nullable WHERE value IS NULL; + null_value_count +------------------ + 1 + +-- Verify ordering at boundary: NULL must come before non-null with NULLS FIRST +SELECT time, value FROM metrics_nullable +WHERE time = '2025-01-02'::timestamptz + '1000 minutes'::interval +ORDER BY time, value NULLS FIRST; + time | value +------------------------------+------- + Thu Jan 02 16:40:00 2025 PST | + Thu Jan 02 16:40:00 2025 PST | 1000 + +DROP TABLE metrics_nullable; +-- compact_chunk with DESC orderby column +CREATE TABLE metrics_desc (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time DESC', tsdb.segmentby='device'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- Insert non-overlapping data for two devices +INSERT INTO metrics_desc +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(0,1999) i; +INSERT INTO metrics_desc +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', i::float +FROM generate_series(0,1999) i; +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_desc') chunk; + chunk_status_text +------------------------ + {COMPRESSED,UNORDERED} + +-- Get the compressed chunk for metrics_desc +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "DESC_CHUNK_FULL_NAME" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_desc' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- 4 batches: 2 per segment, ordered by max time descending +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :DESC_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_max_1 DESC; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,1) | d1 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (0,2) | d1 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST + (0,3) | d2 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (0,4) | d2 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST + +-- No overlaps: compact_chunk is a no-op +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_desc') chunk; + compact_chunk +----------------------------------------- + _timescaledb_internal._hyper_7_10_chunk + +-- Same ctids: nothing was rewritten +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :DESC_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_max_1 DESC; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,1) | d1 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (0,2) | d1 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST + (0,3) | d2 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (0,4) | d2 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST + +-- Status should be COMPRESSED only after compaction +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_desc') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- Insert overlapping data +INSERT INTO metrics_desc +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(0,1999) i; +INSERT INTO metrics_desc +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', i::float +FROM generate_series(0,1999) i; +-- 8 batches: 4 per segment, overlapping +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :DESC_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_max_1 DESC; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,1) | d1 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (0,5) | d1 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (0,2) | d1 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST + (1,1) | d1 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST + (0,3) | d2 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (1,2) | d2 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (0,4) | d2 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST + (1,3) | d2 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:00:00 2025 PST + +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_desc') chunk; + chunk_status_text +------------------------ + {COMPRESSED,UNORDERED} + +-- compact_chunk should merge overlapping batches with DESC ordering +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_desc') chunk; + compact_chunk +----------------------------------------- + _timescaledb_internal._hyper_7_10_chunk + +-- New ctids: overlapping batches merged +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :DESC_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_max_1 DESC; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +--------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (1,4) | d1 | 1000 | Sat Jan 04 01:00:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 01:00:00 2025 PST + (1,5) | d1 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 00:59:00 2025 PST | Sat Jan 04 00:59:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (1,6) | d1 | 1000 | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 08:20:00 2025 PST + (1,7) | d1 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 08:19:00 2025 PST | Fri Jan 03 08:19:00 2025 PST | Fri Jan 03 00:00:00 2025 PST + (1,8) | d2 | 1000 | Sat Jan 04 01:00:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 09:19:00 2025 PST | Sat Jan 04 01:00:00 2025 PST + (1,9) | d2 | 1000 | Fri Jan 03 16:40:00 2025 PST | Sat Jan 04 00:59:00 2025 PST | Sat Jan 04 00:59:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (1,10) | d2 | 1000 | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 08:20:00 2025 PST + (1,11) | d2 | 1000 | Fri Jan 03 00:00:00 2025 PST | Fri Jan 03 08:19:00 2025 PST | Fri Jan 03 08:19:00 2025 PST | Fri Jan 03 00:00:00 2025 PST + +-- Status should be COMPRESSED only +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_desc') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- Verify data is correctly ordered (DESC) within each segment +SELECT device, time FROM metrics_desc WHERE device = 'd1' ORDER BY device, time DESC LIMIT 5; + device | time +--------+------------------------------ + d1 | Sat Jan 04 09:19:00 2025 PST + d1 | Sat Jan 04 09:19:00 2025 PST + d1 | Sat Jan 04 09:18:00 2025 PST + d1 | Sat Jan 04 09:18:00 2025 PST + d1 | Sat Jan 04 09:17:00 2025 PST + +DROP TABLE metrics_desc; +-- compact_chunk with multi-column orderby +-- Tests that overlap detection works correctly when using ORDER BY device, time. +-- The bug: secondary column min/max metadata is a global aggregate across +-- all rows, not scoped to the primary column's value. This causes false +-- negatives where interleaving batches go undetected. +CREATE TABLE metrics_multi (time TIMESTAMPTZ NOT NULL, device TEXT NOT NULL, value float) +WITH (tsdb.hypertable, tsdb.orderby='device,time'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- Insert 3 batches with different devices creating boundary ties on col1: +-- Batch 1 (500 rows): device d1..d2, time [00:01..08:20] +-- Batch 2 (500 rows): device d2..d3, time [08:21..16:40] +-- The d2 rows in both batches create a boundary tie on col1 (device). +-- Col2 (time) ranges are non-overlapping within the d2 group → no actual overlap. +INSERT INTO metrics_multi +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + CASE WHEN i <= 250 THEN 'd1' ELSE 'd2' END, + i::float +FROM generate_series(1,500) i; +INSERT INTO metrics_multi +SELECT '2025-01-03'::timestamptz + ((500 + i) || ' minute')::interval, + CASE WHEN i <= 250 THEN 'd2' ELSE 'd3' END, + (500 + i)::float +FROM generate_series(1,500) i; +-- Get the compressed chunk for metrics_multi +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "MULTI_CHUNK_FULL_NAME" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_multi' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- 2 batches: boundary tie on col1 (device=d2), non-overlapping time ranges +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1, _ts_meta_min_2; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_device | _ts_meta_v2_last_device | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+----------------+----------------+----------------+--------------------------+-------------------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,1) | 500 | d1 | d2 | d1 | d2 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST + (0,2) | 500 | d2 | d3 | d2 | d3 | Fri Jan 03 08:21:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 08:21:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + +-- compact_chunk: boundary tie on col1, but no overlap on col2 (time) — should be a no-op +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_multi') chunk; + compact_chunk +----------------------------------------- + _timescaledb_internal._hyper_9_12_chunk + +-- Same ctids: nothing was rewritten +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1, _ts_meta_min_2; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_device | _ts_meta_v2_last_device | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+----------------+----------------+----------------+--------------------------+-------------------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,1) | 500 | d1 | d2 | d1 | d2 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST + (0,2) | 500 | d2 | d3 | d2 | d3 | Fri Jan 03 08:21:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 08:21:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + +-- Now insert truly overlapping data within the d2 device group: +-- Batch 3 (500 rows): device d2, time range overlaps with both existing d2 ranges +INSERT INTO metrics_multi +SELECT '2025-01-03'::timestamptz + ((250 + i) || ' minute')::interval, 'd2', (250 + i)::float +FROM generate_series(1,500) i; +-- 3 batches: the new d2 batch overlaps with the d2 portions of existing batches +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1, _ts_meta_min_2; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_device | _ts_meta_v2_last_device | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+----------------+----------------+----------------+--------------------------+-------------------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,1) | 500 | d1 | d2 | d1 | d2 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST + (0,3) | 500 | d2 | d2 | d2 | d2 | Fri Jan 03 04:11:00 2025 PST | Fri Jan 03 12:30:00 2025 PST | Fri Jan 03 04:11:00 2025 PST | Fri Jan 03 12:30:00 2025 PST + (0,2) | 500 | d2 | d3 | d2 | d3 | Fri Jan 03 08:21:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 08:21:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + +-- compact_chunk: boundary tie on col1 with actual overlap on col2 — must detect and merge +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_multi') chunk; + compact_chunk +----------------------------------------- + _timescaledb_internal._hyper_9_12_chunk + +-- New ctids: overlapping batches merged +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1, _ts_meta_min_2; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_device | _ts_meta_v2_last_device | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+----------------+----------------+----------------+--------------------------+-------------------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,4) | 1000 | d1 | d2 | d1 | d2 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 10:25:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 10:25:00 2025 PST + (0,5) | 500 | d2 | d3 | d2 | d3 | Fri Jan 03 10:26:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 10:26:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + +-- Total row count preserved +SELECT count(*) FROM metrics_multi; + count +------- + 1500 + +DROP TABLE metrics_multi; +-- compact_chunk with multi-column orderby, second column DESC +-- Same boundary-tie logic but the secondary column uses descending order. +-- The sort operator for col2 is now ">" instead of "<", so the boundary +-- decompression must use the correct comparator. +CREATE TABLE metrics_multi_desc (time TIMESTAMPTZ NOT NULL, device TEXT NOT NULL, value float) +WITH (tsdb.hypertable, tsdb.orderby='device,time DESC'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- Insert 2 batches with multiple devices, DESC time ordering: +-- Batch 1 (500 rows): device d2..d3, time [08:21..16:40] DESC +-- Batch 2 (500 rows): device d1..d2, time [00:01..08:20] DESC +-- The d2 rows create a boundary tie on col1. +-- With DESC, batch 1's d2 trailing edge (min time=08:21) and batch 2's +-- d2 leading edge (max time=08:20) are non-overlapping → no actual overlap. +INSERT INTO metrics_multi_desc +SELECT '2025-01-03'::timestamptz + ((500 + i) || ' minute')::interval, + CASE WHEN i <= 250 THEN 'd2' ELSE 'd3' END, + (500 + i)::float +FROM generate_series(1,500) i; +INSERT INTO metrics_multi_desc +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + CASE WHEN i <= 250 THEN 'd1' ELSE 'd2' END, + i::float +FROM generate_series(1,500) i; +-- Get the compressed chunk +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "MULTI_DESC_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_multi_desc' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- 2 batches: boundary tie on col1 (device=d2), non-overlapping DESC time ranges +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_DESC_CHUNK +ORDER BY _ts_meta_min_1, _ts_meta_max_2 DESC; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_device | _ts_meta_v2_last_device | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+----------------+----------------+----------------+--------------------------+-------------------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,2) | 500 | d1 | d2 | d1 | d2 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 04:10:00 2025 PST | Fri Jan 03 04:11:00 2025 PST + (0,1) | 500 | d2 | d3 | d2 | d3 | Fri Jan 03 08:21:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 12:30:00 2025 PST | Fri Jan 03 12:31:00 2025 PST + +-- compact_chunk: boundary tie on col1, no overlap on col2 (DESC) — should be a no-op +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_multi_desc') chunk; + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_11_14_chunk + +-- Same ctids: nothing was rewritten +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_DESC_CHUNK +ORDER BY _ts_meta_min_1, _ts_meta_max_2 DESC; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_device | _ts_meta_v2_last_device | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+----------------+----------------+----------------+--------------------------+-------------------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,3) | 1000 | d1 | d3 | d1 | d3 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 04:10:00 2025 PST | Fri Jan 03 12:31:00 2025 PST + +-- Now insert overlapping data within the d2 device group +INSERT INTO metrics_multi_desc +SELECT '2025-01-03'::timestamptz + ((250 + i) || ' minute')::interval, 'd2', (250 + i)::float +FROM generate_series(1,500) i; +-- 3 batches: the new d2 batch overlaps with existing d2 portions +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_DESC_CHUNK +ORDER BY _ts_meta_min_1, _ts_meta_max_2 DESC; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_device | _ts_meta_v2_last_device | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+----------------+----------------+----------------+--------------------------+-------------------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,3) | 1000 | d1 | d3 | d1 | d3 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 04:10:00 2025 PST | Fri Jan 03 12:31:00 2025 PST + (0,4) | 500 | d2 | d2 | d2 | d2 | Fri Jan 03 04:11:00 2025 PST | Fri Jan 03 12:30:00 2025 PST | Fri Jan 03 12:30:00 2025 PST | Fri Jan 03 04:11:00 2025 PST + +-- compact_chunk: boundary tie on col1, actual overlap on col2 (DESC) — must merge +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_multi_desc') chunk; + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_11_14_chunk + +-- New ctids: overlapping batches merged +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_DESC_CHUNK +ORDER BY _ts_meta_min_1, _ts_meta_max_2 DESC; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_device | _ts_meta_v2_last_device | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+----------------+----------------+----------------+--------------------------+-------------------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,5) | 1000 | d1 | d2 | d1 | d2 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 12:30:00 2025 PST | Fri Jan 03 04:10:00 2025 PST | Fri Jan 03 06:16:00 2025 PST + (0,6) | 500 | d2 | d3 | d2 | d3 | Fri Jan 03 04:11:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 06:15:00 2025 PST | Fri Jan 03 12:31:00 2025 PST + +-- Total row count preserved +SELECT count(*) FROM metrics_multi_desc; + count +------- + 1500 + +DROP TABLE metrics_multi_desc; +-- compact_chunk with segmentby + multi-column orderby + nullable orderby column +-- Combines all three features: +-- segmentby='device' multiple segments processed independently +-- orderby='time,value' multi-column overlap detection with boundary ties +-- 'value' is nullable boundary tie NULL values must respect NULL ordering +CREATE TABLE metrics_combined (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time,value', tsdb.segmentby='device'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- Insert non-null data for two segments: 1000 rows each, non-overlapping +INSERT INTO metrics_combined +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,1000) i; +INSERT INTO metrics_combined +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', (i + 1000)::float +FROM generate_series(1,1000) i; +-- Get the compressed chunk +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "COMBINED_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_combined' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- 2 batches: 1 per segment, no overlaps, no nulls +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :COMBINED_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | 1 | 1000 | 1 | 1000 + (0,2) | d2 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | 1001 | 2000 | 1001 | 2000 + +-- No overlaps, no nulls: compact_chunk is a no-op +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_combined') chunk; + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_13_16_chunk + +-- Same ctids as before: nothing was rewritten +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :COMBINED_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | 1 | 1000 | 1 | 1000 + (0,2) | d2 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | 1001 | 2000 | 1001 | 2000 + +-- Status should be COMPRESSED only +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_combined') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- Insert overlapping data with NULLs in the nullable orderby column (value) +-- for both segments. Every 5th row has NULL value. +INSERT INTO metrics_combined +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 2000)::float END +FROM generate_series(1,1000) i; +INSERT INTO metrics_combined +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd2', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 3000)::float END +FROM generate_series(1,1000) i; +-- 4 batches: original + overlapping with nulls. Both segments overlap on time col1. +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :COMBINED_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | 1 | 1000 | 1 | 1000 + (0,3) | d1 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | 2001 | 2999 | 2001 | + (0,2) | d2 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | 1001 | 2000 | 1001 | 2000 + (0,4) | d2 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | 3001 | 3999 | 3001 | + +-- Status should be UNORDERED +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_combined') chunk; + chunk_status_text +------------------------ + {COMPRESSED,UNORDERED} + +-- compact_chunk must: +-- 1. Detect overlaps per segment via multi-column boundary-tie logic +-- 2. Recompress overlapping rows into ordered batches +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_combined') chunk; + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_13_16_chunk + +-- After compaction: new ctids, overlapping batches merged and re-sorted +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :COMBINED_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------+----------------+----------------+-------------------------+------------------------ + (0,5) | d1 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | 1 | 2499 | 1 | + (0,6) | d1 | 1000 | Fri Jan 03 08:21:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 08:21:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | 501 | 2999 | 501 | + (0,7) | d2 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | 1001 | 3499 | 1001 | + (0,8) | d2 | 1000 | Fri Jan 03 08:21:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | Fri Jan 03 08:21:00 2025 PST | Fri Jan 03 16:40:00 2025 PST | 1501 | 3999 | 1501 | + +-- Status should be COMPRESSED only (UNORDERED cleared). +-- NULLs in the secondary orderby column (value) just sort within the batch per +-- NULLS FIRST/LAST; only the first orderby column (time, NOT NULL) affects +-- batch placement. +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_combined') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- Total row count preserved +SELECT count(*) FROM metrics_combined; + count +------- + 4000 + +-- NULL rows preserved per segment +SELECT device, count(*) AS null_value_count FROM metrics_combined WHERE value IS NULL GROUP BY device ORDER BY device; + device | null_value_count +--------+------------------ + d1 | 200 + d2 | 200 + +-- Data integrity per segment +SELECT device, count(*) FROM metrics_combined GROUP BY device ORDER BY device; + device | count +--------+------- + d1 | 2000 + d2 | 2000 + +-- Verify ordering is correct within segment d1 +SELECT device, time, value FROM metrics_combined +WHERE device = 'd1' ORDER BY time, value LIMIT 5; + device | time | value +--------+------------------------------+------- + d1 | Fri Jan 03 00:01:00 2025 PST | 1 + d1 | Fri Jan 03 00:01:00 2025 PST | 2001 + d1 | Fri Jan 03 00:02:00 2025 PST | 2 + d1 | Fri Jan 03 00:02:00 2025 PST | 2002 + d1 | Fri Jan 03 00:03:00 2025 PST | 3 + +SELECT device, time, value FROM metrics_combined +WHERE device = 'd1' AND value IS NULL ORDER BY time LIMIT 5; + device | time | value +--------+------------------------------+------- + d1 | Fri Jan 03 00:05:00 2025 PST | + d1 | Fri Jan 03 00:10:00 2025 PST | + d1 | Fri Jan 03 00:15:00 2025 PST | + d1 | Fri Jan 03 00:20:00 2025 PST | + d1 | Fri Jan 03 00:25:00 2025 PST | + +DROP TABLE metrics_combined; +-- compact_chunk with nullable first orderby column (NULLS LAST, default) +-- A NULL boundary value makes a mixed-null batch overlap its neighbor, so the +-- overlap merge re-sorts both batches; the NULLs land at the end (NULLS LAST). +-- A mixed-null batch that overlaps nothing is already ordered and left untouched. +CREATE TABLE metrics_nulls_last (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='value,time', tsdb.segmentby='device'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- Insert non-null data for two segments +INSERT INTO metrics_nulls_last +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,1000) i; +INSERT INTO metrics_nulls_last +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', (i + 1000)::float +FROM generate_series(1,1000) i; +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "NULLS_LAST_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_nulls_last' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- 2 non-overlapping batches, no nulls +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_LAST_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 1000 | 1 | 1000 | 1 | 1000 + (0,2) | d2 | 1000 | 1001 | 2000 | 1001 | 2000 + +-- Insert overlapping data with NULLs in value (first orderby column) +INSERT INTO metrics_nulls_last +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 2000)::float END +FROM generate_series(1,1000) i; +INSERT INTO metrics_nulls_last +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd2', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 3000)::float END +FROM generate_series(1,1000) i; +-- 4 batches with overlaps and nulls in first orderby column +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_LAST_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 1000 | 1 | 1000 | 1 | 1000 + (0,3) | d1 | 1000 | 2001 | 2999 | 2001 | + (0,2) | d2 | 1000 | 1001 | 2000 | 1001 | 2000 + (0,4) | d2 | 1000 | 3001 | 3999 | 3001 | + +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nulls_last') chunk; + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_15_18_chunk + +-- After compaction: the mixed-null batch does not overlap its neighbor, so it +-- stays as-is (nulls already sort to the end of the batch with NULLS LAST) +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_LAST_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 1000 | 1 | 1000 | 1 | 1000 + (0,3) | d1 | 1000 | 2001 | 2999 | 2001 | + (0,2) | d2 | 1000 | 1001 | 2000 | 1001 | 2000 + (0,4) | d2 | 1000 | 3001 | 3999 | 3001 | + +-- UNORDERED cleared: NULLs fold into the end of the segment (NULLS LAST) +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_nulls_last') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- Data integrity +SELECT count(*) FROM metrics_nulls_last; + count +------- + 4000 + +SELECT device, count(*) FROM metrics_nulls_last GROUP BY device ORDER BY device; + device | count +--------+------- + d1 | 2000 + d2 | 2000 + +SELECT device, count(*) AS null_count FROM metrics_nulls_last WHERE value IS NULL GROUP BY device ORDER BY device; + device | null_count +--------+------------ + d1 | 200 + d2 | 200 + +-- Now test the overlap path with NULLs: insert data that truly overlaps +-- on the value range AND contains NULLs, with a nullable first orderby column. +-- The NULL rows must survive the overlap merge and end up ordered correctly. +INSERT INTO metrics_nulls_last +SELECT '2025-01-03'::timestamptz + ((2000 + i) || ' minute')::interval, + 'd1', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 500)::float END +FROM generate_series(1,500) i; +-- Batches before overlap compaction +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_LAST_CHUNK +WHERE device = 'd1' +ORDER BY _ts_meta_min_1 NULLS LAST; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 1000 | 1 | 1000 | 1 | 1000 + (0,5) | d1 | 500 | 501 | 999 | 501 | + (0,3) | d1 | 1000 | 2001 | 2999 | 2001 | + +-- This overlaps with existing d1 data (value 501..900 overlaps with 1..1000 and 2001..2999) +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nulls_last') chunk; + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_15_18_chunk + +-- After overlap merge: NULLs from overlapping batches preserved +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_LAST_CHUNK +WHERE device = 'd1' +ORDER BY _ts_meta_min_1 NULLS LAST; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,6) | d1 | 1000 | 1 | 778 | 1 | 778 + (0,7) | d1 | 1000 | 778 | 2749 | 778 | 2749 + (0,8) | d1 | 500 | 2751 | 2999 | 2751 | + +-- Data integrity: all rows including NULLs from the overlap merge must survive +SELECT count(*) FROM metrics_nulls_last WHERE device = 'd1'; + count +------- + 2500 + +SELECT count(*) AS null_count FROM metrics_nulls_last WHERE device = 'd1' AND value IS NULL; + null_count +------------ + 300 + +DROP TABLE metrics_nulls_last; +-- compact_chunk with nullable first orderby column (NULLS FIRST) +-- With NULLS FIRST, the re-sort places NULLs at the start of each segment, so a +-- null-containing batch is already correctly positioned. +CREATE TABLE metrics_nulls_first (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='value NULLS FIRST,time', tsdb.segmentby='device'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- Insert non-null data for two segments +INSERT INTO metrics_nulls_first +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,1000) i; +INSERT INTO metrics_nulls_first +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', (i + 1000)::float +FROM generate_series(1,1000) i; +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "NULLS_FIRST_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_nulls_first' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- 2 non-overlapping batches, no nulls +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_FIRST_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 1000 | 1 | 1000 | 1 | 1000 + (0,2) | d2 | 1000 | 1001 | 2000 | 1001 | 2000 + +-- Insert overlapping data with NULLs in value (first orderby column) +INSERT INTO metrics_nulls_first +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 2000)::float END +FROM generate_series(1,1000) i; +INSERT INTO metrics_nulls_first +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd2', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 3000)::float END +FROM generate_series(1,1000) i; +-- 4 batches with overlaps and nulls in first orderby column +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_FIRST_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 1000 | 1 | 1000 | 1 | 1000 + (0,3) | d1 | 1000 | 2001 | 2999 | | 2999 + (0,2) | d2 | 1000 | 1001 | 2000 | 1001 | 2000 + (0,4) | d2 | 1000 | 3001 | 3999 | | 3999 + +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nulls_first') chunk; + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_17_20_chunk + +-- After compaction: with NULLS FIRST, null batches at segment start are fine +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_FIRST_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,5) | d1 | 1000 | 1 | 800 | | 800 + (0,6) | d1 | 1000 | 801 | 2999 | 801 | 2999 + (0,7) | d2 | 1000 | 1001 | 1800 | | 1800 + (0,8) | d2 | 1000 | 1801 | 3999 | 1801 | 3999 + +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_nulls_first') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- Data integrity +SELECT count(*) FROM metrics_nulls_first; + count +------- + 4000 + +SELECT device, count(*) FROM metrics_nulls_first GROUP BY device ORDER BY device; + device | count +--------+------- + d1 | 2000 + d2 | 2000 + +SELECT device, count(*) AS null_count FROM metrics_nulls_first WHERE value IS NULL GROUP BY device ORDER BY device; + device | null_count +--------+------------ + d1 | 200 + d2 | 200 + +-- Same overlap+NULLs test for NULLS FIRST +INSERT INTO metrics_nulls_first +SELECT '2025-01-03'::timestamptz + ((2000 + i) || ' minute')::interval, + 'd1', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 500)::float END +FROM generate_series(1,500) i; +-- Batches before overlap compaction +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_FIRST_CHUNK +WHERE device = 'd1' +ORDER BY _ts_meta_min_1 NULLS FIRST; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,5) | d1 | 1000 | 1 | 800 | | 800 + (0,9) | d1 | 500 | 501 | 999 | | 999 + (0,6) | d1 | 1000 | 801 | 2999 | 801 | 2999 + +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nulls_first') chunk; + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_17_20_chunk + +-- After overlap merge: NULLs from overlapping batches preserved +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_FIRST_CHUNK +WHERE device = 'd1' +ORDER BY _ts_meta_min_1 NULLS FIRST; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +--------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,10) | d1 | 1000 | 1 | 611 | | 611 + (0,11) | d1 | 1000 | 612 | 2374 | 612 | 2374 + (0,12) | d1 | 500 | 2376 | 2999 | 2376 | 2999 + +-- Data integrity: NULLs from overlap merge must survive +SELECT count(*) FROM metrics_nulls_first WHERE device = 'd1'; + count +------- + 2500 + +SELECT count(*) AS null_count FROM metrics_nulls_first WHERE device = 'd1' AND value IS NULL; + null_count +------------ + 300 + +DROP TABLE metrics_nulls_first; +-- compact_chunk with a mixed-null batch whose NULLs overlap a later batch +-- A batch with both NULL and non-NULL values in the first orderby column has a +-- NULL last row (NULLS LAST) that sorts after a following non-null batch, so the +-- two batches overlap and the merge re-sorts them with the NULLs back at the end. +-- Without firstlast metadata the NULL boundary is invisible and the overlap is +-- missed, leaving NULLs in the wrong index position. +CREATE TABLE metrics_mixed_nulls (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='value,time', tsdb.segmentby='device'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- Insert 1800 non-null rows + 200 NULL rows for device 'd1'. +-- DCI sorts by (value NULLS LAST, time), producing: +-- Batch 1 (1000 rows): value [1..1000] — all non-null +-- Batch 2 (1000 rows): value [1001..1800] + 200 NULLs — mixed! +-- Batch 2's firstlast metadata: first=1001, last=NULL (NULLS LAST). +INSERT INTO metrics_mixed_nulls +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + CASE WHEN i > 1800 THEN NULL ELSE i::float END +FROM generate_series(1,2000) i; +-- Insert 1000 more non-null rows with values [1801..2800], forming Batch 3 +-- (first=1801, last=2800). Batch 2's non-null range [1001..1800] sits below +-- Batch 3, but Batch 2's NULL last row sorts after Batch 3 (NULLS LAST), so the +-- two batches overlap and must be merged. +INSERT INTO metrics_mixed_nulls +SELECT '2025-01-03'::timestamptz + ((2000 + i) || ' minute')::interval, + 'd1', + (1800 + i)::float +FROM generate_series(1,1000) i; +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "MIXED_NULLS_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_mixed_nulls' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- Show batch metadata before compaction: 3 batches, batch 2 has mixed nulls +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :MIXED_NULLS_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 1000 | 1 | 1000 | 1 | 1000 + (0,2) | d1 | 1000 | 1001 | 1800 | 1001 | + (0,3) | d1 | 1000 | 1801 | 2800 | 1801 | 2800 + +-- compact_chunk must detect the overlap and re-sort the NULLs back to the end +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_mixed_nulls') chunk; + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_19_22_chunk + +-- After compaction: Batch 2 and Batch 3 merged, NULLs folded into the trailing +-- batch at the end (NULLS LAST). +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :MIXED_NULLS_CHUNK +ORDER BY device, _ts_meta_min_1 NULLS LAST; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 1000 | 1 | 1000 | 1 | 1000 + (0,4) | d1 | 1000 | 1001 | 2000 | 1001 | 2000 + (0,5) | d1 | 1000 | 2001 | 2800 | 2001 | + +-- Status should be COMPRESSED only (UNORDERED cleared correctly this time) +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_mixed_nulls') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- Data integrity +SELECT count(*) FROM metrics_mixed_nulls; + count +------- + 3000 + +SELECT count(*) AS null_count FROM metrics_mixed_nulls WHERE value IS NULL; + null_count +------------ + 200 + +-- Verify ordering is correct: NULLs must come after all non-null values +-- This query will return wrong results if the bug is present (NULLs between 1800 and 1801) +SELECT value FROM metrics_mixed_nulls +WHERE device = 'd1' +ORDER BY value NULLS LAST, time +LIMIT 5 OFFSET 1795; + value +------- + 1796 + 1797 + 1798 + 1799 + 1800 + +DROP TABLE metrics_mixed_nulls; +-- compact_chunk with nullable SECONDARY orderby column at boundary tie +-- Regression test: when orderby='time,value' and value is nullable, +-- two batches can tie on col1 (time) at the boundary. If the prev batch's +-- last row has NULL in col2 (value), the overlap check must compare using +-- NULLS FIRST/LAST semantics so the tie is correctly seen as an overlap. +CREATE TABLE metrics_secondary_null (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time,value', tsdb.segmentby='device'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- Insert batch 1 (500 rows): time [00:01..08:20], with NULL value at the +-- last timestamp (i=500, time=08:20). With orderby='time,value NULLS LAST', +-- the last row in sorted order is (08:20, NULL). +INSERT INTO metrics_secondary_null +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + CASE WHEN i = 500 THEN NULL ELSE i::float END +FROM generate_series(1,500) i; +-- Insert batch 2 (500 rows): starts exactly at batch 1's max time (08:20). +-- This creates a boundary tie on col1 (time). All values are non-null. +-- The first row (08:20, 1001) ties with batch 1's last row (08:20, NULL). +-- With NULLS LAST, (08:20, NULL) should sort AFTER (08:20, 1001), but in +-- index order batch 1 comes before batch 2 — an ordering violation. +INSERT INTO metrics_secondary_null +SELECT '2025-01-03'::timestamptz + ((499 + i) || ' minute')::interval, + 'd1', + (1000 + i)::float +FROM generate_series(1,500) i; +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "SEC_NULL_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_secondary_null' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- Show batch metadata: col1 (time) ranges overlap at the boundary +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :SEC_NULL_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 500 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | 1 | 499 | 1 | + (0,2) | d1 | 500 | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | 1001 | 1500 | 1001 | 1500 + +-- compact_chunk must detect the boundary-tie overlap caused by NULL in col2 +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_secondary_null') chunk; + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_21_24_chunk + +-- After compaction: batches should be merged and re-sorted +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEC_NULL_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,3) | d1 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + +-- Status should be COMPRESSED only +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_secondary_null') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- Data integrity +SELECT count(*) FROM metrics_secondary_null; + count +------- + 1000 + +SELECT count(*) AS null_count FROM metrics_secondary_null WHERE value IS NULL; + null_count +------------ + 1 + +-- Verify ordering: at the boundary time (08:20 = +500 min), non-null value +-- must come before NULL with NULLS LAST ordering. +SELECT time, value FROM metrics_secondary_null +WHERE device = 'd1' AND time = '2025-01-03'::timestamptz + '500 minutes'::interval +ORDER BY time, value NULLS LAST; + time | value +------------------------------+------- + Fri Jan 03 08:20:00 2025 PST | 1001 + Fri Jan 03 08:20:00 2025 PST | + +DROP TABLE metrics_secondary_null; +-- compact_chunk with nullable SECONDARY orderby column at boundary tie (NULLS FIRST) +-- Mirror of the NULLS LAST test above. With NULLS FIRST, the curr batch's +-- first row has NULL in col2, which should sort BEFORE the prev batch's +-- last non-null col2 value — meaning the curr batch actually starts earlier +-- than its non-null min suggests, causing an ordering violation. +CREATE TABLE metrics_secondary_null_first (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time,value NULLS FIRST', tsdb.segmentby='device'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- Insert batch 1 (500 rows): time [00:01..08:20], all non-null values. +INSERT INTO metrics_secondary_null_first +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + i::float +FROM generate_series(1,500) i; +-- Insert batch 2 (500 rows): starts at batch 1's max time (08:20). +-- The first row at time=08:20 has NULL value. With NULLS FIRST, +-- (08:20, NULL) should sort BEFORE (08:20, 500) from batch 1's last row. +-- But batch 2 comes after batch 1 in index order — ordering violation. +INSERT INTO metrics_secondary_null_first +SELECT '2025-01-03'::timestamptz + ((499 + i) || ' minute')::interval, + 'd1', + CASE WHEN i = 1 THEN NULL ELSE (1000 + i)::float END +FROM generate_series(1,500) i; +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "SEC_NF_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_secondary_null_first' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- Show batch metadata: boundary tie on col1 (time) at 08:20 +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :SEC_NF_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time | _ts_meta_min_2 | _ts_meta_max_2 | _ts_meta_v2_first_value | _ts_meta_v2_last_value +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------+----------------+----------------+-------------------------+------------------------ + (0,1) | d1 | 500 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | 1 | 500 | 1 | 500 + (0,2) | d1 | 500 | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 08:20:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | 1002 | 1500 | | 1500 + +-- compact_chunk must detect the boundary-tie overlap caused by NULL in col2 +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_secondary_null_first') chunk; + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_23_26_chunk + +-- After compaction: batches should be merged and re-sorted +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEC_NF_CHUNK +ORDER BY device, _ts_meta_min_1; + ctid | device | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 | _ts_meta_v2_first_time | _ts_meta_v2_last_time +-------+--------+----------------+------------------------------+------------------------------+------------------------------+------------------------------ + (0,3) | d1 | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:39:00 2025 PST | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:39:00 2025 PST + +-- Status should be COMPRESSED only +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_secondary_null_first') chunk; + chunk_status_text +------------------- + {COMPRESSED} + +-- Data integrity +SELECT count(*) FROM metrics_secondary_null_first; + count +------- + 1000 + +SELECT count(*) AS null_count FROM metrics_secondary_null_first WHERE value IS NULL; + null_count +------------ + 1 + +-- Verify ordering: at the boundary time (08:20), NULL must come before non-null +-- with NULLS FIRST ordering. +SELECT time, value FROM metrics_secondary_null_first +WHERE device = 'd1' AND time = '2025-01-03'::timestamptz + '500 minutes'::interval +ORDER BY time, value NULLS FIRST; + time | value +------------------------------+------- + Fri Jan 03 08:20:00 2025 PST | + Fri Jan 03 08:20:00 2025 PST | 500 + +DROP TABLE metrics_secondary_null_first; +-- compact_chunk skips legacy chunks whose orderby column has no firstlast sparse +-- index. Compaction reads exact batch boundaries from firstlast metadata, so +-- without it the chunk is left untouched (UNORDERED stays set). Configuring minmax +-- (and not firstlast) on the orderby column reproduces a legacy chunk: a +-- user-supplied index for a column suppresses the firstlast that orderby columns +-- get by default. +CREATE TABLE metrics_no_firstlast (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time', tsdb.index='minmax(time)'); +NOTICE: using column "time" as partitioning column +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; +-- Two inserts over the same time range (one chunk), leaving the chunk UNORDERED. +INSERT INTO metrics_no_firstlast +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,1000) i; +INSERT INTO metrics_no_firstlast +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', (1000 + i)::float +FROM generate_series(1,1000) i; +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "NO_FL_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_no_firstlast' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset +-- The orderby column only carries minmax metadata (no _ts_meta_v2_first/last). +SELECT count(*) FILTER (WHERE attname LIKE '\_ts\_meta\_v2\_first%') AS first_cols, + count(*) FILTER (WHERE attname LIKE '\_ts\_meta\_v2\_last%') AS last_cols +FROM pg_attribute +WHERE attrelid = :'NO_FL_CHUNK'::regclass AND attnum > 0; + first_cols | last_cols +------------+----------- + 0 | 0 + +-- Overlapping batches; chunk is UNORDERED. +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1 +FROM :NO_FL_CHUNK ORDER BY _ts_meta_min_1; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 +-------+----------------+------------------------------+------------------------------ + (0,1) | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (0,2) | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_no_firstlast') chunk; + chunk_status_text +------------------------ + {COMPRESSED,UNORDERED} + +-- compact_chunk is a no-op and warns with a hint on how to enable it: there is +-- no firstlast metadata to read batch boundaries from. +\set VERBOSITY default +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_no_firstlast') chunk; +WARNING: skipping compaction on chunk _timescaledb_internal._hyper_25_28_chunk +DETAIL: Orderby column "time" has no firstlast sparse index. +HINT: Recompress the chunk to add firstlast sparse index metadata for its orderby columns. + compact_chunk +------------------------------------------ + _timescaledb_internal._hyper_25_28_chunk + +\set VERBOSITY terse +-- Batches unchanged (same ctids) and UNORDERED is still set. +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1 +FROM :NO_FL_CHUNK ORDER BY _ts_meta_min_1; + ctid | _ts_meta_count | _ts_meta_min_1 | _ts_meta_max_1 +-------+----------------+------------------------------+------------------------------ + (0,1) | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + (0,2) | 1000 | Fri Jan 03 00:01:00 2025 PST | Fri Jan 03 16:40:00 2025 PST + +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_no_firstlast') chunk; + chunk_status_text +------------------------ + {COMPRESSED,UNORDERED} + +DROP TABLE metrics_no_firstlast; diff --git a/tsl/test/isolation/expected/compact_chunk_concurrent.out b/tsl/test/isolation/expected/compact_chunk_concurrent.out new file mode 100644 index 00000000000..a286196745c --- /dev/null +++ b/tsl/test/isolation/expected/compact_chunk_concurrent.out @@ -0,0 +1,246 @@ +Parsed test spec with 2 sessions + +starting permutation: s2_begin s2_select s1_compact s2_commit s1_show_status s1_count +step s2_begin: + BEGIN; + +step s2_select: + SELECT count(*) FROM metrics; + +count +----- + 4000 + +step s1_compact: + SELECT count(_timescaledb_functions.compact_chunk(chunk)) AS compact + FROM show_chunks('metrics') chunk; + +compact +------- + 1 + +step s2_commit: + COMMIT; + +step s1_show_status: + SELECT _timescaledb_functions.chunk_status_text(chunk) AS status + FROM show_chunks('metrics') chunk; + +status +------------ +{COMPRESSED} + +step s1_count: + SELECT count(*) FROM metrics; + +count +----- + 4000 + + +starting permutation: s2_begin s2_insert s1_compact s2_commit s1_show_status s1_count +step s2_begin: + BEGIN; + +step s2_insert: + INSERT INTO metrics VALUES ('2025-01-02 12:00', 'd1', -1.0); + +s1: WARNING: delaying compaction on chunk _timescaledb_internal._hyper_X_X_chunk due to concurrent DML +step s1_compact: + SELECT count(_timescaledb_functions.compact_chunk(chunk)) AS compact + FROM show_chunks('metrics') chunk; + +compact +------- + 1 + +step s2_commit: + COMMIT; + +step s1_show_status: + SELECT _timescaledb_functions.chunk_status_text(chunk) AS status + FROM show_chunks('metrics') chunk; + +status +------------------------------ +{COMPRESSED,UNORDERED,PARTIAL} + +step s1_count: + SELECT count(*) FROM metrics; + +count +----- + 4001 + + +starting permutation: s2_begin s2_update s1_compact s2_commit s1_show_status s1_count +step s2_begin: + BEGIN; + +step s2_update: + UPDATE metrics SET value = -1.0 WHERE value = 1.0; + +s1: WARNING: delaying compaction on chunk _timescaledb_internal._hyper_X_X_chunk due to concurrent DML +step s1_compact: + SELECT count(_timescaledb_functions.compact_chunk(chunk)) AS compact + FROM show_chunks('metrics') chunk; + +compact +------- + 1 + +step s2_commit: + COMMIT; + +step s1_show_status: + SELECT _timescaledb_functions.chunk_status_text(chunk) AS status + FROM show_chunks('metrics') chunk; + +status +------------------------------ +{COMPRESSED,UNORDERED,PARTIAL} + +step s1_count: + SELECT count(*) FROM metrics; + +count +----- + 4000 + + +starting permutation: s2_begin s2_delete s1_compact s2_commit s1_show_status s1_count +step s2_begin: + BEGIN; + +step s2_delete: + DELETE FROM metrics WHERE value = 1.0; + +s1: WARNING: delaying compaction on chunk _timescaledb_internal._hyper_X_X_chunk due to concurrent DML +step s1_compact: + SELECT count(_timescaledb_functions.compact_chunk(chunk)) AS compact + FROM show_chunks('metrics') chunk; + +compact +------- + 1 + +step s2_commit: + COMMIT; + +step s1_show_status: + SELECT _timescaledb_functions.chunk_status_text(chunk) AS status + FROM show_chunks('metrics') chunk; + +status +------------------------------ +{COMPRESSED,UNORDERED,PARTIAL} + +step s1_count: + SELECT count(*) FROM metrics; + +count +----- + 3999 + + +starting permutation: s2_begin s2_insert s2_commit s1_compact s1_show_status s1_count +step s2_begin: + BEGIN; + +step s2_insert: + INSERT INTO metrics VALUES ('2025-01-02 12:00', 'd1', -1.0); + +step s2_commit: + COMMIT; + +step s1_compact: + SELECT count(_timescaledb_functions.compact_chunk(chunk)) AS compact + FROM show_chunks('metrics') chunk; + +ERROR: trying to compact a partially compressed chunk _timescaledb_internal._hyper_X_X_chunk +step s1_show_status: + SELECT _timescaledb_functions.chunk_status_text(chunk) AS status + FROM show_chunks('metrics') chunk; + +status +------------------------------ +{COMPRESSED,UNORDERED,PARTIAL} + +step s1_count: + SELECT count(*) FROM metrics; + +count +----- + 4001 + + +starting permutation: s2_begin s2_insert s2_rollback s1_compact s1_show_status s1_count +step s2_begin: + BEGIN; + +step s2_insert: + INSERT INTO metrics VALUES ('2025-01-02 12:00', 'd1', -1.0); + +step s2_rollback: + ROLLBACK; + +step s1_compact: + SELECT count(_timescaledb_functions.compact_chunk(chunk)) AS compact + FROM show_chunks('metrics') chunk; + +compact +------- + 1 + +step s1_show_status: + SELECT _timescaledb_functions.chunk_status_text(chunk) AS status + FROM show_chunks('metrics') chunk; + +status +------------ +{COMPRESSED} + +step s1_count: + SELECT count(*) FROM metrics; + +count +----- + 4000 + + +starting permutation: s2_begin s2_direct_insert s2_commit s1_compact s1_show_status s1_count +step s2_begin: + BEGIN; + +step s2_direct_insert: + SET timescaledb.enable_direct_compress_insert = true; + INSERT INTO metrics + SELECT '2025-01-02'::timestamptz + (i || ' minute')::interval, 'd1', (i + 0.1)::float + FROM generate_series(1,100) i; + +step s2_commit: + COMMIT; + +step s1_compact: + SELECT count(_timescaledb_functions.compact_chunk(chunk)) AS compact + FROM show_chunks('metrics') chunk; + +compact +------- + 1 + +step s1_show_status: + SELECT _timescaledb_functions.chunk_status_text(chunk) AS status + FROM show_chunks('metrics') chunk; + +status +------------ +{COMPRESSED} + +step s1_count: + SELECT count(*) FROM metrics; + +count +----- + 4100 + diff --git a/tsl/test/isolation/specs/CMakeLists.txt b/tsl/test/isolation/specs/CMakeLists.txt index e74ee485d3b..17317fe41d1 100644 --- a/tsl/test/isolation/specs/CMakeLists.txt +++ b/tsl/test/isolation/specs/CMakeLists.txt @@ -45,6 +45,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) cagg_hierarchical_concurrent_refresh.spec cagg_incremental_concurrent.spec cagg_refresh_cleanup_register.spec + compact_chunk_concurrent.spec compression_chunk_race.spec direct_compress_copy.spec compression_freeze.spec diff --git a/tsl/test/isolation/specs/compact_chunk_concurrent.spec b/tsl/test/isolation/specs/compact_chunk_concurrent.spec new file mode 100644 index 00000000000..39b05bb300a --- /dev/null +++ b/tsl/test/isolation/specs/compact_chunk_concurrent.spec @@ -0,0 +1,99 @@ +# This file and its contents are licensed under the Timescale License. +# Please see the included NOTICE for copyright information and +# LICENSE-TIMESCALE for a copy of the license. + +### +# Test compact_chunk conflicts with concurrent DML transactions +### + +setup { + CREATE TABLE metrics (time TIMESTAMPTZ NOT NULL, device TEXT, value float) + WITH (tsdb.hypertable, tsdb.orderby='time'); + + SET timescaledb.enable_direct_compress_insert = true; + + -- Insert overlapping batches so compact_chunk has work to do + INSERT INTO metrics + SELECT '2025-01-02'::timestamptz + (i || ' minute')::interval, 'd1', i::float + FROM generate_series(1,2000) i; + + INSERT INTO metrics + SELECT '2025-01-02'::timestamptz + (i || ' minute')::interval, 'd1', (i + 0.5)::float + FROM generate_series(1,2000) i; +} + +teardown { + DROP TABLE metrics; +} + +session "s1" +step "s1_compact" { + SELECT count(_timescaledb_functions.compact_chunk(chunk)) AS compact + FROM show_chunks('metrics') chunk; +} + +step "s1_show_status" { + SELECT _timescaledb_functions.chunk_status_text(chunk) AS status + FROM show_chunks('metrics') chunk; +} + +step "s1_count" { + SELECT count(*) FROM metrics; +} + +session "s2" +step "s2_begin" { + BEGIN; +} + +step "s2_insert" { + INSERT INTO metrics VALUES ('2025-01-02 12:00', 'd1', -1.0); +} + +step "s2_direct_insert" { + SET timescaledb.enable_direct_compress_insert = true; + INSERT INTO metrics + SELECT '2025-01-02'::timestamptz + (i || ' minute')::interval, 'd1', (i + 0.1)::float + FROM generate_series(1,100) i; +} + +step "s2_update" { + UPDATE metrics SET value = -1.0 WHERE value = 1.0; +} + +step "s2_delete" { + DELETE FROM metrics WHERE value = 1.0; +} + +step "s2_select" { + SELECT count(*) FROM metrics; +} + +step "s2_commit" { + COMMIT; +} + +step "s2_rollback" { + ROLLBACK; +} + +# compact_chunk should not block concurrent reads +permutation "s2_begin" "s2_select" "s1_compact" "s2_commit" "s1_show_status" "s1_count" + +# compact_chunk should detect concurrent insert and error +permutation "s2_begin" "s2_insert" "s1_compact" "s2_commit" "s1_show_status" "s1_count" + +# compact_chunk should detect concurrent update and error +permutation "s2_begin" "s2_update" "s1_compact" "s2_commit" "s1_show_status" "s1_count" + +# compact_chunk should detect concurrent delete and error +permutation "s2_begin" "s2_delete" "s1_compact" "s2_commit" "s1_show_status" "s1_count" + +# compact_chunk doesn't run on partial chunks; committed insert makes the chunk partial +permutation "s2_begin" "s2_insert" "s2_commit" "s1_compact" "s1_show_status" "s1_count" + +# compact_chunk should succeed after concurrent DML rolls back +permutation "s2_begin" "s2_insert" "s2_rollback" "s1_compact" "s1_show_status" "s1_count" + +# compact_chunk should succeed after committed direct compress insert (chunk stays fully compressed) +permutation "s2_begin" "s2_direct_insert" "s2_commit" "s1_compact" "s1_show_status" "s1_count" diff --git a/tsl/test/shared/expected/extension.out b/tsl/test/shared/expected/extension.out index bc376920f20..42b9a0d0685 100644 --- a/tsl/test/shared/expected/extension.out +++ b/tsl/test/shared/expected/extension.out @@ -45,6 +45,7 @@ ORDER BY pronamespace::regnamespace::text COLLATE "C", p.oid::regprocedure::text _timescaledb_functions.chunk_status_text(integer) _timescaledb_functions.chunk_status_text(regclass) _timescaledb_functions.chunks_local_size(name,name) + _timescaledb_functions.compact_chunk(regclass) _timescaledb_functions.compressed_chunk_local_stats(name,name) _timescaledb_functions.compressed_data_column_size(_timescaledb_internal.compressed_data,anyelement) _timescaledb_functions.compressed_data_has_nulls(_timescaledb_internal.compressed_data) diff --git a/tsl/test/sql/CMakeLists.txt b/tsl/test/sql/CMakeLists.txt index be7f45b4b2f..3347e3f6d49 100644 --- a/tsl/test/sql/CMakeLists.txt +++ b/tsl/test/sql/CMakeLists.txt @@ -27,6 +27,7 @@ set(TEST_FILES chunk_publication_compression.sql columnar_scan_cost.sql columnstore_aliases.sql + compact_chunk.sql compress_auto_sparse_index.sql compress_batch_size.sql compress_bitmap_scan.sql diff --git a/tsl/test/sql/compact_chunk.sql b/tsl/test/sql/compact_chunk.sql new file mode 100644 index 00000000000..ed038da3dda --- /dev/null +++ b/tsl/test/sql/compact_chunk.sql @@ -0,0 +1,1068 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. + +-- Tests for the compact_chunk function. +-- compact_chunk merges overlapping compressed batches within a chunk +-- without decompressing/recompressing batches that are already ordered. + +CREATE TABLE metrics (time TIMESTAMPTZ NOT NULL, device TEXT, value float) WITH (tsdb.hypertable, tsdb.orderby='time'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- compact_chunk with no overlapping batches (no-op case) +-- Insert 3000 ordered rows: Jan 2 00:00 to Jan 4 02:00 PST. +-- Direct compress insert creates 3 batches: 3x1000 rows. +INSERT INTO metrics +SELECT '2025-01-02'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,3000) i; + +-- Status should be COMPRESSED,UNORDERED after direct compress insert +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics') chunk; + +-- Get the first compressed chunk for inspection +SELECT comp_ch.table_name AS "CHUNK_NAME", + comp_ch.schema_name || '.' || comp_ch.table_name AS "CHUNK_FULL_NAME" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- Show batch metadata: 3 non-overlapping batches (ordered by min time) +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + +-- compact_chunk on non-overlapping batches is a no-op; returns the chunk regclass +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics') chunk; + +-- Show batch metadata: 3 non-overlapping batches with same ctids as before +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + +-- Status UNORDERED removed: compact_chunk clears the UNORDERED flag +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics') chunk; + +-- compact_chunk with overlapping batches +-- Insert data that overlaps with existing batches. +-- Jan 1 to Jan 3 data overlaps with the existing Jan 2 to Jan 4 batches +-- in the compressed chunk. +INSERT INTO metrics +SELECT '2025-01-01'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,3000) i; + +-- Show compressed chunk metadata: overlapping batches are now visible. +-- The new batches from the Jan 1 insert interleave with the old Jan 2 batches. +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + +-- Status should contain UNORDERED flag +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics') chunk; + +-- compact_chunk should identify and merge the overlapping batches. +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics') chunk; + +-- Show compressed chunk metadata: no overlapping batches anymore. +-- We should see new ctids of the newly merged batches. +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + +-- Status should not contain UNORDERED flag +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics') chunk; + +-- compact an uncompressed chunk +-- Create a new uncompressed chunk for a different time range by +-- inserting with direct compress insert disabled. +SET timescaledb.enable_direct_compress_insert = false; +INSERT INTO metrics VALUES ('2025-12-01', 'd1', -1.0); +SET timescaledb.enable_direct_compress_insert = true; + +-- The new chunk for Dec 2025 should be uncompressed (empty status array) +SELECT _timescaledb_functions.chunk_status_text(chunk) AS chunk_status, + chunk::text AS chunk_name +FROM show_chunks('metrics') chunk +WHERE NOT (_timescaledb_functions.chunk_status_text(chunk) && ARRAY['COMPRESSED']) +ORDER BY chunk; + +-- compact_chunk on an uncompressed chunk must fail +SELECT chunk AS "UNCOMPRESSED_CHUNK" +FROM show_chunks('metrics') chunk +WHERE NOT (_timescaledb_functions.chunk_status_text(chunk) && ARRAY['COMPRESSED']) +LIMIT 1 \gset + +\set ON_ERROR_STOP 0 +SELECT _timescaledb_functions.compact_chunk(:'UNCOMPRESSED_CHUNK'); +\set ON_ERROR_STOP 1 + +-- compact a partially compressed chunk +-- Insert an uncompressed row into the already-compressed Jan 2 chunk. +-- This makes chunk 1 PARTIAL (it has both compressed and uncompressed rows). +SET timescaledb.enable_direct_compress_insert = false; +INSERT INTO metrics VALUES ('2025-01-02 12:00', 'd1', -1.0); +SET timescaledb.enable_direct_compress_insert = true; + +-- Chunk 1 should now show PARTIAL status +SELECT _timescaledb_functions.chunk_status_text(chunk) AS chunk_status, + chunk::text AS chunk_name +FROM show_chunks('metrics') chunk +WHERE _timescaledb_functions.chunk_status_text(chunk) && ARRAY['PARTIAL'] +ORDER BY chunk; + +-- compact_chunk on a partially compressed chunk must fail +SELECT chunk AS "PARTIAL_CHUNK" +FROM show_chunks('metrics') chunk +WHERE _timescaledb_functions.chunk_status_text(chunk) && ARRAY['PARTIAL'] +LIMIT 1 \gset + +\set ON_ERROR_STOP 0 +SELECT _timescaledb_functions.compact_chunk(:'PARTIAL_CHUNK'); +\set ON_ERROR_STOP 1 + +-- compact_chunk is STRICT, a NULL argument returns NULL +SELECT _timescaledb_functions.compact_chunk(NULL::regclass); + +-- compact_chunk in read-only mode must fail +SET default_transaction_read_only TO on; + +\set ON_ERROR_STOP 0 +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics') chunk; +\set ON_ERROR_STOP 1 + +SET default_transaction_read_only TO off; + +-- Create a hypertable with a segmentby column. +-- Each segment ('d1', 'd2') will have its own set of batches, and +-- compact_chunk should handle overlaps per segment independently. +CREATE TABLE metrics_seg (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time', tsdb.segmentby='device'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- Insert 2000 rows for each of two devices: non-overlapping within each segment +INSERT INTO metrics_seg +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(0,1999) i; + +INSERT INTO metrics_seg +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', i::float +FROM generate_series(0,1999) i; + +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_seg') chunk; + +-- Get the compressed chunk for metrics_seg +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "SEG_CHUNK_FULL_NAME" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_seg' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- 4 batches: 2 per segment, no overlaps +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEG_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_min_1; + +-- compact_chunk with no overlaps: no-op for both segments +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_seg') chunk; + +-- Same ctids: nothing was rewritten +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEG_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_min_1; + +-- status should be updated to COMPRESSED only +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_seg') chunk; + +-- Insert overlapping data +INSERT INTO metrics_seg +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(0,1999) i; + +INSERT INTO metrics_seg +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', i::float +FROM generate_series(0,1999) i; + +-- 8 batches: 4 per segment, overlapping +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEG_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_min_1; + +-- status should be updated to COMPRESSED, UNORDERED +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_seg') chunk; + +-- compact_chunk with overlaps: combines the batches +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_seg') chunk; + +-- New ctids: overlapping batches merged and re-sorted +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEG_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_min_1; + +-- status should be COMPRESSED only (UNORDERED cleared after compaction) +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_seg') chunk; + +-- compact_chunk with nullable secondary orderby column (NULLS FIRST) +-- The second batch starts at a boundary tie on col1 (time) with a NULL +-- in col2 (value). With NULLS FIRST, (time=T, value=NULL) should sort +-- BEFORE (time=T, value=non-null), but the second batch is positioned +-- after the first in the index — an ordering violation. +CREATE TABLE metrics_nullable (time TIMESTAMPTZ, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time,value NULLS FIRST'); + +SET timescaledb.enable_direct_compress_insert = true; + +-- Insert batch 1 (1000 rows): time [00:01..16:40], all non-null values. +-- Last row: (16:40, 1000). +INSERT INTO metrics_nullable +SELECT '2025-01-02'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,1000) i; + +-- compact_chunk on nullable orderby with no nulls in data should succeed (no-op) +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nullable') chunk; + +-- Get the compressed chunk for metrics_nullable +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "NULLABLE_CHUNK_FULL_NAME" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_nullable' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- 1 batch, no nulls +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLABLE_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + +-- Insert batch 2 (1000 rows): starts at batch 1's max time (i=1000, time=16:40). +-- First row has NULL value: (16:40, NULL). +-- With NULLS FIRST, (16:40, NULL) should sort BEFORE (16:40, 1000) from +-- batch 1's last row. But batch 2 is after batch 1 in the index → unordered. +INSERT INTO metrics_nullable +SELECT '2025-01-02'::timestamptz + ((999 + i) || ' minute')::interval, + 'd1', + CASE WHEN i = 1 THEN NULL ELSE (1000 + i)::float END +FROM generate_series(1,1000) i; + +-- 2 batches: boundary tie on col1, NULL in col2 at the boundary +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLABLE_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + +-- compact_chunk should detect the boundary-tie overlap via NULL in col2 +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nullable') chunk; + +-- After compaction: batches merged and re-sorted +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLABLE_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1; + +-- Verify total row count is preserved +SELECT count(*) FROM metrics_nullable; + +-- Verify NULL rows are accessible +SELECT count(*) AS null_value_count FROM metrics_nullable WHERE value IS NULL; + +-- Verify ordering at boundary: NULL must come before non-null with NULLS FIRST +SELECT time, value FROM metrics_nullable +WHERE time = '2025-01-02'::timestamptz + '1000 minutes'::interval +ORDER BY time, value NULLS FIRST; + +DROP TABLE metrics_nullable; + +-- compact_chunk with DESC orderby column +CREATE TABLE metrics_desc (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time DESC', tsdb.segmentby='device'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- Insert non-overlapping data for two devices +INSERT INTO metrics_desc +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(0,1999) i; + +INSERT INTO metrics_desc +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', i::float +FROM generate_series(0,1999) i; + +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_desc') chunk; + +-- Get the compressed chunk for metrics_desc +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "DESC_CHUNK_FULL_NAME" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_desc' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- 4 batches: 2 per segment, ordered by max time descending +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :DESC_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_max_1 DESC; + +-- No overlaps: compact_chunk is a no-op +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_desc') chunk; + +-- Same ctids: nothing was rewritten +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :DESC_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_max_1 DESC; + +-- Status should be COMPRESSED only after compaction +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_desc') chunk; + +-- Insert overlapping data +INSERT INTO metrics_desc +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(0,1999) i; + +INSERT INTO metrics_desc +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', i::float +FROM generate_series(0,1999) i; + +-- 8 batches: 4 per segment, overlapping +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :DESC_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_max_1 DESC; + +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_desc') chunk; + +-- compact_chunk should merge overlapping batches with DESC ordering +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_desc') chunk; + +-- New ctids: overlapping batches merged +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :DESC_CHUNK_FULL_NAME +ORDER BY device, _ts_meta_max_1 DESC; + +-- Status should be COMPRESSED only +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_desc') chunk; + +-- Verify data is correctly ordered (DESC) within each segment +SELECT device, time FROM metrics_desc WHERE device = 'd1' ORDER BY device, time DESC LIMIT 5; + +DROP TABLE metrics_desc; + +-- compact_chunk with multi-column orderby +-- Tests that overlap detection works correctly when using ORDER BY device, time. +-- The bug: secondary column min/max metadata is a global aggregate across +-- all rows, not scoped to the primary column's value. This causes false +-- negatives where interleaving batches go undetected. +CREATE TABLE metrics_multi (time TIMESTAMPTZ NOT NULL, device TEXT NOT NULL, value float) +WITH (tsdb.hypertable, tsdb.orderby='device,time'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- Insert 3 batches with different devices creating boundary ties on col1: +-- Batch 1 (500 rows): device d1..d2, time [00:01..08:20] +-- Batch 2 (500 rows): device d2..d3, time [08:21..16:40] +-- The d2 rows in both batches create a boundary tie on col1 (device). +-- Col2 (time) ranges are non-overlapping within the d2 group → no actual overlap. +INSERT INTO metrics_multi +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + CASE WHEN i <= 250 THEN 'd1' ELSE 'd2' END, + i::float +FROM generate_series(1,500) i; + +INSERT INTO metrics_multi +SELECT '2025-01-03'::timestamptz + ((500 + i) || ' minute')::interval, + CASE WHEN i <= 250 THEN 'd2' ELSE 'd3' END, + (500 + i)::float +FROM generate_series(1,500) i; + +-- Get the compressed chunk for metrics_multi +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "MULTI_CHUNK_FULL_NAME" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_multi' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- 2 batches: boundary tie on col1 (device=d2), non-overlapping time ranges +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1, _ts_meta_min_2; + +-- compact_chunk: boundary tie on col1, but no overlap on col2 (time) — should be a no-op +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_multi') chunk; + +-- Same ctids: nothing was rewritten +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1, _ts_meta_min_2; + +-- Now insert truly overlapping data within the d2 device group: +-- Batch 3 (500 rows): device d2, time range overlaps with both existing d2 ranges +INSERT INTO metrics_multi +SELECT '2025-01-03'::timestamptz + ((250 + i) || ' minute')::interval, 'd2', (250 + i)::float +FROM generate_series(1,500) i; + +-- 3 batches: the new d2 batch overlaps with the d2 portions of existing batches +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1, _ts_meta_min_2; + +-- compact_chunk: boundary tie on col1 with actual overlap on col2 — must detect and merge +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_multi') chunk; + +-- New ctids: overlapping batches merged +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_CHUNK_FULL_NAME +ORDER BY _ts_meta_min_1, _ts_meta_min_2; + +-- Total row count preserved +SELECT count(*) FROM metrics_multi; + +DROP TABLE metrics_multi; + +-- compact_chunk with multi-column orderby, second column DESC +-- Same boundary-tie logic but the secondary column uses descending order. +-- The sort operator for col2 is now ">" instead of "<", so the boundary +-- decompression must use the correct comparator. +CREATE TABLE metrics_multi_desc (time TIMESTAMPTZ NOT NULL, device TEXT NOT NULL, value float) +WITH (tsdb.hypertable, tsdb.orderby='device,time DESC'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- Insert 2 batches with multiple devices, DESC time ordering: +-- Batch 1 (500 rows): device d2..d3, time [08:21..16:40] DESC +-- Batch 2 (500 rows): device d1..d2, time [00:01..08:20] DESC +-- The d2 rows create a boundary tie on col1. +-- With DESC, batch 1's d2 trailing edge (min time=08:21) and batch 2's +-- d2 leading edge (max time=08:20) are non-overlapping → no actual overlap. +INSERT INTO metrics_multi_desc +SELECT '2025-01-03'::timestamptz + ((500 + i) || ' minute')::interval, + CASE WHEN i <= 250 THEN 'd2' ELSE 'd3' END, + (500 + i)::float +FROM generate_series(1,500) i; + +INSERT INTO metrics_multi_desc +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + CASE WHEN i <= 250 THEN 'd1' ELSE 'd2' END, + i::float +FROM generate_series(1,500) i; + +-- Get the compressed chunk +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "MULTI_DESC_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_multi_desc' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- 2 batches: boundary tie on col1 (device=d2), non-overlapping DESC time ranges +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_DESC_CHUNK +ORDER BY _ts_meta_min_1, _ts_meta_max_2 DESC; + +-- compact_chunk: boundary tie on col1, no overlap on col2 (DESC) — should be a no-op +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_multi_desc') chunk; + +-- Same ctids: nothing was rewritten +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_DESC_CHUNK +ORDER BY _ts_meta_min_1, _ts_meta_max_2 DESC; + +-- Now insert overlapping data within the d2 device group +INSERT INTO metrics_multi_desc +SELECT '2025-01-03'::timestamptz + ((250 + i) || ' minute')::interval, 'd2', (250 + i)::float +FROM generate_series(1,500) i; + +-- 3 batches: the new d2 batch overlaps with existing d2 portions +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_DESC_CHUNK +ORDER BY _ts_meta_min_1, _ts_meta_max_2 DESC; + +-- compact_chunk: boundary tie on col1, actual overlap on col2 (DESC) — must merge +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_multi_desc') chunk; + +-- New ctids: overlapping batches merged +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_device, _ts_meta_v2_last_device, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :MULTI_DESC_CHUNK +ORDER BY _ts_meta_min_1, _ts_meta_max_2 DESC; + +-- Total row count preserved +SELECT count(*) FROM metrics_multi_desc; + +DROP TABLE metrics_multi_desc; + +-- compact_chunk with segmentby + multi-column orderby + nullable orderby column +-- Combines all three features: +-- segmentby='device' multiple segments processed independently +-- orderby='time,value' multi-column overlap detection with boundary ties +-- 'value' is nullable boundary tie NULL values must respect NULL ordering +CREATE TABLE metrics_combined (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time,value', tsdb.segmentby='device'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- Insert non-null data for two segments: 1000 rows each, non-overlapping +INSERT INTO metrics_combined +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,1000) i; + +INSERT INTO metrics_combined +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', (i + 1000)::float +FROM generate_series(1,1000) i; + +-- Get the compressed chunk +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "COMBINED_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_combined' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- 2 batches: 1 per segment, no overlaps, no nulls +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :COMBINED_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- No overlaps, no nulls: compact_chunk is a no-op +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_combined') chunk; + +-- Same ctids as before: nothing was rewritten +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :COMBINED_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- Status should be COMPRESSED only +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_combined') chunk; + +-- Insert overlapping data with NULLs in the nullable orderby column (value) +-- for both segments. Every 5th row has NULL value. +INSERT INTO metrics_combined +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 2000)::float END +FROM generate_series(1,1000) i; + +INSERT INTO metrics_combined +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd2', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 3000)::float END +FROM generate_series(1,1000) i; + +-- 4 batches: original + overlapping with nulls. Both segments overlap on time col1. +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :COMBINED_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- Status should be UNORDERED +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_combined') chunk; + +-- compact_chunk must: +-- 1. Detect overlaps per segment via multi-column boundary-tie logic +-- 2. Recompress overlapping rows into ordered batches +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_combined') chunk; + +-- After compaction: new ctids, overlapping batches merged and re-sorted +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :COMBINED_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- Status should be COMPRESSED only (UNORDERED cleared). +-- NULLs in the secondary orderby column (value) just sort within the batch per +-- NULLS FIRST/LAST; only the first orderby column (time, NOT NULL) affects +-- batch placement. +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_combined') chunk; + +-- Total row count preserved +SELECT count(*) FROM metrics_combined; + +-- NULL rows preserved per segment +SELECT device, count(*) AS null_value_count FROM metrics_combined WHERE value IS NULL GROUP BY device ORDER BY device; + +-- Data integrity per segment +SELECT device, count(*) FROM metrics_combined GROUP BY device ORDER BY device; + +-- Verify ordering is correct within segment d1 +SELECT device, time, value FROM metrics_combined +WHERE device = 'd1' ORDER BY time, value LIMIT 5; + +SELECT device, time, value FROM metrics_combined +WHERE device = 'd1' AND value IS NULL ORDER BY time LIMIT 5; + +DROP TABLE metrics_combined; + +-- compact_chunk with nullable first orderby column (NULLS LAST, default) +-- A NULL boundary value makes a mixed-null batch overlap its neighbor, so the +-- overlap merge re-sorts both batches; the NULLs land at the end (NULLS LAST). +-- A mixed-null batch that overlaps nothing is already ordered and left untouched. +CREATE TABLE metrics_nulls_last (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='value,time', tsdb.segmentby='device'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- Insert non-null data for two segments +INSERT INTO metrics_nulls_last +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,1000) i; + +INSERT INTO metrics_nulls_last +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', (i + 1000)::float +FROM generate_series(1,1000) i; + +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "NULLS_LAST_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_nulls_last' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- 2 non-overlapping batches, no nulls +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_LAST_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- Insert overlapping data with NULLs in value (first orderby column) +INSERT INTO metrics_nulls_last +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 2000)::float END +FROM generate_series(1,1000) i; + +INSERT INTO metrics_nulls_last +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd2', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 3000)::float END +FROM generate_series(1,1000) i; + +-- 4 batches with overlaps and nulls in first orderby column +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_LAST_CHUNK +ORDER BY device, _ts_meta_min_1; + +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nulls_last') chunk; + +-- After compaction: the mixed-null batch does not overlap its neighbor, so it +-- stays as-is (nulls already sort to the end of the batch with NULLS LAST) +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_LAST_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- UNORDERED cleared: NULLs fold into the end of the segment (NULLS LAST) +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_nulls_last') chunk; + +-- Data integrity +SELECT count(*) FROM metrics_nulls_last; +SELECT device, count(*) FROM metrics_nulls_last GROUP BY device ORDER BY device; +SELECT device, count(*) AS null_count FROM metrics_nulls_last WHERE value IS NULL GROUP BY device ORDER BY device; + +-- Now test the overlap path with NULLs: insert data that truly overlaps +-- on the value range AND contains NULLs, with a nullable first orderby column. +-- The NULL rows must survive the overlap merge and end up ordered correctly. +INSERT INTO metrics_nulls_last +SELECT '2025-01-03'::timestamptz + ((2000 + i) || ' minute')::interval, + 'd1', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 500)::float END +FROM generate_series(1,500) i; + +-- Batches before overlap compaction +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_LAST_CHUNK +WHERE device = 'd1' +ORDER BY _ts_meta_min_1 NULLS LAST; + +-- This overlaps with existing d1 data (value 501..900 overlaps with 1..1000 and 2001..2999) +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nulls_last') chunk; + +-- After overlap merge: NULLs from overlapping batches preserved +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_LAST_CHUNK +WHERE device = 'd1' +ORDER BY _ts_meta_min_1 NULLS LAST; + +-- Data integrity: all rows including NULLs from the overlap merge must survive +SELECT count(*) FROM metrics_nulls_last WHERE device = 'd1'; +SELECT count(*) AS null_count FROM metrics_nulls_last WHERE device = 'd1' AND value IS NULL; + +DROP TABLE metrics_nulls_last; + +-- compact_chunk with nullable first orderby column (NULLS FIRST) +-- With NULLS FIRST, the re-sort places NULLs at the start of each segment, so a +-- null-containing batch is already correctly positioned. +CREATE TABLE metrics_nulls_first (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='value NULLS FIRST,time', tsdb.segmentby='device'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- Insert non-null data for two segments +INSERT INTO metrics_nulls_first +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,1000) i; + +INSERT INTO metrics_nulls_first +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd2', (i + 1000)::float +FROM generate_series(1,1000) i; + +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "NULLS_FIRST_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_nulls_first' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- 2 non-overlapping batches, no nulls +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_FIRST_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- Insert overlapping data with NULLs in value (first orderby column) +INSERT INTO metrics_nulls_first +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 2000)::float END +FROM generate_series(1,1000) i; + +INSERT INTO metrics_nulls_first +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd2', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 3000)::float END +FROM generate_series(1,1000) i; + +-- 4 batches with overlaps and nulls in first orderby column +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_FIRST_CHUNK +ORDER BY device, _ts_meta_min_1; + +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nulls_first') chunk; + +-- After compaction: with NULLS FIRST, null batches at segment start are fine +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_FIRST_CHUNK +ORDER BY device, _ts_meta_min_1; + +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_nulls_first') chunk; + +-- Data integrity +SELECT count(*) FROM metrics_nulls_first; +SELECT device, count(*) FROM metrics_nulls_first GROUP BY device ORDER BY device; +SELECT device, count(*) AS null_count FROM metrics_nulls_first WHERE value IS NULL GROUP BY device ORDER BY device; + +-- Same overlap+NULLs test for NULLS FIRST +INSERT INTO metrics_nulls_first +SELECT '2025-01-03'::timestamptz + ((2000 + i) || ' minute')::interval, + 'd1', + CASE WHEN i % 5 = 0 THEN NULL ELSE (i + 500)::float END +FROM generate_series(1,500) i; + +-- Batches before overlap compaction +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_FIRST_CHUNK +WHERE device = 'd1' +ORDER BY _ts_meta_min_1 NULLS FIRST; + +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_nulls_first') chunk; + +-- After overlap merge: NULLs from overlapping batches preserved +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :NULLS_FIRST_CHUNK +WHERE device = 'd1' +ORDER BY _ts_meta_min_1 NULLS FIRST; + +-- Data integrity: NULLs from overlap merge must survive +SELECT count(*) FROM metrics_nulls_first WHERE device = 'd1'; +SELECT count(*) AS null_count FROM metrics_nulls_first WHERE device = 'd1' AND value IS NULL; + +DROP TABLE metrics_nulls_first; + +-- compact_chunk with a mixed-null batch whose NULLs overlap a later batch +-- A batch with both NULL and non-NULL values in the first orderby column has a +-- NULL last row (NULLS LAST) that sorts after a following non-null batch, so the +-- two batches overlap and the merge re-sorts them with the NULLs back at the end. +-- Without firstlast metadata the NULL boundary is invisible and the overlap is +-- missed, leaving NULLs in the wrong index position. +CREATE TABLE metrics_mixed_nulls (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='value,time', tsdb.segmentby='device'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- Insert 1800 non-null rows + 200 NULL rows for device 'd1'. +-- DCI sorts by (value NULLS LAST, time), producing: +-- Batch 1 (1000 rows): value [1..1000] — all non-null +-- Batch 2 (1000 rows): value [1001..1800] + 200 NULLs — mixed! +-- Batch 2's firstlast metadata: first=1001, last=NULL (NULLS LAST). +INSERT INTO metrics_mixed_nulls +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + CASE WHEN i > 1800 THEN NULL ELSE i::float END +FROM generate_series(1,2000) i; + +-- Insert 1000 more non-null rows with values [1801..2800], forming Batch 3 +-- (first=1801, last=2800). Batch 2's non-null range [1001..1800] sits below +-- Batch 3, but Batch 2's NULL last row sorts after Batch 3 (NULLS LAST), so the +-- two batches overlap and must be merged. +INSERT INTO metrics_mixed_nulls +SELECT '2025-01-03'::timestamptz + ((2000 + i) || ' minute')::interval, + 'd1', + (1800 + i)::float +FROM generate_series(1,1000) i; + +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "MIXED_NULLS_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_mixed_nulls' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- Show batch metadata before compaction: 3 batches, batch 2 has mixed nulls +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :MIXED_NULLS_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- compact_chunk must detect the overlap and re-sort the NULLs back to the end +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_mixed_nulls') chunk; + +-- After compaction: Batch 2 and Batch 3 merged, NULLs folded into the trailing +-- batch at the end (NULLS LAST). +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :MIXED_NULLS_CHUNK +ORDER BY device, _ts_meta_min_1 NULLS LAST; + +-- Status should be COMPRESSED only (UNORDERED cleared correctly this time) +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_mixed_nulls') chunk; + +-- Data integrity +SELECT count(*) FROM metrics_mixed_nulls; +SELECT count(*) AS null_count FROM metrics_mixed_nulls WHERE value IS NULL; + +-- Verify ordering is correct: NULLs must come after all non-null values +-- This query will return wrong results if the bug is present (NULLs between 1800 and 1801) +SELECT value FROM metrics_mixed_nulls +WHERE device = 'd1' +ORDER BY value NULLS LAST, time +LIMIT 5 OFFSET 1795; + +DROP TABLE metrics_mixed_nulls; + +-- compact_chunk with nullable SECONDARY orderby column at boundary tie +-- Regression test: when orderby='time,value' and value is nullable, +-- two batches can tie on col1 (time) at the boundary. If the prev batch's +-- last row has NULL in col2 (value), the overlap check must compare using +-- NULLS FIRST/LAST semantics so the tie is correctly seen as an overlap. +CREATE TABLE metrics_secondary_null (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time,value', tsdb.segmentby='device'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- Insert batch 1 (500 rows): time [00:01..08:20], with NULL value at the +-- last timestamp (i=500, time=08:20). With orderby='time,value NULLS LAST', +-- the last row in sorted order is (08:20, NULL). +INSERT INTO metrics_secondary_null +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + CASE WHEN i = 500 THEN NULL ELSE i::float END +FROM generate_series(1,500) i; + +-- Insert batch 2 (500 rows): starts exactly at batch 1's max time (08:20). +-- This creates a boundary tie on col1 (time). All values are non-null. +-- The first row (08:20, 1001) ties with batch 1's last row (08:20, NULL). +-- With NULLS LAST, (08:20, NULL) should sort AFTER (08:20, 1001), but in +-- index order batch 1 comes before batch 2 — an ordering violation. +INSERT INTO metrics_secondary_null +SELECT '2025-01-03'::timestamptz + ((499 + i) || ' minute')::interval, + 'd1', + (1000 + i)::float +FROM generate_series(1,500) i; + +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "SEC_NULL_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_secondary_null' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- Show batch metadata: col1 (time) ranges overlap at the boundary +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :SEC_NULL_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- compact_chunk must detect the boundary-tie overlap caused by NULL in col2 +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_secondary_null') chunk; + +-- After compaction: batches should be merged and re-sorted +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEC_NULL_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- Status should be COMPRESSED only +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_secondary_null') chunk; + +-- Data integrity +SELECT count(*) FROM metrics_secondary_null; +SELECT count(*) AS null_count FROM metrics_secondary_null WHERE value IS NULL; + +-- Verify ordering: at the boundary time (08:20 = +500 min), non-null value +-- must come before NULL with NULLS LAST ordering. +SELECT time, value FROM metrics_secondary_null +WHERE device = 'd1' AND time = '2025-01-03'::timestamptz + '500 minutes'::interval +ORDER BY time, value NULLS LAST; + +DROP TABLE metrics_secondary_null; + +-- compact_chunk with nullable SECONDARY orderby column at boundary tie (NULLS FIRST) +-- Mirror of the NULLS LAST test above. With NULLS FIRST, the curr batch's +-- first row has NULL in col2, which should sort BEFORE the prev batch's +-- last non-null col2 value — meaning the curr batch actually starts earlier +-- than its non-null min suggests, causing an ordering violation. +CREATE TABLE metrics_secondary_null_first (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time,value NULLS FIRST', tsdb.segmentby='device'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- Insert batch 1 (500 rows): time [00:01..08:20], all non-null values. +INSERT INTO metrics_secondary_null_first +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, + 'd1', + i::float +FROM generate_series(1,500) i; + +-- Insert batch 2 (500 rows): starts at batch 1's max time (08:20). +-- The first row at time=08:20 has NULL value. With NULLS FIRST, +-- (08:20, NULL) should sort BEFORE (08:20, 500) from batch 1's last row. +-- But batch 2 comes after batch 1 in index order — ordering violation. +INSERT INTO metrics_secondary_null_first +SELECT '2025-01-03'::timestamptz + ((499 + i) || ' minute')::interval, + 'd1', + CASE WHEN i = 1 THEN NULL ELSE (1000 + i)::float END +FROM generate_series(1,500) i; + +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "SEC_NF_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_secondary_null_first' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- Show batch metadata: boundary tie on col1 (time) at 08:20 +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time, _ts_meta_min_2, _ts_meta_max_2, _ts_meta_v2_first_value, _ts_meta_v2_last_value +FROM :SEC_NF_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- compact_chunk must detect the boundary-tie overlap caused by NULL in col2 +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_secondary_null_first') chunk; + +-- After compaction: batches should be merged and re-sorted +SELECT ctid, device, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1, _ts_meta_v2_first_time, _ts_meta_v2_last_time +FROM :SEC_NF_CHUNK +ORDER BY device, _ts_meta_min_1; + +-- Status should be COMPRESSED only +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_secondary_null_first') chunk; + +-- Data integrity +SELECT count(*) FROM metrics_secondary_null_first; +SELECT count(*) AS null_count FROM metrics_secondary_null_first WHERE value IS NULL; + +-- Verify ordering: at the boundary time (08:20), NULL must come before non-null +-- with NULLS FIRST ordering. +SELECT time, value FROM metrics_secondary_null_first +WHERE device = 'd1' AND time = '2025-01-03'::timestamptz + '500 minutes'::interval +ORDER BY time, value NULLS FIRST; + +DROP TABLE metrics_secondary_null_first; + +-- compact_chunk skips legacy chunks whose orderby column has no firstlast sparse +-- index. Compaction reads exact batch boundaries from firstlast metadata, so +-- without it the chunk is left untouched (UNORDERED stays set). Configuring minmax +-- (and not firstlast) on the orderby column reproduces a legacy chunk: a +-- user-supplied index for a column suppresses the firstlast that orderby columns +-- get by default. +CREATE TABLE metrics_no_firstlast (time TIMESTAMPTZ NOT NULL, device TEXT, value float) +WITH (tsdb.hypertable, tsdb.orderby='time', tsdb.index='minmax(time)'); + +SET timescaledb.enable_direct_compress_insert = true; +SET timescaledb.enable_direct_compress_insert_sort_batches = true; +SET timescaledb.enable_direct_compress_insert_client_sorted = false; + +-- Two inserts over the same time range (one chunk), leaving the chunk UNORDERED. +INSERT INTO metrics_no_firstlast +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', i::float +FROM generate_series(1,1000) i; + +INSERT INTO metrics_no_firstlast +SELECT '2025-01-03'::timestamptz + (i || ' minute')::interval, 'd1', (1000 + i)::float +FROM generate_series(1,1000) i; + +SELECT comp_ch.schema_name || '.' || comp_ch.table_name AS "NO_FL_CHUNK" +FROM _timescaledb_catalog.chunk ch1, + _timescaledb_catalog.chunk comp_ch, + _timescaledb_catalog.hypertable ht +WHERE ch1.hypertable_id = ht.id + AND ht.table_name = 'metrics_no_firstlast' + AND ch1.compressed_chunk_id = comp_ch.id +ORDER BY ch1.id LIMIT 1 \gset + +-- The orderby column only carries minmax metadata (no _ts_meta_v2_first/last). +SELECT count(*) FILTER (WHERE attname LIKE '\_ts\_meta\_v2\_first%') AS first_cols, + count(*) FILTER (WHERE attname LIKE '\_ts\_meta\_v2\_last%') AS last_cols +FROM pg_attribute +WHERE attrelid = :'NO_FL_CHUNK'::regclass AND attnum > 0; + +-- Overlapping batches; chunk is UNORDERED. +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1 +FROM :NO_FL_CHUNK ORDER BY _ts_meta_min_1; +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_no_firstlast') chunk; + +-- compact_chunk is a no-op and warns with a hint on how to enable it: there is +-- no firstlast metadata to read batch boundaries from. +\set VERBOSITY default +SELECT _timescaledb_functions.compact_chunk(chunk) FROM show_chunks('metrics_no_firstlast') chunk; +\set VERBOSITY terse + +-- Batches unchanged (same ctids) and UNORDERED is still set. +SELECT ctid, _ts_meta_count, _ts_meta_min_1, _ts_meta_max_1 +FROM :NO_FL_CHUNK ORDER BY _ts_meta_min_1; +SELECT DISTINCT _timescaledb_functions.chunk_status_text(chunk) FROM show_chunks('metrics_no_firstlast') chunk; + +DROP TABLE metrics_no_firstlast;