Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .unreleased/pr_9957
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implements: #9957 Add compact_chunk function
4 changes: 4 additions & 0 deletions sql/maintenance_utils.sql
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ CREATE OR REPLACE FUNCTION _timescaledb_functions.recompress_chunk_segmentwise(
if_compressed BOOLEAN = true
) RETURNS REGCLASS AS '@MODULE_PATHNAME@', 'ts_recompress_chunk_segmentwise' LANGUAGE C STRICT VOLATILE;

CREATE OR REPLACE FUNCTION _timescaledb_functions.compact_chunk(
uncompressed_chunk REGCLASS
) RETURNS REGCLASS AS '@MODULE_PATHNAME@', 'ts_compact_chunk' LANGUAGE C STRICT VOLATILE;

-- find the index on the compressed chunk that can be used to recompress efficiently
-- this index must contain all the segmentby columns and the meta_sequence_number column last
CREATE OR REPLACE FUNCTION _timescaledb_functions.get_compressed_chunk_index_for_recompression(
Expand Down
1 change: 1 addition & 0 deletions sql/updates/reverse-dev.sql
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
DROP FUNCTION IF EXISTS _timescaledb_functions.decompress_batch(record);
DROP FUNCTION IF EXISTS _timescaledb_functions.estimate_uncompressed_size(regclass, double precision);
DROP FUNCTION IF EXISTS _timescaledb_functions.compact_chunk(REGCLASS);
2 changes: 2 additions & 0 deletions src/cross_module_fn.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ CROSSMODULE_WRAPPER(chunk_freeze_chunk);
CROSSMODULE_WRAPPER(chunk_unfreeze_chunk);

CROSSMODULE_WRAPPER(recompress_chunk_segmentwise);
CROSSMODULE_WRAPPER(compact_chunk);
CROSSMODULE_WRAPPER(get_compressed_chunk_index_for_recompression);
CROSSMODULE_WRAPPER(merge_chunks);
CROSSMODULE_WRAPPER(split_chunk);
Expand Down Expand Up @@ -397,6 +398,7 @@ TSDLLEXPORT CrossModuleFunctions ts_cm_functions_default = {
.chunk_freeze_chunk = error_no_default_fn_pg_community,
.chunk_unfreeze_chunk = error_no_default_fn_pg_community,
.recompress_chunk_segmentwise = error_no_default_fn_pg_community,
.compact_chunk = error_no_default_fn_pg_community,
.get_compressed_chunk_index_for_recompression = error_no_default_fn_pg_community,

.preprocess_query_tsl = preprocess_query_tsl_default_fn_community,
Expand Down
1 change: 1 addition & 0 deletions src/cross_module_fn.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ typedef struct CrossModuleFunctions
PGFunction chunk_freeze_chunk;
PGFunction chunk_unfreeze_chunk;
PGFunction recompress_chunk_segmentwise;
PGFunction compact_chunk;
PGFunction get_compressed_chunk_index_for_recompression;

void (*preprocess_query_tsl)(Query *parse, int *cursor_opts);
Expand Down
139 changes: 139 additions & 0 deletions tsl/src/compression/COMPACT_CHUNK.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# compact_chunk

Merges overlapping compressed batches within a chunk. Only touches batches
that need fixing — correctly ordered batches are left as-is.

Overlap detection reads the firstlast sparse metadata, which stores the exact
orderby values of each batch's first and last rows. Two adjacent batches overlap
when the current batch's first row sorts before the previous batch's last row.
Because the metadata holds the real boundary rows for every orderby column, no
decompression is needed, even for multi-column orderby.

## How It Works

```
Phase 1: FIND Phase 2: RECOMPRESS Phase 3: VERIFY
┌──────────────┐ ┌──────────────────┐ ┌────────────────┐
│ Index scan │──────▶│ Decompress+merge │─────▶│ Re-scan with │
│ Stop at │ │ overlapping │ │ fresh snapshot │
│ first issue │ │ batches, continue│ │ Clear UNORDERED│
└──────────────┘ │ scanning for more│ │ if clean │
└──────────────────┘ └────────────────┘
```

## Handling specific compression and batch configurations

### 1. No overlaps (no-op)

```
┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐
│ 1..100 │ │101..200│ │201..300│ ───▶ │ 1..100 │ │101..200│ │201..300│
└────────┘ └────────┘ └────────┘ └────────┘ └────────┘ └────────┘
(unchanged, UNORDERED cleared)
```

### 2. Overlapping batches

```
┌──────────┐ ┌───────┐┌───────┐┌───────┐┌──┐┌────────┐
│ 1..100 │ │ 1..50 ││51..100││101 ││ ││201..300│
└──────────┘ │ ││ ││ ..150 ││… │└────────┘
┌──────────┐ ───▶ └───────┘└───────┘└───────┘└──┘
│ 50..150 │ ◄────── merged + re-sorted ──► ◄ kept ►
└──────────┘
┌────────┐
│201..300│
└────────┘
```

### 3. Segmentby — independent per segment

```
d1: ┌──────┐ ┌───────┐ d1: ┌──────────────┐
│1..100│ │50..200│ ──▶ │ 1..200 merged│
└──────┘ └───────┘ └──────────────┘
d2: ┌──────┐ ┌───────┐ d2: ┌──────────────┐
│1..100│ │50..200│ ──▶ │ 1..200 merged│
└──────┘ └───────┘ └──────────────┘
```

### 4. DESC orderby

```
orderby='time DESC' max◄────────────────────►min

┌──────────┐ ┌──────────────────┐
│ 200..100 │ │ 200..........100 │
└──────────┘ ──▶ │ merged │
┌──────────┐ └──────────────────┘
│ 150..50 │
└──────────┘
```

### 5. Multi-column orderby — boundary tie resolution

When col1 first/last tie, compare the secondary columns straight from the
first/last metadata — no decompression.

```
orderby='device,time' Both batches tie on device (first=d2, last=d2)

Batch 1 last row: (d2, 08:20) ◄─ from last metadata
Batch 2 first row: (d2, 08:21) ◄─ from first metadata
08:20 < 08:21 → no overlap ✓

Batch 1 last row: (d2, 08:20)
Batch 2 first row: (d2, 08:11)
08:20 > 08:11 → OVERLAP → merge
```

### 6. Mixed-null batch overlaps a neighbor
Comment thread
antekresic marked this conversation as resolved.

A batch with both NULL and non-NULL values in the first orderby column has a
NULL boundary row. With NULLS LAST its last row is NULL, which sorts after a
following non-null batch — so the two batches overlap. The merge re-sorts the
rows and the NULLs settle at the end (NULLS LAST). (A mixed-null batch with no
neighbor to overlap is already ordered and is left as-is.)

```
orderby='value NULLS LAST' last row of batch 1 is NULL, sorts after batch 2

┌─────────────────────┐ ┌──────────┐ ┌────────────────────────────────┐
│ 1001..1800, NULL×200│ │1801..2800│ ──▶ │ 1001..2800 re-sorted, NULL×200 │
└─────────────────────┘ └──────────┘ └────────────────────────────────┘
first=1001, last=NULL ──▶ overlap merged, NULLs at end (NULLS LAST)
```

### 7. Overlap merge preserving NULLs

When overlapping batches with nullable first orderby are merged, the re-sort
keeps the NULL rows in their correct ordered position.

```
orderby='value NULLS LAST'

┌──────────────────┐ ┌──────────────────────────┐
│ 1..400, NULL×100 │ ──▶ │ 1..699 re-sorted, NULL×100│
└──────────────────┘ └──────────────────────────┘
┌──────────┐ overlap NULLs kept at end (NULLS LAST)
│ 200..699 │ on 200..400
└──────────┘
```

### 8. Secondary column NULLs at boundary tie

The boundary comparison uses the column's sort order, which already places NULLs
per its NULLS FIRST/LAST setting, so a NULL boundary value compares like any
other value.

```
orderby='time, value NULLS LAST'

Batch 1 last row: (08:20, NULL) CORRECT: NULL with NULLS LAST
Batch 2 first row: (08:20, 1001) means NULL > 1001 → OVERLAP → merge


┌──────────────────┐ ┌──────────────────┐ ┌──────────────────────────┐
│ ..., (08:20,NULL)│ │(08:20,1001), ... │ ──▶ │ merged + correctly sorted│
└──────────────────┘ └──────────────────┘ └──────────────────────────┘
```
14 changes: 10 additions & 4 deletions tsl/src/compression/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -3235,10 +3235,10 @@ tsl_compressed_data_info(PG_FUNCTION_ARGS)
return HeapTupleGetDatum(tuple);
}

extern Datum
tsl_compressed_data_has_nulls(PG_FUNCTION_ARGS)
bool
compressed_data_has_nulls(Datum compressed_data)
{
const CompressedDataHeader *header = get_compressed_data_header(PG_GETARG_DATUM(0));
const CompressedDataHeader *header = get_compressed_data_header(compressed_data);
bool has_nulls = false;

switch (header->compression_algorithm)
Expand Down Expand Up @@ -3269,7 +3269,13 @@ tsl_compressed_data_has_nulls(PG_FUNCTION_ARGS)
break;
}

return BoolGetDatum(has_nulls);
return has_nulls;
}

extern Datum
tsl_compressed_data_has_nulls(PG_FUNCTION_ARGS)
{
return BoolGetDatum(compressed_data_has_nulls(PG_GETARG_DATUM(0)));
}

extern CompressionStorage
Expand Down
1 change: 1 addition & 0 deletions tsl/src/compression/compression.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ extern Datum tsl_compressed_data_in(PG_FUNCTION_ARGS);
extern Datum tsl_compressed_data_out(PG_FUNCTION_ARGS);
extern Datum tsl_compressed_data_info(PG_FUNCTION_ARGS);
extern Datum tsl_compressed_data_has_nulls(PG_FUNCTION_ARGS);
extern bool compressed_data_has_nulls(Datum compressed_data);
extern Datum tsl_compressed_data_column_size(PG_FUNCTION_ARGS);
extern Datum tsl_compressed_data_to_array(PG_FUNCTION_ARGS);
extern Datum tsl_decompress_batch(PG_FUNCTION_ARGS);
Expand Down
Loading
Loading