diff --git a/.gitignore b/.gitignore index 259888fdff..00320283df 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,9 @@ log.txt load_out.txt server.log server.pid +server.endpoint +server.endpoint.tmp +hyperd.log arc_token.txt data-size.txt .doris_home @@ -20,9 +23,9 @@ data-size.txt # Per-system data files hits.db mydb -hits.hyper +hits*.hyper hits.vortex *.vortex # Python venvs created by install scripts -myenv/ +myenv diff --git a/hyper-parquet-partitioned/benchmark.sh b/hyper-parquet-partitioned/benchmark.sh new file mode 100755 index 0000000000..0aec537e79 --- /dev/null +++ b/hyper-parquet-partitioned/benchmark.sh @@ -0,0 +1,5 @@ +#!/bin/bash +export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-partitioned" +export BENCH_DURABLE=yes +export BENCH_RESTARTABLE=yes +exec ../lib/benchmark-common.sh diff --git a/hyper-parquet-partitioned/check b/hyper-parquet-partitioned/check new file mode 100755 index 0000000000..f3fdc4a1f4 --- /dev/null +++ b/hyper-parquet-partitioned/check @@ -0,0 +1,24 @@ +#!/bin/bash +# Readiness probe: connect to the persistent Hyper server (via the descriptor +# ./start published to server.endpoint) and run SELECT 1. Non-zero exit means +# "not up yet" — the benchmark driver polls this in a loop after ./start and +# uses its transition to failing as the "server is really stopped" signal in +# the cold cycle. +set -e + +# shellcheck disable=SC1091 +source myenv/bin/activate + +# No endpoint published => server isn't up. +[ -s server.endpoint ] || exit 1 + +python3 - <<'PY' +from tableauhyperapi import Connection, Endpoint + +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint) as connection: + connection.execute_list_query("SELECT 1") +PY diff --git a/hyper-parquet/create.sql b/hyper-parquet-partitioned/create.sql similarity index 95% rename from hyper-parquet/create.sql rename to hyper-parquet-partitioned/create.sql index 79cb815bb5..de9c2c8097 100644 --- a/hyper-parquet/create.sql +++ b/hyper-parquet-partitioned/create.sql @@ -1,4 +1,4 @@ -create temp external table hits +create external table hits for array[ 'hits_0.parquet', 'hits_10.parquet', @@ -101,4 +101,4 @@ for array[ 'hits_99.parquet', 'hits_9.parquet' ] -with (format => 'parquet', binary_as_text => true); +with (format => 'parquet', binary_as_text => true, immutable => true); diff --git a/hyper-parquet/data-size b/hyper-parquet-partitioned/data-size similarity index 100% rename from hyper-parquet/data-size rename to hyper-parquet-partitioned/data-size diff --git a/hyper-parquet/install b/hyper-parquet-partitioned/install similarity index 100% rename from hyper-parquet/install rename to hyper-parquet-partitioned/install diff --git a/hyper-parquet-partitioned/load b/hyper-parquet-partitioned/load new file mode 100755 index 0000000000..b5ee76311f --- /dev/null +++ b/hyper-parquet-partitioned/load @@ -0,0 +1,35 @@ +#!/bin/bash +# Create hits_parquet.hyper holding a peristent external table over the +# single parquet file. The data stays in the parquet file; this .hyper +# DB only persists the external table *catalog* so its compiled query plans +# survive across the per-try connections the benchmark driver opens (./query). +# Without a persistent catalog kept attached, every connection close tears the +# catalog down and evicts its plan-cache entries.2s recompile on every try. +# +# Loading through the already-running server (descriptor in server.endpoint) +# avoids briefly running two hyperd instances. BENCH_DURABLE=yes, so this runs +# once (not per query); the file persists across the cold-cycle restarts and +# ./start re-attaches it via a keep-alive connection after each restart. +set -e + +# shellcheck disable=SC1091 +source myenv/bin/activate + +rm -f hits_parquet.hyper + +python3 - <<'PY' +from tableauhyperapi import Connection, Endpoint, CreateMode + +with open("server.endpoint") as f: + descriptor = f.read().strip() + +# experimental_persisted_external_tables must already be on for the server +# (set as a HyperProcess startup parameter in ./start); creating/opening a +# persistent external table otherwise errors with "Persisted external tables +# are disabled". +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint, "hits_parquet.hyper", CreateMode.CREATE_AND_REPLACE) as connection: + connection.execute_command(open("create.sql").read()) +PY + +sync diff --git a/hyper-parquet/queries.sql b/hyper-parquet-partitioned/queries.sql similarity index 67% rename from hyper-parquet/queries.sql rename to hyper-parquet-partitioned/queries.sql index 00114a30a7..2230946c78 100644 --- a/hyper-parquet/queries.sql +++ b/hyper-parquet-partitioned/queries.sql @@ -27,7 +27,7 @@ SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhras SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10; SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM("ResolutionWidth"::bigint), SUM("ResolutionWidth"::bigint + 1), SUM("ResolutionWidth"::bigint + 2), SUM("ResolutionWidth"::bigint + 3), SUM("ResolutionWidth"::bigint + 4), SUM("ResolutionWidth"::bigint + 5), SUM("ResolutionWidth"::bigint + 6), SUM("ResolutionWidth"::bigint + 7), SUM("ResolutionWidth"::bigint + 8), SUM("ResolutionWidth"::bigint + 9), SUM("ResolutionWidth"::bigint + 10), SUM("ResolutionWidth"::bigint + 11), SUM("ResolutionWidth"::bigint + 12), SUM("ResolutionWidth"::bigint + 13), SUM("ResolutionWidth"::bigint + 14), SUM("ResolutionWidth"::bigint + 15), SUM("ResolutionWidth"::bigint + 16), SUM("ResolutionWidth"::bigint + 17), SUM("ResolutionWidth"::bigint + 18), SUM("ResolutionWidth"::bigint + 19), SUM("ResolutionWidth"::bigint + 20), SUM("ResolutionWidth"::bigint + 21), SUM("ResolutionWidth"::bigint + 22), SUM("ResolutionWidth"::bigint + 23), SUM("ResolutionWidth"::bigint + 24), SUM("ResolutionWidth"::bigint + 25), SUM("ResolutionWidth"::bigint + 26), SUM("ResolutionWidth"::bigint + 27), SUM("ResolutionWidth"::bigint + 28), SUM("ResolutionWidth"::bigint + 29), SUM("ResolutionWidth"::bigint + 30), SUM("ResolutionWidth"::bigint + 31), SUM("ResolutionWidth"::bigint + 32), SUM("ResolutionWidth"::bigint + 33), SUM("ResolutionWidth"::bigint + 34), SUM("ResolutionWidth"::bigint + 35), SUM("ResolutionWidth"::bigint + 36), SUM("ResolutionWidth"::bigint + 37), SUM("ResolutionWidth"::bigint + 38), SUM("ResolutionWidth"::bigint + 39), SUM("ResolutionWidth"::bigint + 40), SUM("ResolutionWidth"::bigint + 41), SUM("ResolutionWidth"::bigint + 42), SUM("ResolutionWidth"::bigint + 43), SUM("ResolutionWidth"::bigint + 44), SUM("ResolutionWidth"::bigint + 45), SUM("ResolutionWidth"::bigint + 46), SUM("ResolutionWidth"::bigint + 47), SUM("ResolutionWidth"::bigint + 48), SUM("ResolutionWidth"::bigint + 49), SUM("ResolutionWidth"::bigint + 50), SUM("ResolutionWidth"::bigint + 51), SUM("ResolutionWidth"::bigint + 52), SUM("ResolutionWidth"::bigint + 53), SUM("ResolutionWidth"::bigint + 54), SUM("ResolutionWidth"::bigint + 55), SUM("ResolutionWidth"::bigint + 56), SUM("ResolutionWidth"::bigint + 57), SUM("ResolutionWidth"::bigint + 58), SUM("ResolutionWidth"::bigint + 59), SUM("ResolutionWidth"::bigint + 60), SUM("ResolutionWidth"::bigint + 61), SUM("ResolutionWidth"::bigint + 62), SUM("ResolutionWidth"::bigint + 63), SUM("ResolutionWidth"::bigint + 64), SUM("ResolutionWidth"::bigint + 65), SUM("ResolutionWidth"::bigint + 66), SUM("ResolutionWidth"::bigint + 67), SUM("ResolutionWidth"::bigint + 68), SUM("ResolutionWidth"::bigint + 69), SUM("ResolutionWidth"::bigint + 70), SUM("ResolutionWidth"::bigint + 71), SUM("ResolutionWidth"::bigint + 72), SUM("ResolutionWidth"::bigint + 73), SUM("ResolutionWidth"::bigint + 74), SUM("ResolutionWidth"::bigint + 75), SUM("ResolutionWidth"::bigint + 76), SUM("ResolutionWidth"::bigint + 77), SUM("ResolutionWidth"::bigint + 78), SUM("ResolutionWidth"::bigint + 79), SUM("ResolutionWidth"::bigint + 80), SUM("ResolutionWidth"::bigint + 81), SUM("ResolutionWidth"::bigint + 82), SUM("ResolutionWidth"::bigint + 83), SUM("ResolutionWidth"::bigint + 84), SUM("ResolutionWidth"::bigint + 85), SUM("ResolutionWidth"::bigint + 86), SUM("ResolutionWidth"::bigint + 87), SUM("ResolutionWidth"::bigint + 88), SUM("ResolutionWidth"::bigint + 89) FROM hits; +SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits; SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10; SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; diff --git a/hyper-parquet-partitioned/query b/hyper-parquet-partitioned/query new file mode 100755 index 0000000000..e869d9347d --- /dev/null +++ b/hyper-parquet-partitioned/query @@ -0,0 +1,37 @@ +#!/bin/bash +# Reads a SQL query from stdin, runs it once against the persistent external +# table in hits_parquet.hyper on the long-lived Hyper server started by ./start +# (descriptor in server.endpoint). +# Stdout: query result. +# Stderr: query runtime in fractional seconds on the last line. +set -e + +# shellcheck disable=SC1091 +source myenv/bin/activate + +query_file=$(mktemp) +trap 'rm -f "$query_file"' EXIT +cat > "$query_file" + +python3 - "$query_file" <<'PY' +import sys +import timeit +from tableauhyperapi import Connection, Endpoint + +with open(sys.argv[1]) as f: + query = f.read() + +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint, "hits_parquet.hyper") as connection: + start = timeit.default_timer() + rows = connection.execute_list_query(query) + end = timeit.default_timer() + +for r in rows: + print(r) + +print(f"{end - start:.3f}", file=sys.stderr) +PY diff --git a/hyper-parquet/results/20240117/c6a.4xlarge.json b/hyper-parquet-partitioned/results/20240117/c6a.4xlarge.json similarity index 98% rename from hyper-parquet/results/20240117/c6a.4xlarge.json rename to hyper-parquet-partitioned/results/20240117/c6a.4xlarge.json index 6df7795953..127c7e6503 100644 --- a/hyper-parquet/results/20240117/c6a.4xlarge.json +++ b/hyper-parquet-partitioned/results/20240117/c6a.4xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2024-01-17", "machine": "c6a.4xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20240117/c6a.metal.json b/hyper-parquet-partitioned/results/20240117/c6a.metal.json similarity index 98% rename from hyper-parquet/results/20240117/c6a.metal.json rename to hyper-parquet-partitioned/results/20240117/c6a.metal.json index 7bacbde4b8..6073dbe1c6 100644 --- a/hyper-parquet/results/20240117/c6a.metal.json +++ b/hyper-parquet-partitioned/results/20240117/c6a.metal.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2024-01-17", "machine": "c6a.metal", "cluster_size": 1, diff --git a/hyper-parquet/results/20250301/c6a.4xlarge.json b/hyper-parquet-partitioned/results/20250301/c6a.4xlarge.json similarity index 98% rename from hyper-parquet/results/20250301/c6a.4xlarge.json rename to hyper-parquet-partitioned/results/20250301/c6a.4xlarge.json index 4a7dc18510..50fcb27776 100644 --- a/hyper-parquet/results/20250301/c6a.4xlarge.json +++ b/hyper-parquet-partitioned/results/20250301/c6a.4xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-03-01", "machine": "c6a.4xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20250301/c6a.metal.json b/hyper-parquet-partitioned/results/20250301/c6a.metal.json similarity index 98% rename from hyper-parquet/results/20250301/c6a.metal.json rename to hyper-parquet-partitioned/results/20250301/c6a.metal.json index 94acf3b8e9..7c00c5d8c0 100644 --- a/hyper-parquet/results/20250301/c6a.metal.json +++ b/hyper-parquet-partitioned/results/20250301/c6a.metal.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-03-01", "machine": "c6a.metal", "cluster_size": 1, diff --git a/hyper-parquet/results/20250503/c6a.4xlarge.json b/hyper-parquet-partitioned/results/20250503/c6a.4xlarge.json similarity index 98% rename from hyper-parquet/results/20250503/c6a.4xlarge.json rename to hyper-parquet-partitioned/results/20250503/c6a.4xlarge.json index 684f48e3c9..d8fea5a610 100644 --- a/hyper-parquet/results/20250503/c6a.4xlarge.json +++ b/hyper-parquet-partitioned/results/20250503/c6a.4xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-05-03", "machine": "c6a.4xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20250503/c6a.metal.json b/hyper-parquet-partitioned/results/20250503/c6a.metal.json similarity index 98% rename from hyper-parquet/results/20250503/c6a.metal.json rename to hyper-parquet-partitioned/results/20250503/c6a.metal.json index 20ec52baff..115211fd4c 100644 --- a/hyper-parquet/results/20250503/c6a.metal.json +++ b/hyper-parquet-partitioned/results/20250503/c6a.metal.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-05-03", "machine": "c6a.metal", "cluster_size": 1, diff --git a/hyper-parquet/results/20250710/c6a.2xlarge.json b/hyper-parquet-partitioned/results/20250710/c6a.2xlarge.json similarity index 98% rename from hyper-parquet/results/20250710/c6a.2xlarge.json rename to hyper-parquet-partitioned/results/20250710/c6a.2xlarge.json index b52072659a..f6464cbee0 100644 --- a/hyper-parquet/results/20250710/c6a.2xlarge.json +++ b/hyper-parquet-partitioned/results/20250710/c6a.2xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-07-10", "machine": "c6a.2xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20250710/c6a.4xlarge.json b/hyper-parquet-partitioned/results/20250710/c6a.4xlarge.json similarity index 98% rename from hyper-parquet/results/20250710/c6a.4xlarge.json rename to hyper-parquet-partitioned/results/20250710/c6a.4xlarge.json index f011c8c774..87938e7648 100644 --- a/hyper-parquet/results/20250710/c6a.4xlarge.json +++ b/hyper-parquet-partitioned/results/20250710/c6a.4xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-07-10", "machine": "c6a.4xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20250830/c7a.metal-48xl.json b/hyper-parquet-partitioned/results/20250830/c7a.metal-48xl.json similarity index 98% rename from hyper-parquet/results/20250830/c7a.metal-48xl.json rename to hyper-parquet-partitioned/results/20250830/c7a.metal-48xl.json index 56665c4931..33d3635fc9 100644 --- a/hyper-parquet/results/20250830/c7a.metal-48xl.json +++ b/hyper-parquet-partitioned/results/20250830/c7a.metal-48xl.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-08-30", "machine": "c7a.metal-48xl", "cluster_size": 1, diff --git a/hyper-parquet/results/20250831/c6a.2xlarge.json b/hyper-parquet-partitioned/results/20250831/c6a.2xlarge.json similarity index 98% rename from hyper-parquet/results/20250831/c6a.2xlarge.json rename to hyper-parquet-partitioned/results/20250831/c6a.2xlarge.json index c47faa115f..0f79565607 100644 --- a/hyper-parquet/results/20250831/c6a.2xlarge.json +++ b/hyper-parquet-partitioned/results/20250831/c6a.2xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-08-31", "machine": "c6a.2xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20250831/c6a.4xlarge.json b/hyper-parquet-partitioned/results/20250831/c6a.4xlarge.json similarity index 98% rename from hyper-parquet/results/20250831/c6a.4xlarge.json rename to hyper-parquet-partitioned/results/20250831/c6a.4xlarge.json index 96cb5f9f8f..2949f56bc1 100644 --- a/hyper-parquet/results/20250831/c6a.4xlarge.json +++ b/hyper-parquet-partitioned/results/20250831/c6a.4xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-08-31", "machine": "c6a.4xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20250831/c6a.large.json b/hyper-parquet-partitioned/results/20250831/c6a.large.json similarity index 98% rename from hyper-parquet/results/20250831/c6a.large.json rename to hyper-parquet-partitioned/results/20250831/c6a.large.json index daf84af492..06bc9adcc0 100644 --- a/hyper-parquet/results/20250831/c6a.large.json +++ b/hyper-parquet-partitioned/results/20250831/c6a.large.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-08-31", "machine": "c6a.large", "cluster_size": 1, diff --git a/hyper-parquet/results/20250831/c6a.xlarge.json b/hyper-parquet-partitioned/results/20250831/c6a.xlarge.json similarity index 98% rename from hyper-parquet/results/20250831/c6a.xlarge.json rename to hyper-parquet-partitioned/results/20250831/c6a.xlarge.json index 65139d0e1a..70f5e6a819 100644 --- a/hyper-parquet/results/20250831/c6a.xlarge.json +++ b/hyper-parquet-partitioned/results/20250831/c6a.xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-08-31", "machine": "c6a.xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20250831/t3a.small.json b/hyper-parquet-partitioned/results/20250831/t3a.small.json similarity index 98% rename from hyper-parquet/results/20250831/t3a.small.json rename to hyper-parquet-partitioned/results/20250831/t3a.small.json index 7909848150..0b1f998b54 100644 --- a/hyper-parquet/results/20250831/t3a.small.json +++ b/hyper-parquet-partitioned/results/20250831/t3a.small.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-08-31", "machine": "t3a.small", "cluster_size": 1, diff --git a/hyper-parquet/results/20251026/c6a.2xlarge.json b/hyper-parquet-partitioned/results/20251026/c6a.2xlarge.json similarity index 98% rename from hyper-parquet/results/20251026/c6a.2xlarge.json rename to hyper-parquet-partitioned/results/20251026/c6a.2xlarge.json index 82e7e417dc..e2b4efd6ae 100644 --- a/hyper-parquet/results/20251026/c6a.2xlarge.json +++ b/hyper-parquet-partitioned/results/20251026/c6a.2xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-10-26", "machine": "c6a.2xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20251026/c6a.4xlarge.json b/hyper-parquet-partitioned/results/20251026/c6a.4xlarge.json similarity index 98% rename from hyper-parquet/results/20251026/c6a.4xlarge.json rename to hyper-parquet-partitioned/results/20251026/c6a.4xlarge.json index 9e74067d15..0dc2ae76d2 100644 --- a/hyper-parquet/results/20251026/c6a.4xlarge.json +++ b/hyper-parquet-partitioned/results/20251026/c6a.4xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-10-26", "machine": "c6a.4xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20251026/c6a.large.json b/hyper-parquet-partitioned/results/20251026/c6a.large.json similarity index 98% rename from hyper-parquet/results/20251026/c6a.large.json rename to hyper-parquet-partitioned/results/20251026/c6a.large.json index a7811a9731..661a830477 100644 --- a/hyper-parquet/results/20251026/c6a.large.json +++ b/hyper-parquet-partitioned/results/20251026/c6a.large.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-10-26", "machine": "c6a.large", "cluster_size": 1, diff --git a/hyper-parquet/results/20251026/c6a.metal.json b/hyper-parquet-partitioned/results/20251026/c6a.metal.json similarity index 98% rename from hyper-parquet/results/20251026/c6a.metal.json rename to hyper-parquet-partitioned/results/20251026/c6a.metal.json index eaac0fa7f1..4abfc2ef9a 100644 --- a/hyper-parquet/results/20251026/c6a.metal.json +++ b/hyper-parquet-partitioned/results/20251026/c6a.metal.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-10-26", "machine": "c6a.metal", "cluster_size": 1, diff --git a/hyper-parquet/results/20251026/c6a.xlarge.json b/hyper-parquet-partitioned/results/20251026/c6a.xlarge.json similarity index 98% rename from hyper-parquet/results/20251026/c6a.xlarge.json rename to hyper-parquet-partitioned/results/20251026/c6a.xlarge.json index 8762e667fa..66278b14b7 100644 --- a/hyper-parquet/results/20251026/c6a.xlarge.json +++ b/hyper-parquet-partitioned/results/20251026/c6a.xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-10-26", "machine": "c6a.xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20251026/c7a.metal-48xl.json b/hyper-parquet-partitioned/results/20251026/c7a.metal-48xl.json similarity index 98% rename from hyper-parquet/results/20251026/c7a.metal-48xl.json rename to hyper-parquet-partitioned/results/20251026/c7a.metal-48xl.json index b0bccf876d..01da8b297e 100644 --- a/hyper-parquet/results/20251026/c7a.metal-48xl.json +++ b/hyper-parquet-partitioned/results/20251026/c7a.metal-48xl.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-10-26", "machine": "c7a.metal-48xl", "cluster_size": 1, diff --git a/hyper-parquet/results/20251026/t3a.small.json b/hyper-parquet-partitioned/results/20251026/t3a.small.json similarity index 98% rename from hyper-parquet/results/20251026/t3a.small.json rename to hyper-parquet-partitioned/results/20251026/t3a.small.json index 91ec634b28..398b85e99b 100644 --- a/hyper-parquet/results/20251026/t3a.small.json +++ b/hyper-parquet-partitioned/results/20251026/t3a.small.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-10-26", "machine": "t3a.small", "cluster_size": 1, diff --git a/hyper-parquet/results/20251124/c6a.2xlarge.json b/hyper-parquet-partitioned/results/20251124/c6a.2xlarge.json similarity index 95% rename from hyper-parquet/results/20251124/c6a.2xlarge.json rename to hyper-parquet-partitioned/results/20251124/c6a.2xlarge.json index 1a107a5bfc..82867c5209 100644 --- a/hyper-parquet/results/20251124/c6a.2xlarge.json +++ b/hyper-parquet-partitioned/results/20251124/c6a.2xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-11-24", "machine": "c6a.2xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20251124/c6a.4xlarge.json b/hyper-parquet-partitioned/results/20251124/c6a.4xlarge.json similarity index 95% rename from hyper-parquet/results/20251124/c6a.4xlarge.json rename to hyper-parquet-partitioned/results/20251124/c6a.4xlarge.json index d6f54f91c0..d8fc4ddf8d 100644 --- a/hyper-parquet/results/20251124/c6a.4xlarge.json +++ b/hyper-parquet-partitioned/results/20251124/c6a.4xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-11-24", "machine": "c6a.4xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20251124/c6a.large.json b/hyper-parquet-partitioned/results/20251124/c6a.large.json similarity index 95% rename from hyper-parquet/results/20251124/c6a.large.json rename to hyper-parquet-partitioned/results/20251124/c6a.large.json index 66d572c883..7ef72237be 100644 --- a/hyper-parquet/results/20251124/c6a.large.json +++ b/hyper-parquet-partitioned/results/20251124/c6a.large.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-11-24", "machine": "c6a.large", "cluster_size": 1, diff --git a/hyper-parquet/results/20251124/c6a.metal.json b/hyper-parquet-partitioned/results/20251124/c6a.metal.json similarity index 95% rename from hyper-parquet/results/20251124/c6a.metal.json rename to hyper-parquet-partitioned/results/20251124/c6a.metal.json index 70baa10fd5..d588a2cf98 100644 --- a/hyper-parquet/results/20251124/c6a.metal.json +++ b/hyper-parquet-partitioned/results/20251124/c6a.metal.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-11-24", "machine": "c6a.metal", "cluster_size": 1, diff --git a/hyper-parquet/results/20251124/c6a.xlarge.json b/hyper-parquet-partitioned/results/20251124/c6a.xlarge.json similarity index 95% rename from hyper-parquet/results/20251124/c6a.xlarge.json rename to hyper-parquet-partitioned/results/20251124/c6a.xlarge.json index f5bd9e9755..4809542c63 100644 --- a/hyper-parquet/results/20251124/c6a.xlarge.json +++ b/hyper-parquet-partitioned/results/20251124/c6a.xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-11-24", "machine": "c6a.xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20251124/c7a.metal-48xl.json b/hyper-parquet-partitioned/results/20251124/c7a.metal-48xl.json similarity index 95% rename from hyper-parquet/results/20251124/c7a.metal-48xl.json rename to hyper-parquet-partitioned/results/20251124/c7a.metal-48xl.json index a20de51e1a..93f9a41f56 100644 --- a/hyper-parquet/results/20251124/c7a.metal-48xl.json +++ b/hyper-parquet-partitioned/results/20251124/c7a.metal-48xl.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-11-24", "machine": "c7a.metal-48xl", "cluster_size": 1, diff --git a/hyper-parquet/results/20251124/t3a.small.json b/hyper-parquet-partitioned/results/20251124/t3a.small.json similarity index 95% rename from hyper-parquet/results/20251124/t3a.small.json rename to hyper-parquet-partitioned/results/20251124/t3a.small.json index b0a47437f1..075524dbb4 100644 --- a/hyper-parquet/results/20251124/t3a.small.json +++ b/hyper-parquet-partitioned/results/20251124/t3a.small.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2025-11-24", "machine": "t3a.small", "cluster_size": 1, diff --git a/hyper-parquet/results/20260509/c6a.4xlarge.json b/hyper-parquet-partitioned/results/20260509/c6a.4xlarge.json similarity index 96% rename from hyper-parquet/results/20260509/c6a.4xlarge.json rename to hyper-parquet-partitioned/results/20260509/c6a.4xlarge.json index 04dd073be1..e31dc7b6b9 100644 --- a/hyper-parquet/results/20260509/c6a.4xlarge.json +++ b/hyper-parquet-partitioned/results/20260509/c6a.4xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-09", "machine": "c6a.4xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20260509/c6a.metal.json b/hyper-parquet-partitioned/results/20260509/c6a.metal.json similarity index 96% rename from hyper-parquet/results/20260509/c6a.metal.json rename to hyper-parquet-partitioned/results/20260509/c6a.metal.json index db98fea7fa..fde33b3438 100644 --- a/hyper-parquet/results/20260509/c6a.metal.json +++ b/hyper-parquet-partitioned/results/20260509/c6a.metal.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-09", "machine": "c6a.metal", "cluster_size": 1, diff --git a/hyper-parquet/results/20260510/c6a.2xlarge.json b/hyper-parquet-partitioned/results/20260510/c6a.2xlarge.json similarity index 96% rename from hyper-parquet/results/20260510/c6a.2xlarge.json rename to hyper-parquet-partitioned/results/20260510/c6a.2xlarge.json index 7d64e6abf7..8a370d641e 100644 --- a/hyper-parquet/results/20260510/c6a.2xlarge.json +++ b/hyper-parquet-partitioned/results/20260510/c6a.2xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-10", "machine": "c6a.2xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20260510/c6a.4xlarge.json b/hyper-parquet-partitioned/results/20260510/c6a.4xlarge.json similarity index 96% rename from hyper-parquet/results/20260510/c6a.4xlarge.json rename to hyper-parquet-partitioned/results/20260510/c6a.4xlarge.json index 45e6ac65d3..5909e999d9 100644 --- a/hyper-parquet/results/20260510/c6a.4xlarge.json +++ b/hyper-parquet-partitioned/results/20260510/c6a.4xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-10", "machine": "c6a.4xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20260510/c6a.large.json b/hyper-parquet-partitioned/results/20260510/c6a.large.json similarity index 96% rename from hyper-parquet/results/20260510/c6a.large.json rename to hyper-parquet-partitioned/results/20260510/c6a.large.json index cd1f4b6ae2..8ececba9df 100644 --- a/hyper-parquet/results/20260510/c6a.large.json +++ b/hyper-parquet-partitioned/results/20260510/c6a.large.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-10", "machine": "c6a.large", "cluster_size": 1, diff --git a/hyper-parquet/results/20260510/c6a.metal.json b/hyper-parquet-partitioned/results/20260510/c6a.metal.json similarity index 96% rename from hyper-parquet/results/20260510/c6a.metal.json rename to hyper-parquet-partitioned/results/20260510/c6a.metal.json index 756296438a..332ab7ad5b 100644 --- a/hyper-parquet/results/20260510/c6a.metal.json +++ b/hyper-parquet-partitioned/results/20260510/c6a.metal.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-10", "machine": "c6a.metal", "cluster_size": 1, diff --git a/hyper-parquet/results/20260510/c7a.metal-48xl.json b/hyper-parquet-partitioned/results/20260510/c7a.metal-48xl.json similarity index 96% rename from hyper-parquet/results/20260510/c7a.metal-48xl.json rename to hyper-parquet-partitioned/results/20260510/c7a.metal-48xl.json index 3789e8aa12..7cac612f48 100644 --- a/hyper-parquet/results/20260510/c7a.metal-48xl.json +++ b/hyper-parquet-partitioned/results/20260510/c7a.metal-48xl.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-10", "machine": "c7a.metal-48xl", "cluster_size": 1, diff --git a/hyper-parquet/results/20260510/t3a.small.json b/hyper-parquet-partitioned/results/20260510/t3a.small.json similarity index 96% rename from hyper-parquet/results/20260510/t3a.small.json rename to hyper-parquet-partitioned/results/20260510/t3a.small.json index 8196dfa430..6772c44e69 100644 --- a/hyper-parquet/results/20260510/t3a.small.json +++ b/hyper-parquet-partitioned/results/20260510/t3a.small.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-10", "machine": "t3a.small", "cluster_size": 1, diff --git a/hyper-parquet/results/20260511/c6a.2xlarge.json b/hyper-parquet-partitioned/results/20260511/c6a.2xlarge.json similarity index 96% rename from hyper-parquet/results/20260511/c6a.2xlarge.json rename to hyper-parquet-partitioned/results/20260511/c6a.2xlarge.json index 41074c43ff..53e41634f0 100644 --- a/hyper-parquet/results/20260511/c6a.2xlarge.json +++ b/hyper-parquet-partitioned/results/20260511/c6a.2xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-11", "machine": "c6a.2xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20260511/c6a.4xlarge.json b/hyper-parquet-partitioned/results/20260511/c6a.4xlarge.json similarity index 96% rename from hyper-parquet/results/20260511/c6a.4xlarge.json rename to hyper-parquet-partitioned/results/20260511/c6a.4xlarge.json index 24b68a6ba8..96c4665da0 100644 --- a/hyper-parquet/results/20260511/c6a.4xlarge.json +++ b/hyper-parquet-partitioned/results/20260511/c6a.4xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-11", "machine": "c6a.4xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20260511/c6a.large.json b/hyper-parquet-partitioned/results/20260511/c6a.large.json similarity index 96% rename from hyper-parquet/results/20260511/c6a.large.json rename to hyper-parquet-partitioned/results/20260511/c6a.large.json index 3b0c13ff4f..f90886215c 100644 --- a/hyper-parquet/results/20260511/c6a.large.json +++ b/hyper-parquet-partitioned/results/20260511/c6a.large.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-11", "machine": "c6a.large", "cluster_size": 1, diff --git a/hyper-parquet/results/20260511/c6a.metal.json b/hyper-parquet-partitioned/results/20260511/c6a.metal.json similarity index 96% rename from hyper-parquet/results/20260511/c6a.metal.json rename to hyper-parquet-partitioned/results/20260511/c6a.metal.json index eb2842709a..071148a75e 100644 --- a/hyper-parquet/results/20260511/c6a.metal.json +++ b/hyper-parquet-partitioned/results/20260511/c6a.metal.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-11", "machine": "c6a.metal", "cluster_size": 1, diff --git a/hyper-parquet/results/20260511/c6a.xlarge.json b/hyper-parquet-partitioned/results/20260511/c6a.xlarge.json similarity index 96% rename from hyper-parquet/results/20260511/c6a.xlarge.json rename to hyper-parquet-partitioned/results/20260511/c6a.xlarge.json index 033fbc6ed7..56c56c6345 100644 --- a/hyper-parquet/results/20260511/c6a.xlarge.json +++ b/hyper-parquet-partitioned/results/20260511/c6a.xlarge.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-11", "machine": "c6a.xlarge", "cluster_size": 1, diff --git a/hyper-parquet/results/20260511/c7a.metal-48xl.json b/hyper-parquet-partitioned/results/20260511/c7a.metal-48xl.json similarity index 96% rename from hyper-parquet/results/20260511/c7a.metal-48xl.json rename to hyper-parquet-partitioned/results/20260511/c7a.metal-48xl.json index db84ffb0c9..d58f490c6c 100644 --- a/hyper-parquet/results/20260511/c7a.metal-48xl.json +++ b/hyper-parquet-partitioned/results/20260511/c7a.metal-48xl.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-11", "machine": "c7a.metal-48xl", "cluster_size": 1, diff --git a/hyper-parquet/results/20260511/t3a.small.json b/hyper-parquet-partitioned/results/20260511/t3a.small.json similarity index 96% rename from hyper-parquet/results/20260511/t3a.small.json rename to hyper-parquet-partitioned/results/20260511/t3a.small.json index 2b3a831f29..5daeec77df 100644 --- a/hyper-parquet/results/20260511/t3a.small.json +++ b/hyper-parquet-partitioned/results/20260511/t3a.small.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, partitioned)", "date": "2026-05-11", "machine": "t3a.small", "cluster_size": 1, diff --git a/hyper-parquet-partitioned/start b/hyper-parquet-partitioned/start new file mode 100755 index 0000000000..f13ebd21d7 --- /dev/null +++ b/hyper-parquet-partitioned/start @@ -0,0 +1,77 @@ +#!/bin/bash +# Launch one long-lived Hyper server (hyperd) and publish its connection +# descriptor to server.endpoint. Every ./query invocation then connects to +# this single persistent process instead of spawning its own. +# +# Unlike the earlier temp-external-table approach, the data is exposed through +# a persistent external table in hits_parquet.hyper (created by ./load). The +# supervisor holds a keep-alive connection that keeps that catalog attached for +# the server lifetime. This mirrors hyper/ (native): without the keep-alive the +# catalog is destroyed the moment a ./query process exits. +set -e + +# shellcheck disable=SC1091 +source myenv/bin/activate + +# Already running? The pidfile + a live process is authoritative. +if [ -f server.pid ] && kill -0 "$(cat server.pid 2>/dev/null)" 2>/dev/null; then + exit 0 +fi + +# Clean up stale artifacts from a previous (possibly crashed) server. +rm -f server.pid server.endpoint + +# Background a supervisor that opens HyperProcess, writes the descriptor, +# attaches hits_parquet.hyper (if present) to keep its catalog + plans warm, +# then blocks until ./stop signals it. +nohup python3 - >server.log 2>&1 <<'PY' & +import os +import signal +import sys +from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode + + +def _terminate(*_): + sys.exit(0) + + +signal.signal(signal.SIGTERM, _terminate) +signal.signal(signal.SIGINT, _terminate) + +with HyperProcess( + telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, + parameters={"experimental_persisted_external_tables": "on"}, +) as hyper: + # Keep-alive: hold hits_parquet.hyper attached for the server lifetime so + # its external-table catalog (and the plan-cache entries depending on it) + # are not torn down between per-try ./query processes. Skipped on the + # pre-load ./start (file not yet created); established by the next + # cold-cycle ./start once ./load has created it. Done BEFORE publishing the + # endpoint so ./check never sees a "ready" server still mid-attach. + keepalive = None + if os.path.exists("hits_parquet.hyper"): + keepalive = Connection(hyper.endpoint, "hits_parquet.hyper", CreateMode.NONE) + + with open("server.endpoint.tmp", "w") as f: + f.write(hyper.endpoint.connection_descriptor) + os.replace("server.endpoint.tmp", "server.endpoint") + + try: + while True: + signal.pause() + finally: + if keepalive is not None: + keepalive.close() +PY +echo $! > server.pid + +# Give the supervisor a moment to publish the endpoint. +for _ in $(seq 1 60); do + if [ -s server.endpoint ]; then + exit 0 + fi + sleep 1 +done + +echo "hyper-parquet: server did not publish server.endpoint within 60s" >&2 +exit 1 diff --git a/hyper-parquet-partitioned/stop b/hyper-parquet-partitioned/stop new file mode 100755 index 0000000000..049794e87f --- /dev/null +++ b/hyper-parquet-partitioned/stop @@ -0,0 +1,22 @@ +#!/bin/bash +# Stop the persistent Hyper server started by ./start. SIGTERM the supervisor +set -e + +if [ -f server.pid ]; then + pid="$(cat server.pid 2>/dev/null || true)" + if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then + kill "$pid" 2>/dev/null || true + # Wait for the supervisor (and thus hyperd) to actually exit so the + # benchmark driver's drop_caches isn't defeated by pages still pinned + # by a live mmap. + for _ in $(seq 1 60); do + kill -0 "$pid" 2>/dev/null || break + sleep 1 + done + # Still alive after 60s? Force it. + kill -9 "$pid" 2>/dev/null || true + fi +fi + +rm -f server.pid server.endpoint +exit 0 diff --git a/hyper-parquet-partitioned/template.json b/hyper-parquet-partitioned/template.json new file mode 100644 index 0000000000..cc3cce23e5 --- /dev/null +++ b/hyper-parquet-partitioned/template.json @@ -0,0 +1,11 @@ +{ + "system": "Salesforce Hyper (Parquet, partitioned)", + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": [ + "C++", + "column-oriented", + "stateless" + ] +} diff --git a/hyper-parquet-single/benchmark.sh b/hyper-parquet-single/benchmark.sh new file mode 100755 index 0000000000..0225254a25 --- /dev/null +++ b/hyper-parquet-single/benchmark.sh @@ -0,0 +1,5 @@ +#!/bin/bash +export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-single" +export BENCH_DURABLE=yes +export BENCH_RESTARTABLE=yes +exec ../lib/benchmark-common.sh diff --git a/hyper-parquet-single/check b/hyper-parquet-single/check new file mode 100755 index 0000000000..f3fdc4a1f4 --- /dev/null +++ b/hyper-parquet-single/check @@ -0,0 +1,24 @@ +#!/bin/bash +# Readiness probe: connect to the persistent Hyper server (via the descriptor +# ./start published to server.endpoint) and run SELECT 1. Non-zero exit means +# "not up yet" — the benchmark driver polls this in a loop after ./start and +# uses its transition to failing as the "server is really stopped" signal in +# the cold cycle. +set -e + +# shellcheck disable=SC1091 +source myenv/bin/activate + +# No endpoint published => server isn't up. +[ -s server.endpoint ] || exit 1 + +python3 - <<'PY' +from tableauhyperapi import Connection, Endpoint + +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint) as connection: + connection.execute_list_query("SELECT 1") +PY diff --git a/hyper-parquet-single/create.sql b/hyper-parquet-single/create.sql new file mode 100644 index 0000000000..eca786d8fa --- /dev/null +++ b/hyper-parquet-single/create.sql @@ -0,0 +1,3 @@ +create external table hits +for 'hits.parquet' +with (format => 'parquet', binary_as_text => true, immutable => true); diff --git a/hyper-parquet-single/data-size b/hyper-parquet-single/data-size new file mode 100755 index 0000000000..ff90d09f61 --- /dev/null +++ b/hyper-parquet-single/data-size @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +du -bcsL hits.parquet | awk '/total$/ { print $1 }' diff --git a/hyper-parquet-single/install b/hyper-parquet-single/install new file mode 100755 index 0000000000..cd102fae47 --- /dev/null +++ b/hyper-parquet-single/install @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +# See hyper/install — tableauhyperapi has no Linux arm64 wheel. +if [ "$(uname -m)" != "x86_64" ] && [ "$(uname -m)" != "amd64" ]; then + echo "hyper-parquet: tableauhyperapi has no Linux $(uname -m) wheel; skipping" >&2 + exit 1 +fi + +sudo apt-get update -y +sudo apt-get install -y python3-pip python3-venv + +if [ ! -d myenv ]; then + python3 -m venv myenv +fi + +# shellcheck disable=SC1091 +source myenv/bin/activate + +pip install --upgrade pip +pip install tableauhyperapi diff --git a/hyper-parquet-single/load b/hyper-parquet-single/load new file mode 100755 index 0000000000..2cc1da5abe --- /dev/null +++ b/hyper-parquet-single/load @@ -0,0 +1,35 @@ +#!/bin/bash +# Create hits_parquet.hyper holding a peristent external table over the +# partitioned parquet files. The data stays in the parquet files; this .hyper +# DB only persists the external table *catalog* so its compiled query plans +# survive across the per-try connections the benchmark driver opens (./query). +# Without a persistent catalog kept attached, every connection close tears the +# catalog down and evicts its plan-cache entries.2s recompile on every try. +# +# Loading through the already-running server (descriptor in server.endpoint) +# avoids briefly running two hyperd instances. BENCH_DURABLE=yes, so this runs +# once (not per query); the file persists across the cold-cycle restarts and +# ./start re-attaches it via a keep-alive connection after each restart. +set -e + +# shellcheck disable=SC1091 +source myenv/bin/activate + +rm -f hits_parquet.hyper + +python3 - <<'PY' +from tableauhyperapi import Connection, Endpoint, CreateMode + +with open("server.endpoint") as f: + descriptor = f.read().strip() + +# experimental_persisted_external_tables must already be on for the server +# (set as a HyperProcess startup parameter in ./start); creating/opening a +# persistent external table otherwise errors with "Persisted external tables +# are disabled". +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint, "hits_parquet.hyper", CreateMode.CREATE_AND_REPLACE) as connection: + connection.execute_command(open("create.sql").read()) +PY + +sync diff --git a/hyper-parquet-single/queries.sql b/hyper-parquet-single/queries.sql new file mode 100644 index 0000000000..2230946c78 --- /dev/null +++ b/hyper-parquet-single/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0; +SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits; +SELECT AVG("UserID") FROM hits; +SELECT COUNT(DISTINCT "UserID") FROM hits; +SELECT COUNT(DISTINCT "SearchPhrase") FROM hits; +SELECT (date '1970-01-01' + MIN("EventDate")), (date '1970-01-01' + MAX("EventDate")) FROM hits; +SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC; +SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10; +SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10; +SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10; +SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10; +SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; +SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10; +SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10; +SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10; +SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; +SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10; +SELECT "UserID", extract(minute FROM to_timestamp("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; +SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%'; +SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; +SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10; +SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10; +SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10; +SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10; +SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits; +SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10; +SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; +SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; +SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10; +SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10; +SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10; +SELECT "URL", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-01' AND (date '1970-01-01' + "EventDate") <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10; +SELECT "Title", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-01' AND (date '1970-01-01' + "EventDate") <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY "PageViews" DESC LIMIT 10; +SELECT "URL", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-01' AND (date '1970-01-01' + "EventDate") <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10 OFFSET 1000; +SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-01' AND (date '1970-01-01' + "EventDate") <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY "PageViews" DESC LIMIT 10 OFFSET 1000; +SELECT "URLHash", (date '1970-01-01' + "EventDate"), COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-01' AND (date '1970-01-01' + "EventDate") <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", (date '1970-01-01' + "EventDate") ORDER BY "PageViews" DESC LIMIT 10 OFFSET 100; +SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-01' AND (date '1970-01-01' + "EventDate") <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY "PageViews" DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', to_timestamp("EventTime")) AS M, COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-14' AND (date '1970-01-01' + "EventDate") <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp("EventTime")) ORDER BY DATE_TRUNC('minute', to_timestamp("EventTime")) LIMIT 10 OFFSET 1000; diff --git a/hyper-parquet-single/query b/hyper-parquet-single/query new file mode 100755 index 0000000000..dcf31a9980 --- /dev/null +++ b/hyper-parquet-single/query @@ -0,0 +1,37 @@ +#!/bin/bash +# Reads a SQL query from stdin, runs it once against the PERSISTENT external +# table in hits_parquet.hyper on the long-lived Hyper server started by ./start +# (descriptor in server.endpoint). +# Stdout: query result. +# Stderr: query runtime in fractional seconds on the last line. +set -e + +# shellcheck disable=SC1091 +source myenv/bin/activate + +query_file=$(mktemp) +trap 'rm -f "$query_file"' EXIT +cat > "$query_file" + +python3 - "$query_file" <<'PY' +import sys +import timeit +from tableauhyperapi import Connection, Endpoint + +with open(sys.argv[1]) as f: + query = f.read() + +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint, "hits_parquet.hyper") as connection: + start = timeit.default_timer() + rows = connection.execute_list_query(query) + end = timeit.default_timer() + +for r in rows: + print(r) + +print(f"{end - start:.3f}", file=sys.stderr) +PY diff --git a/hyper-parquet-single/start b/hyper-parquet-single/start new file mode 100755 index 0000000000..f13ebd21d7 --- /dev/null +++ b/hyper-parquet-single/start @@ -0,0 +1,77 @@ +#!/bin/bash +# Launch one long-lived Hyper server (hyperd) and publish its connection +# descriptor to server.endpoint. Every ./query invocation then connects to +# this single persistent process instead of spawning its own. +# +# Unlike the earlier temp-external-table approach, the data is exposed through +# a persistent external table in hits_parquet.hyper (created by ./load). The +# supervisor holds a keep-alive connection that keeps that catalog attached for +# the server lifetime. This mirrors hyper/ (native): without the keep-alive the +# catalog is destroyed the moment a ./query process exits. +set -e + +# shellcheck disable=SC1091 +source myenv/bin/activate + +# Already running? The pidfile + a live process is authoritative. +if [ -f server.pid ] && kill -0 "$(cat server.pid 2>/dev/null)" 2>/dev/null; then + exit 0 +fi + +# Clean up stale artifacts from a previous (possibly crashed) server. +rm -f server.pid server.endpoint + +# Background a supervisor that opens HyperProcess, writes the descriptor, +# attaches hits_parquet.hyper (if present) to keep its catalog + plans warm, +# then blocks until ./stop signals it. +nohup python3 - >server.log 2>&1 <<'PY' & +import os +import signal +import sys +from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode + + +def _terminate(*_): + sys.exit(0) + + +signal.signal(signal.SIGTERM, _terminate) +signal.signal(signal.SIGINT, _terminate) + +with HyperProcess( + telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, + parameters={"experimental_persisted_external_tables": "on"}, +) as hyper: + # Keep-alive: hold hits_parquet.hyper attached for the server lifetime so + # its external-table catalog (and the plan-cache entries depending on it) + # are not torn down between per-try ./query processes. Skipped on the + # pre-load ./start (file not yet created); established by the next + # cold-cycle ./start once ./load has created it. Done BEFORE publishing the + # endpoint so ./check never sees a "ready" server still mid-attach. + keepalive = None + if os.path.exists("hits_parquet.hyper"): + keepalive = Connection(hyper.endpoint, "hits_parquet.hyper", CreateMode.NONE) + + with open("server.endpoint.tmp", "w") as f: + f.write(hyper.endpoint.connection_descriptor) + os.replace("server.endpoint.tmp", "server.endpoint") + + try: + while True: + signal.pause() + finally: + if keepalive is not None: + keepalive.close() +PY +echo $! > server.pid + +# Give the supervisor a moment to publish the endpoint. +for _ in $(seq 1 60); do + if [ -s server.endpoint ]; then + exit 0 + fi + sleep 1 +done + +echo "hyper-parquet: server did not publish server.endpoint within 60s" >&2 +exit 1 diff --git a/hyper-parquet-single/stop b/hyper-parquet-single/stop new file mode 100755 index 0000000000..049794e87f --- /dev/null +++ b/hyper-parquet-single/stop @@ -0,0 +1,22 @@ +#!/bin/bash +# Stop the persistent Hyper server started by ./start. SIGTERM the supervisor +set -e + +if [ -f server.pid ]; then + pid="$(cat server.pid 2>/dev/null || true)" + if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then + kill "$pid" 2>/dev/null || true + # Wait for the supervisor (and thus hyperd) to actually exit so the + # benchmark driver's drop_caches isn't defeated by pages still pinned + # by a live mmap. + for _ in $(seq 1 60); do + kill -0 "$pid" 2>/dev/null || break + sleep 1 + done + # Still alive after 60s? Force it. + kill -9 "$pid" 2>/dev/null || true + fi +fi + +rm -f server.pid server.endpoint +exit 0 diff --git a/hyper-parquet/template.json b/hyper-parquet-single/template.json similarity index 72% rename from hyper-parquet/template.json rename to hyper-parquet-single/template.json index 2a7c93a064..5b34290837 100644 --- a/hyper-parquet/template.json +++ b/hyper-parquet-single/template.json @@ -1,5 +1,5 @@ { - "system": "Salesforce Hyper (Parquet)", + "system": "Salesforce Hyper (Parquet, single)", "proprietary": "yes", "hardware": "cpu", "tuned": "no", diff --git a/hyper-parquet/benchmark.sh b/hyper-parquet/benchmark.sh deleted file mode 100755 index fea177f34d..0000000000 --- a/hyper-parquet/benchmark.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -# Thin shim — actual flow is in lib/benchmark-common.sh. -export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-partitioned" -export BENCH_RESTARTABLE=no -# Single-process engine: each query forks a fresh full-machine process with no -# shared scheduler across connections, so the concurrent-QPS test only -# oversubscribes RAM rather than measuring throughput. Skip it by default; -# override BENCH_CONCURRENT_DURATION to re-enable. See issue #946. -export BENCH_CONCURRENT_DURATION="${BENCH_CONCURRENT_DURATION:-0}" -exec ../lib/benchmark-common.sh diff --git a/hyper-parquet/check b/hyper-parquet/check deleted file mode 100755 index 23ad27458a..0000000000 --- a/hyper-parquet/check +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -set -e - -# shellcheck disable=SC1091 -source myenv/bin/activate - -python3 - <<'PY' -from tableauhyperapi import HyperProcess, Telemetry, Connection -with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint) as connection: - connection.execute_list_query("SELECT 1") -PY diff --git a/hyper-parquet/load b/hyper-parquet/load deleted file mode 100755 index 19ff8b994b..0000000000 --- a/hyper-parquet/load +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -# hyper-parquet queries the parquet files directly via a temp external table -# defined in create.sql, recreated per query. No persistent DB to load. -set -e -sync diff --git a/hyper-parquet/query b/hyper-parquet/query deleted file mode 100755 index 36e4c2efb9..0000000000 --- a/hyper-parquet/query +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# Reads a SQL query from stdin, runs it via tableau hyperapi against the -# partitioned parquet files (registered as a temp external table from -# create.sql). -# Stdout: query result. -# Stderr: query runtime in fractional seconds on the last line. -set -e - -# shellcheck disable=SC1091 -source myenv/bin/activate - -# Stage stdin into a temp file: `python3 - <<'PY'` already consumes stdin to -# read the program, so sys.stdin.read() inside the heredoc returns "". -query_file=$(mktemp) -trap 'rm -f "$query_file"' EXIT -cat > "$query_file" - -python3 - "$query_file" <<'PY' -import sys -import timeit -from tableauhyperapi import HyperProcess, Telemetry, Connection - -with open(sys.argv[1]) as f: - query = f.read() - -with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint) as connection: - connection.execute_command(open("create.sql").read()) - start = timeit.default_timer() - rows = connection.execute_list_query(query) - end = timeit.default_timer() - -for r in rows: - print(r) - -print(f"{end - start:.3f}", file=sys.stderr) -PY diff --git a/hyper-parquet/start b/hyper-parquet/start deleted file mode 100755 index 06bd986563..0000000000 --- a/hyper-parquet/start +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -exit 0 diff --git a/hyper-parquet/stop b/hyper-parquet/stop deleted file mode 100755 index 06bd986563..0000000000 --- a/hyper-parquet/stop +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -exit 0 diff --git a/hyper/benchmark.sh b/hyper/benchmark.sh index 1b692a2155..7d4ef95c38 100755 --- a/hyper/benchmark.sh +++ b/hyper/benchmark.sh @@ -1,10 +1,4 @@ #!/bin/bash # Thin shim — actual flow is in lib/benchmark-common.sh. export BENCH_DOWNLOAD_SCRIPT="download-hits-csv" -export BENCH_RESTARTABLE=no -# Single-process engine: each query forks a fresh full-machine process with no -# shared scheduler across connections, so the concurrent-QPS test only -# oversubscribes RAM rather than measuring throughput. Skip it by default; -# override BENCH_CONCURRENT_DURATION to re-enable. See issue #946. -export BENCH_CONCURRENT_DURATION="${BENCH_CONCURRENT_DURATION:-0}" exec ../lib/benchmark-common.sh diff --git a/hyper/check b/hyper/check index 23ad27458a..f3fdc4a1f4 100755 --- a/hyper/check +++ b/hyper/check @@ -1,12 +1,24 @@ #!/bin/bash +# Readiness probe: connect to the persistent Hyper server (via the descriptor +# ./start published to server.endpoint) and run SELECT 1. Non-zero exit means +# "not up yet" — the benchmark driver polls this in a loop after ./start and +# uses its transition to failing as the "server is really stopped" signal in +# the cold cycle. set -e # shellcheck disable=SC1091 source myenv/bin/activate +# No endpoint published => server isn't up. +[ -s server.endpoint ] || exit 1 + python3 - <<'PY' -from tableauhyperapi import HyperProcess, Telemetry, Connection -with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint) as connection: - connection.execute_list_query("SELECT 1") +from tableauhyperapi import Connection, Endpoint + +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint) as connection: + connection.execute_list_query("SELECT 1") PY diff --git a/hyper/load b/hyper/load index a4a1d58cb0..9c34e7d839 100755 --- a/hyper/load +++ b/hyper/load @@ -1,4 +1,8 @@ #!/bin/bash +# Create hits.hyper and COPY hits.csv into it, using the PERSISTENT Hyper +# server started by ./start (descriptor in server.endpoint). Loading through +# the already-running server avoids briefly running two hyperd instances +# (each of which would try to claim up to 80% of RAM) during the heavy COPY. set -e # shellcheck disable=SC1091 @@ -8,12 +12,15 @@ source myenv/bin/activate rm -f hits.hyper python3 - <<'PY' -from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode +from tableauhyperapi import Connection, Endpoint, CreateMode -with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint, 'hits.hyper', CreateMode.CREATE_AND_REPLACE) as connection: - connection.execute_command(open("create.sql").read()) - connection.execute_command("copy hits from 'hits.csv' with (format csv)") +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint, 'hits.hyper', CreateMode.CREATE_AND_REPLACE) as connection: + connection.execute_command(open("create.sql").read()) + connection.execute_command("copy hits from 'hits.csv' with (format csv)") PY rm -f hits.csv diff --git a/hyper/queries.sql b/hyper/queries.sql index ecfb6b77d0..31f65fc898 100644 --- a/hyper/queries.sql +++ b/hyper/queries.sql @@ -27,7 +27,7 @@ SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIM SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth::bigint), SUM(ResolutionWidth::bigint + 1), SUM(ResolutionWidth::bigint + 2), SUM(ResolutionWidth::bigint + 3), SUM(ResolutionWidth::bigint + 4), SUM(ResolutionWidth::bigint + 5), SUM(ResolutionWidth::bigint + 6), SUM(ResolutionWidth::bigint + 7), SUM(ResolutionWidth::bigint + 8), SUM(ResolutionWidth::bigint + 9), SUM(ResolutionWidth::bigint + 10), SUM(ResolutionWidth::bigint + 11), SUM(ResolutionWidth::bigint + 12), SUM(ResolutionWidth::bigint + 13), SUM(ResolutionWidth::bigint + 14), SUM(ResolutionWidth::bigint + 15), SUM(ResolutionWidth::bigint + 16), SUM(ResolutionWidth::bigint + 17), SUM(ResolutionWidth::bigint + 18), SUM(ResolutionWidth::bigint + 19), SUM(ResolutionWidth::bigint + 20), SUM(ResolutionWidth::bigint + 21), SUM(ResolutionWidth::bigint + 22), SUM(ResolutionWidth::bigint + 23), SUM(ResolutionWidth::bigint + 24), SUM(ResolutionWidth::bigint + 25), SUM(ResolutionWidth::bigint + 26), SUM(ResolutionWidth::bigint + 27), SUM(ResolutionWidth::bigint + 28), SUM(ResolutionWidth::bigint + 29), SUM(ResolutionWidth::bigint + 30), SUM(ResolutionWidth::bigint + 31), SUM(ResolutionWidth::bigint + 32), SUM(ResolutionWidth::bigint + 33), SUM(ResolutionWidth::bigint + 34), SUM(ResolutionWidth::bigint + 35), SUM(ResolutionWidth::bigint + 36), SUM(ResolutionWidth::bigint + 37), SUM(ResolutionWidth::bigint + 38), SUM(ResolutionWidth::bigint + 39), SUM(ResolutionWidth::bigint + 40), SUM(ResolutionWidth::bigint + 41), SUM(ResolutionWidth::bigint + 42), SUM(ResolutionWidth::bigint + 43), SUM(ResolutionWidth::bigint + 44), SUM(ResolutionWidth::bigint + 45), SUM(ResolutionWidth::bigint + 46), SUM(ResolutionWidth::bigint + 47), SUM(ResolutionWidth::bigint + 48), SUM(ResolutionWidth::bigint + 49), SUM(ResolutionWidth::bigint + 50), SUM(ResolutionWidth::bigint + 51), SUM(ResolutionWidth::bigint + 52), SUM(ResolutionWidth::bigint + 53), SUM(ResolutionWidth::bigint + 54), SUM(ResolutionWidth::bigint + 55), SUM(ResolutionWidth::bigint + 56), SUM(ResolutionWidth::bigint + 57), SUM(ResolutionWidth::bigint + 58), SUM(ResolutionWidth::bigint + 59), SUM(ResolutionWidth::bigint + 60), SUM(ResolutionWidth::bigint + 61), SUM(ResolutionWidth::bigint + 62), SUM(ResolutionWidth::bigint + 63), SUM(ResolutionWidth::bigint + 64), SUM(ResolutionWidth::bigint + 65), SUM(ResolutionWidth::bigint + 66), SUM(ResolutionWidth::bigint + 67), SUM(ResolutionWidth::bigint + 68), SUM(ResolutionWidth::bigint + 69), SUM(ResolutionWidth::bigint + 70), SUM(ResolutionWidth::bigint + 71), SUM(ResolutionWidth::bigint + 72), SUM(ResolutionWidth::bigint + 73), SUM(ResolutionWidth::bigint + 74), SUM(ResolutionWidth::bigint + 75), SUM(ResolutionWidth::bigint + 76), SUM(ResolutionWidth::bigint + 77), SUM(ResolutionWidth::bigint + 78), SUM(ResolutionWidth::bigint + 79), SUM(ResolutionWidth::bigint + 80), SUM(ResolutionWidth::bigint + 81), SUM(ResolutionWidth::bigint + 82), SUM(ResolutionWidth::bigint + 83), SUM(ResolutionWidth::bigint + 84), SUM(ResolutionWidth::bigint + 85), SUM(ResolutionWidth::bigint + 86), SUM(ResolutionWidth::bigint + 87), SUM(ResolutionWidth::bigint + 88), SUM(ResolutionWidth::bigint + 89) FROM hits; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; diff --git a/hyper/query b/hyper/query index d0f59d1a0e..3465f9df13 100755 --- a/hyper/query +++ b/hyper/query @@ -1,8 +1,13 @@ #!/bin/bash -# Reads a SQL query from stdin, runs it via tableau hyperapi against -# hits.hyper. +# Reads a SQL query from stdin, runs it once against hits.hyper on the +# PERSISTENT Hyper server started by ./start (descriptor in server.endpoint). # Stdout: query result. # Stderr: query runtime in fractional seconds on the last line. +# +# The benchmark driver calls this once per try (BENCH_TRIES). Because every +# call connects to the SAME long-lived server, the buffer pool stays warm +# across tries: try 1 (right after the driver's stop/drop_caches/start cold +# cycle) is cold, tries 2..N are genuinely hot. See issue #936. set -e # shellcheck disable=SC1091 @@ -17,16 +22,19 @@ cat > "$query_file" python3 - "$query_file" <<'PY' import sys import timeit -from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode +from tableauhyperapi import Connection, Endpoint with open(sys.argv[1]) as f: query = f.read() -with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint, 'hits.hyper', CreateMode.NONE) as connection: - start = timeit.default_timer() - rows = connection.execute_list_query(query) - end = timeit.default_timer() +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint, 'hits.hyper') as connection: + start = timeit.default_timer() + rows = connection.execute_list_query(query) + end = timeit.default_timer() for r in rows: print(r) diff --git a/hyper/start b/hyper/start index 06bd986563..0dddc07e3f 100755 --- a/hyper/start +++ b/hyper/start @@ -1,2 +1,86 @@ #!/bin/bash -exit 0 +# Launch ONE long-lived Hyper server (hyperd) and publish its connection +# descriptor to server.endpoint. Every ./query invocation then connects to +# this single persistent process instead of spawning its own. +# +# To keep the buffer pool warm across the SEPARATE processes that each ./query +# spawns, the supervisor also holds a keep-alive connection that keeps +# hits.hyper attached for the server's whole lifetime. Without it, hits.hyper +# would be detached the moment a per-try ./query process exits and its pages +# evicted, so tries 2..N would re-read from a cold pool. (On the very first +# ./start, before ./load has created hits.hyper, there is nothing to attach; +# the keep-alive is established by the next cold-cycle ./start once the file +# exists.) +set -e + +# shellcheck disable=SC1091 +source myenv/bin/activate + +# Already running? The pidfile + a live process is authoritative. +if [ -f server.pid ] && kill -0 "$(cat server.pid 2>/dev/null)" 2>/dev/null; then + exit 0 +fi + +# Clean up stale artifacts from a previous (possibly crashed) server. +rm -f server.pid server.endpoint + +# Background a supervisor that opens HyperProcess, writes the descriptor, +# attaches hits.hyper (if present) to keep it warm, then blocks until ./stop +# signals it. nohup so it survives this script exiting; $! is the supervisor +# PID we kill in ./stop. +nohup python3 - >server.log 2>&1 <<'PY' & +import os +import signal +import sys +from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode + + +def _terminate(*_): + # Raise SystemExit so the `with HyperProcess` block exits cleanly and + # hyperd is shut down with us (it is terminated when its controlling + # process exits). + sys.exit(0) + + +signal.signal(signal.SIGTERM, _terminate) +signal.signal(signal.SIGINT, _terminate) + +with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: + # Keep-alive: hold hits.hyper attached for the server's lifetime so its + # buffer pool isn't torn down between per-try ./query processes. Skipped + # on the pre-load ./start (file not yet created). Established BEFORE + # publishing the endpoint so ./check never sees a "ready" server that is + # actually still mid-attach (or about to die on a bad attach). + keepalive = None + if os.path.exists("hits.hyper"): + keepalive = Connection(hyper.endpoint, "hits.hyper", CreateMode.NONE) + + # Publish the descriptor atomically so ./check and ./query never read a + # half-written file. + with open("server.endpoint.tmp", "w") as f: + f.write(hyper.endpoint.connection_descriptor) + os.replace("server.endpoint.tmp", "server.endpoint") + + try: + # Block until a signal arrives; loop so a stray signal can't tear the + # server down (only the handler's sys.exit does). + while True: + signal.pause() + finally: + if keepalive is not None: + keepalive.close() +PY +echo $! > server.pid + +# Give the supervisor a moment to publish the endpoint. The benchmark driver +# also runs ./check in a loop afterwards, so this is just a fast-path / clean +# error rather than the authoritative readiness gate. +for _ in $(seq 1 60); do + if [ -s server.endpoint ]; then + exit 0 + fi + sleep 1 +done + +echo "hyper: server did not publish server.endpoint within 60s" >&2 +exit 1 diff --git a/hyper/stop b/hyper/stop index 06bd986563..cb2bbedc17 100755 --- a/hyper/stop +++ b/hyper/stop @@ -1,2 +1,24 @@ #!/bin/bash +# Stop the persistent Hyper server started by ./start. SIGTERM the supervisor +# (see ./start); its handler exits the `with HyperProcess` block, which shuts +# down hyperd. Idempotent: a missing/stale pidfile is not an error. +set -e + +if [ -f server.pid ]; then + pid="$(cat server.pid 2>/dev/null || true)" + if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then + kill "$pid" 2>/dev/null || true + # Wait for the supervisor (and thus hyperd) to actually exit so the + # benchmark driver's drop_caches isn't defeated by pages still pinned + # by a live mmap. + for _ in $(seq 1 60); do + kill -0 "$pid" 2>/dev/null || break + sleep 1 + done + # Still alive after 60s? Force it. + kill -9 "$pid" 2>/dev/null || true + fi +fi + +rm -f server.pid server.endpoint exit 0