CREATE TABLE sku_usage (
-- create a regular table
time timestamptz NOT NULL,
device_id uuid NOT NULL,
loyalty_program_id text NOT NULL,
sku text NOT NULL,
quantity double precision NOT NULL,
transaction_id text NOT NULL,
PRIMARY KEY (time, device_id, sku, transaction_id)
);
SELECT
create_hypertable ('sku_usage', 'time');
CREATE INDEX ix_device_sku_time ON sku_usage (time, device_id, sku_id DESC);
CREATE MATERIALIZED VIEW sku_usage_hourly
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS bucket,
device_id,
sku,
SUM(quantity)
FROM sku_usage
GROUP BY bucket, device_id, sku
WITH NO DATA;
SELECT add_continuous_aggregate_policy('sku_usage_hourly',
start_offset => INTERVAL '3 hours',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour');
I then threw in 20 million rows
When I run this query
(
SELECT
time_bucket_gapfill ('1 hour', bucket) AS time_bucket,
device_id,
sku,
coalesce(max(sum), 0) AS usage
FROM
sku_usage_hourly
WHERE
bucket < now() - INTERVAL '100 days'
AND bucket > now() - INTERVAL '110 days'
AND device_id IN ('111109ee-aaaa-4444-8765-123450273214')
AND sku IN ('10-49-0920')
GROUP BY
time_bucket,
device_id,
sku
);
it always takes about 6-6.5 seconds to run (0.5 CPU, 2 GB RAM). I am shocked it is so slow. It is only producing about 2200 rows.
If I do
select count(*) from sku_usage where
time < now() - INTERVAL '100 days'
AND time > now() - INTERVAL '110 days'
AND device_id IN ('111109ee-aaaa-4444-8765-123450273214')
AND sku IN ('10-49-0920')
It returns about 1000 records in 400ms. If I remove the device and sku conditions, there is only ~3500000
Why is my time_bucket_gapfill so slow? How can I improve it?
My understanding is that the materialized view should have the index (bucket, device_id, sku). Is it not being used properly? Or is the time_bucket_gapfill doing something weird?
Thanks
EDIT: changing my query to just
SELECT
device_id,
FROM
sku_usage_hourly
WHERE
bucket < now() - INTERVAL '100 days'
AND bucket > now() - INTERVAL '110 days'
AND device_id IN ('111109ee-aaaa-4444-8765-123450273214')
AND sku IN ('10-49-0920')
still takes 6 seconds. so it isn't the time_bucket_gapfill.
Can you share what you have in the query planner?
Have you tried to change the
AND device_id IN (...)toAND device_id =?Same for the other param that uses
IN.