I am currently creating a query with a UDF, that returns similar stores from a restaurant table. I want to utilize the UDF so that I can make life easier for myself when I need to reprint the data for new restaurantid's.
I'm running into this issue everytime I expand the query to have 5+ records in the reference list. It works if I have like 2 or 3 records, but no more.
This is the error, neither myslef or ChatGPT can get to the bottom or find a solution; Query error: Correlated subqueries that reference other tables are not supported unless they can be de-correlated, such as by transforming them into an efficient JOIN
The script has 3 stages.
- Create a core reference list (used so the UDF doesnt send back the same IDs & to trigger the UDF in one select)
- The UDF, it takes 4 parameters and tries to find a similar store
- Run the UDF from a select statement for all in the reference list
I have attached the full query I am using for the project below. Any response, suggestions, workarounds would all be greatly appreciated!
DROP TABLE IF EXISTS `trial_stores_stg1`;
CREATE TABLE `trial_stores_stg1` AS (
SELECT '' AS restaurantid, '' AS country, '' AS launch_date, "Test" AS chain
UNION ALL SELECT "1", "CA", "2022-04-10", "Test"
UNION ALL SELECT "2", "CA", "2022-04-10", "Test"
UNION ALL SELECT "3", "UK", "2022-04-10", "Test"
);
-- Create a temporary UDF
CREATE TEMP FUNCTION FindSimilarStore(var_restaurantid STRING, var_country STRING, var_launch_date DATE, var_chain STRING)
RETURNS STRING
AS (
(SELECT AS STRUCT similar.restaurantid AS similar_restaurantid
FROM (
SELECT
dr.restaurantid,
dr.country,
AVG(nr_of_orders) AS avg_orders
FROM
`dummydata.fact_restaurant_daily_snapshot` ds
JOIN `dummydata.dim_restaurant` dr ON dr.restaurantid = ds.restaurantid
WHERE
-- Exclude the sample restaurants
dr.restaurantid NOT IN (var_restaurantid)
AND dr.restaurantid NOT IN (SELECT restaurantid FROM `trial_stores_stg1`)
AND dr.country = var_country
AND dr.chain = var_chain
AND ds.snapshot_dt BETWEEN DATE_SUB(DATE(var_launch_date), INTERVAL 3 MONTH) AND DATE(var_launch_date)
GROUP BY 1, 2
) similar
-- We want similar based on historic daily orders
ORDER BY ABS(similar.avg_orders - (
SELECT AVG(nr_of_orders) FROM `dummydata.fact_restaurant_daily_snapshot` ds
JOIN `dummydata.dim_restaurant` dr ON dr.restaurantid = ds.restaurantid
WHERE dr.restaurantid = var_restaurantid
)) / (
SELECT AVG(nr_of_orders) FROM `dummydata.fact_restaurant_daily_snapshot` ds
JOIN `dummydata.dim_restaurant` dr ON dr.restaurantid = ds.restaurantid
WHERE dr.restaurantid = var_restaurantid
)
LIMIT 1).similar_restaurantid
);
-- Drop the table if it exists
DROP TABLE IF EXISTS `trial_stores`;
-- Create the table
CREATE TABLE `trial_stores` (
restaurantid STRING,
country STRING,
similar_restaurantid STRING
);
--UK
INSERT INTO `trial_stores`
SELECT
restaurantid,
country,
FindSimilarStore(restaurantid, country, CAST(launch_date as DATE), chain) AS similar_restaurantid
FROM
`trial_stores_stg1`
WHERE country = "GB";
--CA
INSERT INTO `trial_stores`
SELECT
restaurantid,
country,
FindSimilarStore(restaurantid, country, CAST(launch_date as DATE), chain) AS similar_restaurantid
FROM
`trial_stores_stg1`
WHERE country = "CA";