SQL (standard) join count by several columns

67 views Asked by At

I have these tables:

CREATE TABLE poster (
    poster_id SERIAL NOT NULL  PRIMARY KEY,
    country VARCHAR ( 50 ) NOT NULL
);

CREATE TABLE batch (
    batch_id SERIAL NOT NULL PRIMARY KEY,
    price VARCHAR ( 50 ) NOT NULL
);

ALTER TABLE poster 
    ADD COLUMN batch_id SERIAL REFERENCES batch (batch_id);

INSERT INTO batch (price) VALUES (10)
INSERT INTO batch (price) VALUES (11)

INSERT INTO poster (country, batch_id) VALUES ('DEU', 1);
INSERT INTO poster (country, batch_id) VALUES ('DEU', 1);
INSERT INTO poster (country, batch_id) VALUES ('FRA', 1);

I want get how many posters I have for a particular country, for the rest of the world and the total, grouped by batch.

This code solves the problem, but I want to know if there's a more performant query.

SELECT 
    batch.batch_id, 
    germany.count AS germany, 
    other.count AS outside, 
    total.count AS total 
FROM
    batch 
LEFT JOIN
    (SELECT 
         poster.batch_id AS batch_id, 
         COUNT(*) AS count 
     FROM
         poster 
     WHERE
         poster.country = 'DEU' 
     GROUP BY
         poster.batch_id) AS germany ON germany.batch_id = batch.batch_id
LEFT JOIN
    (SELECT
         poster.batch_id AS batch_id, 
         COUNT(*) AS count 
     FROM
         poster 
     WHERE
         poster.country <> 'DEU' 
     GROUP BY
         poster.batch_id) AS other ON other.batch_id = batch.batch_id
LEFT JOIN
    (SELECT
         poster.batch_id AS batch_id, 
         COUNT(*) AS count 
     FROM
         poster 
     GROUP BY
         poster.batch_id) AS total ON total.batch_id = batch.batch_id

Also when the batch doesn't have any poster I want to return 0 instead of null.

2

There are 2 answers

2
Mureinik On BEST ANSWER

In modern (>=9.4) PostgreSQL (that you tagged the question with), you can use a filter clause on an aggregate function and use that to apply conditions on the rows effected by the function. With this technique, you can reduce the query to a single left join:

SELECT    b.batch_id, germany, outside, total
FROM      batch b
LEFT JOIN (SELECT   batch_id,
                    COUNT(*) FILTER (WHERE country = 'DEU') AS germany,
                    COUNT(*) FILTER (WHERE country <> 'DEU') AS outside,
                    COUNT(*) AS total
           FROM     poster
           GROUP BY batch_id) p ON b.batch_id = p.batch_id

SQLFiddle demo

If you don't want to use PostgreSQL-specific syntax, you could use case expressions instead:

SELECT    b.batch_id, germany, outside, total
FROM      batch b
LEFT JOIN (SELECT   batch_id,
                    COUNT(CASE country WHEN 'DEU' THEN 1 END) AS germany,
                    COUNT(CASE country WHEN 'DEU' THEN NULL ELSE 1 END) AS outside,
                    COUNT(*) AS total
           FROM     poster
           GROUP BY batch_id) p ON b.batch_id = p.batch_id

SQLFiddle demo

0
Erwin Brandstetter On

Also when the batch doesn't have any poster I want to return 0 instead of null.

You get 0 instead of null with a LATERAL subquery:

SELECT b.batch_id, germany, total - germany AS other, total
FROM   batch b
LEFT   JOIN LATERAL (
   SELECT count(*) FILTER (WHERE country = 'DEU') AS germany
        , count(*) AS total
   FROM   poster p
   WHERE  p.batch_id = b.batch_id
   ) p ON true;

See:

Also, 2 counts are cheaper than 3.

While processing all batches anyway, a plain subquery is cheaper.
Wrap counts in COALESCE() in the outer SELECT list to get0 instead of null:

SELECT b.batch_id
     , COALESCE(germany, 0) AS germany
     , COALESCE(total - germany, 0) AS other
     , COALESCE(total, 0) AS total
FROM   batch b
LEFT   JOIN (
   SELECT batch_id
        , count(*) FILTER (WHERE country = 'DEU') AS germany
--      , count(country = 'DEU' OR null) AS germany  -- alternative
        , count(*) AS total
   FROM   poster
   GROUP  BY batch_id
   ) p USING (batch_id);

fiddle

About various counting techniques: