Self Join - ignacio-alorre/Hive GitHub Wiki
Consist on joining a table with itself. Some use cases:
Checking which warehouse has more than one transportation type
CREATE TABLE IF NOT EXISTS D_DW
(name String, transp String)
INSERT INTO TABLE D_DW VALUES
('DW1', 'train'),
('DW1', 'airport'),
('DW1', 'road'),
('DW2', 'road'),
('DW3', 'train'),
('DW4', 'train'),
('DW4', 'road'),
('DW5', 'airport')
-- Retrieve which warehouse have more than 1 transport
select distinct r.name
from D_DW r, D_DW l
where r.name = l.name
and r.transp <> l.transp
Looking for duplicates in a table
Required an additional column with some kind of ordinal value
CREATE TABLE IF NOT EXISTS D_DW (id int, name String, transp String)
INSERT INTO TABLE D_DW VALUES (1, 'DW1', 'train'), (2, 'DW1', 'airport'), (3, 'DW1', 'road'), (4, 'DW2', 'road'), (5, 'DW3', 'train'), (6, 'DW4', 'train'), (7, 'DW4', 'road'), (8, 'DW5', 'airport')
select distinct r.name from D_DW r, D_DW l where r.name = l.name and r.id < l.id
Sources