Self Join - ignacio-alorre/Hive GitHub Wiki

Consist on joining a table with itself. Some use cases:

Checking which warehouse has more than one transportation type

CREATE TABLE IF NOT EXISTS D_DW 
(name String, transp String)

INSERT INTO TABLE D_DW VALUES 
('DW1', 'train'), 
('DW1', 'airport'),
('DW1', 'road'),
('DW2', 'road'),
('DW3', 'train'),
('DW4', 'train'),
('DW4', 'road'),
('DW5', 'airport')

-- Retrieve which warehouse have more than 1 transport

select distinct r.name 
from D_DW r, D_DW l
where r.name = l.name
and r.transp <> l.transp

Looking for duplicates in a table

Required an additional column with some kind of ordinal value

CREATE TABLE IF NOT EXISTS D_DW (id int, name String, transp String)

INSERT INTO TABLE D_DW VALUES (1, 'DW1', 'train'), (2, 'DW1', 'airport'), (3, 'DW1', 'road'), (4, 'DW2', 'road'), (5, 'DW3', 'train'), (6, 'DW4', 'train'), (7, 'DW4', 'road'), (8, 'DW5', 'airport')

select distinct r.name from D_DW r, D_DW l where r.name = l.name and r.id < l.id

Sources