upsert - prayagupa/rdb GitHub Wiki

Upsert

bq

sql_to_merge = """
MERGE INTO `pro1.ad_creative` T
USING `pro1.ad_creative_temp` S
ON T.id = S.id
WHEN MATCHED THEN
	UPDATE SET 
    ad_id = S.ad_id,
    ad_type = S.ad_type,
    status = S.status,
    modified_date = S.modified_date
WHEN NOT MATCHED THEN
	INSERT (id, ad_id, name, ad_type, status, created_date, modified_date)
    VALUES(S.id, S.ad_id, S.name, S.ad_type, S.status, S.created_date, S.modified_date);"""

Pg update

On UPDATE, PG rows have infomask, infomask2 and t_ctid, where the latter provides the link between the old and the new version of a row.

INSERT INTO visiting_user(user_name)
SELECT ('upaupaupa')
WHERE NOT EXISTS (SELECT user_name from visiting_user where user_name='upaupaupa');

There is some way for PostgreSQL to find the new version of an updated row. That is why the experiment returned a result row.

-- session1
BEGIN;
UPDATE visiting_user SET user_name = 'new username' WHERE user_id = 1;
COMMIT;
-- one row is returned

--session2
SELECT user_id FROM visiting_user WHERE user_id = 1 FOR UPDATE;
-- hangs

There is no connection between the old, deleted row and the newly inserted one in following exp, that’s why we get no result in this case.

-- 
TRUNCATE visiting_user;
INSERT INTO visiting_user VALUES (1, 'upaupaupa');


-- session1
BEGIN;
 
DELETE FROM visiting_user WHERE user_id = 1;

INSERT INTO visiting_user VALUES (1, 'username 1');
COMMIT;
-- no row is returned


-- session2
SELECT user_id FROM visiting_user WHERE user_name = 'username 1' FOR UPDATE;  
-- hangs

update pageinspect


TRUNCATE visiting_user;
INSERT INTO visiting_user VALUES (1, '42');

CREATE EXTENSION  pageinspect;

SELECT lp,
       t_xmin AS xmin,
       t_xmax AS xmax,
       t_ctid,
       to_hex(t_infomask2) AS infomask2,
       to_hex(t_infomask) AS infomask,
       t_attrs       
FROM heap_page_item_attrs(get_raw_page('visiting_user', 0), 'visiting_user');
 lp | xmin | xmax | t_ctid | infomask2 | infomask |                             t_attrs                             
----+------+------+--------+-----------+----------+-----------------------------------------------------------------
  1 |  750 |    0 | (0,1)  | 3         | 902      | {"\\x01000000","\\x15757061757061757061","\\x9d0e1bc952930200"}


--
-- 
UPDATE visiting_user SET user_name = '2' WHERE user_id = 1;

SELECT lp,
       t_xmin AS xmin,
       t_xmax AS xmax,
       t_ctid,
       to_hex(t_infomask2) AS infomask2,
       to_hex(t_infomask) AS infomask,
       t_attrs       
FROM heap_page_item_attrs(get_raw_page('visiting_user', 0), 'visiting_user');
 lp | xmin | xmax | t_ctid | infomask2 | infomask |                      t_attrs                      
----+------+------+--------+-----------+----------+---------------------------------------------------
  1 |  753 |  754 | (0,2)  | 4003      | 102      | {"\\x01000000","\\x073432","\\x73299fef52930200"}
  2 |  754 |    0 | (0,2)  | 8003      | 2802     | {"\\x01000000","\\x0532","\\x73299fef52930200"}
(2 rows)

The first entry is the old version of the row, the second the new version.

  • lp is the line pointer number, which stands for the number of the tuple within the data page.
  • t_ctid contains the tuple identifier of the updated version of the row.

delete pageinspect

TRUNCATE visiting_user;
INSERT INTO visiting_user VALUES (1, '42');
 
-- inspect
 lp | xmin | xmax | t_ctid | infomask2 | infomask |                      t_attrs                      
----+------+------+--------+-----------+----------+---------------------------------------------------
  1 |  756 |    0 | (0,1)  | 3         | 802      | {"\\x01000000","\\x073432","\\x3a61041253930200"}

-- 
--
BEGIN;
DELETE FROM visiting_user WHERE user_id = 1;
INSERT INTO visiting_user VALUES (2, '42');
COMMIT;
 
SELECT lp,
       t_xmin AS xmin,
       t_xmax AS xmax,
       t_ctid,
       to_hex(t_infomask2) AS infomask2,
       to_hex(t_infomask) AS infomask,
       t_attrs       
FROM heap_page_item_attrs(get_raw_page('visiting_user', 0), 'visiting_user');
 lp | xmin | xmax | t_ctid | infomask2 | infomask |                      t_attrs                      
----+------+------+--------+-----------+----------+---------------------------------------------------
  1 |  756 |  757 | (0,1)  | 2003      | 102      | {"\\x01000000","\\x073432","\\x3a61041253930200"}
  2 |  757 |    0 | (0,2)  | 3         | 802      | {"\\x02000000","\\x073432","\\x1bbd6c1353930200"}
(2 rows)
  • t_ctid column from the old, deleted tuple is unchanged and there is no link to the new tuple.

  • The second tuple is not found by the SELECT ... FOR UPDATE, since it is “invisible” to the “snapshot” used for scanning the table.

  • infomask: HEAP_XMIN_COMMITTED (x01000000) means that the tuple was valid before it was removed

  • infomask2: 2 is the number of columns, and HEAP_KEYS_UPDATED (x02000000) means that the tuple is deleted or updated