ClickHouse Current - ad4game/public GitHub Wiki

CREATE DATABASE IF NOT EXISTS BidRequests;

CREATE TABLE IF NOT EXISTS BidRequests.MediaPlanning
(
    event_date  Date,
    ssp         String,
    siteAppName String,
    country     FixedString(3),
    requestType Enum8 (
        'banner' = 1,
        'exadsBanner' = 2,
        'exadsPop' = 3,
        'native' = 4,
        'popunder' = 5),
    conType     UInt8,
    deviceOs    String,
    deviceType  UInt8,
    carrier     String,
    siteAppId   String,
    isApp       UInt8,
    browser     String,
    language    FixedString(2),
    iab         String,
    size        String,
    minCPM      AggregateFunction(avg, Float32),
    MTOTAL      UInt64,
    lastUpdated AggregateFunction(max, DateTime)
)
    engine = SummingMergeTree()
        PARTITION BY toYYYYMMDD(event_date)
        ORDER BY (event_date, country, requestType, conType, isApp, deviceOs, deviceType,
                  carrier, browser, size, ssp, siteAppName, siteAppId, language, iab);


CREATE TABLE IF NOT EXISTS BidRequests.statsByDay
(
    event_date  Date,
    server      String,
    ssp         String,
    visits      UInt64,
    lastUpdated AggregateFunction(max, DateTime)
)
    engine = SummingMergeTree()
        PARTITION BY toYYYYMM(event_date)
        ORDER BY (event_date, server, ssp);


CREATE DATABASE if not exists DMP;


CREATE TABLE IF NOT EXISTS DMP.users
(
    country     FixedString(3),
    userId      String,
    conType     UInt8,
    carrier     String,
    gender      AggregateFunction(max, FixedString(1)),
    yob         AggregateFunction(max, UInt16),
    visits      UInt64,
    created     AggregateFunction(min, datetime),
    lastUpdated AggregateFunction(max, datetime)
) ENGINE = SummingMergeTree()
      PARTITION BY substring(userId, 1, 3)
      ORDER BY (country, userId, conType, carrier);


CREATE TABLE IF NOT EXISTS DMP.interests
(
    country     FixedString(3),
    iab         String,
    userId      String,
    visits      UInt64,
    lastUpdated AggregateFunction(max, datetime)
) ENGINE = SummingMergeTree()
      PARTITION BY substring(userId, 1, 3)
      ORDER BY (country, iab, userId);


-- Incoming Kafka Queues

CREATE TABLE IF NOT EXISTS BidRequests.usQueue
(
    id             String,
    event_datetime DateTime,
    ssp            String,
    siteAppName    String,
    country        String,
    requestType    String,
    conType        UInt8,
    deviceOs       String,
    deviceType     UInt8,
    carrier        String,
    siteAppId      String,
    isApp          UInt8,
    server         String,
    userId         String,
    lat            Float32,
    long           Float32,
    browser        String,
    language       String,
    iab            String,
    size           String,
    minCPM         Float32,
    gender         String,
    yob            UInt16,
    url            String
)
    engine = Kafka
        SETTINGS kafka_broker_list = '192.168.6.27:9092,192.168.6.28:9092,192.168.6.29:9092',
            kafka_topic_list = 'dsp-BidRequests',
            kafka_group_name = 'clickhouse.a4g.com-BidRequests',
            kafka_skip_broken_messages = 100000,
            kafka_format = 'TSV',
            kafka_row_delimiter = '\n',
            kafka_num_consumers = 11,
            kafka_max_block_size = 131072;

CREATE TABLE IF NOT EXISTS BidRequests.euQueue
(
    id             String,
    event_datetime DateTime,
    ssp            String,
    siteAppName    String,
    country        String,
    requestType    String,
    conType        UInt8,
    deviceOs       String,
    deviceType     UInt8,
    carrier        String,
    siteAppId      String,
    isApp          UInt8,
    server         String,
    userId         String,
    lat            Float32,
    long           Float32,
    browser        String,
    language       String,
    iab            String,
    size           String,
    minCPM         Float32,
    gender         String,
    yob            UInt16,
    url            String
)
    engine = Kafka
        SETTINGS kafka_broker_list = '195.181.167.158:9092,195.181.167.142:9092,195.181.167.140:9092',
            kafka_topic_list = 'dsp-BidRequests',
            kafka_group_name = 'clickhouse.a4g.com-BidRequests',
            kafka_skip_broken_messages = 10000,
            kafka_format = 'TSV',
            kafka_row_delimiter = '\n',
            kafka_num_consumers = 3
;


-- Materialized Views : Processors


CREATE MATERIALIZED VIEW IF NOT EXISTS BidRequests.usTraffic
    (id String,
     event_datetime DateTime,
     ssp String,
     siteAppName String,
     country FixedString(3),
     requestType Enum8 (
         'banner' = 1,
         'exadsBanner' = 2,
         'exadsPop' = 3,
         'native' = 4,
         'popunder' = 5),
     conType UInt8,
     deviceOs String,
     deviceType UInt8,
     carrier String,
     siteAppId String,
     isApp UInt8,
     server String,
     userId String,
     browser String,
     language FixedString(2),
     iab String,
     size String,
     minCPM Float32,
     gender FixedString(1),
     yob UInt16,
     url String)
    ENGINE = MergeTree()
        PARTITION BY toYYYYMMDD(event_datetime)
        ORDER BY (event_datetime,
                  ssp,
                  siteAppName,
                  country,
                  requestType,
                  conType,
                  deviceOs,
                  deviceType,
                  carrier,
                  siteAppId,
                  server,
                  userId,
                  browser,
                  language,
                  iab,
                  size,
                  minCPM,
                  gender,
                  yob,
                  url)
AS
SELECT DISTINCT id,
                event_datetime,
                ssp,
                siteAppName,
                toFixedString(extract(upper(country), '\\w\\w\\w'), 3) as country,
                cast(requestType as Enum8 (
                    'banner' = 1,
                    'exadsBanner' = 2,
                    'exadsPop' = 3,
                    'native' = 4,
                    'popunder' = 5))                                  as requestType,
                conType,
                deviceOs,
                deviceType,
                carrier,
                siteAppId,
                isApp,
                server,
                extract(userId, '\\w{3,}:.{8,}')                      as userId,
                browser,
                toFixedString(extract(lower(language), '\\w\\w'), 2)  as language,
                iab,
                size,
                minCPM,
                toFixedString(extract(gender, 'M|F'), 1)              as gender,
                yob,
                url
FROM BidRequests.usQueue;


CREATE MATERIALIZED VIEW IF NOT EXISTS BidRequests.euTraffic
    (id String,
     event_datetime DateTime,
     ssp String,
     siteAppName String,
     country FixedString(3),
     requestType Enum8 (
         'banner' = 1,
         'exadsBanner' = 2,
         'exadsPop' = 3,
         'native' = 4,
         'popunder' = 5),
     conType UInt8,
     deviceOs String,
     deviceType UInt8,
     carrier String,
     siteAppId String,
     isApp UInt8,
     server String,
     userId String,
     browser String,
     language FixedString(2),
     iab String,
     size String,
     minCPM Float32,
     gender FixedString(1),
     yob UInt16,
     url String)
    ENGINE = MergeTree()
        PARTITION BY toYYYYMMDD(event_datetime)
        ORDER BY (event_datetime,
                  ssp,
                  siteAppName,
                  country,
                  requestType,
                  conType,
                  deviceOs,
                  deviceType,
                  carrier,
                  siteAppId,
                  server,
                  userId,
                  browser,
                  language,
                  iab,
                  size,
                  minCPM,
                  gender,
                  yob,
                  url)
AS
SELECT DISTINCT id,
                event_datetime,
                ssp,
                siteAppName,
                toFixedString(extract(upper(country), '\\w\\w\\w'), 3) as country,
                cast(requestType as Enum8 (
                    'banner' = 1,
                    'exadsBanner' = 2,
                    'exadsPop' = 3,
                    'native' = 4,
                    'popunder' = 5))                                  as requestType,
                conType,
                deviceOs,
                deviceType,
                carrier,
                siteAppId,
                isApp,
                server,
                extract(userId, '\\w{3,}:.{8,}')                      as userId,
                browser,
                toFixedString(extract(lower(language), '\\w\\w'), 2)  as language,
                iab,
                size,
                minCPM,
                toFixedString(extract(gender, 'M|F'), 1)              as gender,
                yob,
                url
FROM BidRequests.euQueue;


CREATE MATERIALIZED VIEW IF NOT EXISTS BidRequests.usMediaPlanning
    TO BidRequests.MediaPlanning
AS
SELECT toDate(event_datetime)   AS event_date,
       ssp,
       siteAppName,
       country,
       requestType,
       conType,
       deviceOs,
       deviceType,
       carrier,
       siteAppId,
       isApp,
       browser,
       language,
       iab,
       size,
       avgState(minCPM)         AS minCPM,
       count()                  AS MTOTAL,
       maxState(event_datetime) AS lastUpdated
FROM BidRequests.usTraffic
GROUP BY event_date, ssp, siteAppName, country, requestType, conType, deviceOs, deviceType, carrier, siteAppId, isApp,
         browser, language, size, iab;


CREATE MATERIALIZED VIEW IF NOT EXISTS BidRequests.euMediaPlanning
    TO BidRequests.MediaPlanning
AS
SELECT toDate(event_datetime)   AS event_date,
       ssp,
       siteAppName,
       country,
       requestType,
       conType,
       deviceOs,
       deviceType,
       carrier,
       siteAppId,
       isApp,
       browser,
       language,
       iab,
       size,
       avgState(minCPM)         AS minCPM,
       count()                  AS MTOTAL,
       maxState(event_datetime) AS lastUpdated
FROM BidRequests.euTraffic
GROUP BY event_date, ssp, siteAppName, country, requestType, conType, deviceOs, deviceType, carrier, siteAppId, isApp,
         browser, language, size, iab;


CREATE MATERIALIZED VIEW IF NOT EXISTS BidRequests.usStats
    TO BidRequests.statsByDay
AS
SELECT toDate(event_datetime)   AS event_date,
       server,
       ssp,
       count()                  AS visits,
       maxState(event_datetime) AS lastUpdated
FROM BidRequests.usTraffic
GROUP BY event_date, server, ssp;


CREATE MATERIALIZED VIEW IF NOT EXISTS BidRequests.euStats
    TO BidRequests.statsByDay
AS
SELECT toDate(event_datetime)   AS event_date,
       server,
       ssp,
       count()                  AS visits,
       maxState(event_datetime) AS lastUpdated
FROM BidRequests.euTraffic
GROUP BY event_date, server, ssp;


CREATE MATERIALIZED VIEW IF NOT EXISTS DMP.usUsers
    TO DMP.users
AS
SELECT country,
       userId,
       conType,
       carrier,
       maxState(gender)         as gender,
       maxState(yob)            as yob,
       count()                  as visits,
       minState(event_datetime) as created,
       maxState(event_datetime) as lastUpdated
FROM BidRequests.usTraffic
WHERE requestType != 'popunder'
  AND notEmpty(country)
  AND notEmpty(userId)
GROUP BY country, userId, conType, carrier;

CREATE MATERIALIZED VIEW IF NOT EXISTS DMP.euUsers
    TO DMP.users
AS
SELECT country,
       userId,
       conType,
       carrier,
       maxState(gender)         as gender,
       maxState(yob)            as yob,
       count()                  as visits,
       minState(event_datetime) as created,
       maxState(event_datetime) as lastUpdated
FROM BidRequests.euTraffic
WHERE requestType != 'popunder'
  AND notEmpty(country)
  AND notEmpty(userId)
GROUP BY country, userId, conType, carrier;


CREATE MATERIALIZED VIEW IF NOT EXISTS DMP.usInterests
    TO DMP.interests
AS
SELECT country,
       arrayJoin(splitByChar(',', iab)) as iab,
       userId,
       count()                          as visits,
       maxState(event_datetime)         as lastUpdated
FROM BidRequests.usTraffic
WHERE notEmpty(iab)
  AND notEmpty(userId)
  and notEmpty(country)
GROUP BY country, iab, userId
HAVING iab like 'IAB_%';


CREATE MATERIALIZED VIEW IF NOT EXISTS DMP.euInterests
    TO DMP.interests
AS
SELECT country,
       arrayJoin(splitByChar(',', iab)) as iab,
       userId,
       count()                          as visits,
       maxState(event_datetime)         as lastUpdated
FROM BidRequests.euTraffic
WHERE notEmpty(iab)
  AND notEmpty(userId)
  and notEmpty(country)
GROUP BY country, iab, userId
HAVING iab like 'IAB_%';