ETL Device Monitoring
==
:::info
this work still in progress (WIP)
:::
## Table
dd_device_gobiz_mobile_kong_60d_dinata
## Notes
- `bi-gojek.stream.fr_beast_kong_log_proto_01` does not contain device id for `goresto_service`. Therefore it can't get device-level data
- This will be used to add universe GoResto device if Clevertap and Kong could not cover it
## To Do
- [ ] add merchant device which does not have merchant_id using gobiz user data which has been filtered (only paired to 1 merchant or non admin)
## Design
## Implementation
### SQL Script
```flow
WITH _DECLARE AS (
SELECT
'2020
DATE_SUB(end_date, INTERVAL 65 day) AS start_date,
end_date,
DATE_SUB(end_date, INTERVAL 2 day) AS max_partition_date
FROM (
SELECT CURRENT_DATE("Asia/Jakarta") AS end_date
)
),
_GOBIZ_USER AS (
SELECT
DISTINCT gobiz_user_id,
phone_normalized AS phone,
merchant_id,
one_to_one_flag
FROM
`bi-gojek.spi_reference_gomerchant.dd_device_gobiz_user`
),
-- _GOBIZ_USER AS (
-- SELECT
-- id AS gobiz_user_id,
-- `bi-gojek.spi_playground_gomerchant.function_dinata_normalizePhone`(phone) AS phone,
-- merchant_id
-- FROM
-- `bi-gojek.spi_reference_gomerchant.dd_gojek_hermes_user`
-- ),
-- Select only GoBiz users that have 1 to 1 relationship with outlet.
_GOBIZ_USER_FILTERED AS (
WITH _BASE AS (
SELECT
id AS gobiz_user_id,
merchant_id,
`bi-gojek.spi_playground_gomerchant.function_dinata_normalizePhone`(phone) AS phone
FROM
`bi-gojek.spi_reference_gomerchant.dd_gojek_hermes_user`
WHERE
approved IS TRUE AND phone IS NOT NULL
),
_EXCLUDE AS (
SELECT
phone,
COUNT(DISTINCT merchant_id) AS total_outlets,
COUNT(DISTINCT gobiz_user_id) AS total_user_id
FROM
_BASE
WHERE
phone IS NOT NULL
GROUP BY
1
)
SELECT
*
FROM
_BASE
WHERE
phone NOT IN (SELECT DISTINCT phone FROM _EXCLUDE)
),
_CMP AS (
SELECT
merchant_id,
goresto_restaurant_id
FROM
`bi-gojek.spi_reference_midtrans.dd_gojek_saudagar_merchant`, _DECLARE
WHERE
_PARTITIONDATE = DATE_SUB(_DECLARE.end_date, INTERVAL 1 day)
),
_RAW_KONG AS (
WITH _RAW_KONG AS (
SELECT
time,
api_request_uri,
gobiz_user_id,
app_version,
LOWER(device_os_split[offset(0)]) AS os_type,
CASE
WHEN device_os_split[offset(1)] IS NULL THEN NULL
WHEN device_os_split[offset(1)] = '1' THEN '1.0'
WHEN device_os_split[offset(1)] = '2' THEN '1.1'
WHEN device_os_split[offset(1)] = '3' THEN '1.5'
WHEN device_os_split[offset(1)] = '4' THEN '1.6'
WHEN device_os_split[offset(1)] = '5' THEN '2.0'
WHEN device_os_split[offset(1)] = '8' THEN '2.2'
WHEN device_os_split[offset(1)] = '9' THEN '2.3'
WHEN device_os_split[offset(1)] = '11' THEN '3.0'
WHEN device_os_split[offset(1)] = '14' THEN '4.0'
WHEN device_os_split[offset(1)] = '16' THEN '4.1'
WHEN device_os_split[offset(1)] = '19' THEN '4.4'
WHEN device_os_split[offset(1)] = '21' THEN '5.0'
WHEN device_os_split[offset(1)] = '23' THEN '6.0'
WHEN device_os_split[offset(1)] = '24' THEN '7.0'
WHEN device_os_split[offset(1)] = '26' THEN '8.0'
WHEN device_os_split[offset(1)] = '28' THEN '9.0'
WHEN device_os_split[offset(1)] = '29' THEN '10.0'
WHEN device_os_split[offset(1)] = '30' THEN '11.0'
ELSE '9999.0'
END AS os_version
FROM (
SELECT
time,
api_request_uri,
user_owner_id AS gobiz_user_id,
app_version,
SPLIT(device_os, ' ') AS device_os_split
FROM
`bi-gojek.staging_gomerchant.fd_kong_log_goresto_service` _KONG, _DECLARE
WHERE
time >= TIMESTAMP(_DECLARE.start_date)
)
),
_GOFOOD_ORDER AS (
SELECT
order_no,
_CMP.merchant_id AS merchant_id
FROM
`bi-gojek.access.sd_gofood_booking` _GF_TRX, _DECLARE
LEFT JOIN `bi-gojek.reference.dd_merchant` _GF_MERCHANT ON _GF_TRX.merchant_id = _GF_MERCHANT.id
LEFT JOIN _CMP ON _GF_MERCHANT.uuid = _CMP.goresto_restaurant_id
WHERE
_PARTITIONTIME >= TIMESTAMP(_DECLARE.start_date)
),
_RAW_KONG_URI_EXTRACTED AS (
SELECT
time,
`bi-gojek.spi_playground_gomerchant.function_dinata_extractKongOrderID`(api_request_uri) AS order_no,
`bi-gojek.spi_playground_gomerchant.function_dinata_extractKongRestoID`(api_request_uri) AS resto_id,
gobiz_user_id,
app_version,
os_type,
os_version
FROM
_RAW_KONG, _DECLARE
WHERE
time >= TIMESTAMP(_DECLARE.start_date)
)
SELECT
_RAW_KONG_URI_EXTRACTED.time,
COALESCE(_GOFOOD_ORDER.merchant_id, _CMP.merchant_id, _GOBIZ_USER.merchant_id) AS merchant_id,
_RAW_KONG_URI_EXTRACTED.order_no,
_RAW_KONG_URI_EXTRACTED.resto_id AS goresto_restaurant_id,
_RAW_KONG_URI_EXTRACTED.gobiz_user_id,
app_version,
os_type,
os_version
FROM
_RAW_KONG_URI_EXTRACTED
LEFT JOIN _GOFOOD_ORDER ON _RAW_KONG_URI_EXTRACTED.order_no = _GOFOOD_ORDER.order_no
LEFT JOIN _CMP ON _RAW_KONG_URI_EXTRACTED.resto_id = _CMP.goresto_restaurant_id
--LEFT JOIN _GOBIZ_USER ON _RAW_KONG_URI_EXTRACTED.gobiz_user_id = CAST(_GOBIZ_USER.gobiz_user_id AS STRING)
LEFT JOIN _GOBIZ_USER AS _GOBIZ_USER_ONE_TO_ONE ON
_RAW_KONG_URI_EXTRACTED.gobiz_user_id = CAST(_GOBIZ_USER_ONE_TO_ONE.gobiz_user_id AS STRING)
AND _GOBIZ_USER_ONE_TO_ONE.one_to_one_flag IS TRUE
LEFT JOIN _GOBIZ_USER ON
_RAW_KONG_URI_EXTRACTED.gobiz_user_id = CAST(_GOBIZ_USER.gobiz_user_id AS STRING)
WHERE
_GOFOOD_ORDER.order_no IS NOT NULL
),
_BASE_KONG AS (
SELECT
NULL AS device_brand_model_std,
merchant_id,
gobiz_user_id,
app_version,
os_type,
os_version,
-- MIN(time) AS min_updated_at,
MAX(time) AS max_updated_at,
COUNT(*) AS goresto_total_event,
SUM(CASE WHEN DATE(time,"Asia/Jakarta") >= DATE_SUB(CURRENT_DATE("Asia/Jakarta"), INTERVAL 7 day) THEN 1 ELSE 0 END) AS goresto_total_event_7days,
SUM(CASE WHEN DATE(time,"Asia/Jakarta") >= DATE_SUB(CURRENT_DATE("Asia/Jakarta"), INTERVAL 14 day) THEN 1 ELSE 0 END) AS goresto_total_event_14days,
SUM(CASE WHEN DATE(time,"Asia/Jakarta") >= DATE_SUB(CURRENT_DATE("Asia/Jakarta"), INTERVAL 30 day) THEN 1 ELSE 0 END) AS goresto_total_event_30days,
SUM(CASE WHEN DATE(time,"Asia/Jakarta") >= DATE_SUB(CURRENT_DATE("Asia/Jakarta"), INTERVAL 60 day) THEN 1 ELSE 0 END) AS goresto_total_event_60days
FROM
_RAW_KONG, _DECLARE
WHERE
time >= TIMESTAMP(_DECLARE.start_date)
GROUP BY
1,2,3,4,5,6
),
_SUMMARY_KONG AS (
SELECT
UPPER(CONCAT(COALESCE(_BASE_KONG.merchant_id, CAST(_BASE_KONG.gobiz_user_id AS STRING)), '.', IFNULL(CAST(device_brand_model_std AS STRING), 'brand_model_null'))) AS device_kong_id,
device_brand_model_std,
merchant_id,
gobiz_user_id,
app_version,
os_type,
os_version,
-- MIN(min_updated_at) AS min_updated_at,
MAX(max_updated_at) AS max_updated_at,
SUM(goresto_total_event) AS goresto_total_event,
SUM(goresto_total_event_7days) AS goresto_total_event_7days,
SUM(goresto_total_event_14days) AS goresto_total_event_14days,
SUM(goresto_total_event_30days) AS goresto_total_event_30days,
SUM(goresto_total_event_60days) AS goresto_total_event_60days
FROM
_BASE_KONG
GROUP BY
1,2,3,4,5,6,7
),
----------------------------------------------------------------------------------------
-- B. GET MAX DEVICE APP VERSION
-- This will calculate each merchant product event.
----------------------------------------------------------------------------------------
_MAX_VERSION_DEV_MID_TIMESTAMP AS (
WITH _APP_VERSION AS (
SELECT
device_kong_id,
merchant_id,
app_version,
max_updated_at
FROM
_SUMMARY_KONG
),
_MAX_TS AS (
SELECT
device_kong_id,
merchant_id,
MAX(max_updated_at) AS max_updated_at
FROM
_APP_VERSION
GROUP BY
1,2
),
_APP_VERSION_MAX AS (
SELECT
DISTINCT _APP_VERSION.device_kong_id,
_APP_VERSION.merchant_id,
_APP_VERSION.app_version,
_APP_VERSION.max_updated_at
FROM
_APP_VERSION
LEFT JOIN _MAX_TS ON
_MAX_TS.device_kong_id = _APP_VERSION.device_kong_id
AND _MAX_TS.max_updated_at = _APP_VERSION.max_updated_at
WHERE
_MAX_TS.device_kong_id IS NOT NULL
AND _MAX_TS.max_updated_at IS NOT NULL
)
SELECT DISTINCT
device_kong_id,
merchant_id,
app_version,
CASE WHEN ARRAY_LENGTH(app_version_split) > 0 THEN CAST(app_version_split[OFFSET(0)] AS INT64) ELSE NULL
END AS app_version_major,
CASE WHEN ARRAY_LENGTH(app_version_split) > 1 THEN CAST(app_version_split[OFFSET(1)] AS INT64) ELSE 0
END AS app_version_minor,
CASE WHEN ARRAY_LENGTH(app_version_split) > 2 THEN CAST(app_version_split[OFFSET(2)] AS INT64) ELSE 0
END AS app_version_patch,
max_updated_at
FROM (
SELECT
device_kong_id,
merchant_id,
app_version,
max_updated_at,
SPLIT(app_version, '.') AS app_version_split
FROM
_APP_VERSION_MAX
) _TA
),
_SUMMARY_CT_UNION_MAX_VERSION_MAJOR AS (
SELECT
device_kong_id,
merchant_id,
max_updated_at,
app_version,
app_version_major,
app_version_minor,
app_version_patch,
MAX(app_version_major) OVER (PARTITION BY device_kong_id) AS max_major_device,
MAX(app_version_major) OVER (PARTITION BY device_kong_id, merchant_id) AS max_major_device_outlet
FROM
_MAX_VERSION_DEV_MID_TIMESTAMP
),
_SUMMARY_CT_UNION_MAX_VERSION_MINOR AS (
SELECT
_TB.*,
MAX(CASE WHEN _TB.max_major_device = _TB.app_version_major THEN _TB.app_version_minor ELSE NULL END) OVER (PARTITION BY _TB.device_kong_id) AS max_minor_device,
MAX(CASE WHEN _TB.max_major_device_outlet = _TB.app_version_major THEN _TB.app_version_minor ELSE NULL END) OVER (PARTITION BY _TB.device_kong_id, _TB.merchant_id) AS max_minor_device_outlet
FROM (
SELECT
_TA.*
FROM
_SUMMARY_CT_UNION_MAX_VERSION_MAJOR _TA
) _TB
),
_SUMMARY_CT_UNION_MAX_VERSION_PATCH AS (
SELECT
_TB.*,
MAX(
CASE
WHEN _TB.max_major_device = _TB.app_version_major AND _TB.max_minor_device = _TB.app_version_minor THEN _TB.app_version_patch
ELSE NULL
END
) OVER (PARTITION BY _TB.device_kong_id) AS max_patch_device,
MAX(
CASE
WHEN _TB.max_major_device_outlet = _TB.app_version_major AND _TB.max_minor_device_outlet = _TB.app_version_minor THEN _TB.app_version_patch
ELSE NULL
END
) OVER (PARTITION BY _TB.device_kong_id, _TB.merchant_id) AS max_patch_device_outlet
FROM (
SELECT
_TA.*
FROM
_SUMMARY_CT_UNION_MAX_VERSION_MINOR _TA
) _TB
),
_SUMMARY_CT_UNION_MAX_VERSION AS (
SELECT DISTINCT
_TA.*,
CONCAT(_TA.max_major_device, '.', _TA.max_minor_device, '.', _TA.max_patch_device) AS max_app_version_device,
CONCAT(_TA.max_major_device_outlet, '.', _TA.max_minor_device_outlet, '.', _TA.max_patch_device_outlet) AS max_app_version_device_outlet
FROM
_SUMMARY_CT_UNION_MAX_VERSION_PATCH _TA
),
_MAX_VERSION_DEVICE AS (
SELECT DISTINCT
device_kong_id,
merchant_id,
max_updated_at AS max_updated_at_app_version,
max_app_version_device,
max_app_version_device_outlet,
max_major_device AS max_app_version_device_major,
max_minor_device AS max_app_version_device_minor,
max_patch_device AS max_app_version_device_patch,
max_major_device_outlet AS max_app_version_device_outlet_major,
max_minor_device_outlet AS max_app_version_device_outlet_minor,
max_patch_device_outlet AS max_app_version_device_outlet_patch
FROM
_SUMMARY_CT_UNION_MAX_VERSION
WHERE
max_app_version_device_outlet = app_version
),
----------------------------------------------------------------------------------------
-- C. GET MAX DEVICE OS VERSION
----------------------------------------------------------------------------------------
_MAX_OS_VERSION_DEV_MID_TIMESTAMP AS (
WITH _OS_VERSION AS (
SELECT
device_kong_id,
merchant_id,
os_type,
os_version,
max_updated_at
FROM
_SUMMARY_KONG
),
_MAX_TS AS (
SELECT
device_kong_id,
merchant_id,
MAX(max_updated_at) AS max_updated_at
FROM
_OS_VERSION
GROUP BY
1,2
),
_OS_VERSION_MAX AS (
SELECT
DISTINCT _OS_VERSION.device_kong_id,
_OS_VERSION.merchant_id,
_OS_VERSION.os_type,
_OS_VERSION.os_version,
_OS_VERSION.max_updated_at
FROM
_OS_VERSION
LEFT JOIN _MAX_TS ON
_MAX_TS.device_kong_id = _OS_VERSION.device_kong_id
AND _MAX_TS.max_updated_at = _OS_VERSION.max_updated_at
WHERE
_MAX_TS.device_kong_id IS NOT NULL
AND _MAX_TS.max_updated_at IS NOT NULL
)
SELECT DISTINCT
device_kong_id,
merchant_id,
os_type,
-- This is to standardized into x.x the os_version since some live tracker use x.x.x.
-- It will affect with the final logic 'max_os_version_device_outlet = os_version'.
CONCAT(os_version_major, '.', os_version_minor) AS os_version,
os_version_major,
os_version_minor,
max_updated_at
FROM (
SELECT DISTINCT
device_kong_id,
merchant_id,
os_type,
os_version,
CASE WHEN ARRAY_LENGTH(os_version_split) > 0 THEN CAST(os_version_split[OFFSET(0)] AS INT64) ELSE NULL
END AS os_version_major,
CASE WHEN ARRAY_LENGTH(os_version_split) > 1 THEN CAST(os_version_split[OFFSET(1)] AS INT64) ELSE 0
END AS os_version_minor,
max_updated_at
FROM (
SELECT
device_kong_id,
merchant_id,
os_type,
os_version,
max_updated_at,
SPLIT(os_version, '.') AS os_version_split
FROM
_OS_VERSION_MAX
) _TA
) _TB
),
_SUMMARY_KONG_UNION_MAX_OS_VERSION_MAJOR AS (
SELECT
device_kong_id,
merchant_id,
max_updated_at,
os_type,
os_version,
os_version_major,
os_version_minor,
MAX(os_version_major) OVER (PARTITION BY device_kong_id) AS max_major_device,
MAX(os_version_major) OVER (PARTITION BY device_kong_id, merchant_id) AS max_major_device_outlet
FROM
_MAX_OS_VERSION_DEV_MID_TIMESTAMP
),
_SUMMARY_KONG_UNION_MAX_OS_VERSION_MINOR AS (
SELECT
_TB.*,
MAX(CASE WHEN _TB.max_major_device = _TB.os_version_major THEN _TB.os_version_minor ELSE NULL END) OVER (PARTITION BY _TB.device_kong_id) AS max_minor_device,
MAX(CASE WHEN _TB.max_major_device_outlet = _TB.os_version_major THEN _TB.os_version_minor ELSE NULL END) OVER (PARTITION BY _TB.device_kong_id, _TB.merchant_id) AS max_minor_device_outlet
FROM (
SELECT
_TA.*
FROM
_SUMMARY_KONG_UNION_MAX_OS_VERSION_MAJOR _TA
) _TB
),
_SUMMARY_KONG_UNION_MAX_OS_VERSION AS (
SELECT DISTINCT
_TA.*,
CONCAT(_TA.max_major_device, '.', _TA.max_minor_device) AS max_os_version_device,
CONCAT(_TA.max_major_device_outlet, '.', _TA.max_minor_device_outlet) AS max_os_version_device_outlet
FROM
_SUMMARY_KONG_UNION_MAX_OS_VERSION_MINOR _TA
),
_MAX_OS_VERSION_DEVICE AS (
SELECT DISTINCT
device_kong_id,
merchant_id,
max_updated_at AS max_updated_at_os_version,
os_type,
max_os_version_device,
max_os_version_device_outlet,
max_major_device AS max_os_version_device_major,
max_minor_device AS max_os_version_device_minor,
max_major_device_outlet AS max_os_version_device_outlet_major,
max_minor_device_outlet AS max_os_version_device_outlet_minor
FROM
_SUMMARY_KONG_UNION_MAX_OS_VERSION
WHERE
max_os_version_device_outlet = os_version
)
SELECT
UPPER(_SUMMARY_KONG.device_kong_id) AS device_kong_id,
_SUMMARY_KONG.device_brand_model_std,
UPPER(_SUMMARY_KONG.merchant_id) AS merchant_id,
_SUMMARY_KONG.gobiz_user_id,
_MAX_VERSION_DEVICE.max_app_version_device,
_MAX_VERSION_DEVICE.max_app_version_device_outlet,
_MAX_VERSION_DEVICE.max_app_version_device_major,
_MAX_VERSION_DEVICE.max_app_version_device_minor,
_MAX_VERSION_DEVICE.max_app_version_device_patch,
_MAX_VERSION_DEVICE.max_app_version_device_outlet_major,
_MAX_VERSION_DEVICE.max_app_version_device_outlet_minor,
_MAX_VERSION_DEVICE.max_app_version_device_outlet_patch,
_MAX_OS_VERSION_DEVICE.os_type,
_MAX_OS_VERSION_DEVICE.max_os_version_device,
_MAX_OS_VERSION_DEVICE.max_os_version_device_outlet,
_MAX_OS_VERSION_DEVICE.max_os_version_device_major,
_MAX_OS_VERSION_DEVICE.max_os_version_device_minor,
_MAX_OS_VERSION_DEVICE.max_os_version_device_outlet_major,
_MAX_OS_VERSION_DEVICE.max_os_version_device_outlet_minor,
--MIN(_SUMMARY_KONG.min_updated_at) AS min_updated_at,
MAX(_SUMMARY_KONG.max_updated_at) AS max_updated_at,
IFNULL(SUM(goresto_total_event),0) AS goresto_total_event,
IFNULL(SUM(goresto_total_event_7days),0) AS goresto_total_event_7days,
IFNULL(SUM(goresto_total_event_14days),0) AS goresto_total_event_14days,
IFNULL(SUM(goresto_total_event_30days),0) AS goresto_total_event_30days,
IFNULL(SUM(goresto_total_event_60days),0) AS goresto_total_event_60days
FROM
_SUMMARY_KONG
LEFT JOIN _MAX_VERSION_DEVICE ON _SUMMARY_KONG.device_kong_id = _MAX_VERSION_DEVICE.device_kong_id AND _SUMMARY_KONG.merchant_id = _MAX_VERSION_DEVICE.merchant_id
LEFT JOIN _MAX_OS_VERSION_DEVICE ON _SUMMARY_KONG.device_kong_id = _MAX_OS_VERSION_DEVICE.device_kong_id AND _SUMMARY_KONG.merchant_id = _MAX_OS_VERSION_DEVICE.merchant_id
WHERE
_SUMMARY_KONG.merchant_id IS NOT NULL
GROUP BY
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
```