###### tags: `Data Science` # BigQuery "u-find" ## SQL basics [Visual Representation of SQL Joins](https://www.codeproject.com/Articles/33052/Visual-Representation-of-SQL-Joins) Google BigQuery: [Geography Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_contains) ## SQL queries ```sql= SELECT clientMac, manufacturer, ssid FROM `indoor-meraki.locationIndoor.scanapiv3` WHERE DATE(seenTime) = "2020-05-27" AND customerId = "C000100" GROUP BY clientMac, manufacturer, ssid LIMIT 100 ``` ```sql= SELECT clientMac, manufacturer, username FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE customerId = "C000100" GROUP BY clientMac, manufacturer, username LIMIT 100 ``` List of manufacturers: ```sql SELECT manufacturer , COUNT (DISTINCT clientMac) as total_count FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE DATE(seenTime) = "2020-05-27" AND customerId = "C000100" GROUP BY manufacturer ORDER BY total_count DESC LIMIT 1000 ``` List of apple laptops: ```sql SELECT manufacturer , COUNT (DISTINCT clientMac) as total_count FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE DATE(seenTime) = "2020-05-27" AND customerId = “C000100” AND manufacturer=“Apple" GROUP BY manufacturer ORDER BY total_count DESC LIMIT 1000 ``` ```sql= SELECT macAddress , COUNT (DISTINCT macAddress) as total_count FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE DATE(time) = "2020-10-01" GROUP BY macAddress ORDER BY total_count DESC LIMIT 1000 ``` Look for duplicate rows: ```sql select (select count(1) from (select distinct time, macAddress, username from `indoor-meraki.locationIndoor.scanapi_v3`)) as distinct_rows, (select count(1) from `indoor-meraki.locationIndoor.scanapi_v3`) as total_rows ``` create a new table with the cleanup result of the delition ```sql CREATE OR REPLACE TABLE `indoor-meraki.locationIndoor.dedup_scanapi_v3` AS SELECT DISTINCT * FROM `indoor-meraki.locationIndoor.scanapi_v3` group by 1 ``` https://dataform.co/blog/removing-duplicates-bigquery To summarize, to deduplicate records with BigQuery, follow these steps: * Identify whether your dataset contains duplicates. * Create a SELECT query that aggregates the desired column using a GROUP BY clause. * Materialize the result to a new table using CREATE OR REPLACE TABLE [tablename] AS [SELECT STATEMENT]. List of people in one day @Location grouped by floorPlanName: ```sql SELECT floorPlanName, netName, username FROM `indoor-meraki.locationIndoor.scanapi_v3_names` WHERE DATE(time) = "2020-10-15" GROUP BY floorPlanName, username, netName ORDER BY floorPlanName LIMIT 1000 ```` Busiest timeframes, Count #people per day ```sql= SELECT TIMESTAMP_TRUNC(time, DAY) date, COUNT (DISTINCT username) as people FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE customerId = "C000100" GROUP BY date ORDER BY date DESC ``` Busiest floors, Count #people per day and per floor ```sql SELECT TIMESTAMP_TRUNC(time, DAY) date, COUNT (DISTINCT username) as people, floorPlanName FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE customerId = "C000100" GROUP BY date, floorPlanName ORDER BY date DESC ``` select specific date ```sql= SELECT TIMESTAMP_TRUNC(time, DAY) date, COUNT (DISTINCT username) as people, floorPlanName FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE customerId = "C000100" AND time >= '2020-10-1' AND time <= '2020-10-15' GROUP BY date, floorPlanName ORDER BY date DESC ``` Count number of unique people per hour / per day (on each net) ```sql= SELECT TIMESTAMP_TRUNC(time, HOUR) date, COUNT (DISTINCT username) as people, floorPlanName, netName FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE customerId = "C000100" AND time >= '2020-10-13' AND time <= '2020-10-15' AND netName = 'London' GROUP BY netName, floorPlanName, date ``` Same but with full timestamp info: ```sql= SELECT TIMESTAMP_TRUNC(time, HOUR) date, COUNT (DISTINCT username) as people FROM `indoor-meraki.locationIndoor.scanapi_C000001` WHERE time >= '2020-11-25' AND time <= '2020-11-26' GROUP BY date ORDER BY date ASC ``` Count minutes seen by Meraki Infra ```sql SELECT username, floorPlanName, COUNT(time) as seen FROM `indoor-meraki.locationIndoor.scanapi_v3_names` WHERE customerId = "C000100" AND time >= '2020-10-15' AND time < '2020-10-16' GROUP BY username, floorPlanName ORDER BY seen DESC ``` ## Extract date or hour from datetime EXTRACT(DATE FROM curr_dt)) ```sql= SELECT EXTRACT(DATE FROM time) as date, COUNT (DISTINCT username) as people FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE customerId = "C000100" GROUP BY date ORDER BY people DESC ``` ## Combine query to get hours per day of week ```sql= SELECT hourly.hour, AVG(hourly.people) as avg_people FROM (SELECT EXTRACT(DAYOFWEEK FROM time) AS dayofweek, EXTRACT(HOUR FROM time) as hour, COUNT (DISTINCT username) as people FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE customerId = "C000100" AND time >= '2020-09-01' AND time <= '2020-09-30' GROUP BY hour, dayofweek ORDER BY hour ASC) AS hourly GROUP BY hourly.hour ORDER BY hourly.hour ASC ``` ## Weekly per day of week per hour (only works for one week, then it summs up) ```sql= SELECT dayofweek, hourly.hour, AVG(hourly.people) as avg_people FROM (SELECT EXTRACT(DAYOFWEEK FROM time) AS dayofweek, EXTRACT(HOUR FROM time) as hour, COUNT (DISTINCT username) as people FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE customerId = "C000100" AND time >= '2020-09-01' AND time <= '2020-09-07' GROUP BY dayofweek, hour ORDER BY dayofweek, hour ASC) as hourly group by dayofweek, hourly.hour ``` ```sql= SELECT max(people) max, EXTRACT(HOUR FROM date) AS hour from (SELECT TIMESTAMP_TRUNC(time, HOUR) date, COUNT (DISTINCT username) as people, floorPlanName, netName FROM `indoor-meraki.locationIndoor.scanapi_v3` WHERE customerId = "C000100" AND time >= '2020-10-13' AND time <= '2020-10-15' AND netName = 'London' GROUP BY netName, floorPlanName, date) GROUP BY date ``` ## BigQuery Geolocation ```sql= SELECT geoJSON, ST_CONTAINS(ST_GEOGFROMTEXT('POLYGON((-9.158116 38.577876, -9.158059 38.577906, -9.158086 38.577962, -9.158145 38.577934, -9.158116 38.577876))'), geoJSON) AS `contains` FROM `indoor-meraki.locationIndoor.scanapi_C000001` WHERE customerID = "C000001" LIMIT 100 ``` ```sql= SELECT Points.geoJSON AS point, Polygons.geoJSON AS zone FROM `indoor-meraki.locationIndoor.scanapi_C000001` AS Points, `ufind-prod.zones.zones_table` AS Polygons WHERE ST_CONTAINS(Polygons.geoJSON, Points.geoJSON) = false LIMIT 1000 ``` ```sql= SELECT Points.time AS time, Points.macAddress AS mac, Points.currLocation as loc, Polygons.name AS zoneName FROM `indoor-meraki.locationIndoor.scanapi_C000001` AS Points, `ufind-prod.zones.zones_table` AS Polygons WHERE ST_CONTAINS(Polygons.geoJSON, Points.geoJSON) = true AND time >= '2020-10-13' AND time <= '2020-10-14' ``` ### Query returning #unique people, hourly, matching zoneName from zones table ```sql= SELECT TIMESTAMP_TRUNC(Points.time, HOUR) date, COUNT (DISTINCT Points.username) as people, Points.floorPlanName as floorName, Polygons.name AS zoneName FROM `indoor-meraki.locationIndoor.scanapi_C000100` AS Points, `ufind-prod.zones.zones_table` AS Polygons WHERE ST_CONTAINS(Polygons.geoJSON, Points.geoJSON) = true AND Points.floorPlanId = Polygons.floorPlanId AND Points.time >= '2020-12-10' AND Points.time <= '2020-12-15' and Polygons.customerId='C000100' GROUP BY date, zoneName, floorName ``` ### Query with filling hours not in query (LEFT_JOIN) support post: [how-to-generate-series-to-avoid-gaps-in-data-in-bigquery](https://popsql.com/learn-sql/bigquery/how-to-generate-series-to-avoid-gaps-in-data-in-bigquery) Tool to Investigate: [get-started-with-bigquery-and-popsql-in-5-minutes](https://popsql.com/learn-sql/bigquery/get-started-with-bigquery-and-popsql-in-5-minutes) ```sql= with hours as ( select * from UNNEST(GENERATE_TIMESTAMP_ARRAY('2020-08-10', '2020-08-15', INTERVAL 1 HOUR)) AS hour ), zones as ( SELECT TIMESTAMP_TRUNC(Points.time, HOUR) date, COUNT (DISTINCT Points.username) as people, Points.floorPlanName as floorName, Polygons.name AS zoneName FROM `indoor-meraki.locationIndoor.scanapi_C000100` AS Points, `ufind-prod.zones.zones_table` AS Polygons WHERE ST_CONTAINS(Polygons.geoJSON, Points.geoJSON) = true AND Points.floorPlanId = Polygons.floorPlanId AND Points.time >= '2020-08-10' AND Points.time <= '2020-08-15' and Polygons.customerId='C000100' GROUP BY date, zoneName, floorName ) select hours.hour, zones.people as people, zones.floorName as floorName, zones.zoneName AS zoneName FROM hours LEFT JOIN zones on zones.date = hours.hour ``` ## API-gateway: Finally Query to count all users per hour and insert "no fill hours" ```sql= with hours as ( select * from UNNEST(GENERATE_TIMESTAMP_ARRAY('2020-08-10T00:0:00', '2020-08-10T23:59:59', INTERVAL 1 HOUR)) AS hour ), floors as ( SELECT TIMESTAMP_TRUNC(time, HOUR) date, COUNT (DISTINCT username) as people FROM `indoor-meraki.locationIndoor.scanapi_C000100` WHERE time >= '2020-08-10T00:00:00' AND time <= '2020-08-10T23:59:59' GROUP BY date ORDER BY date ) select hours.hour, floors.people as people, floors.date as date, FROM hours LEFT JOIN floors on floors.date = hours.hour ``` ## API-gateway: Finally Query to count all users per day and insert "no fill days" ```sql= with days as ( select * from UNNEST(GENERATE_TIMESTAMP_ARRAY('2020-08-10', '2020-08-15', INTERVAL 1 DAY)) AS day ), floors as ( SELECT TIMESTAMP_TRUNC(time, DAY) date, COUNT (DISTINCT username) as people FROM `indoor-meraki.locationIndoor.scanapi_C000100` WHERE time >= '2020-08-10' AND time <= '2020-08-15' GROUP BY date ORDER BY date ) select days.day, floors.people as people, floors.date as date, FROM days LEFT JOIN floors on floors.date = days.day ``` ## API-gateway: OTHER SQL interesting metrics ### Query to get WiFi Accuracy ```sql= SELECT floorPlanId, floorPlanName , COUNT(*) as sample_size, COUNTIF( accurate ) * 100 / COUNT(*) as location_accuracy, FROM `indoor-meraki.locationIndoor.scanapi_C000100` WHERE customerID = "C000100" GROUP BY floorPlanId, floorPlanName ORDER BY floorPlanId DESC ``` ### Query to get WiFi Signal Strenght quality ```sql= SELECT floorPlanId, COUNT(*) as sample_size, COUNT (DISTINCT macAddress) as numMacs, TIMESTAMP_TRUNC(time, DAY) date, ROUND(COUNTIF(nearestApRssi <= -90) * 100 / COUNT(*),0) as terrible, ROUND(COUNTIF( nearestApRssi > -90 and nearestApRssi < -80) * 100 / COUNT(*), 0) as badSignal, ROUND(COUNTIF( nearestApRssi > -80 and nearestApRssi < -65) * 100 / COUNT(*), 0) as goodSignal, ROUND(COUNTIF( nearestApRssi >= -65) * 100 / COUNT(*), 0) as excelentSignal FROM `ufind-prod.location.scanapi_farfetch-ymqgt` WHERE time >= '2022-05-01 00:00:00' AND time <= '2022-05-31 23:59:59' GROUP BY floorPlanId, date ORDER BY date ASC ``` ## API-gateway: Finally Query to get WiFi Signal Strenght quality per day and insert "no fill days" ```sql= with days as ( select * from UNNEST(GENERATE_TIMESTAMP_ARRAY('2020-08-10', '2020-08-15', INTERVAL 1 DAY)) AS day ), wifi as ( SELECT floorPlanId, COUNT(*) as sample_size, ROUND(COUNTIF(nearestApRssi < -90) * 100 / COUNT(*),0) as terrible, ROUND(COUNTIF( nearestApRssi > -90 and nearestApRssi < -80) * 100 / COUNT(*), 0) as badSignal, ROUND(COUNTIF( nearestApRssi > -80 and nearestApRssi < -65) * 100 / COUNT(*), 0) as goodSignal, ROUND(COUNTIF( nearestApRssi > -65) * 100 / COUNT(*), 0) as excelentSignal FROM `ufind-prod.location.scanapi_farfetch-ymqgt` GROUP BY floorPlanId ORDER BY floorPlanId DESC ) select days.day, wifi.sample_size as sample_size, wifi.terribleSignal as terribleSignal, wifi.badSignal as badSignal, wifi.good as goodSignal, wifi.excelentSignal as excelentSignal, floors.date as date, FROM days LEFT JOIN floors on floors.date = days.day ``` ### Query to get WiFi Signal Strenght quality per AP ```sql= SELECT netName, floorPlanId, nearestApMac, COUNT(*) as sample_size, ROUND(COUNTIF( nearestApRssi < -80) * 100 / COUNT(*), 2) as badSignal, ROUND(COUNTIF( nearestApRssi > -80 and nearestApRssi < -65) * 100 / COUNT(*), 2) as goodSignal, ROUND(COUNTIF( nearestApRssi > -65) * 100 / COUNT(*), 2) as excelentSignal FROM `indoor-meraki.locationIndoor.scanapi_C000100` WHERE customerID = "C000100" GROUP BY netName, floorPlanId, nearestApMac ORDER BY netName, floorPlanId DESC ``` ### Query with set variables ```sql= DECLARE size INT64; SET size = ( SELECT COUNT(*) as x FROM `ufind-prod.location.scanapi_farfetch-ymqgt` WHERE time >= '2021-04-01 00:00:00' AND time <= '2021-04-01 23:59:59' ); SELECT floorPlanId, COUNTIF(nearestApRssi > -90) as sample_size, COUNT (DISTINCT macAddress) as numMacs, TIMESTAMP_TRUNC(time, DAY) date, ROUND(COUNTIF(nearestApRssi <= -90) * 100 / COUNT(*),0) as terrible, ROUND(COUNTIF( nearestApRssi > -90 and nearestApRssi < -80) * 100 / COUNT(*), 0) as badSignal, ROUND(COUNTIF( nearestApRssi > -80 and nearestApRssi < -65) * 100 / COUNT(*), 0) as goodSignal, ROUND(COUNTIF( nearestApRssi >= -65) * 100 / COUNT(*), 0) as excelentSignal FROM `ufind-prod.location.scanapi_farfetch-ymqgt` WHERE time >= '2021-04-01 00:00:00' AND time <= '2021-04-01 23:59:59' GROUP BY floorPlanId, date ORDER BY date ASC ``` 2 Stages are processed, one per each SELECT ### Query with time interval (to test still) https://stackoverflow.com/questions/52890538/how-to-get-count-of-average-calls-every-5-minutes-using-datetime-sql ```sql= SELECT DATE_FORMAT(`DateTime`,'%Y-%m-%d %H:00')+INTERVAL(MINUTE(`DateTime`)-MINUTE(datestamp) MOD 5) interval_beginning, DialedNumberID, COUNT(*) call_count FROM Calls GROUP BY DATE_FORMAT(`DateTime`,'%Y-%m-%d %H:00')+INTERVAL(MINUTE(`DateTime`)-MINUTE(datestamp) MOD 5), DialedNumberID ``` ### Query with DECLARE ... two step Query ```sql= DECLARE size INT64; SET size = ( SELECT COUNT(*) AS x FROM `ufind-prod.location.scanapi_farfetch-ymqgt` WHERE time >= '2021-04-01 00:00:00' AND time <= '2021-04-01 23:59:59' ); SELECT floorPlanId, COUNTIF(nearestApRssi > -90) AS sample_size, COUNT (DISTINCT macAddress) AS numMacs, TIMESTAMP_TRUNC(time, DAY) date, ROUND(COUNTIF(nearestApRssi <= -90) * 100 / COUNT(*),0) AS terrible, ROUND(COUNTIF( nearestApRssi > -90 AND nearestApRssi < -80) * 100 / COUNT(*), 0) AS badSignal, ROUND(COUNTIF( nearestApRssi > -80 AND nearestApRssi < -65) * 100 / COUNT(*), 0) AS goodSignal, ROUND(COUNTIF( nearestApRssi >= -65) * 100 / COUNT(*), 0) AS excelentSignal FROM `ufind-prod.location.scanapi_farfetch-ymqgt` WHERE time >= '2021-04-01 00:00:00' AND time <= '2021-04-01 23:59:59' GROUP BY floorPlanId, date ORDER BY date ASC ``` ### Query visitors seen for more than 10m (10x) #### Atenção: mais do que 10m em todos os dias e não por dia ... ```sql= SELECT visitors.macAddress, DIV(visitors.count, 60) AS hours, MOD(visitors.count, 60) AS minutes FROM ( SELECT macAddress, COUNT(macAddress) AS count, FROM `ufind-prod.location.scanapi_farfetch-ymqgt` WHERE time BETWEEN '2021-04-20 07:00:00' AND '2021-04-20 23:59:00.000059' AND networkId IN UNNEST(['L_644014746713983665']) AND ssid IN UNNEST(['']) AND nearestApRssi >= -90 GROUP BY macAddress ORDER BY count DESC ) AS visitors WHERE visitors.count >= 10 ``` ### Query AVG time per Guest (or other SSID) ```sql= SELECT visitors.date, CAST(AVG(visitors.count) as INT64) as avgMinutes, DIV(CAST(AVG(visitors.count) as INT64), 60) AS hours, MOD(CAST(AVG(visitors.count) as INT64), 60) AS minutes, count(visitors.macAddress) as devices FROM ( SELECT TIMESTAMP_TRUNC(time, DAY) date, macAddress, COUNT(macAddress) AS count, FROM `ufind-prod.location.scanapi_farfetch-ymqgt` WHERE time BETWEEN '2021-04-20 00:00:00' AND '2021-04-27 23:59:00.000059' AND networkId IN UNNEST(['L_644014746713983665']) AND ssid IN UNNEST(['Browns-Guest']) AND nearestApRssi >= -90 GROUP BY date, macAddress ORDER BY date, count DESC ) AS visitors WHERE visitors.count >= 10 GROUP BY date ORDER BY date ``` ### Exclude MAC addresses #### Rules month: more than 15 days (>15 days) day: more than 2 hours (>2 hours) ```sql= SELECT macDays.macAddress AS macAddress, macDays.days AS days FROM ( SELECT visitors.macAddress AS macAddress, COUNT( visitors.macAddress) AS days FROM ( SELECT macAddress, TIMESTAMP_TRUNC(time, DAY) date, COUNT(macAddress) AS count FROM `ufind-prod.location.scanapi_farfetch-ymqgt` WHERE time BETWEEN '2021-06-01 00:00:00' AND '2021-06-30 23:59:00.000059' AND networkId IN UNNEST(['L_644014746713983665']) AND nearestApRssi >= -90 GROUP BY macAddress, date ORDER BY macAddress) AS visitors WHERE visitors.count >= 240 GROUP BY macAddress ) AS macDays WHERE macDays.days > 15 GROUP BY macAddress, days ``` #### Sugestion Stackoverflow ```sql= SELECT macAddress AS macAddress, COUNT(DISTINCT TIMESTAMP_TRUNC(time, DAY)) AS days FROM `my_table` WHERE time BETWEEN '2021-06-01 00:00:00' AND '2021-06-30 23:59:00.000059' AND networkId IN UNNEST(['L_644014746713983665']) AND nearestApRssi >= -90 GROUP BY macAddress HAVING COUNT(DISTINCT TIMESTAMP_TRUNC(time, DAY)) > 15 ORDER BY macAddress ``` #### Stackoverflow alternative ```sql= SELECT COUNT(*) cnt, macAddress, mnth FROM ( SELECT DISTINCT macAddress, TIMESTAMP_TRUNC(TIME, DAY) dt, TIMESTAMP_TRUNC(TIME, MONTH) mnth, FROM `ufind-prod.location.scanapi_farfetch-ymqgt` WHERE time BETWEEN '2021-06-01 00:00:00' AND '2021-06-30 23:59:00.000059' ) GROUP BY macAddress, Mnth HAVING COUNT(*) > 15 ORDER BY cnt DESC ``` ```sql= SELECT visitors.macAddress AS macAddress, COUNT( visitors.macAddress) AS days FROM ( SELECT macAddress, TIMESTAMP_TRUNC(time, DAY) date, COUNT(macAddress) AS count FROM `ufind-prod.location.scanapi_farfetch-ymqgt` WHERE time BETWEEN '2021-06-01 00:00:00' AND '2021-06-30 23:59:00.000059' AND networkId IN UNNEST(['L_644014746713983665']) AND nearestApRssi >= -90 GROUP BY macAddress, date ORDER BY macAddress) AS visitors WHERE visitors.count >= 300 GROUP BY macAddress ``` #### Query Excluding items from a SQL table ```sql= SELECT TIMESTAMP_TRUNC(time, DAY) date, COUNT (DISTINCT macAddress) AS people FROM `ufind-prod.location.scanapi_farfetch-ymqgt` WHERE time BETWEEN '2021-06-01 00:00:00' AND '2021-06-30 23:59:00.000059' AND networkId IN UNNEST(['L_644014746713983665']) AND nearestApRssi >= -90 AND macAddress NOT IN ( SELECT macAddress FROM `ufind-prod.excludeMacs.farfetch-ymqgt`) GROUP BY date ORDER BY date DESC ``` ## Contact Tracing Queries ### OLD way ```sql= SELECT # TIMESTAMP_TRUNC(Points.time, MINUTE) date, Points.time as date, COUNT (*) AS seen, Points.floorPlanId AS floorPlanId, Points.macAddress AS macAddress, Polygons.name AS zoneName FROM `ufind-prod.merakiWiFi.deloitte-7q734` AS Points, `ufind-prod.zones.zones_deloitte-7q734` AS Polygons WHERE ST_CONTAINS(Polygons.geoJSON, Points.geoJSON) = TRUE AND Points.floorPlanId = Polygons.floorPlanId AND time BETWEEN '2022-03-06T00:00:00' AND '2022-03-22T23:59:59' AND macAddress in UNNEST(['4101030fe1d29cee9c6b2dfcf22be18543a10c06caeb71412f5d08271434d42861']) GROUP BY macAddress, date, floorPlanId, zoneName ORDER BY date ``` ### NEW Way ```sql= WITH zonetable AS ( SELECT Points.time AS date, Points.floorPlanId AS floorPlanId, Polygons.name AS zoneName, Points.macAddress AS macAddress FROM `ufind-prod.merakiWiFi.deloitte-7q734` AS Points, `ufind-prod.zones.zones_deloitte-7q734` AS Polygons WHERE ST_CONTAINS(Polygons.geoJSON, Points.geoJSON) = TRUE AND Points.floorPlanId = Polygons.floorPlanId AND time BETWEEN '2022-03-06T00:00:00' AND '2022-03-22T23:59:59' AND macAddress in UNNEST(['4101030fe1d29cee9c6b2dfcf22be18543a10c06caeb71412f5d08271434d42861']) GROUP BY macAddress, date, floorPlanId, zoneName ORDER BY date ) SELECT macAddress, floorPlanId, MIN(date) AS startDate, # MAX(date) AS endDate, DATE_DIFF(MAX(date), MIN(date), minute) + 1 AS time, zoneName FROM ( SELECT *, COUNTIF(new_zone) OVER (PARTITION BY zoneName ORDER BY date) AS zone_number FROM ( SELECT *, ifnull(date - LAG(date) OVER (PARTITION BY zoneName ORDER BY date) > make_interval(minute => 5) OR zoneName != LAG(zoneName) OVER(ORDER BY date), TRUE) AS new_zone FROM zonetable ) ) GROUP BY macAddress, # startDate, floorPlanId, zoneName, zone_number ORDER BY startDate ``` ### floorPlans query ```sql= SELECT COUNT(*) AS seen, TIMESTAMP_TRUNC(time, DAY) date, floorPlanId, floorPlanName FROM `ufind-prod.location.scanapi_C000001` WHERE time BETWEEN '2021-01-06 00:00:00' AND '2021-01-06 23:59:59' AND macAddress = "28a0730852493c8c" GROUP BY date, macAddress, floorPlanId, floorPlanName ``` ### Zones query result ```sql= SELECT TIMESTAMP_TRUNC(Points.time, DAY) date, COUNT (*) AS num, Points.floorPlanName AS floorName, Points.floorPlanId AS floorPlanId, Points.macAddress AS macAddress, Polygons.name AS zoneName FROM `ufind-prod.location.scanapi_C000001` AS Points, `ufind-prod.zones.zones_C000001` AS Polygons WHERE ST_CONTAINS(Polygons.geoJSON, Points.geoJSON) = TRUE AND Points.floorPlanId = Polygons.floorPlanId AND time BETWEEN '2021-01-06 00:00:00' AND '2021-01-06 23:59:59' AND macAddress = "28a0730852493c8c" AND Polygons.customerId='C000001' GROUP BY date, macAddress, floorName, floorPlanId, zoneName ``` ### Bookings Query - with insert days and filter by user ```sql= with days as ( select * from UNNEST(GENERATE_TIMESTAMP_ARRAY('2022-01-01', '2022-01-31', INTERVAL 1 DAY)) AS day ), bookings as ( SELECT DATE(bookingStart) AS date, COUNTIF(status='booked') AS booked, COUNTIF(status='confirmed') AS confirmed, COUNTIF(status='deleted') AS deleted FROM `ufind-dev.bookings.C000001` WHERE DATE(bookingStart) BETWEEN '2022-01-01' AND '2022-01-31' AND (createdBy IN UNNEST(['diogomagalhaes@u-factor.io', 'ruibras@u-factor.io']) OR updatedBy IN UNNEST(['diogomagalhaes@u-factor.io', 'ruibras@u-factor.io'])) GROUP BY date ORDER BY date ) select days.day, bookings.booked as booked, bookings.confirmed as confirmed, bookings.deleted as deleted, FROM days LEFT JOIN bookings on bookings.date = DATE(days.day) ``` #### base ```sql= SELECT DATE(bookingStart) AS date, COUNTIF(status='booked') AS booked, COUNTIF(status='confirmed') AS confirmed, COUNTIF(status='deleted') AS deleted FROM `ufind-dev.bookings.C000001` WHERE DATE(bookingStart) BETWEEN '2022-01-01' AND '2022-01-31' # bookingStart BETWEEN '2022-01-01' AND '2022-01-31' AND (createdBy IN UNNEST(['diogomagalhaes@u-factor.io', 'ruibras@u-factor.io']) OR updatedBy IN UNNEST(['diogomagalhaes@u-factor.io', 'ruibras@u-factor.io'])) GROUP BY date ORDER BY date ``` ### NEW - Avg Time spend per floor ```sql= SELECT TIMESTAMP_TRUNC(startDate, DAY) day, macAddress, floorPlanId, SUM(time) AS minutes, FROM ( WITH zonetable AS ( SELECT Points.time AS date, # TIMESTAMP_TRUNC(Points.time, DAY) day, Points.floorPlanId AS floorPlanId, Polygons.name AS zoneName, Points.macAddress AS macAddress FROM `ufind-prod.merakiWiFi.deloitte-7q734` AS Points, `ufind-prod.zones.zones_deloitte-7q734` AS Polygons WHERE ST_CONTAINS(Polygons.geoJSON, Points.geoJSON) = TRUE AND Points.floorPlanId = Polygons.floorPlanId AND time BETWEEN '2022-03-06T00:00:00' AND '2022-03-22T23:59:59' AND macAddress IN UNNEST(['4101030fe1d29cee9c6b2dfcf22be18543a10c06caeb71412f5d08271434d42861']) GROUP BY macAddress, date, floorPlanId, zoneName ORDER BY date ) SELECT macAddress, floorPlanId, # day, MIN(date) AS startDate, MAX(date) AS endDate, DATE_DIFF(MAX(date), MIN(date), minute) + 1 AS time, zoneName FROM ( SELECT *, COUNTIF(new_zone) OVER (PARTITION BY zoneName ORDER BY date) AS zone_number FROM ( SELECT *, ifnull(date - LAG(date) OVER (PARTITION BY zoneName ORDER BY date) > make_interval(minute => 5) OR zoneName != LAG(zoneName) OVER(ORDER BY date), TRUE) AS new_zone FROM zonetable ) ) GROUP BY macAddress, # day, floorPlanId, zoneName, zone_number ORDER BY startDate ) GROUP BY day, macAddress, floorPlanId ORDER BY day ``` ### MAX simultaneous users per AP () ```sql= WITH ap AS ( SELECT networkId, TIMESTAMP_TRUNC(time, MINUTE) minute, floorPlanId, nearestApMac, COUNT (DISTINCT macAddress) AS people FROM `ufind-prod.merakiWiFi.farfetch-ymqgt` WHERE time >= '2022-07-05' AND time <= '2022-07-06' AND networkId = 'L_644014746713982610' AND ssid != "" AND nearestApMac IN UNNEST(['e0:cb:bc:8e:aa:21']) GROUP BY networkId, floorPlanId, nearestApMac, minute ), genhours AS ( SELECT * FROM UNNEST(GENERATE_TIMESTAMP_ARRAY('2022-07-05', '2022-07-06', INTERVAL 1 HOUR)) AS genhour ), maximum AS ( SELECT ap.networkId AS networkId, ap.nearestApMac AS nearestApMac, TIMESTAMP_TRUNC(ap.minute, HOUR) hour, MAX(ap.people) AS maxpeople FROM ap GROUP BY networkId, nearestApMac, hour ) SELECT maximum.networkId, genhours.genhour, maximum.nearestApMac, maximum.maxpeople, FROM genhours LEFT JOIN maximum ON maximum.hour = genhours.genhour ```