# Ahana Evaluation
## Cluster Configuration

## Scenario 1
```
SELECT
date,
publisherName,
mediaType,
seriesName,
episodeName,
ROUND(SUM((CASE WHEN coViewId = '0' THEN secondsViewed ELSE 0 END) / 60)) AS personPrimaryMinutes,
ROUND(SUM((CASE WHEN coViewId != '0' THEN secondsViewed ELSE 0 END) / 60)) AS personCoViewerMinutes,
ROUND(SUM(secondsViewed / 60)) AS personTotalMinutes, ROUND(SUM(secondsViewed/mediaDuration)) AS bvodAudience
FROM
campaign.oztam_demo_event_v310
WHERE
mediaType = 'live'
AND date >= '20210101'
AND date <= '20210930'
AND seriesName != ''
AND mediaDuration >= 600
GROUP BY
date, mediaType, publisherName, seriesName, episodeName
ORDER BY
bvodAudience desc
LIMIT
1000;
```
### Ahana - 29 Nodes
```
Query 20211004_012712_00002_pm5uz, FINISHED, 20 nodes
Splits: 6,827 total, 6,827 done (100.00%)
0:18 [1.47B rows, 15.5GB] [82.7M rows/s, 894MB/s]
```
### Athena

## Larger Query
```
with by_ppl as (
select
a.date,
a.voz_id_ppl,
arbitrary(d.r) as r,
a.year,
a.week,
c.p_age_sex_group as demo,
c.h_met_mkt * 10 + c.h_reg_mkt as market,
count(*) as reach
from
jima.remapped_spots_1 as b, voz_v4.voz_respondentleveldata as a, voz_v4.voz_dailydynamicuniverse as d, voz_v4.voz_population as c
where
b.cimf_event_id = a.cimf_event_id
and a.date between b.from_ad_date and b.to_ad_date
and a.viewing_session_type in ('Live TV', 'Playback TV')
and a.network in ('SEVEN', 'SBS', 'NINE', 'TEN')
and b.ad_offset between a.eventstartseconds and a.eventendseconds
and d.voz_id_ppl = a.voz_id_ppl
and d.date = a.date
and c.voz_id_ppl = a.voz_id_ppl
and c.year = a.year
and c.q = '3'
group by
a.date,
a.voz_id_ppl,
c.h_met_mkt * 10 + c.h_reg_mkt,
c.p_age_sex_group,
a.week,
a.year
)
select
year,
week,
sum(reach) as grossAudience,
count_if(r = 0 and reach = 1) as r1,
count_if(r = 0 and reach = 2) as r2,
count_if(r = 0 and reach = 3) as r3,
count_if(r = 0 and reach = 4) as r4,
count_if(r = 0 and reach = 5) as r5,
count_if(r = 0 and reach = 6) as r6,
count_if(r = 0 and reach = 7) as r7,
count_if(r = 0 and reach = 8) as r8,
count_if(r = 0 and reach = 9) as r9,
count_if(r = 0 and reach = 10) as r10,
count_if(r = 0 and reach = 11) as r11,
count_if(r = 0 and reach = 12) as r12,
count_if(r = 0 and reach = 13) as r13,
count_if(r = 0 and reach = 14) as r14,
count_if(r = 0 and reach >= 15) as r15,
count_if(r = 0 and reach >= 1) as c1,
count_if(r = 0 and reach >= 2) as c2,
count_if(r = 0 and reach >= 3) as c3,
count_if(r = 0 and reach >= 4) as c4,
count_if(r = 0 and reach >= 5) as c5,
count_if(r = 0 and reach >= 6) as c6,
count_if(r = 0 and reach >= 7) as c7,
count_if(r = 0 and reach >= 8) as c8,
count_if(r = 0 and reach >= 9) as c9,
count_if(r = 0 and reach >= 10) as c10,
count_if(r = 0 and reach >= 11) as c11,
count_if(r = 0 and reach >= 12) as c12,
count_if(r = 0 and reach >= 13) as c13,
count_if(r = 0 and reach >= 14) as c14,
count_if(r = 0 and reach >= 15) as c15
from
by_ppl
group by
year,
week
```
### Ahana - 29 nodes
```
Query 20211004_013251_00004_pm5uz, FINISHED, 20 nodes
Splits: 145,366 total, 145,366 done (100.00%)
1:24 [15.1B rows, 42.9GB] [181M rows/s, 525MB/s]
```
### Athena

## Scenario 2
```
SELECT
b.sessionId,
b.publisherId,
b.deviceId,
b.publisherName,
IF(TRIM(a.browser) != '', TRIM(a.browser), '') as browser,
IF(TRIM(a.os) != '', TRIM(a.os), '') as os,
IF(TRIM(a.platform) != '', TRIM(a.platform), '') as platform,
IF(TRIM(a.version) != '', TRIM(a.version), '') as version,
IF(TRIM(a.plugin) != '', TRIM(a.plugin), '') as plugin,
IF(TRIM(a.vendorVersion) != '', TRIM(a.vendorVersion), '') as vendorVersion,
IF(a.timezoneOffset IS NOT NULL, TRY_CAST(a.timezoneOffset AS VARCHAR), '') as timezoneOffset,
IF(TRIM(a.deviceType) != '', TRIM(a.deviceType), '') as deviceType,
IF(TRIM(a.clientDeviceId) != '', TRIM(a.clientDeviceId), '') as clientDeviceId,
IF(TRIM(a.remoteAddress) != '', TRIM(a.remoteAddress), '') as remoteAddress,
IF(TRIM(b.altMediaId) != '', TRIM(b.altMediaId), '') as altMediaId,
IF(TRIM(b.originMediaId) != '', TRIM(b.originMediaId), '') as originMediaId,
IF(TRIM(a.registrationId) != '', TRIM(a.registrationId), '') as registrationId,
IF(TRIM(a.demo2) != '', TRIM(a.demo2), '') as demo2,
IF(TRIM(a.demo3) != '', TRIM(a.demo3), '') as demo3,
IF(TRIM(a.connectionType) != '', TRIM(a.connectionType), '') as connectionType,
IF(TRIM(a.streamingType) != '', TRIM(a.streamingType), '') as streamingType,
IF(TRIM(a.longitude) != '', TRIM(a.longitude), IF(TRIM(c.longitude) != '', TRIM(c.longitude), '')) as longitude,
IF(TRIM(a.latitude) != '', TRIM(a.latitude), IF(TRIM(c.latitude) != '', TRIM(c.latitude), '')) as latitude,
b.firstEvent,
b.firstEventTime,
b.lastEvent,
b.lastEventTime,
IF(TRIM(b.channel) != '', TRIM(b.channel), '') as channel,
'' AS channelGroup,
IF(TRIM(b.programId) != '', TRIM(b.programId), '') as programId,
IF(TRIM(b.programName) != '', TRIM(b.programName), '') as programName,
IF(TRIM(b.seriesId) != '', TRIM(b.seriesId), '') as seriesId,
IF(TRIM(b.seriesName) != '', TRIM(b.seriesName), '') as seriesName,
IF(TRIM(b.episodeId) != '', TRIM(b.episodeId), '') as episodeId,
IF(TRIM(b.episodeName) != '', TRIM(b.episodeName), '') as episodeName,
IF(TRIM(b.mediaId) != '', TRIM(b.mediaId), '') as mediaId,
'vod' as mediaType,
IF(b.mediaDuration IS NOT NULL, b.mediaDuration, 0) AS mediaDuration,
IF(TRIM(b.genre) != '', TRIM(b.genre), '') as genre,
'' AS startTime,
'' AS endTime,
b.recordCount,
b.loadCount,
b.beginCount,
b.progressCount,
b.completeCount,
b.adBeginCount,
b.adCompleteCount,
b.streamStart,
b.secondsViewed,
b.adSecondsViewed AS adSecondsViewed,
IF(TRIM(b.telProgrammeId) != '', TRIM(b.telProgrammeId), '') as telProgrammeId,
IF(TRIM(b.telUniqueProgrammeId) != '', TRIM(b.telUniqueProgrammeId), '') as telUniqueProgrammeId,
IF(TRIM(a.postCode) != '', TRIM(a.postCode), IF(TRIM(c.postalCode) != '', TRIM(c.postalCode), '')) AS postCode,
IF(TRIM(a.usr1) != '', TRIM(a.usr1), '') as usr1,
IF(TRIM(a.usr2) != '', TRIM(a.usr2), '') as usr2,
IF(TRIM(a.usr3) != '', TRIM(a.usr3), '') as usr3,
b.viewVersion AS viewVersion,
b.date AS firstEventDate,
b.hour AS firstEventHour,
IF(TRIM(a.viewingMarket) != '', TRIM(a.viewingMarket), IF(TRIM(c.viewingMarket) != '', TRIM(c.viewingMarket), '')) AS viewingMarket,
'' as homeMarket,
IF(TRIM(c.ipOrg) != '', TRIM(c.ipOrg), '') AS ipOrg,
IF(TRIM(c.ipIsp) != '', TRIM(c.ipIsp), '') AS ipIsp,
IF(TRIM(b.mainGenre) != '', TRIM(b.mainGenre), '') AS mainGenre,
IF(b.mediaDuration IS NOT NULL, b.mediaDuration, 0) AS vpmDuration,
'' AS liveEpochFrom,
'' AS liveEpochTo,
IF(TRIM(b.classification) != '', TRIM(b.classification), '') AS classification
FROM
vod_ved_20211003 as b
LEFT JOIN
vod_smd_20211003 as a
ON
LOWER(b.sessionId) = LOWER(a.sessionId) and LOWER(b.mediaId) = LOWER(a.mediaId)
LEFT JOIN
vod_dmd_20211003 as c
ON
LOWER(b.deviceId) = LOWER(c.deviceId)
WHERE
b.sessionId NOT IN (
SELECT
distinct(sessionId)
FROM
vod_ved_20211003
GROUP BY
sessionId
HAVING
sum(secondsViewed + adSecondsViewed) > 86400
)
```
### Ahana - 29 nodes
```
Query 20211004_014938_00005_pm5uz, RUNNING, 29 nodes
Splits: 9,453 total, 8,493 done (89.84%)
0:23 [30.2M rows, 2.73GB] [1.31M rows/s, 122MB/s]
Query aborted by user
```
### Athena

## Notes
1. Spots instance support is important
2. Query retry on the server, particularly when spot instances are replaced
3. We can roughly run 12 spots r5.4xlarge instances for around 5K AUD
4. Is there multi-AZ support?
###### tags: `ahana` `prestodb
`