# icedyn solid mechanics (ridging/rafting) module
<!--
-gpu=mem:unified:managedalloc
AJAZZ AKP153
-->
Regions:
- functions tab_Xd_Yd() -- 1 loop each
- BLOCK_A (*)
- BLOCK_B (fully depends on: tab_Xd_Yd())
- BLOCK_C - 1 loop
- BLOCK_D (*)
- BLOCK_E - 1 loop
- function rdgrft_prep() -- 11 loops, 3 WHEREs
- function ice_var_roundoff() -- 10 WHEREs
- function rdgrft_shift() -- 2 loops
- function ice_dyn_1d2d() (fully depends on: tab_Xd_Yd())
- function ice_var_agg() -- 1 loop and 5 WHEREs
```
Baseline for eORCA12/16 nodes MN5-ACC:
region avg_us max_us min_us count
---------------------- ------ ------ ------ -----
ice_dyn_rdgrft_BLOCK_A 1128 1128 1128 1 100 220 +
ice_dyn_rdgrft_BLOCK_B 67 80 60 10 10 100
ice_dyn_rdgrft_BLOCK_C 9 15 2 10 20 20 +
ice_dyn_rdgrft_PREP_1 114 150 81 10 20 300
ice_dyn_rdgrft_BLOCK_D 82 82 82 1 12 250
ice_dyn_rdgrft_1D2D_1 345 396 284 10 30 700
ice_dyn_rdgrft_1D2D_2 182 321 99 10 15 700
ice_dyn_rdgrft_PREP_2 68 255 41 173 15 300
ice_dyn_rdgrft_SHIFT 148 618 93 173 25 1000
ice_dyn_rdgrft_AGG 7049 7049 7049 1 600 130 +
```
```
!!! BLOCK_A: counter based serial loop (npti = npti + 1)
at_i(:,:) = SUM( a_i, dim=3 )
!
npti = 0 ; nptidx(:) = 0
ipti = 0 ; iptidx(:) = 0
DO jj = 1, jpj
DO ji = 1, jpi
IF ( at_i(ji,jj) > epsi10 ) THEN
npti = npti + 1
nptidx( npti ) = (jj - 1) * jpi + ji
ENDIF
END DO
END DO
```
```
!!! BLOCK_A: accelerator parallel version (Fortran syntax might be wrong)
INTEGER, ALLOCATABLE, DIMENSION(:) :: scan_idxflags
INTEGER, ALLOCATABLE, DIMENSION(:) :: scan_idxoffsets
INTEGER :: scan_idx = 0
INTEGER :: scan_offset = 0
ALLOCATE scan_idxflags(size(nptidx))
ALLOCATE scan_idxoffsets(size(nptidx)+1)
scan_idxflags(:)=0
scan_idxoffsets(:)=0
!@acc loop collapse(2)
DO jj = 1, jpj
DO ji = 1, jpi
IF ( at_i(ji,jj) > epsi10 ) THEN
scan_idx = (jj - 1) * jpi + ji
scan_idxflags(scan_idx) = 1
ENDIF
END DO
END DO
call thrust_scan_iface(scan_idxflags, scan_idxoffsets)
!@acc loop collapse(2)
DO jj = 1, jpj
DO ji = 1, jpi
scan_idx = (jj - 1) * jpi + ji
IF (scan_idxflags(scan_idx))
scan_offset = scan_idxoffsets(scan_idx)
nptidx( scan_offset ) = scan_idx
ENDIF
END DO
END DO
npti = scan_idxoffsets(size(scan_idxoffsets))
DEALLOCATE scan_idxflags
DEALLOCATE scan_idxoffsets
```
```
ji1 = 1
ji2 = npti
jpti = npti
```
```
!!! BLOCK_B
CALL tab_2d_1d( jpti, nptidx(ji1:ji2), zdelt (ji1:ji2) , delta_i )
CALL tab_2d_1d( jpti, nptidx(ji1:ji2), zdivu (ji1:ji2) , divu_i) ! zdivu is used as a work array here (no change in divu_i)
CALL tab_3d_2d( jpti, nptidx(ji1:ji2), a_i_2d (ji1:ji2,1:jpl), a_i )
CALL tab_3d_2d( jpti, nptidx(ji1:ji2), v_i_2d (ji1:ji2,1:jpl), v_i )
CALL tab_2d_1d( jpti, nptidx(ji1:ji2), ato_i_1d(ji1:ji2) , ato_i )
```
```
!!! BLOCK_C
DO ji = ji1, ji2
closing_net(ji) = rn_csrdg * 0.5_wp * ( zdelt(ji) - ABS( zdivu(ji) ) ) - MIN( zdivu(ji), 0._wp )
IF( zdivu(ji) < 0._wp ) closing_net(ji) = MAX( closing_net(ji), -zdivu(ji) )
opning(ji) = closing_net(ji) + zdivu(ji)
END DO
```
```
CALL rdgrft_prep( ji1, ji2, a_i_2d, v_i_2d, ato_i_1d, closing_net )
```
```
!!! BLOCK_D: counter based serial loop (ipti = ipti + 1)
DO ji = 1, npti
IF( SUM( apartf(ji,1:jpl) ) > 0._wp .AND. closing_gross(ji) > 0._wp ) THEN
ipti = ipti + 1
iptidx (ipti) = nptidx (ji)
a_i_2d (ipti,:) = a_i_2d (ji,:)
v_i_2d (ipti,:) = v_i_2d (ji,:)
ato_i_1d (ipti) = ato_i_1d (ji)
closing_net(ipti) = closing_net(ji)
zdivu (ipti) = zdivu (ji)
opning (ipti) = opning (ji)
ENDIF
END DO
```
```
!!! BLOCK_D: accelerator parallel version (Fortran syntax might be wrong)
ALLOCATE scan_idxflags(npti)
ALLOCATE scan_idxoffsets(npti+1)
scan_idxflags(:)=0
scan_idxoffsets(:)=0
!@acc loop
DO ji = 1, npti
IF( SUM( apartf(ji,1:jpl) ) > 0._wp .AND. closing_gross(ji) > 0._wp ) THEN
scan_idx = ji
scan_idxflags(scan_idx) = 1
ENDIF
END DO
call thrust_scan_iface(scan_idxflags, scan_idxoffsets)
!@acc loop
DO ji = 1, npti
scan_idx = ji
IF (scan_idxflags(scan_idx))
scan_offset = scan_idxoffsets(scan_idx)
iptidx (scan_offset) = nptidx (ji)
a_i_2d (scan_offset,:) = a_i_2d (ji,:)
v_i_2d (scan_offset,:) = v_i_2d (ji,:)
ato_i_1d (scan_offset) = ato_i_1d (ji)
closing_net(scan_offset) = closing_net(ji)
zdivu (scan_offset) = zdivu (ji)
opning (scan_offset) = opning (ji)
ENDIF
END DO
ipti = scan_idxoffsets(size(scan_idxoffsets))
DEALLOCATE scan_idxflags
DEALLOCATE scan_idxoffsets
```
```
! Reconsider counters and indexes after BLOCK_D
nptidx(:) = iptidx(:)
npti = ipti
ji1 = 1
ji2 = npti
jpti = npti
```
```
! Lot of tab_XD_YD() calls inside
CALL ice_dyn_1d2d( 1, ji1, ji2, jpti )
```
```
DO WHILE( iterate_ridging > 0 .AND. iter < jp_itermax )
CALL rdgrft_prep( ji1, ji2, a_i_2d, v_i_2d, ato_i_1d, closing_net )
CALL rdgrft_shift( ji1, ji2, jpti )
!!! BLOCK_E
iterate_ridging = 0
DO ji = ji1, ji2
zfac = 1._wp - ( ato_i_1d(ji) + SUM( a_i_2d(ji,:) ) )
IF( ABS( zfac ) < epsi10 ) THEN
closing_net(ji) = 0._wp
opning (ji) = 0._wp
ato_i_1d (ji) = MAX( 0._wp, 1._wp - SUM( a_i_2d(ji,:) ) )
ELSE
iterate_ridging = 1
zdivu (ji) = zfac * r1_rdtice
closing_net(ji) = MAX( 0._wp, -zdivu(ji) )
opning (ji) = MAX( 0._wp, zdivu(ji) )
ENDIF
END DO
iter = iter + 1
END DO
```
```
! Lot of tab_XD_YD() calls inside
CALL ice_dyn_1d2d( 2, ji1, ji2, jpti )
```
```
CALL ice_var_agg( 1 )
```