# Training
## Forward
--dir=FWD_B
--dt=f32,bf16,f16
--attr-post-ops=,sum:0.25+relu:0.5
--batch=shapes_ci_gpu
--attr-post-ops=

## Backward
--dir=BWD_D,BWD_WB
--dt=f32,bf16,f16
--batch=shapes_ci_gpu

## FP math mode
--dir=FWD_B,BWD_D,BWD_WB
--dt=f32
--attr-fpmath=tf32,bf16,f16
--batch=shapes_ci_gpu
--attr-fpmath=

# int8
## Type combinations
--dir=FWD_I
--dt=s8:s8:f32,s8:s8:bf16,s8:s8:f16,s8:s8:s32,s8:s8:s8,s8:s8:u8, \
      u8:s8:f32,u8:s8:bf16,u8:s8:f16,u8:s8:s32,u8:s8:s8,u8:s8:u8
--batch=shapes_ci_gpu

## Attributes
--dir=FWD_I
--dt=u8:s8:s32
--attr-scales=,src:common:0.5+wei:common:2+dst:common:4
--attr-post-ops=,sum:0.5:3+add:f32:per_dim_1+mul:f16:per_tensor
--attr-zero-points=,src:common:2+dst:common:1,src:per_dim_1+wei:common:2+dst:per_dim_1
--batch=shapes_ci_gpu
--attr-scales=
--attr-post-ops=
--attr-zero-points=

# f64
--dir=FWD_B,BWD_D,BWD_WB
--dt=f64
--batch=shapes_ci_gpu

# f8
--dir=FWD_B,FWD_D
--dt=f8_e5m2,f8_e4m3,f8_e5m2:f8_e4m3:f32
--attr-post-ops=, \
                sum:0.5+linear:2:1, \
                add:f32,add:f8_e5m2,add:f8_e4m3, \
                prelu:per_dim_1
--batch=shapes_ci_gpu
--attr-post-ops=

--dir=BWD_D,BWD_WB
--dt=f8_e5m2,f8_e4m3,f8_e5m2:f8_e4m3:f32
--batch=shapes_ci_gpu
