# GST test
#
# Preconditions:
#   Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g
#   option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify
#   all the GPUs IDs separated by white space (e.g.: device: 50599 3245)
#   Set parallel execution to false
#   Set matrix_size to 5760 (for Vega 10 cards). For Vega 20, the recommended matrix_size is 8640
#   Set run count to 2 (each test will run twice)
#   Set copy_matrix to false (the matrices will be copied to GPUs only once)
#
# Run test with:
#   cd bin
#   sudo ./rvs -c conf/gst_1.conf -d 3
#
# Expected result:
#   The test on each GPU passes (TRUE) if the GPU achieves 5000 gflops
#   in maximum 7 seconds and then the GPU sustains the gflops
#   for the rest of the test duration (total duration is 18 seconds).
#   A single Gflops violation (with a 7% tolerance) is allowed.
#   FALSE otherwise

actions:
- name: action_1 
  device: all            #Runs on all devices
  module: gst
  parallel: true         #True/False, Runs on all 8 GPUs
  count: 1               #Number of times you want to repeat the test
  duration: 7000         #Duration of the test, but hit_calls takes over duration ( down below) 
  hot_calls: 20000       #Hot calls inner iteration, it blasts the GPU, 5000 will run for >=30mins approximately
  copy_matrix: false      #True , generates the matrix everytime and does a BLAS operation, false, does it only at the start 
  target_stress: 2000    #End of the test it checks wether it reached target stress or not
  matrix_size_a: 4096    #Size of matrix a
  matrix_size_b: 4096    #Size of matrix b
  matrix_size_c: 4096    #Size of matrix c
  ops_type: sgemm        #Double precission GEMM
  transa: 0              #Matrix Transpose A, can be 0 or 1
  transb: 0              #Matrix Transpose B, can be 0 or 1 
  alpha: 1               #Scalar value 1
  beta: 1                #Scalar value 1
  lda: 4096              #Leading offset A
  ldb: 4096              #Leading offset B
  ldc: 4096              #Leading offset C