numactl --interleave=all ./testing_sgeqrf -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_sgeqrf [options] [-h|--help]

ngpu 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||R||_F / ||A||_F
=======================================================================
  100   100     ---   (  ---  )      0.94 (   0.00)     ---
 1000  1000     ---   (  ---  )    104.85 (   0.01)     ---
   10    10     ---   (  ---  )      0.01 (   0.00)     ---
   20    20     ---   (  ---  )      0.04 (   0.00)     ---
   30    30     ---   (  ---  )      0.12 (   0.00)     ---
   40    40     ---   (  ---  )      1.07 (   0.00)     ---
   50    50     ---   (  ---  )      1.48 (   0.00)     ---
   60    60     ---   (  ---  )      2.22 (   0.00)     ---
   70    70     ---   (  ---  )      2.53 (   0.00)     ---
   80    80     ---   (  ---  )      1.46 (   0.00)     ---
   90    90     ---   (  ---  )      1.76 (   0.00)     ---
  100   100     ---   (  ---  )      1.53 (   0.00)     ---
  200   200     ---   (  ---  )      6.48 (   0.00)     ---
  300   300     ---   (  ---  )     14.22 (   0.00)     ---
  400   400     ---   (  ---  )     25.30 (   0.00)     ---
  500   500     ---   (  ---  )     36.48 (   0.00)     ---
  600   600     ---   (  ---  )     52.68 (   0.01)     ---
  700   700     ---   (  ---  )     64.74 (   0.01)     ---
  800   800     ---   (  ---  )     80.14 (   0.01)     ---
  900   900     ---   (  ---  )     97.49 (   0.01)     ---
 1000  1000     ---   (  ---  )    112.62 (   0.01)     ---
 2000  2000     ---   (  ---  )    316.39 (   0.03)     ---
 3000  3000     ---   (  ---  )    568.03 (   0.06)     ---
 4000  4000     ---   (  ---  )    728.14 (   0.12)     ---
 5000  5000     ---   (  ---  )    934.81 (   0.18)     ---
 6000  6000     ---   (  ---  )   1048.53 (   0.27)     ---
 7000  7000     ---   (  ---  )   1116.13 (   0.41)     ---
 8000  8000     ---   (  ---  )   1386.17 (   0.49)     ---
 9000  9000     ---   (  ---  )   1487.87 (   0.65)     ---
10000 10000     ---   (  ---  )   1589.34 (   0.84)     ---
12000 12000     ---   (  ---  )   1726.17 (   1.33)     ---
14000 14000     ---   (  ---  )   1787.23 (   2.05)     ---
16000 16000     ---   (  ---  )   1873.62 (   2.92)     ---
18000 18000     ---   (  ---  )   1906.54 (   4.08)     ---
20000 20000     ---   (  ---  )   1936.86 (   5.51)     ---

numactl --interleave=all ./testing_sgeqrf_gpu -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_sgeqrf_gpu [options] [-h|--help]

version 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||Ax-b||_F/(N*||A||_F*||x||_F)
====================================================================================
  100   100     ---   (  ---  )      0.54 (   0.00)     ---
 1000  1000     ---   (  ---  )     94.77 (   0.01)     ---
   10    10     ---   (  ---  )      0.00 (   0.00)     ---
   20    20     ---   (  ---  )      0.01 (   0.00)     ---
   30    30     ---   (  ---  )      0.04 (   0.00)     ---
   40    40     ---   (  ---  )      0.10 (   0.00)     ---
   50    50     ---   (  ---  )      0.18 (   0.00)     ---
   60    60     ---   (  ---  )      0.27 (   0.00)     ---
   70    70     ---   (  ---  )      0.41 (   0.00)     ---
   80    80     ---   (  ---  )      0.60 (   0.00)     ---
   90    90     ---   (  ---  )      0.79 (   0.00)     ---
  100   100     ---   (  ---  )      0.83 (   0.00)     ---
  200   200     ---   (  ---  )      6.99 (   0.00)     ---
  300   300     ---   (  ---  )     14.65 (   0.00)     ---
  400   400     ---   (  ---  )     25.22 (   0.00)     ---
  500   500     ---   (  ---  )     29.15 (   0.01)     ---
  600   600     ---   (  ---  )     44.10 (   0.01)     ---
  700   700     ---   (  ---  )     55.32 (   0.01)     ---
  800   800     ---   (  ---  )     72.09 (   0.01)     ---
  900   900     ---   (  ---  )     85.90 (   0.01)     ---
 1000  1000     ---   (  ---  )    104.39 (   0.01)     ---
 2000  2000     ---   (  ---  )    304.72 (   0.04)     ---
 3000  3000     ---   (  ---  )    494.96 (   0.07)     ---
 4000  4000     ---   (  ---  )    670.82 (   0.13)     ---
 5000  5000     ---   (  ---  )    870.41 (   0.19)     ---
 6000  6000     ---   (  ---  )   1014.38 (   0.28)     ---
 7000  7000     ---   (  ---  )   1086.34 (   0.42)     ---
 8000  8000     ---   (  ---  )   1324.67 (   0.52)     ---
 9000  9000     ---   (  ---  )   1421.01 (   0.68)     ---
10000 10000     ---   (  ---  )   1531.29 (   0.87)     ---
12000 12000     ---   (  ---  )   1674.26 (   1.38)     ---
14000 14000     ---   (  ---  )   1755.17 (   2.08)     ---
16000 16000     ---   (  ---  )   1855.08 (   2.94)     ---
18000 18000     ---   (  ---  )   1891.68 (   4.11)     ---
20000 20000     ---   (  ---  )   1985.87 (   5.37)     ---
