diff --git a/turing/benchmarks/hpl/hpl_timing.py b/turing/benchmarks/hpl/hpl_timing.py new file mode 100644 index 0000000..3d6f8be --- /dev/null +++ b/turing/benchmarks/hpl/hpl_timing.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# +# 20160920 +# Wirawan Purwanto +# + +def est_hpl_timing(N, nprocs, proc_gflops, eff=0.8): + """Estimates the time it takes to do HPL calculation on + an (N x N) problem, given `nprocs` processor cores which has + `proc_gflops` GFLOPS. + """ + # Number of floating point operations + # From HPL code, estimated to be + # 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve. + assert N > 0 + assert nprocs >= 1 + assert proc_gflops > 0 + assert 0.0 < eff <= 1.0 + N = float(N) + num_gflop = (2 * N**3 / 3 - 0.5 * N**2 + 2 * N**2) * 1e-9 + tot_proc_gflops = nprocs * proc_gflops + est = num_gflop / tot_proc_gflops / eff + proc_mem_gb = (N**2 * 1e-9 * 8) / nprocs + #if verbose >= 1: + + return (est, num_gflop, tot_proc_gflops, proc_mem_gb) + + +def est_hpl_timing2(proc_mem_gb, nprocs, proc_gflops, eff=0.8): + """Estimates the time it takes to do HPL calculation on + a problem specified by `proc_mem_gb` RAM per core (in GB), + `nprocs` processor cores, each having `proc_gflops` GFLOPS. + + We assume the matrix is evenly distributed across processors in + a square fashion (i.e. P == Q for tile definition). + """ + from math import sqrt + N1 = sqrt(proc_mem_gb * 1e9 / 8) + N = float(int(N1 * sqrt(nprocs))) + (est, num_gflop, tot_proc_flops, proc_mem_gb0) = \ + est_hpl_timing(N, nprocs, proc_gflops, eff) + return est, num_gflop, tot_proc_flops, N + + + +def Test_64core_memscale(proc_mem_gb=[0.15, 0.25, 0.50, 0.780125, 1.25, 2.00, 3.00], + proc_gflops=17.6, eff=0.8): + """[20160920] + Test: keep at 64 cores, scale up memory and see how much time it takes.""" + nproc = 64 + from wpylib.text_tools import str_fmt_heading + cols = ("N", "mem/proc", "nproc", "time", "numops_gf", "proc_gf") + fmt = "%8d %8.3f %5d %6.0f %10.2f %10.2f" + hfmt = str_fmt_heading(fmt) + print(hfmt % cols) + for pm1 in proc_mem_gb: + (est_t, num_gflop, tot_proc_gflops, N) = \ + est_hpl_timing2(pm1, nproc, proc_gflops, eff) + print(fmt % (N, pm1, nproc, est_t, num_gflop, tot_proc_gflops)) +