#!/usr/bin/env python # # 20160920 # Wirawan Purwanto # def est_hpl_timing(N, nprocs, proc_gflops, eff=0.8): """Estimates the time it takes to do HPL calculation on an (N x N) problem, given `nprocs` processor cores which has `proc_gflops` GFLOPS. """ # Number of floating point operations # From HPL code, estimated to be # 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve. assert N > 0 assert nprocs >= 1 assert proc_gflops > 0 assert 0.0 < eff <= 1.0 N = float(N) num_gflop = (2 * N**3 / 3 - 0.5 * N**2 + 2 * N**2) * 1e-9 tot_proc_gflops = nprocs * proc_gflops est = num_gflop / tot_proc_gflops / eff proc_mem_gb = (N**2 * 1e-9 * 8) / nprocs #if verbose >= 1: return (est, num_gflop, tot_proc_gflops, proc_mem_gb) def est_hpl_timing2(proc_mem_gb, nprocs, proc_gflops, eff=0.8): """Estimates the time it takes to do HPL calculation on a problem specified by `proc_mem_gb` RAM per core (in GB), `nprocs` processor cores, each having `proc_gflops` GFLOPS. We assume the matrix is evenly distributed across processors in a square fashion (i.e. P == Q for tile definition). """ from math import sqrt N1 = sqrt(proc_mem_gb * 1e9 / 8) N = float(int(N1 * sqrt(nprocs))) (est, num_gflop, tot_proc_flops, proc_mem_gb0) = \ est_hpl_timing(N, nprocs, proc_gflops, eff) return est, num_gflop, tot_proc_flops, N def Test_64core_memscale(proc_mem_gb=[0.15, 0.25, 0.50, 0.780125, 1.25, 2.00, 3.00], proc_gflops=17.6, eff=0.8): """[20160920] Test: keep at 64 cores, scale up memory and see how much time it takes.""" nproc = 64 from wpylib.text_tools import str_fmt_heading cols = ("N", "mem/proc", "nproc", "time", "numops_gf", "proc_gf") fmt = "%8d %8.3f %5d %6.0f %10.2f %10.2f" hfmt = str_fmt_heading(fmt) print(hfmt % cols) for pm1 in proc_mem_gb: (est_t, num_gflop, tot_proc_gflops, N) = \ est_hpl_timing2(pm1, nproc, proc_gflops, eff) print(fmt % (N, pm1, nproc, est_t, num_gflop, tot_proc_gflops))