#!/usr/bin/env python
#
# 20160920
# Wirawan Purwanto
#

def est_hpl_timing(N, nprocs, proc_gflops, eff=0.8):
  """Estimates the time it takes to do HPL calculation on
  an (N x N) problem, given `nprocs` processor cores which has
  `proc_gflops` GFLOPS.
  """
  # Number of floating point operations
  # From HPL code, estimated to be
  #    2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve.
  assert N > 0
  assert nprocs >= 1
  assert proc_gflops > 0
  assert 0.0 < eff <= 1.0
  N = float(N)
  num_gflop = (2 * N**3 / 3 - 0.5 * N**2 + 2 * N**2) * 1e-9
  tot_proc_gflops = nprocs * proc_gflops
  est = num_gflop / tot_proc_gflops / eff
  proc_mem_gb = (N**2 * 1e-9 * 8) / nprocs
  #if verbose >= 1:
    
  return (est, num_gflop, tot_proc_gflops, proc_mem_gb)


def est_hpl_timing2(proc_mem_gb, nprocs, proc_gflops, eff=0.8):
  """Estimates the time it takes to do HPL calculation on
  a problem specified by `proc_mem_gb` RAM per core (in GB), 
  `nprocs` processor cores, each having `proc_gflops` GFLOPS.

  We assume the matrix is evenly distributed across processors in
  a square fashion (i.e. P == Q for tile definition).
  """
  from math import sqrt
  N1 = sqrt(proc_mem_gb * 1e9 / 8)
  N = float(int(N1 * sqrt(nprocs)))
  (est, num_gflop, tot_proc_flops, proc_mem_gb0) = \
    est_hpl_timing(N, nprocs, proc_gflops, eff)
  return est, num_gflop, tot_proc_flops, N


def Test_64core_memscale(proc_mem_gb=[0.15, 0.25, 0.50, 0.780125, 1.25, 2.00, 3.00],
                         proc_gflops=17.6, eff=0.8):
  """[20160920]
  Test: keep at 64 cores, scale up memory and see how much time it takes."""
  nproc = 64
  from wpylib.text_tools import str_fmt_heading
  cols = ("N", "mem/proc", "nproc", "time", "numops_gf", "proc_gf")
  fmt  = "%8d  %8.3f  %5d  %6.0f  %10.2f %10.2f"
  hfmt = str_fmt_heading(fmt)
  print(hfmt % cols)
  for pm1 in proc_mem_gb:
    (est_t, num_gflop, tot_proc_gflops, N) = \
      est_hpl_timing2(pm1, nproc, proc_gflops, eff)
    print(fmt % (N, pm1, nproc, est_t, num_gflop, tot_proc_gflops))