From 050e2414ef901f3fd234cf7ad7532fbcb3fa6550 Mon Sep 17 00:00:00 2001
From: Wirawan Purwanto <wirawan0@gmail.com>
Date: Thu, 26 Jan 2012 10:46:55 -0500
Subject: [PATCH] * Added faster LDdec1 for 1-based indexing.   Tested through
 N=1000.

---
 math/symmetrix-array-index.PY | 144 ++++++++++++++++++++++++++++------
 1 file changed, 121 insertions(+), 23 deletions(-)

diff --git a/math/symmetrix-array-index.PY b/math/symmetrix-array-index.PY
index f659dd8..6767064 100644
--- a/math/symmetrix-array-index.PY
+++ b/math/symmetrix-array-index.PY
@@ -43,7 +43,10 @@ Note:
 """
 
 """
-Update 20120124: the jskip formula can be written in similar fashion to
+Update 20120124:
+
+Faster LDdec is possible.
+The jskip formula can be written in similar fashion to
 the 'UD' array format, as shown below.
 
 The endpoint of the array index is N(N+1)/2 .
@@ -59,9 +62,36 @@ N(N+1)/2 - (N+1-j)(N+2-j) / 2 =
   = (j-1)N + (j-2)(j-1)/2
   >>> the same formula as before.
 
+We can now make a similar analysis as in UD case to make a j_guess
+formula:
+
+  j_guess = int( N+1 - sqrt((N(N+1)/2 - ij) * 2) )
+
+Note that:
+  ij = N(N+1)/2 - (N+1-j)(N+2-j)/2 + i-j+1
+
+Now focus on this expression:
+
+  xj := ( N(N+1)/2 - ij) * 2
+      = (N+1-j)*(N+2-j) - 2*(i+1-j)
+
+So the maximum value of xj (for i=j) is:
+
+  xj_max = (N+1-j)*(N+2-j) - 2
+         = (N+1-j)**2 + (N+1-j) - 2
+
+  xj_min = (N+1-j)*(N+2-j) - 2*(N+1-j)
+         = (N+1-j)**2 - (N+1-j)
 
+Again, these values satisfy the inequality
 
+  (N-j)**2 < xj_min <= xj_max < (N+2-j)**2
 
+Thus translates to
+  N-j <= int(sqrt(xj)) <= N+1-j
+
+or
+  j <= j_guess <= j+1
 """
 
 import numpy
@@ -115,10 +145,30 @@ def LD(i,j,N):
     jj = i
 
   iskip = ii - jj # + 1
-  #jskip = (jj-1)*N - (jj-2)*(jj-1)/2  # for 1-based
   jskip = (jj)*N - (jj-1)*(jj)//2  # for 0-based
   return iskip + jskip
 
+def LD1(i,j,N):
+  """python equivalent of gafqmc_LD on nwchem-gafqmc integral
+  dumper module.
+  Translates a lower-diagonal index (ii >= jj) to linear index
+  0, 1, 2, 3, ...
+  This follows Fortran convention; thus 1 <= i <= N, and so also j.
+
+  """
+  # iskip is row traversal, jskip is column traversal.
+  # (iskip+jskip) is the final array index.
+  if i >= j:
+    ii = i
+    jj = j
+  else:
+    ii = j
+    jj = i
+
+  iskip = ii - jj + 1
+  jskip = (jj-1)*N - (jj-2)*(jj-1)//2  # for 1-based
+  return iskip + jskip
+
 
 def LDdec(ij, N):
   """Back-translates linear index 0, 1, 2, 3, ... to a lower-diagonal
@@ -137,6 +187,38 @@ def LDdec(ij, N):
 
   raise ValueError, "LDdec(ij=%d,N=%d): invalid index ij" % (ij,N)
 
+def LDdec1(ij, N):
+  """Back-translates linear index 1, 2, 3, ... to a lower-diagonal
+  index pair (ii >= jj).
+  This is not optimal, but it avoids storing an auxiliary array
+  that is easily computable. Plus, this function is supposed to
+  be called rarely.
+  """
+  jskip = 0
+  for j in xrange(1, N+1):
+    if jskip + (N + 1 - j) >= ij:
+      jj = j
+      ii = ij - jskip + j - 1
+      return (ii,jj)
+    jskip += (N + 1 - j)
+
+  raise ValueError, "LDdec1(ij=%d,N=%d): invalid index ij" % (ij,N)
+
+def LDdec1_v2(ij, N):
+  """Version 2, avoiding loop, but adding sqrt() function
+  """
+  from numpy import sqrt
+  LDsize = N*(N+1) // 2
+  j = N + 1 - int( sqrt((LDsize - ij) * 2) )
+  jskip = (j-1)*N - (j-2)*(j-1)//2
+  if ij > jskip:
+    pass # correct already
+  else:
+    j = j - 1
+    jskip = (j-1)*N - (j-2)*(j-1)//2
+  i = ij - jskip + j - 1
+  return (i,j)
+
 # end reference implementation
 
 def test_LD_enc_dec(N):
@@ -169,21 +251,6 @@ def test_LD_enc_dec_diagonal(N):
         -1, jj2)
       #                      ^^ distance from end of array
 
-
-
-
-
-"""
-Faster LDdec is possible.
-
-Consider:
-  jskip = (jj)*N - (jj-1)*(jj)/2  # for 0-based
-        = jj*(2*N - (jj-1)) / 2
-
-
-
-"""
-
 def Hack2_LD_enc_dec(N):
   """Simple test to check LD encoding and decoding correctness.
   For python-style indexing (0 <= i < N, similarly for j)."""
@@ -194,12 +261,43 @@ def Hack2_LD_enc_dec(N):
       ij = LD(i,j,N)
       (ii,jj) = LDdec(ij,N)
       jj2 = ( sqrt(((LDsize) - ij) * 2) )
+      j_guess = int(N + 1 - jj2)  # for some reason this is the one that works for 0-based index
+      ok1 = (jj <= j_guess)
+      ok2 = (j_guess <= jj+1)
+      ok = ((jj <= j_guess) and (j_guess <= jj+1))
       #print "%3d %3d | %6d | %3d %3d" % (i,j, ij, ii,jj)
-      print "%3d %3d | %6d   %6d  | %3d %3d // %8.4f" % (
-        i,j,
-        ij, (LDsize-ij) * 2,
-        ii,jj,
-        jj2)
+      if not ok:
+        # Verified OK empirically till N=1000.
+        print "%3d %3d | %6d   %6d  | %3d %3d // %8.4f %3d  %c %d %d" % (
+          i,j,
+          ij, (LDsize-ij) * 2,
+          ii,jj,
+          jj2, j_guess, ("." if ok else "X"), ok1,ok2)
+
+def Hack3_LD_enc_dec(N, print_all=False):
+  """Simple test to check LD encoding and decoding correctness.
+  For Fortran-style indexing (1 <= i <= N, similarly for j)."""
+  from numpy import sqrt
+  LDsize = N * (N+1) / 2
+  for j in xrange(1,N+1):
+    for i in xrange(j,N+1):
+      ij = LD1(i,j,N)
+      (ii,jj) = LDdec1(ij,N)
+      (ii,jj) = LDdec1_v2(ij,N)
+      jj2 = ( sqrt(((LDsize) - ij) * 2) )
+      j_guess = N + 1 - int(jj2)
+      OK = (ii==i and jj==j)
+      ok1 = (jj <= j_guess)
+      ok2 = (j_guess <= jj+1)
+      ok = ((jj <= j_guess) and (j_guess <= jj+1))
+      #print "%3d %3d | %6d | %3d %3d" % (i,j, ij, ii,jj)
+      if print_all or not (OK and ok):
+        # Verified OK empirically till N=1000.
+        print "%3d %3d | %6d   %6d  | %3d %3d  %c  // %8.4f %3d  %c %d %d" % (
+          i,j,
+          ij, (LDsize-ij) * 2,
+          ii,jj, ("." if OK else "X"),
+          jj2, j_guess, ("." if ok else "X"), ok1,ok2)
 
 
 
@@ -391,7 +489,7 @@ def test_UD_enc_dec1(N):
 
 def hack1_UD_enc_dec1(N):
   """Simple test to check UD encoding and decoding correctness.
-  For python-style indexing (0 <= i < N, similarly for j)."""
+  For Fortran-style indexing (1 <= i <= N, similarly for j)."""
   from numpy import sqrt
   ok = True
   for j in xrange(1,N+1):