commit ed841982f08459462ab4fbbda059700d2ef8dee0
Author: Leszek Koltunski <leszek@distorted.org>
Date:   Thu Jan 26 21:24:58 2017 +0000

    BLUR: implemented caching of the BLUR kernels.

diff --git a/src/main/java/org/distorted/library/EffectQueuePostprocess.java b/src/main/java/org/distorted/library/EffectQueuePostprocess.java
index 4a485da..abc63d4 100644
--- a/src/main/java/org/distorted/library/EffectQueuePostprocess.java
+++ b/src/main/java/org/distorted/library/EffectQueuePostprocess.java
@@ -43,24 +43,6 @@ import java.nio.FloatBuffer;
 
 class EffectQueuePostprocess extends EffectQueue
   {
-  private static final float GAUSSIAN[] =   // G(0.00), G(0.03), G(0.06), ..., G(3.00), 0
-    {                                       // where G(x)= (1/(sqrt(2*PI))) * e^(-(x^2)/2). The last 0 terminates.
-    0.398948f, 0.398769f, 0.398231f, 0.397336f, 0.396086f, 0.394485f, 0.392537f, 0.390247f, 0.387622f, 0.384668f,
-    0.381393f, 0.377806f, 0.373916f, 0.369733f, 0.365268f, 0.360532f, 0.355538f, 0.350297f, 0.344823f, 0.339129f,
-    0.333229f, 0.327138f, 0.320868f, 0.314436f, 0.307856f, 0.301142f, 0.294309f, 0.287373f, 0.280348f, 0.273248f,
-    0.266089f, 0.258884f, 0.251648f, 0.244394f, 0.237135f, 0.229886f, 0.222657f, 0.215461f, 0.208311f, 0.201217f,
-    0.194189f, 0.187238f, 0.180374f, 0.173605f, 0.166940f, 0.160386f, 0.153951f, 0.147641f, 0.141462f, 0.135420f,
-    0.129520f, 0.123765f, 0.118159f, 0.112706f, 0.107408f, 0.102266f, 0.097284f, 0.092461f, 0.087797f, 0.083294f,
-    0.078951f, 0.074767f, 0.070741f, 0.066872f, 0.063158f, 0.059596f, 0.056184f, 0.052920f, 0.049801f, 0.046823f,
-    0.043984f, 0.041280f, 0.038707f, 0.036262f, 0.033941f, 0.031740f, 0.029655f, 0.027682f, 0.025817f, 0.024056f,
-    0.022395f, 0.020830f, 0.019357f, 0.017971f, 0.016670f, 0.015450f, 0.014305f, 0.013234f, 0.012232f, 0.011295f,
-    0.010421f, 0.009606f, 0.008847f, 0.008140f, 0.007483f, 0.006873f, 0.006307f, 0.005782f, 0.005296f, 0.004847f,
-    0.004432f, 0.000000f
-    };
-  private static final int NUM_GAUSSIAN = GAUSSIAN.length-2;
-
-  private static final int MAX_BLUR = 50;
-
   private static final int POS_DATA_SIZE= 2; // Post Program: size of the position data in elements
   private static final int TEX_DATA_SIZE= 2; // Post Program: size of the texture coordinate data in elements.
 
@@ -92,10 +74,36 @@ class EffectQueuePostprocess extends EffectQueue
   private static float[] mTmpMatrix = new float[16];
 
   // BLUR effect
+  private static final float GAUSSIAN[] =   // G(0.00), G(0.03), G(0.06), ..., G(3.00), 0
+    {                                       // where G(x)= (1/(sqrt(2*PI))) * e^(-(x^2)/2). The last 0 terminates.
+    0.398948f, 0.398769f, 0.398231f, 0.397336f, 0.396086f, 0.394485f, 0.392537f, 0.390247f, 0.387622f, 0.384668f,
+    0.381393f, 0.377806f, 0.373916f, 0.369733f, 0.365268f, 0.360532f, 0.355538f, 0.350297f, 0.344823f, 0.339129f,
+    0.333229f, 0.327138f, 0.320868f, 0.314436f, 0.307856f, 0.301142f, 0.294309f, 0.287373f, 0.280348f, 0.273248f,
+    0.266089f, 0.258884f, 0.251648f, 0.244394f, 0.237135f, 0.229886f, 0.222657f, 0.215461f, 0.208311f, 0.201217f,
+    0.194189f, 0.187238f, 0.180374f, 0.173605f, 0.166940f, 0.160386f, 0.153951f, 0.147641f, 0.141462f, 0.135420f,
+    0.129520f, 0.123765f, 0.118159f, 0.112706f, 0.107408f, 0.102266f, 0.097284f, 0.092461f, 0.087797f, 0.083294f,
+    0.078951f, 0.074767f, 0.070741f, 0.066872f, 0.063158f, 0.059596f, 0.056184f, 0.052920f, 0.049801f, 0.046823f,
+    0.043984f, 0.041280f, 0.038707f, 0.036262f, 0.033941f, 0.031740f, 0.029655f, 0.027682f, 0.025817f, 0.024056f,
+    0.022395f, 0.020830f, 0.019357f, 0.017971f, 0.016670f, 0.015450f, 0.014305f, 0.013234f, 0.012232f, 0.011295f,
+    0.010421f, 0.009606f, 0.008847f, 0.008140f, 0.007483f, 0.006873f, 0.006307f, 0.005782f, 0.005296f, 0.004847f,
+    0.004432f, 0.000000f
+    };
+  private static final int NUM_GAUSSIAN = GAUSSIAN.length-2;
+
+  // Support blurs consisting of the present pixel and up to MAX_BLUR pixels in each direction
+  private static final int MAX_BLUR = 50;
+
+  // The (linearly-sampled) Gaussian Blur kernels are of the size k0=1, k1=2, k2=2, k3=3, k4=3, k5=4, k6=4,...
+  // i.e. k(i)=floor((i+3)/2).  (the 'i' in k(i) means 'blur taking into account the present pixel and 'i' pixels
+  // in all 4 directions)
+  // We need room for MAX_BLUR of them, and sum(i=0...N, floor((i+3)/2)) = N + floor(N*N/4)
+  private static float[] weightsCache = new float[MAX_BLUR + MAX_BLUR*MAX_BLUR/4];
+  private static float[] offsetsCache = new float[MAX_BLUR + MAX_BLUR*MAX_BLUR/4];
+
   private static DistortedProgram mBlurProgram;
   private static int mRadiusH,mOffsetsH,mWeightsH,mObjDH,mMVPMatrixH;
-  private float[] mWeights = new float[MAX_BLUR];
-  private float[] mOffsets = new float[MAX_BLUR];
+  private static float[] mWeights = new float[MAX_BLUR];
+  private static float[] mOffsets = new float[MAX_BLUR];
   // another effect ....
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
@@ -171,56 +179,52 @@ class EffectQueuePostprocess extends EffectQueue
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-  private int computeGaussianKernel(int radius)
+  private void computeGaussianKernel(int radius)
     {
-    if( radius>=MAX_BLUR ) radius = MAX_BLUR-1;
+    int offset = radius + radius*radius/4;
 
-    float P = (float)NUM_GAUSSIAN / (radius>3 ? radius:3);
-    float x = 0.0f;
-    mWeights[0] = GAUSSIAN[0];
-    mOffsets[0] = 0.0f;
-    float sum = GAUSSIAN[0];
-    int j;
-    float z;
-
-    for(int i=1; i<=radius; i++)
+    if( weightsCache[offset]==0.0f )
       {
-      x += P;
-      j = (int)x;
-      z = x-j;
+      float z, x= 0.0f, P= (float)NUM_GAUSSIAN / (radius>3 ? radius:3);
+      mWeights[0] = GAUSSIAN[0];
+      float sum   = GAUSSIAN[0];
+      int j;
 
-      mWeights[i] = (1-z)*GAUSSIAN[j] + z*GAUSSIAN[j+1];
-      sum += 2*mWeights[i];
-      }
+      for(int i=1; i<=radius; i++)
+        {
+        x += P;
+        j = (int)x;
+        z = x-j;
 
-    for(int i=0; i<=radius; i++)
-      {
-      mWeights[i] /= sum;
-      }
+        mWeights[i] = (1-z)*GAUSSIAN[j] + z*GAUSSIAN[j+1];
+        sum += 2*mWeights[i];
+        }
 
-    // squash the weights and offsets for linear sampling
-    int numloops = radius/2;
+      for(int i=0; i<=radius; i++) mWeights[i] /= sum;
 
-    for(int i=0; i<numloops; i++)
-      {
-      mOffsets[i+1] = mWeights[2*i+1]*(2*i+1) + mWeights[2*i+2]*(2*i+2);
-      mWeights[i+1] = mWeights[2*i+1] + mWeights[2*i+2];
-      mOffsets[i+1] /= mWeights[i+1];
-      }
+      // squash the weights and offsets for linear sampling
+      int numloops = radius/2;
 
-    if( radius%2 == 1 )
-      {
-      int index = radius/2 +1;
-      mOffsets[index]=mOffsets[radius];
-      mWeights[index]=mWeights[radius];
-      radius = numloops+1;
-      }
-    else
-      {
-      radius = numloops;
-      }
+      weightsCache[offset] = mWeights[0];
+      offsetsCache[offset] = 0.0f;
+
+      for(int i=0; i<numloops; i++)
+        {
+        offsetsCache[offset+i+1] = mWeights[2*i+1]*(2*i+1) + mWeights[2*i+2]*(2*i+2);
+        weightsCache[offset+i+1] = mWeights[2*i+1] + mWeights[2*i+2];
+        offsetsCache[offset+i+1]/= weightsCache[offset+i+1];
+        }
+
+      if( radius%2 == 1 )
+        {
+        int index = offset + radius/2 +1;
+        offsetsCache[index]=mOffsets[radius];
+        weightsCache[index]=mWeights[radius];
+        }
 
-    return radius;
+
+      android.util.Log.e("post", "computed kernel size "+radius+" put it into cache="+offset);
+      }
     }
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
@@ -231,12 +235,15 @@ class EffectQueuePostprocess extends EffectQueue
     {
     mBlurProgram.useProgram();
 
-    int radius = computeGaussianKernel( (int)mUniforms[0] );
+    int radius = (int)mUniforms[0];
+    if( radius>=MAX_BLUR ) radius = MAX_BLUR-1;
+    computeGaussianKernel(radius);
 
-    float adjust = 1/h;
-    for(int i=0; i<=radius; i++) mOffsets[i] *= adjust;
+    int offset = radius + radius*radius/4;
+    radius = (radius+1)/2;
+    for(int i=0; i<=radius; i++) mOffsets[i] = offsetsCache[offset+i]/h;
 
-    GLES30.glUniform1fv( mWeightsH, radius+1, mWeights,0);
+    GLES30.glUniform1fv( mWeightsH, radius+1, weightsCache,offset);
     GLES30.glUniform1i( mRadiusH, radius);
     GLES30.glUniform2f( mObjDH , w, h );
 
@@ -259,8 +266,7 @@ class EffectQueuePostprocess extends EffectQueue
     df.setAsOutput();
     GLES30.glViewport(0, 0, df.mWidth, df.mHeight);
 
-    adjust = h/w;
-    for(int i=0; i<=radius; i++) mOffsets[i] *= adjust;
+    for(int i=0; i<=radius; i++) mOffsets[i] = offsetsCache[offset+i]/w;
 
     // vertical blur
     GLES30.glUniform1fv( mOffsetsH ,radius+1, mOffsets,0);
