commit 178983f48ca114f3b3822c39c828f192ccc2e06c
Author: Leszek Koltunski <leszek@koltunski.pl>
Date:   Wed Oct 26 02:31:11 2022 +0200

    Remove the requirement that not-postprocessed children in the render scene must be in the first bucket.
    Seriously simplify renderChildren().

diff --git a/src/main/java/org/distorted/library/effectqueue/EffectQueuePostprocess.java b/src/main/java/org/distorted/library/effectqueue/EffectQueuePostprocess.java
index 15fde8b..78118dc 100644
--- a/src/main/java/org/distorted/library/effectqueue/EffectQueuePostprocess.java
+++ b/src/main/java/org/distorted/library/effectqueue/EffectQueuePostprocess.java
@@ -165,7 +165,7 @@ public class EffectQueuePostprocess extends EffectQueue
 
   public boolean getRenderDirectly()
     {
-    return mNumEffects > 0 && ((PostprocessEffect) mEffects[0]).getRenderDirectly();
+    return mNumEffects>0 && ((PostprocessEffect) mEffects[0]).getRenderDirectly();
     }
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/main/java/org/distorted/library/main/InternalChildrenList.java b/src/main/java/org/distorted/library/main/InternalChildrenList.java
index b90dae7..96fcf10 100644
--- a/src/main/java/org/distorted/library/main/InternalChildrenList.java
+++ b/src/main/java/org/distorted/library/main/InternalChildrenList.java
@@ -99,24 +99,21 @@ class InternalChildrenList implements InternalMaster.Slave
 // Can make this logarithmic but the typical number of children is very small anyway.
 //
 // We want to keep same buckets next to each other, while avoiding changes in order of the children
-// (if possible!) We want to keep bucket=0 (i.e. the non-postprocessed children) at the beginning.
+// (if possible!)
+// 2022/10/25: removed keeping bucket 0 (i.e. non-postprocessed children) always in the front -
+// we don't need it (given the fixes to renderChildren() )
 
   private void addSortingByBuckets(DistortedNode newChild)
     {
     int i;
     long bucket = newChild.getBucket();
-    boolean sameBucket = false;
+    boolean thisSame,lastSame = false;
 
     for(i=0; i<mNumChildren; i++)
       {
-      if( mChildren.get(i).getBucket() == bucket )
-        {
-        sameBucket=true;
-        }
-      else if( sameBucket || bucket==0 )
-        {
-        break;
-        }
+      thisSame= (mChildren.get(i).getBucket()==bucket);
+      if( lastSame && !thisSame ) break;
+      lastSame = thisSame;
       }
 
     mChildren.add(i,newChild);
diff --git a/src/main/java/org/distorted/library/main/InternalOutputSurface.java b/src/main/java/org/distorted/library/main/InternalOutputSurface.java
index fe659a9..f145f4f 100644
--- a/src/main/java/org/distorted/library/main/InternalOutputSurface.java
+++ b/src/main/java/org/distorted/library/main/InternalOutputSurface.java
@@ -54,6 +54,7 @@ public abstract class InternalOutputSurface extends InternalSurface implements I
 
   // Global buffers used for postprocessing
   private final static DistortedFramebuffer[] mBuffer= new DistortedFramebuffer[EffectQuality.LENGTH];
+  private final boolean[] mBufferInitialized;
 
   float mDistance, mNear, mMipmap;
   float[] mProjectionMatrix;
@@ -105,6 +106,8 @@ public abstract class InternalOutputSurface extends InternalSurface implements I
                           // round of create(), but before we start rendering.
                           // Create an empty FBO and Time here so that setAsOutput() is always safe to call.
 
+    mBufferInitialized = new boolean[EffectQuality.LENGTH];
+
     allocateStuffDependantOnNumFBOS();
     createProjection();
     }
@@ -218,41 +221,6 @@ public abstract class InternalOutputSurface extends InternalSurface implements I
         }
     }
 
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// The postprocessing buffers mBuffer[] are generally speaking too large (there's just one static
-// set of them) so before we use them for output, we need to adjust the Viewport as if they were
-// smaller. That takes care of outputting pixels to them. When we use them as input, we have to
-// adjust the texture coords - see the get{Width|Height}Correction functions.
-//
-// Also, adjust the Buffers so their Projection is the same like the surface we are supposed to be
-// rendering to.
-
-  private static void clonePostprocessingViewportAndProjection(InternalOutputSurface surface, InternalOutputSurface from)
-    {
-    if( surface.mWidth != from.mWidth || surface.mHeight != from.mHeight ||
-        surface.mFOV   != from.mFOV   || surface.mNear   != from.mNear    )
-      {
-      surface.mWidth  = (int)(from.mWidth *surface.mMipmap);
-      surface.mHeight = (int)(from.mHeight*surface.mMipmap);
-      surface.mFOV    = from.mFOV;
-      surface.mNear   = from.mNear;  // Near plane is independent of the mipmap level
-
-      surface.createProjection();
-
-      int maxw = Math.max(surface.mWidth , surface.mRealWidth );
-      int maxh = Math.max(surface.mHeight, surface.mRealHeight);
-
-      if (maxw > surface.mRealWidth || maxh > surface.mRealHeight)
-        {
-        surface.mRealWidth = maxw;
-        surface.mRealHeight = maxh;
-
-        surface.recreate();
-        surface.create();
-        }
-      }
-    }
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
   private int blitWithDepth(long currTime, InternalOutputSurface buffer, int fbo)
@@ -373,6 +341,124 @@ public abstract class InternalOutputSurface extends InternalSurface implements I
     mCurrFBO = fbo;
     }
 
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Render all children from the current bucket to the buffer, apply the postprocessing once to the
+// whole buffer (queue.postprocess) and merge it to 'this' (oitBuild or blitWithDepth depending on
+// the type of rendering)
+
+  private int accumulateAndBlit(EffectQueuePostprocess queue, InternalChildrenList children, DistortedFramebuffer buffer,
+                                int begIndex, int endIndex, boolean isFinal, long time, int fbo, boolean oit )
+    {
+    int numRenders = 0;
+
+    for(int j=begIndex; j<endIndex; j++)
+       {
+       DistortedNode node = children.getChild(j);
+
+       if( node.getSurface().setAsInput() )
+         {
+         buffer.setAsOutput();
+         numRenders += queue.preprocess( buffer, node, buffer.mDistance, buffer.mMipmap, buffer.mProjectionMatrix );
+         }
+       }
+    numRenders += queue.postprocess(buffer);
+
+    if( oit )
+      {
+      numRenders += oitBuild(time, buffer, fbo);
+      GLES31.glMemoryBarrier(GLES31.GL_SHADER_STORAGE_BARRIER_BIT | GLES31.GL_ATOMIC_COUNTER_BARRIER_BIT);
+      buffer.clearBuffer(fbo);
+      }
+    else
+      {
+      numRenders += blitWithDepth(time, buffer, fbo);
+      if( !isFinal ) buffer.clearBuffer(fbo);
+      }
+
+    return numRenders;
+    }
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+  private int renderChildToThisOrToBuffer(DistortedNode child, DistortedFramebuffer buffer, long time, boolean oit, boolean toThis)
+    {
+    int numRenders;
+
+    if( toThis )
+      {
+      setAsOutput(time);
+
+      if( oit )
+        {
+        numRenders = child.drawOIT(time, this);
+        GLES31.glMemoryBarrier(GLES31.GL_SHADER_STORAGE_BARRIER_BIT | GLES31.GL_ATOMIC_COUNTER_BARRIER_BIT);
+        }
+      else
+        {
+        numRenders = child.draw(time, this);
+        }
+      }
+    else
+      {
+      buffer.setAsOutput(time);
+      numRenders = child.drawNoBlend(time, buffer);
+      }
+
+    return numRenders;
+    }
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// The postprocessing buffers mBuffer[] are generally speaking too large (there's just one static
+// set of them) so before we use them for output, we need to adjust the Viewport as if they were
+// smaller. That takes care of outputting pixels to them. When we use them as input, we have to
+// adjust the texture coords - see the get{Width|Height}Correction functions.
+//
+// Also, adjust the Buffers so their Projection is the same like the surface we are supposed to be
+// rendering to.
+
+  private void clonePostprocessingViewportAndProjection(InternalOutputSurface surface, InternalOutputSurface from)
+    {
+    if( surface.mWidth != from.mWidth || surface.mHeight != from.mHeight ||
+        surface.mFOV   != from.mFOV   || surface.mNear   != from.mNear    )
+      {
+      surface.mWidth  = (int)(from.mWidth *surface.mMipmap);
+      surface.mHeight = (int)(from.mHeight*surface.mMipmap);
+      surface.mFOV    = from.mFOV;
+      surface.mNear   = from.mNear;  // Near plane is independent of the mipmap level
+
+      surface.createProjection();
+
+      int maxw = Math.max(surface.mWidth , surface.mRealWidth );
+      int maxh = Math.max(surface.mHeight, surface.mRealHeight);
+
+      if (maxw > surface.mRealWidth || maxh > surface.mRealHeight)
+        {
+        surface.mRealWidth = maxw;
+        surface.mRealHeight = maxh;
+
+        surface.recreate();
+        surface.create();
+        }
+      }
+    }
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+  private DistortedFramebuffer initializeBuffer(EffectQueuePostprocess queue, int fbo )
+    {
+    int currQuality = queue.getQuality();
+    if( mBuffer[currQuality]==null ) createPostprocessingBuffers(currQuality, mWidth, mHeight, mNear);
+    mBuffer[currQuality].setCurrFBO(fbo);
+
+    if( !mBufferInitialized[currQuality] )
+      {
+      mBufferInitialized[currQuality] = true;
+      clonePostprocessingViewportAndProjection(mBuffer[currQuality],this);
+      }
+
+    return mBuffer[currQuality];
+    }
+
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 // Render all children, one by one. If there are no postprocessing effects, just render to THIS.
 // Otherwise, render to a buffer and on each change of Postprocessing Bucket, apply the postprocessing
@@ -386,15 +472,12 @@ public abstract class InternalOutputSurface extends InternalSurface implements I
     DistortedFramebuffer buffer=null;
     EffectQueuePostprocess lastQueue=null, currQueue;
     long lastBucket=0, currBucket;
-    boolean renderDirectly=false;
+    boolean toThis=false;
 
     setCurrFBO(fbo);
     if( numChildren==0 ) setAsOutput(time);
-
-    if( oit && numChildren>0 )
-      {
-      oitClear(this);
-      }
+    if( oit && numChildren>0 ) oitClear(this);
+    for(int i=0; i<EffectQuality.LENGTH; i++) mBufferInitialized[i]=false;
 
     for(int i=0; i<numChildren; i++)
       {
@@ -402,120 +485,22 @@ public abstract class InternalOutputSurface extends InternalSurface implements I
       currQueue = (EffectQueuePostprocess)child.getEffects().getQueues()[3];
       currBucket= currQueue.getID();
 
-      if( currBucket==0 )
+      if( currBucket!=0 && lastBucket!=currBucket )
         {
-        setAsOutput(time);
-
-        if( oit )
-          {
-          numRenders += child.drawOIT(time, this);
-          GLES31.glMemoryBarrier(GLES31.GL_SHADER_STORAGE_BARRIER_BIT | GLES31.GL_ATOMIC_COUNTER_BARRIER_BIT);
-          }
-        else
-          {
-          numRenders += child.draw(time, this);
-          }
+        buffer = initializeBuffer(currQueue,fbo);
+        if( lastBucket!=0 ) numRenders += accumulateAndBlit(lastQueue,children,buffer,bucketChange,i,false,time,fbo,oit);
+        bucketChange= i;
+        toThis = currQueue.getRenderDirectly();
         }
-      else
-        {
-        int currQuality = currQueue.getQuality();
-
-        if( mBuffer[currQuality]==null ) createPostprocessingBuffers(currQuality, mWidth, mHeight, mNear);
-        mBuffer[currQuality].setCurrFBO(fbo);
-
-        if( lastBucket!=currBucket )
-          {
-          if( lastBucket==0 )
-            {
-            clonePostprocessingViewportAndProjection(mBuffer[currQuality],this);
-            }
-          else
-            {
-            for(int j=bucketChange; j<i; j++)
-              {
-              DistortedNode node = children.getChild(j);
-
-              if( node.getSurface().setAsInput() )
-                {
-                buffer.setAsOutput();
-                numRenders += lastQueue.preprocess( buffer, node, buffer.mDistance, buffer.mMipmap, buffer.mProjectionMatrix );
-                }
-              }
-            numRenders += lastQueue.postprocess(buffer);
-
-            if( oit )
-              {
-              numRenders += oitBuild(time, buffer, fbo);
-              GLES31.glMemoryBarrier(GLES31.GL_SHADER_STORAGE_BARRIER_BIT | GLES31.GL_ATOMIC_COUNTER_BARRIER_BIT);
-              }
-            else
-              {
-              numRenders += blitWithDepth(time, buffer, fbo);
-              }
-            buffer.clearBuffer(fbo);
-            }
-
-          buffer= mBuffer[currQuality];
-          bucketChange= i;
-          renderDirectly = currQueue.getRenderDirectly();
-          }
-
-        if( renderDirectly )
-          {
-          setAsOutput(time);
-
-          if( oit )
-            {
-            numRenders += child.drawOIT(time, this);
-            GLES31.glMemoryBarrier(GLES31.GL_SHADER_STORAGE_BARRIER_BIT | GLES31.GL_ATOMIC_COUNTER_BARRIER_BIT);
-            }
-          else
-            {
-            numRenders += child.draw(time, this);
-            }
-          }
-        else
-          {
-          buffer.setAsOutput(time);
-          child.drawNoBlend(time, buffer);
-          }
-
-        if( i==numChildren-1 )
-          {
-          for(int j=bucketChange; j<numChildren; j++)
-            {
-            DistortedNode node = children.getChild(j);
-
-            if( node.getSurface().setAsInput() )
-              {
-              buffer.setAsOutput();
-              numRenders += currQueue.preprocess( buffer, node, buffer.mDistance, buffer.mMipmap, buffer.mProjectionMatrix );
-              }
-            }
-          numRenders += currQueue.postprocess(buffer);
-
-          if( oit )
-            {
-            numRenders += oitBuild(time, buffer, fbo);
-            GLES31.glMemoryBarrier(GLES31.GL_SHADER_STORAGE_BARRIER_BIT | GLES31.GL_ATOMIC_COUNTER_BARRIER_BIT);
-            buffer.clearBuffer(fbo);
-            }
-          else
-            {
-            numRenders += blitWithDepth(time, buffer,fbo);
-            }
-          }
-        } // end else (postprocessed child)
+      numRenders += renderChildToThisOrToBuffer(child,buffer,time,oit,currBucket==0 || toThis);
+      if( currBucket!=0 && i==numChildren-1 ) numRenders += accumulateAndBlit(currQueue,children,buffer,bucketChange,numChildren,true,time,fbo,oit);
 
       lastQueue = currQueue;
       lastBucket= currBucket;
-      } // end main for loop
-
-    if( oit && numChildren>0 )
-      {
-      numRenders += oitRender(time, fbo);  // merge the OIT linked list
       }
 
+    if( oit && numChildren>0 ) numRenders += oitRender(time, fbo);  // merge the OIT linked list
+
     return numRenders;
     }
 
@@ -979,4 +964,4 @@ public abstract class InternalOutputSurface extends InternalSurface implements I
     {
     return mHeight;
     }
-}
+}
\ No newline at end of file
