I found a git repository which instance grass using compute shader and DrawMeshInstancedIndirect in the camera frustum i am not able to figure out how to not instance grass on certain part of plane or say how to make instanced grass editable
below is the main grass rendering script:- '''
using System;
using System.Collections.Generic;
using Unity.Collections;
using Unity.Jobs;
using UnityEngine;
using UnityEngine.Profiling;
[ExecuteAlways]
public class InstancedIndirectGrassRenderer : MonoBehaviour
{
[Header("Settings")]
public float drawDistance = 125;//this setting will affect performance a lot!
public Material instanceMaterial;
[Header("Internal")]
public ComputeShader cullingComputeShader;
[NonSerialized]
public List<Vector3> allGrassPos = new List<Vector3>();//user should update this list using C#
//=====================================================
[HideInInspector]
public static InstancedIndirectGrassRenderer instance;// global ref to this script
private int cellCountX = -1;
private int cellCountZ = -1;
private int dispatchCount = -1;
//smaller the number, CPU needs more time, but GPU is faster
private float cellSizeX = 10; //unity unit (m)
private float cellSizeZ = 10; //unity unit (m)
private int instanceCountCache = -1;
private Mesh cachedGrassMesh;
private ComputeBuffer allInstancesPosWSBuffer;
private ComputeBuffer visibleInstancesOnlyPosWSIDBuffer;
private ComputeBuffer argsBuffer;
private List<Vector3>[] cellPosWSsList; //for binning: binning will put each posWS into correct cell
private float minX, minZ, maxX, maxZ;
private List<int> visibleCellIDList = new List<int>();
private Plane[] cameraFrustumPlanes = new Plane[6];
bool shouldBatchDispatch = true;
//=====================================================
private void OnEnable()
{
instance = this; // assign global ref using this script
}
void LateUpdate()
{
// recreate all buffers if needed
UpdateAllInstanceTransformBufferIfNeeded();
//=====================================================================================================
// rough quick big cell frustum culling in CPU first
//=====================================================================================================
visibleCellIDList.Clear();//fill in this cell ID list using CPU frustum culling first
Camera cam = Camera.main;
//Do frustum culling using per cell bound
//https://docs.unity3d.com/ScriptReference/GeometryUtility.CalculateFrustumPlanes.html
//https://docs.unity3d.com/ScriptReference/GeometryUtility.TestPlanesAABB.html
float cameraOriginalFarPlane = cam.farClipPlane;
cam.farClipPlane = drawDistance;//allow drawDistance control
GeometryUtility.CalculateFrustumPlanes(cam, cameraFrustumPlanes);//Ordering: [0] = Left, [1] = Right, [2] = Down, [3] = Up, [4] = Near, [5] = Far
cam.farClipPlane = cameraOriginalFarPlane;//revert far plane edit
//slow loop
//TODO: (A)replace this forloop by a quadtree test?
//TODO: (B)convert this forloop to job+burst? (UnityException: TestPlanesAABB can only be called from the main thread.)
Profiler.BeginSample("CPU cell frustum culling (heavy)");
for (int i = 0; i < cellPosWSsList.Length; i++)
{
//create cell bound
Vector3 centerPosWS = new Vector3 (i % cellCountX + 0.5f, 0, i / cellCountX + 0.5f);
centerPosWS.x = Mathf.Lerp(minX, maxX, centerPosWS.x / cellCountX);
centerPosWS.z = Mathf.Lerp(minZ, maxZ, centerPosWS.z / cellCountZ);
Vector3 sizeWS = new Vector3(Mathf.Abs(maxX - minX) / cellCountX,0,Mathf.Abs(maxX - minX) / cellCountX);
Bounds cellBound = new Bounds(centerPosWS, sizeWS);
if (GeometryUtility.TestPlanesAABB(cameraFrustumPlanes, cellBound))
{
visibleCellIDList.Add(i);
}
}
Profiler.EndSample();
//=====================================================================================================
// then loop though only visible cells, each visible cell dispatch GPU culling job once
// at the end compute shader will fill all visible instance into visibleInstancesOnlyPosWSIDBuffer
//=====================================================================================================
Matrix4x4 v = cam.worldToCameraMatrix;
Matrix4x4 p = cam.projectionMatrix;
Matrix4x4 vp = p * v;
visibleInstancesOnlyPosWSIDBuffer.SetCounterValue(0);
//set once only
cullingComputeShader.SetMatrix("_VPMatrix", vp);
cullingComputeShader.SetFloat("_MaxDrawDistance", drawDistance);
//dispatch per visible cell
dispatchCount = 0;
for (int i = 0; i < visibleCellIDList.Count; i++)
{
int targetCellFlattenID = visibleCellIDList[i];
int memoryOffset = 0;
for (int j = 0; j < targetCellFlattenID; j++)
{
memoryOffset += cellPosWSsList[j].Count;
}
cullingComputeShader.SetInt("_StartOffset", memoryOffset); //culling read data started at offseted pos, will start from cell's total offset in memory
int jobLength = cellPosWSsList[targetCellFlattenID].Count;
//============================================================================================
//batch n dispatchs into 1 dispatch, if memory is continuous in allInstancesPosWSBuffer
if(shouldBatchDispatch)
{
while ((i < visibleCellIDList.Count - 1) && //test this first to avoid out of bound access to visibleCellIDList
(visibleCellIDList[i + 1] == visibleCellIDList[i] + 1))
{
//if memory is continuous, append them together into the same dispatch call
jobLength += cellPosWSsList[visibleCellIDList[i + 1]].Count;
i++;
}
}
//============================================================================================
cullingComputeShader.Dispatch(0, Mathf.CeilToInt(jobLength / 64f), 1, 1); //disaptch.X division number must match numthreads.x in compute shader (e.g. 64)
dispatchCount++;
}
//====================================================================================
// Final 1 big DrawMeshInstancedIndirect draw call
//====================================================================================
// GPU per instance culling finished, copy visible count to argsBuffer, to setup DrawMeshInstancedIndirect's draw amount
ComputeBuffer.CopyCount(visibleInstancesOnlyPosWSIDBuffer, argsBuffer, 4);
// Render 1 big drawcall using DrawMeshInstancedIndirect
Bounds renderBound = new Bounds();
renderBound.SetMinMax(new Vector3(minX, 0, minZ), new Vector3(maxX, 0, maxZ));//if camera frustum is not overlapping this bound, DrawMeshInstancedIndirect will not even render
Graphics.DrawMeshInstancedIndirect(GetGrassMeshCache(), 0, instanceMaterial, renderBound, argsBuffer);
}
private void OnGUI()
{
GUI.contentColor = Color.black;
GUI.Label(new Rect(200, 0, 400, 60),
$"After CPU cell frustum culling,\n" +
$"-Visible cell count = {visibleCellIDList.Count}/{cellCountX * cellCountZ}\n" +
$"-Real compute dispatch count = {dispatchCount} (saved by batching = {visibleCellIDList.Count - dispatchCount})");
shouldBatchDispatch = GUI.Toggle(new Rect(400, 400, 200, 100), shouldBatchDispatch, "shouldBatchDispatch");
}
void OnDisable()
{
//release all compute buffers
if (allInstancesPosWSBuffer != null)
allInstancesPosWSBuffer.Release();
allInstancesPosWSBuffer = null;
if (visibleInstancesOnlyPosWSIDBuffer != null)
visibleInstancesOnlyPosWSIDBuffer.Release();
visibleInstancesOnlyPosWSIDBuffer = null;
if (argsBuffer != null)
argsBuffer.Release();
argsBuffer = null;
instance = null;
}
Mesh GetGrassMeshCache()
{
if (!cachedGrassMesh)
{
//if not exist, create a 3 vertices hardcode triangle grass mesh
cachedGrassMesh = new Mesh();
//single grass (vertices)
Vector3[] verts = new Vector3[3];
verts[0] = new Vector3(-0.25f, 0);
verts[1] = new Vector3(+0.25f, 0);
verts[2] = new Vector3(-0.0f, 1);
//single grass (Triangle index)
int[] trinagles = new int[3] { 2, 1, 0, }; //order to fit Cull Back in grass shader
cachedGrassMesh.SetVertices(verts);
cachedGrassMesh.SetTriangles(trinagles, 0);
}
return cachedGrassMesh;
}
void UpdateAllInstanceTransformBufferIfNeeded()
{
//always update
instanceMaterial.SetVector("_PivotPosWS", transform.position);
instanceMaterial.SetVector("_BoundSize", new Vector2(transform.localScale.x, transform.localScale.z));
//early exit if no need to update buffer
if (instanceCountCache == allGrassPos.Count &&
argsBuffer != null &&
allInstancesPosWSBuffer != null &&
visibleInstancesOnlyPosWSIDBuffer != null)
{
return;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////
Debug.Log("UpdateAllInstanceTransformBuffer (Slow)");
///////////////////////////
// allInstancesPosWSBuffer buffer
///////////////////////////
if (allInstancesPosWSBuffer != null)
allInstancesPosWSBuffer.Release();
allInstancesPosWSBuffer = new ComputeBuffer(allGrassPos.Count, sizeof(float)*3); //float3 posWS only, per grass
if (visibleInstancesOnlyPosWSIDBuffer != null)
visibleInstancesOnlyPosWSIDBuffer.Release();
visibleInstancesOnlyPosWSIDBuffer = new ComputeBuffer(allGrassPos.Count, sizeof(uint), ComputeBufferType.Append); //uint only, per visible grass
//find all instances's posWS XZ bound min max
minX = float.MaxValue;
minZ = float.MaxValue;
maxX = float.MinValue;
maxZ = float.MinValue;
for (int i = 0; i < allGrassPos.Count; i++)
{
Vector3 target = allGrassPos[i];
minX = Mathf.Min(target.x, minX);
minZ = Mathf.Min(target.z, minZ);
maxX = Mathf.Max(target.x, maxX);
maxZ = Mathf.Max(target.z, maxZ);
}
//decide cellCountX,Z here using min max
//each cell is cellSizeX x cellSizeZ
cellCountX = Mathf.CeilToInt((maxX - minX) / cellSizeX);
cellCountZ = Mathf.CeilToInt((maxZ - minZ) / cellSizeZ);
//init per cell posWS list memory
cellPosWSsList = new List<Vector3>[cellCountX * cellCountZ]; //flatten 2D array
for (int i = 0; i < cellPosWSsList.Length; i++)
{
cellPosWSsList[i] = new List<Vector3>();
}
//binning, put each posWS into the correct cell
for (int i = 0; i < allGrassPos.Count; i++)
{
Vector3 pos = allGrassPos[i];
//find cellID
int xID = Mathf.Min(cellCountX-1,Mathf.FloorToInt(Mathf.InverseLerp(minX, maxX, pos.x) * cellCountX)); //use min to force within 0~[cellCountX-1]
int zID = Mathf.Min(cellCountZ-1,Mathf.FloorToInt(Mathf.InverseLerp(minZ, maxZ, pos.z) * cellCountZ)); //use min to force within 0~[cellCountZ-1]
cellPosWSsList[xID + zID * cellCountX].Add(pos);
}
//combine to a flatten array for compute buffer
int offset = 0;
Vector3[] allGrassPosWSSortedByCell = new Vector3[allGrassPos.Count];
for (int i = 0; i < cellPosWSsList.Length; i++)
{
for (int j = 0; j < cellPosWSsList[i].Count; j++)
{
allGrassPosWSSortedByCell[offset] = cellPosWSsList[i][j];
offset++;
}
}
allInstancesPosWSBuffer.SetData(allGrassPosWSSortedByCell);
instanceMaterial.SetBuffer("_AllInstancesTransformBuffer", allInstancesPosWSBuffer);
instanceMaterial.SetBuffer("_VisibleInstanceOnlyTransformIDBuffer", visibleInstancesOnlyPosWSIDBuffer);
///////////////////////////
// Indirect args buffer
///////////////////////////
if (argsBuffer != null)
argsBuffer.Release();
uint[] args = new uint[5] { 0, 0, 0, 0, 0 };
argsBuffer = new ComputeBuffer(1, args.Length * sizeof(uint), ComputeBufferType.IndirectArguments);
args[0] = (uint)GetGrassMeshCache().GetIndexCount(0);
args[1] = (uint)allGrassPos.Count;
args[2] = (uint)GetGrassMeshCache().GetIndexStart(0);
args[3] = (uint)GetGrassMeshCache().GetBaseVertex(0);
args[4] = 0;
argsBuffer.SetData(args);
///////////////////////////
// Update Cache
///////////////////////////
//update cache to prevent future no-op buffer update, which waste performance
instanceCountCache = allGrassPos.Count;
//set buffer
cullingComputeShader.SetBuffer(0, "_AllInstancesPosWSBuffer", allInstancesPosWSBuffer);
cullingComputeShader.SetBuffer(0, "_VisibleInstancesOnlyPosWSIDBuffer", visibleInstancesOnlyPosWSIDBuffer);
}
}
''' and this update the position of grass if needed:- '''
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
[ExecuteAlways]
public class InstancedIndirectGrassPosDefine : MonoBehaviour
{
[Range(1, 40000000)]
public int instanceCount = 1000000;
public float drawDistance = 125;
private int cacheCount = -1;
// Start is called before the first frame update
void Start()
{
UpdatePosIfNeeded();
}
private void Update()
{
UpdatePosIfNeeded();
}
private void OnGUI()
{
GUI.Label(new Rect(300, 50, 200, 30), "Instance Count: " + instanceCount / 1000000 + "Million");
instanceCount = Mathf.Max(1, (int)(GUI.HorizontalSlider(new Rect(300, 100, 200, 30), instanceCount / 1000000f, 1, 10)) * 1000000);
GUI.Label(new Rect(300, 150, 200, 30), "Draw Distance: " + drawDistance);
drawDistance = Mathf.Max(1, (int)(GUI.HorizontalSlider(new Rect(300, 200, 200, 30), drawDistance / 25f, 1, 8)) * 25);
InstancedIndirectGrassRenderer.instance.drawDistance = drawDistance;
}
private void UpdatePosIfNeeded()
{
if (instanceCount == cacheCount)
return;
Debug.Log("UpdatePos (Slow)");
//same seed to keep grass visual the same
UnityEngine.Random.InitState(123);
//auto keep density the same
float scale = Mathf.Sqrt((instanceCount / 4)) / 2f;
transform.localScale = new Vector3(scale, transform.localScale.y, scale);
//////////////////////////////////////////////////////////////////////////
//can define any posWS in this section, random is just an example
//////////////////////////////////////////////////////////////////////////
List<Vector3> positions = new List<Vector3>(instanceCount);
for (int i = 0; i < instanceCount; i++)
{
Vector3 pos = Vector3.zero;
pos.x = UnityEngine.Random.Range(-1f, 1f) * transform.lossyScale.x;
pos.z = UnityEngine.Random.Range(-1f, 1f) * transform.lossyScale.z;
//transform to posWS in C#
pos += transform.position;
positions.Add(new Vector3(pos.x, pos.y, pos.z));
}
//send all posWS to renderer
InstancedIndirectGrassRenderer.instance.allGrassPos = positions;
cacheCount = positions.Count;
}
}
''' compute Shader Used:- '''
#pragma kernel CSMain
//Matrix4x4 v = Camera.main.worldToCameraMatrix;
//Matrix4x4 p = Camera.main.projectionMatrix; //unity C# use opengl standard projection
matrix
//cullingComputeShader.SetMatrix("_VPMatrix", p * v); //set from C#
float4x4 _VPMatrix;
float _MaxDrawDistance;
uint _StartOffset;
StructuredBuffer<float3> _AllInstancesPosWSBuffer; //will not change until instance
count change
AppendStructuredBuffer<uint> _VisibleInstancesOnlyPosWSIDBuffer; //will set counter to 0
per frame, then fill in by this compute shader
[numthreads(64,1,1)]
void CSMain (uint3 id : SV_DispatchThreadID)
{
//posWS -> posCS
float4 absPosCS = abs(mul(_VPMatrix,float4(_AllInstancesPosWSBuffer[id.x +
_StartOffset],1.0)));
//do culling test in clip space, result is the same as doing test in NDC space.
//prefer clip space here because doing culling test in clip space is faster than doing
culling test in NDC, because we can skip 1 division.
//the test is using OpenGL standard projection matrix, because all matrix from unity C#
is OpenGL standard
//if instance is inside camera frustum, and is within draw distance, we append it to
_VisibleInstanceOnlyTransformBuffer
//y test allow 50% more threshold (hardcode for grass)
//x test allow 10% more threshold (hardcode for grass)
if (absPosCS.z <= absPosCS.w && absPosCS.y <= absPosCS.w*1.5 && absPosCS.x <=
absPosCS.w*1.1 _VisibleInstancesOnlyPosWSIDBuffer.Append(id.x +
_StartOffset);
}
'''