c++ - OpenGL glGetUniformBlockIndex在nvidea GPU上返回INVALID_INDEX

标签 c++ opengl gpu rendering

我已经在这个opengl游戏引擎上工作了很长时间,并且我正在使用程序生成的行星在其中进行游戏。但是,我遇到了一些奇怪的问题,但是我已经坚持了三个星期。

为了实时生成行星,我使用了由镶嵌和几何着色器组成的 Material 来进行生成。这样,它非常快。现在,在细分评估着色器中,我使用一个统一的缓冲块将生成行星的参数发送到gpu(我相信这是48字节x噪声层数)。

现在,所有这些都可以在具有radeon r9 390的台式机以及具有gtx 1060的笔记本电脑上正常运行。但是,在以下台式机GPU上,调用glGetUniformBlockIndex时我得到的是INVALID_INDEX,显然它不能正常工作正在尝试使用错误的参数生成。
-gtx 1050
-gtx 1060
-gtx 960
-gtx 970
-rtx 2080

在以下GPU上,一切正常,没有错误:
-radeon r9 390
-RX 580
-高清7770

我没有其他可用的GPU可以进一步测试。

现在,在进行了一些研究之后,我知道统一缓冲区块的大小以及可以具有的组件数量是有限的。但是,考虑到我将最大层数降低到1和再次测试,问题仍然存在。这也不是内存不足的问题,因为hd 7770仅具有1 GB的vram,而1060具有4-​​6 GB的vram,并且在1060上仍然会发生。

我发现的另一件事是,当变量对输出无贡献时,可以通过驱动程序优化着色器变量,并且您可以从着色器代码中看到它确实对输出有所贡献。

所以,到一些代码

在 Material 类中,有一个CreateUniformBuffer函数,该函数获取统一缓冲区块的索引并将其绑定(bind)到允许编辑的缓冲区。请注意,程序在获取INVALID_INDEX后已经崩溃,因此获取索引必须是问题所在。

GLuint Material::CreateUniformBuffer(const std::string& name, GLuint bufferSize)
{
    GLuint uniformBlockIndex = glGetUniformBlockIndex(m_pShader->m_ShaderProgramID, name.data());
    Utilities::Debug::LogGLError(glGetError());
    if (uniformBlockIndex == GL_INVALID_INDEX)
    {
        Utilities::Debug::LogError("Material::CreateUniformBuffer > Uniform buffer block with name " + name + " not found!");
        return 0;
    }

    Utilities::Debug::LogGLError(glGetError());
    glUniformBlockBinding(m_pShader->m_ShaderProgramID, uniformBlockIndex, m_BufferBindIndex);
    Utilities::Debug::LogGLError(glGetError());

    // Uniform buffer object for lights
    GLuint bufferID;
    glGenBuffers(1, &bufferID);
    Utilities::Debug::LogGLError(glGetError());
    glBindBuffer(GL_UNIFORM_BUFFER, bufferID);
    Utilities::Debug::LogGLError(glGetError());
    glBufferData(GL_UNIFORM_BUFFER, bufferSize, NULL, GL_DYNAMIC_DRAW);
    Utilities::Debug::LogGLError(glGetError());
    glBindBufferBase(GL_UNIFORM_BUFFER, uniformBlockIndex, bufferID);
    Utilities::Debug::LogGLError(glGetError());
    glBindBuffer(GL_UNIFORM_BUFFER, 0);
    Utilities::Debug::LogGLError(glGetError());

    m_UniformBufferObjects.push_back(bufferID);

    ++m_BufferBindIndex;
    return bufferID;
}

这是镶嵌细分评估着色器,请注意,顶部有#include“SimplexNoise”行,该行在opengl中不起作用,但是,引擎具有用于着色器的预编译器阶段,在该阶段中,它会读取着色器代码并替换任何#在编译着色器之前包含文件内容的include伪指令。

planet_te.shader
#version 450

#include "SimplexNoise.shader"

layout(triangles, equal_spacing, cw) in;
in vec3 tcPosition[];
out vec3 tePosition;
out float teElevation;

uniform int NumNoiseLayers;

struct NoiseLayer
{
    float Strength;
    float BaseRoughness;
    float Roughness;
    float Persistance;

    vec3 Center;
    float MinValue;

    int NumLayers;
    int UseFirstLayerAsMask;
    int NoiseFilterType;
    float Weight;
};

const int MaxNoiseLayers = 4;
layout(std140) uniform NoiseBlock
{
    NoiseLayer NoiseLayers[MaxNoiseLayers];
} _NoiseData;

float Evaluate(vec3 p, int layer)
{
    int filterType = _NoiseData.NoiseLayers[layer].NoiseFilterType;
    if (filterType == 0)
        return SimpleEvaluate(p, int(_NoiseData.NoiseLayers[layer].NumLayers), _NoiseData.NoiseLayers[layer].BaseRoughness, _NoiseData.NoiseLayers[layer].Roughness, _NoiseData.NoiseLayers[layer].Persistance, _NoiseData.NoiseLayers[layer].Center, _NoiseData.NoiseLayers[layer].MinValue, _NoiseData.NoiseLayers[layer].Strength);

    return RigidEvaluate(p, int(_NoiseData.NoiseLayers[layer].NumLayers), _NoiseData.NoiseLayers[layer].BaseRoughness, _NoiseData.NoiseLayers[layer].Roughness, _NoiseData.NoiseLayers[layer].Persistance, _NoiseData.NoiseLayers[layer].Center, _NoiseData.NoiseLayers[layer].MinValue, _NoiseData.NoiseLayers[layer].Strength, _NoiseData.NoiseLayers[layer].Weight);
}

float CalculateTotalStrength()
{
    float strength = 0.0;
    for (int i = 0; i < NumNoiseLayers; i++)
    {
        strength += _NoiseData.NoiseLayers[i].Strength;
    }
    return strength;
}

float LayeredEvaluate(vec3 p)
{
    float firstLayerValue = 0.0;
    float elevationAverage = 0.0;

    float totalStrength = CalculateTotalStrength();

    float unscaledElevation = 0.0;
    float scaledElevation = 0.0;

    float noiseValue = 0.0;
    float strengthPercentage = 0.0;
    if (NumNoiseLayers > 0)
    {
        unscaledElevation = Evaluate(p, 0);
        scaledElevation = max(0.0, unscaledElevation);
        noiseValue = scaledElevation;
        elevationAverage = unscaledElevation;
        firstLayerValue = noiseValue;
    }

    for (int i = 1; i < NumNoiseLayers; i++)
    {
        float mask = (_NoiseData.NoiseLayers[i].UseFirstLayerAsMask == 1) ? firstLayerValue : 1.0;
        unscaledElevation = Evaluate(p, 0);
        scaledElevation = max(0.0, unscaledElevation);
        elevationAverage += unscaledElevation;

        noiseValue += scaledElevation;
    }

    elevationAverage /= totalStrength;

    teElevation = clamp(elevationAverage * 115.0, -0.99, 0.99);

    return noiseValue;
}

void main()
{
    vec3 p0 = gl_TessCoord.x * tcPosition[0];
    vec3 p1 = gl_TessCoord.y * tcPosition[1];
    vec3 p2 = gl_TessCoord.z * tcPosition[2];
    tePosition = normalize(p0 + p1 + p2);
    float hieght = LayeredEvaluate(tePosition);
    gl_Position = vec4(tePosition * (1.0 + hieght), 1);
}

SimplexNoise.shader
const int RandomSize = 256;
const float Sqrt3 = 1.7320508075688772935;
const float Sqrt5 = 2.2360679774997896964;
uniform int _random[512];

/// Skewing and unskewing factors for 2D, 3D and 4D, 
/// some of them pre-multiplied.
const float F2 = 0.5 * (Sqrt3 - 1.0);
const float G2 = (3.0 - Sqrt3) / 6.0;
const float G22 = G2 * 2.0 - 1;
const float F3 = 1.0 / 3.0;
const float G3 = 1.0 / 6.0;
const float F4 = (Sqrt5 - 1.0) / 4.0;
const float G4 = (5.0 - Sqrt5) / 20.0;
const float G42 = G4 * 2.0;
const float G43 = G4 * 3.0;
const float G44 = G4 * 4.0 - 1.0;

const int[] Grad3 =
{
    1, 1, 0, -1, 1, 0, 1, -1, 0,
    -1, -1, 0, 1, 0, 1, -1, 0, 1,
    1, 0, -1, -1, 0, -1, 0, 1, 1,
    0, -1, 1, 0, 1, -1, 0, -1, -1
};

float Dot(int index, float x, float y, float z)
{
    return Grad3[index] * x + Grad3[index + 1] * y + Grad3[index + 2] * z;
}

float Dot(int index, float x, float y)
{
    return Grad3[index] * x + Grad3[index + 1] * y;
}

int FastFloor(float x)
{
    return int(x) >= 0 ? int(x) : int(x) - 1;
}

float Evaluate(vec3 p)
{
    float x = p.x;
    float y = p.y;
    float z = p.z;
    float n0 = 0.0, n1 = 0.0, n2 = 0.0, n3 = 0.0;

    // Noise contributions from the four corners
    // Skew the input space to determine which simplex cell we're in
    float s = (x + y + z) * F3;

    // for 3D
    int i = FastFloor(x + s);
    int j = FastFloor(y + s);
    int k = FastFloor(z + s);

    float t = (i + j + k) * G3;

    // The x,y,z distances from the cell origin
    float x0 = x - (i - t);
    float y0 = y - (j - t);
    float z0 = z - (k - t);

    // For the 3D case, the simplex shape is a slightly irregular tetrahedron.
    // Determine which simplex we are in.
    // Offsets for second corner of simplex in (i,j,k)
    int i1, j1, k1;

    // coords
    int i2, j2, k2; // Offsets for third corner of simplex in (i,j,k) coords

    if (x0 >= y0)
    {
        if (y0 >= z0)
        {
            // X Y Z order
            i1 = 1;
            j1 = 0;
            k1 = 0;
            i2 = 1;
            j2 = 1;
            k2 = 0;
        }
        else if (x0 >= z0)
        {
            // X Z Y order
            i1 = 1;
            j1 = 0;
            k1 = 0;
            i2 = 1;
            j2 = 0;
            k2 = 1;
        }
        else
        {
            // Z X Y order
            i1 = 0;
            j1 = 0;
            k1 = 1;
            i2 = 1;
            j2 = 0;
            k2 = 1;
        }
    }
    else
    {
        // x0 < y0
        if (y0 < z0)
        {
            // Z Y X order
            i1 = 0;
            j1 = 0;
            k1 = 1;
            i2 = 0;
            j2 = 1;
            k2 = 1;
        }
        else if (x0 < z0)
        {
            // Y Z X order
            i1 = 0;
            j1 = 1;
            k1 = 0;
            i2 = 0;
            j2 = 1;
            k2 = 1;
        }
        else
        {
            // Y X Z order
            i1 = 0;
            j1 = 1;
            k1 = 0;
            i2 = 1;
            j2 = 1;
            k2 = 0;
        }
    }

    // A step of (1,0,0) in (i,j,k) means a step of (1-c,-c,-c) in (x,y,z),
    // a step of (0,1,0) in (i,j,k) means a step of (-c,1-c,-c) in (x,y,z),
    // and
    // a step of (0,0,1) in (i,j,k) means a step of (-c,-c,1-c) in (x,y,z),
    // where c = 1/6.

    // Offsets for second corner in (x,y,z) coords
    float x1 = x0 - i1 + G3;
    float y1 = y0 - j1 + G3;
    float z1 = z0 - k1 + G3;

    // Offsets for third corner in (x,y,z)
    float x2 = x0 - i2 + F3;
    float y2 = y0 - j2 + F3;
    float z2 = z0 - k2 + F3;

    // Offsets for last corner in (x,y,z)
    float x3 = x0 - 0.5;
    float y3 = y0 - 0.5;
    float z3 = z0 - 0.5;

    // Work out the hashed gradient indices of the four simplex corners
    int ii = i & 0xff;
    int jj = j & 0xff;
    int kk = k & 0xff;

    // Calculate the contribution from the four corners
    float t0 = 0.6 - x0 * x0 - y0 * y0 - z0 * z0;

    if (t0 > 0)
    {
        t0 *= t0;
        int gi0 = _random[ii + _random[jj + _random[kk]]] % 12;
        n0 = t0 * t0 * Dot(gi0 * 3, x0, y0, z0);
    }

    float t1 = 0.6 - x1 * x1 - y1 * y1 - z1 * z1;
    if (t1 > 0)
    {
        t1 *= t1;
        int gi1 = _random[ii + i1 + _random[jj + j1 + _random[kk + k1]]] % 12;
        n1 = t1 * t1 * Dot(gi1 * 3, x1, y1, z1);
    }

    float t2 = 0.6 - x2 * x2 - y2 * y2 - z2 * z2;
    if (t2 > 0)
    {
        t2 *= t2;
        int gi2 = _random[ii + i2 + _random[jj + j2 + _random[kk + k2]]] % 12;
        n2 = t2 * t2 * Dot(gi2 * 3, x2, y2, z2);
    }

    float t3 = 0.6 - x3 * x3 - y3 * y3 - z3 * z3;
    if (t3 > 0)
    {
        t3 *= t3;
        int gi3 = _random[ii + 1 + _random[jj + 1 + _random[kk + 1]]] % 12;
        n3 = t3 * t3 * Dot(gi3 * 3, x3, y3, z3);
    }

    // Add contributions from each corner to get the final noise value.
    // The result is scaled to stay just inside [-1,1]
    return float(n0 + n1 + n2 + n3) * 32;
}

float Evaluate(vec3 p, float strength, float roughness, vec3 centre)
{
    float noise = (Evaluate(p * roughness + centre) + 1.0) * 0.5;
    return noise * strength;
}

float SimpleEvaluate(vec3 p, int numLayers, float baseRoughness, float roughness, float persistance, vec3 centre, float minValue, float strength)
{
    float noiseValue = 0.0;
    float frequency = baseRoughness;
    float amplitude = 1.0;

    for (int i = 0; i < numLayers; i++)
    {
        float v = Evaluate(p * frequency + centre);
        noiseValue += (v + 1) * 0.5 * amplitude;
        frequency *= roughness;
        amplitude *= persistance;
    }

    //noiseValue = max(0.0, noiseValue - minValue);
    return (noiseValue - minValue) * strength;
}

float RigidEvaluate(vec3 p, int numLayers, float baseRoughness, float roughness, float persistance, vec3 centre, float minValue, float strength, float weight)
{
    float noiseValue = 0.0;
    float frequency = baseRoughness;
    float amplitude = 1.0;
    weight = 1.0;

    for (int i = 0; i < numLayers; i++)
    {
        float v = 1.0 - abs(Evaluate(p * frequency + centre));
        v *= v;
        v *= weight;
        weight = v;
        noiseValue += v * amplitude;
        frequency *= roughness;
        amplitude *= persistance;
    }

    //noiseValue = max(0.0, noiseValue - minValue);
    return (noiseValue - minValue) * strength;
}

请注意,这是我在网上找到的一种噪声算法,并将其转换为glsl代码(向塞巴斯蒂安·拉格(Sebastian Lague)致谢,以表扬他在Unity中程序行星的惊人系列)

我正在使用SDL打开一个窗口并处理输入,音频和文本渲染,OpenGL 4.6和GLEW 2.1.0

我尝试过的事情:
-通过减少层数来减小块的大小(因此数组较小)
-一个统一的数组而不是一个统一的块(相同的结果只是不存在崩溃,但是对于发生崩溃的GPU,它只会错误地渲染所有行星)
-将SDL及其所有插件更新到最新版本,并将OpenGL从3.1更新到4.6,还将GLEW从1.10.0更新到2.1.0
-更改统一块的名称
-在块内使用值的数量(同时要注意大小必须是4个float的乘积)
-glsl代码中std140旁边的绑定(bind)= 0
-将着色器编译中的任何错误或警告记录到控制台(未找到任何错误或警告)

我有一个下个星期五的截止日期来上传一个构建版本以供发布,如果能仅在amd gpu上运行(当然是可以理解的),他们就不会异常(exception),所以我希望有人对我的工作有所了解这里的错误,或者以前有过类似的问题,欢迎您的帮助!

编辑:我尝试使用SSBO,这对性能造成了严重影响,虽然它不再崩溃(因为我不在按名称查找块索引),但它不会在nvidea gpu上呈现任何内容并吐出未知错误在日志文件中。

绑定(bind)SSBO的代码
GLuint Material::CreateShaderStorageBuffer(const std::string& name, GLsizeiptr bufferSize, const void* data, GLint bindingIndex, GLenum usage)
{
    GLuint ssbo;
    glGenBuffers(1, &ssbo);
    Utilities::Debug::LogGLError(glGetError());
    glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
    Utilities::Debug::LogGLError(glGetError());
    glBufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, data, usage);
    Utilities::Debug::LogGLError(glGetError());
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, bindingIndex, ssbo);
    Utilities::Debug::LogGLError(glGetError());

    /*GLuint blockIndex = glGetProgramResourceIndex(m_pShader->m_ShaderProgramID, GL_SHADER_STORAGE_BLOCK, name.c_str());
    Utilities::Debug::LogGLError(glGetError());

    if (blockIndex == GL_INVALID_INDEX)
    {
        Utilities::Debug::LogError("Material::CreateShaderStorageBuffer > Shader Storage Buffer Block with name " + name + " not found!");
        return 0;
    }

    glShaderStorageBlockBinding(m_pShader->m_ShaderProgramID, blockIndex, bindingIndex);
    Utilities::Debug::LogGLError(glGetError());*/

    glBindBuffer(GL_SHADER_STORAGE_BUFFER, NULL);
    Utilities::Debug::LogGLError(glGetError());
    return ssbo;
}

设置SSBO数据的代码
void Material::WriteToShaderStorageBuffer(GLuint ssboID, const void* data, GLsizeiptr size)
{
    glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboID);
    Utilities::Debug::LogGLError(glGetError());
    GLvoid* bufferData = glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_WRITE_ONLY);
    Utilities::Debug::LogGLError(glGetError());
    memcpy(bufferData, data, size);
    glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
    Utilities::Debug::LogGLError(glGetError());
    glBindBuffer(GL_SHADER_STORAGE_BUFFER, NULL);
    Utilities::Debug::LogGLError(glGetError());
}

着色器中的更改将统一块变为SSBO
const int MaxNoiseLayers = 4;
layout(std430, binding = 0) buffer NoiseBlock
{
    NoiseLayer NoiseLayers[MaxNoiseLayers];
} _NoiseData;

最佳答案

我发现问题是由SimplexNoise着色器文件中的int数组缓冲区_random [512]引起的,我不确定为什么这仅在nvidea gpu上引起了问题,但我认为这超出了某种限制在导致整个着色器损坏的nvideas GPU上,没有任何警告或错误。

我通过将这个数组变成另一个UBO并将其绑定(bind)到索引1来修复它。

关于c++ - OpenGL glGetUniformBlockIndex在nvidea GPU上返回INVALID_INDEX,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/62075315/

相关文章:

c++ - Linux 共享库被加载两次

c++ - 用于读入字符串时 C++ 提取运算符的行为

c++ - pthread_cond_wait 没有从 pthread_cond_broadcast 唤醒

c# - 使用 COM 在 C++ 中实例化 C# 类成员

c - 为什么OpenGL正常使用4个 float 来定义颜色?

c++ - OpenGL 将纹理应用于曲面 segmentation

c++ - 从深度缓冲区值获取世界位置

gpu - DirectX HAL 规范

caching - 全局内存缓存重放率如何可能超过 100%?

CUDA 合并内存访问速度取决于字大小