java - 就内存使用而言，使用 float[] 数组还是 16 个 float 哪个更好？

我有以下Matrix4f类:

public class Matrix4f {
    private final static float EPSILON = 0.01f;

    private final static Matrix4f IDENTITY = new Matrix4f(new float[] {
        1.0f, 0.0f, 0.0f, 0.0f, //X column
        0.0f, 1.0f, 0.0f, 0.0f, //Y column
        0.0f, 0.0f, 1.0f, 0.0f, //Z column
        0.0f, 0.0f, 0.0f, 1.0f  //W column
    });

    private final float[] elements = new float[16];

    public Matrix4f() {

    }

    public Matrix4f(final float[] elements) {
        System.arraycopy(elements, 0, this.elements, 0, 16);
    }

    public Matrix4f multiply(final Matrix4f other) {
        float[] a = getElements();
        float[] b = other.getElements();
        return new Matrix4f(new float[] {
            a[0] * b[0] +   a[4] * b[1] +   a[8] * b[2] +   a[12] * b[3],
            a[1] * b[0] +   a[5] * b[1] +   a[9] * b[2] +   a[13] * b[3],
            a[2] * b[0] +   a[6] * b[1] +   a[10] * b[2] +  a[14] * b[3],
            a[3] * b[0] +   a[7] * b[1] +   a[11] * b[2] +  a[15] * b[3],   //X column

            a[0] * b[4] +   a[4] * b[5] +   a[8] * b[6] +   a[12] * b[7],
            a[1] * b[4] +   a[5] * b[5] +   a[9] * b[6] +   a[13] * b[7],
            a[2] * b[4] +   a[6] * b[5] +   a[10] * b[6] +  a[14] * b[7],
            a[3] * b[4] +   a[7] * b[5] +   a[11] * b[6] +  a[15] * b[7],   //Y column

            a[0] * b[8] +   a[4] * b[9] +   a[8] * b[10] +  a[12] * b[11],
            a[1] * b[8] +   a[5] * b[9] +   a[9] * b[10] +  a[13] * b[11],
            a[2] * b[8] +   a[6] * b[9] +   a[10] * b[10] + a[14] * b[11],
            a[3] * b[8] +   a[7] * b[9] +   a[11] * b[10] + a[15] * b[11],  //Z column

            a[0] * b[12] +  a[4] * b[13] +  a[8] * b[14] +  a[12] * b[15],
            a[1] * b[12] +  a[5] * b[13] +  a[9] * b[14] +  a[13] * b[15],
            a[2] * b[12] +  a[6] * b[13] +  a[10] * b[14] + a[14] * b[15],
            a[3] * b[12] +  a[7] * b[13] +  a[11] * b[14] + a[15] * b[15]  //W column            
        });
    }

    public FloatBuffer asFloatBuffer() {
        FloatBuffer floatBuffer = BufferUtils.createFloatBuffer(elements.length).put(elements);
        floatBuffer.flip();
        return floatBuffer;
    }

    public FloatBuffer writeToFloatBuffer(final FloatBuffer floatBuffer) {
        floatBuffer.clear();
        floatBuffer.put(elements);
        floatBuffer.flip();
        return floatBuffer;
    }

    float[] getElements() {
        return elements;
    }

    @Override
    public String toString() {
        return Arrays.toString(elements);
    }

    public static Matrix4f identity() {
        return IDENTITY;
    }

    public static Matrix4f scale(final float sx, final float sy, final float sz) {
        return new Matrix4f(new float[] {
            sx, 0.0f, 0.0f, 0.0f,   //X column
            0.0f, sy, 0.0f, 0.0f,   //Y column
            0.0f, 0.0f, sz, 0.0f,   //Z column
            0.0f, 0.0f, 0.0f, 1.0f  //W column
        });
    }

    public static Matrix4f translate(final float tx, final float ty, final float tz) {
        return new Matrix4f(new float[] {
            1.0f, 0.0f, 0.0f, 0.0f, //X column
            0.0f, 1.0f, 0.0f, 0.0f, //Y column
            0.0f, 0.0f, 1.0f, 0.0f, //Z column
            tx,    ty,    tz, 1.0f  //W column
        });
    }

    public static Matrix4f rotate(final float theta, final float x, final float y, final float z) {
        if (Math.abs(x * x + y * y + z * z - 1.0f) >= EPSILON) {
            throw new IllegalArgumentException("(x, y, z) is not a unit vector: x = " + x + ", y = " + y + ", z = " + z);
        }
        float thetaRad = (float)Math.toRadians(theta);
        float cosTheta = (float)Math.cos(thetaRad);
        float sinTheta = (float)Math.sin(thetaRad);
        float cosThetaRes = 1.0f - cosTheta;
        return new Matrix4f(new float[] {
            cosTheta + x * x * cosThetaRes,     y * x * cosThetaRes + z * sinTheta, z * x * cosThetaRes - y * sinTheta, 0.0f,   //X column
            x * y * cosThetaRes - z * sinTheta, cosTheta + y * y * cosThetaRes,     z * y * cosThetaRes + x * sinTheta, 0.0f,   //Y column
            x * z * cosThetaRes + y * sinTheta, y * z * cosThetaRes - x * sinTheta, cosTheta + z * z * cosThetaRes,     0.0f,   //Z column
            0.0f,                               0.0f,                               0.0f,                               1.0f    //W column
        });
    }

    public static Matrix4f frustum(final float left, final float right, final float bottom, final float top, final float near, final float far) {
        return new Matrix4f(new float[] {
            2 * near / (right - left),          0.0f,                               0.0f,                           0.0f,   //X column
            0.0f,                               2 * near / (top - bottom),          0.0f,                           0.0f,   //Y column
            (right + left) / (right - left),    (top + bottom) / (top - bottom),    (near + far) / (near - far),    -1.0f,  //Z column
            0.0f,                               0.0f,                               2 * near * far / (near - far),  0.0f    //Z column
        });
    }

    public static Matrix4f perspective(final float fovy, final float aspect, final float near, final float far) {
        float y2 = near * (float)Math.tan(Math.toRadians(fovy * 0.5f));
        float y1 = -y2;
        float x1 = y1 * aspect;
        float x2 = y2 * aspect;
        return frustum(x1, x2, y1, y2, near, far);
    }

    public static Matrix4f multiply(final Matrix4f... matrices) {
        Matrix4f output = identity();
        for (Matrix4f matrix : matrices) {
            output = output.multiply(matrix);
        }
        return output;
    }
}

在分析我的 3D 应用程序时，几乎所有内容都是正确的，只是生成了异常大量的 float[]。这可能是正常行为，因为正在进行大量矩阵乘法。

如果我将其更改为使用 16 个 float 而不是 1 个 float[]，性能是否会有显着改善(以及出于什么原因)？

几分钟前我优化了以下部分(如下)，它给我带来了巨大的性能提升:

public FloatBuffer asFloatBuffer() {
    FloatBuffer floatBuffer = BufferUtils.createFloatBuffer(elements.length).put(elements);
    floatBuffer.flip();
    return floatBuffer;
}

public FloatBuffer writeToFloatBuffer(final FloatBuffer floatBuffer) {
    floatBuffer.clear();
    floatBuffer.put(elements);
    floatBuffer.flip();
    return floatBuffer;
}

我在那里有效做的是摆脱new实例，类似的事情可以与 float 一起使用吗？为什么？

更新:我制作了一个新版本，它确实显示了它的改进!我曾经能够绘制 240 次，但由于垃圾收集开销，每秒都会出现卡顿。现在我可以使用完全不产生垃圾的方法绘制 24000 次，现在实际的限制因素很可能是我只是做了太多的 OpenGL 调用或 osmething，这不是问题，因为如果我是，我应该搜索其他方法在真实场景中将这么多数据发送到 OpenGL。

更新后的代码:

@Override
protected void render(final double msDelta) {
    glClearColor(0.0f, 0.25f, 0.0f, 1.0f);
    glClearDepthf(1f);
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    testProgram.use();

    FloatBuffer modelViewMatrixBuffer = BufferUtils.createFloatBuffer(16);
    Matrix4f modelviewMatrix = new Matrix4f();

    for (int i = 0; i < 24000; i++) {
        float fVar = i + currentTime / 1000f * 0.3f;
        modelviewMatrix.identity()
                .translate(0.0f, 0.0f, -8.0f)   //translate
                .rotate(currentTime / 1000f * 45.0f, 0.0f, 1.0f, 0.0f)  //rotate around Y
                .rotate(currentTime / 1000f * 21.0f, 1.0f, 0.0f, 0.0f)  //rotate around X
                .translate(
                    (float)Math.sin(2.1f * fVar) * 2.0f,
                    (float)Math.cos(1.7f * fVar) * 2.0f,
                    (float)Math.sin(1.3f * fVar) * (float)Math.cos(1.5f * fVar) * 2.0f
                );  //translate
        glUniformMatrix4(MODELVIEW_LOCATION, false, modelviewMatrix.writeToFloatBuffer(modelViewMatrixBuffer));    
        glDrawArrays(GL_TRIANGLES, 0, 36);
    }
}

<小时/>

public class Matrix4f {
    private final static float EPSILON = 0.01f;    
    private final static int LENGTH = 16;

    private float elem0 = 0.0f, elem1 = 0.0f, elem2 = 0.0f, elem3 = 0.0f,
            elem4 = 0.0f, elem5 = 0.0f, elem6 = 0.0f, elem7 = 0.0f,
            elem8 = 0.0f, elem9 = 0.0f, elem10 = 0.0f, elem11 = 0.0f,
            elem12 = 0.0f, elem13 = 0.0f, elem14 = 0.0f, elem15 = 0.0f;

    public Matrix4f() {

    }

    public Matrix4f(final float elem0, final float elem1, final float elem2, final float elem3,
            final float elem4, final float elem5, final float elem6, final float elem7,
            final float elem8, final float elem9, final float elem10, final float elem11, 
            final float elem12, final float elem13, final float elem14, final float elem15) {
        set(elem0, elem1, elem2, elem3, elem4, elem5, elem6, elem7, elem8, elem9, elem10, elem11, elem12, elem13, elem14, elem15);
    }

    public Matrix4f identity() {
        set(
            1.0f, 0.0f, 0.0f, 0.0f, //X column
            0.0f, 1.0f, 0.0f, 0.0f, //Y column
            0.0f, 0.0f, 1.0f, 0.0f, //Z column
            0.0f, 0.0f, 0.0f, 1.0f   //W column
        );
        return this;
    }

    public Matrix4f multiply(final Matrix4f other) {
        return multiply(
            other.elem0, other.elem1, other.elem2, other.elem3, 
            other.elem4, other.elem5, other.elem6, other.elem7, 
            other.elem8, other.elem9, other.elem10, other.elem11, 
            other.elem12, other.elem13, other.elem14, other.elem15
        );
    }

    public Matrix4f multiply(final float mul0, final float mul1, final float mul2, final float mul3,
            final float mul4, final float mul5, final float mul6, final float mul7,
            final float mul8, final float mul9, final float mul10, final float mul11,
            final float mul12, final float mul13, final float mul14, final float mul15) {
        set(
            this.elem0 * mul0 +   this.elem4 * mul1 +   this.elem8 * mul2 +   this.elem12 * mul3,
            this.elem1 * mul0 +   this.elem5 * mul1 +   this.elem9 * mul2 +   this.elem13 * mul3,
            this.elem2 * mul0 +   this.elem6 * mul1 +   this.elem10 * mul2 +  this.elem14 * mul3,
            this.elem3 * mul0 +   this.elem7 * mul1 +   this.elem11 * mul2 +  this.elem15 * mul3,   //X column

            this.elem0 * mul4 +   this.elem4 * mul5 +   this.elem8 * mul6 +   this.elem12 * mul7,
            this.elem1 * mul4 +   this.elem5 * mul5 +   this.elem9 * mul6 +   this.elem13 * mul7,
            this.elem2 * mul4 +   this.elem6 * mul5 +   this.elem10 * mul6 +  this.elem14 * mul7,
            this.elem3 * mul4 +   this.elem7 * mul5 +   this.elem11 * mul6 +  this.elem15 * mul7,   //Y column

            this.elem0 * mul8 +   this.elem4 * mul9 +   this.elem8 * mul10 +  this.elem12 * mul11,
            this.elem1 * mul8 +   this.elem5 * mul9 +   this.elem9 * mul10 +  this.elem13 * mul11,
            this.elem2 * mul8 +   this.elem6 * mul9 +   this.elem10 * mul10 + this.elem14 * mul11,
            this.elem3 * mul8 +   this.elem7 * mul9 +   this.elem11 * mul10 + this.elem15 * mul11,  //Z column

            this.elem0 * mul12 +  this.elem4 * mul13 +  this.elem8 * mul14 +  this.elem12 * mul15,
            this.elem1 * mul12 +  this.elem5 * mul13 +  this.elem9 * mul14 +  this.elem13 * mul15,
            this.elem2 * mul12 +  this.elem6 * mul13 +  this.elem10 * mul14 + this.elem14 * mul15,
            this.elem3 * mul12 +  this.elem7 * mul13 +  this.elem11 * mul14 + this.elem15 * mul15  //W column            
        );
        return this;
    }

    public Matrix4f scale(final float sx, final float sy, final float sz) {
        return multiply(
            sx, 0.0f, 0.0f, 0.0f,   //X column
            0.0f, sy, 0.0f, 0.0f,   //Y column
            0.0f, 0.0f, sz, 0.0f,   //Z column
            0.0f, 0.0f, 0.0f, 1.0f  //W column
        );
    }

    public Matrix4f translate(final float tx, final float ty, final float tz) {
        return multiply(
            1.0f, 0.0f, 0.0f, 0.0f, //X column
            0.0f, 1.0f, 0.0f, 0.0f, //Y column
            0.0f, 0.0f, 1.0f, 0.0f, //Z column
            tx,    ty,    tz, 1.0f  //W column
        );
    }

    public Matrix4f rotate(final float theta, final float x, final float y, final float z) {
        if (Math.abs(x * x + y * y + z * z - 1.0f) >= EPSILON) {
            throw new IllegalArgumentException("(x, y, z) is not a unit vector: x = " + x + ", y = " + y + ", z = " + z);
        }
        float thetaRad = (float)Math.toRadians(theta);
        float cosTheta = (float)Math.cos(thetaRad);
        float sinTheta = (float)Math.sin(thetaRad);
        float cosThetaRes = 1.0f - cosTheta;
        return multiply(
            cosTheta + x * x * cosThetaRes,     y * x * cosThetaRes + z * sinTheta, z * x * cosThetaRes - y * sinTheta, 0.0f,   //X column
            x * y * cosThetaRes - z * sinTheta, cosTheta + y * y * cosThetaRes,     z * y * cosThetaRes + x * sinTheta, 0.0f,   //Y column
            x * z * cosThetaRes + y * sinTheta, y * z * cosThetaRes - x * sinTheta, cosTheta + z * z * cosThetaRes,     0.0f,   //Z column
            0.0f,                               0.0f,                               0.0f,                               1.0f    //W column
        );
    }

    public Matrix4f frustum(final float left, final float right, final float bottom, final float top, final float near, final float far) {
        return multiply(
            2 * near / (right - left),          0.0f,                               0.0f,                           0.0f,   //X column
            0.0f,                               2 * near / (top - bottom),          0.0f,                           0.0f,   //Y column
            (right + left) / (right - left),    (top + bottom) / (top - bottom),    (near + far) / (near - far),    -1.0f,  //Z column
            0.0f,                               0.0f,                               2 * near * far / (near - far),  0.0f    //Z column
        );
    }

    public Matrix4f perspective(final float fovy, final float aspect, final float near, final float far) {
        float y2 = near * (float)Math.tan(Math.toRadians(fovy * 0.5f));
        float y1 = -y2;
        float x1 = y1 * aspect;
        float x2 = y2 * aspect;
        return frustum(x1, x2, y1, y2, near, far);
    }

    public FloatBuffer asFloatBuffer() {
        FloatBuffer floatBuffer = BufferUtils.createFloatBuffer(LENGTH)
                .put(elem0).put(elem1).put(elem2).put(elem3)
                .put(elem4).put(elem5).put(elem6).put(elem7)
                .put(elem8).put(elem9).put(elem10).put(elem11)
                .put(elem12).put(elem13).put(elem14).put(elem15);
        floatBuffer.flip();
        return floatBuffer;
    }

    public FloatBuffer writeToFloatBuffer(final FloatBuffer floatBuffer) {
        floatBuffer.clear();
        floatBuffer.put(elem0).put(elem1).put(elem2).put(elem3)
                .put(elem4).put(elem5).put(elem6).put(elem7)
                .put(elem8).put(elem9).put(elem10).put(elem11)
                .put(elem12).put(elem13).put(elem14).put(elem15);
        floatBuffer.flip();
        return floatBuffer;
    }

    private void set(final float elem0, final float elem1, final float elem2, final float elem3,
            final float elem4, final float elem5, final float elem6, final float elem7,
            final float elem8, final float elem9, final float elem10, final float elem11, 
            final float elem12, final float elem13, final float elem14, final float elem15) {
        this.elem0 = elem0;
        this.elem1 = elem1;
        this.elem2 = elem2;
        this.elem3 = elem3;
        this.elem4 = elem4;
        this.elem5 = elem5;
        this.elem6 = elem6;
        this.elem7 = elem7;
        this.elem8 = elem8;
        this.elem9 = elem9;
        this.elem10 = elem10;
        this.elem11 = elem11;
        this.elem12 = elem12;
        this.elem13 = elem13;
        this.elem14 = elem14;
        this.elem15 = elem15;
    }

    @Override
    public String toString() {
        return "[" + 
                elem0 + ", "  + elem1 + ", "  + elem2 + ", "  + elem3 + ", " +
                elem4 + ", "  + elem5 + ", "  + elem6 + ", "  + elem7 + ", " +
                elem8 + ", "  + elem9 + ", "  + elem10 + ", " + elem11 + ", " +
                elem12 + ", " + elem13 + ", " + elem14 + ", " + elem15 + "]";
    }
}

最佳答案

java中的数组就是类! -> 因此它们在堆中创建并受到 GC 的影响 - 最大的性能 killer 之一(因为它们卡住应用程序直到 GC 完成)。您的分配越少越好!此外，作为调用，它们还有额外的内存开销:

Each Java object has a header that contains information important for the JVM. The most important is a reference to the object's class (one machine word), and there are some flags used by the garbage collector and to manage synchronization (since every object can be synchronized on) which takes up another machine word (using partial words would be bad for performance). So that's 2 words, which is 8 bytes on 32 bit systems, and 16 bytes on 64 bit. Arrays additionally need an int field for the array length, which is another 4 bytes, possibly 8 on 64 bit systems.

来源:https://softwareengineering.stackexchange.com/questions/162546/why-the-overhead-when-allocating-objects-arrays-in-java

PS:附注 - 使用 -XX:+DoEscapeAnalysis 运行您的应用程序 - 这可能会进一步减少分配数量

关于java - 就内存使用而言，使用 float[] 数组还是 16 个 float 哪个更好？，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/21194962/

java - 就内存使用而言，使用 float[] 数组还是 16 个 float 哪个更好？

上一篇：java - 数组的选择排序方法

下一篇：java - 尝试使用异步将图像上传到 Android 网络服务器？