python - 如何迭代行索引并从Python数据集中找到具有最大行列式的组?

标签 python pandas numpy matrix combinations

我想从我的数据集中获取 7 个索引值,这些值的组将为我提供 |Matrix.T*Matrix| 的最大行列式它应该是一个 6 x 6 矩阵。(因为我的列数是 6)。

数据集:

Serial_no,A,B,C,D,E,F
1,0.379,-0.588,-1.69,-0.0135,0.083,-0.0297
2,-0.144,0.278,0.354,-0.000672,-0.0228,0.014
3,0.295,-0.157,-1.63,-0.00451,0.0778,-0.00969
4,0.371,-0.623,-4.98,-0.000253,0.0872,-0.0109
5,0.369,-3.11,-8.3,-0.0000105,0.0871,-0.0327
6,0.369,-0.899,-7.19,-0.0000177,0.0872,-0.0109
7,0.383,-1.04,-2.76,-0.00418,0.089,-0.033
8,0.369,-1.04,-8.3,-0.00000263,0.0871,-0.0109
9,-0.124,0.421,0.679,0.00246,-0.0216,0.0133
10,0.37,2.15,-17.1,0.000244,0.0871,0.0109
11,0.369,5.61,-14.9,0.0000352,0.0872,0.0327
12,0.369,1.45,-11.6,-0.000000963,0.0872,0.0109
13,0.369,3.53,-9.41,-0.00000186,0.0872,0.0327
14,0.369,6.44,-17.2,0.000513,0.0872,0.0327
15,-0.11,-2.57,4.11,-0.000127,-0.0209,-0.0131
16,-0.11,-2.76,4.43,-0.000606,-0.0211,-0.0132
17,0.37,0.761,-6.09,0.0000571,0.0871,0.0109
18,0.3678,1.45,-3.88,0.00209,0.0865,0.0325
19,0.381,-2.46,-19.4,-0.00274,0.0874,-0.0111
20,0.369,4.36,-11.6,-0.000003,0.0872,0.0327
21,-0.111,-1.74,2.79,0.000000903,-0.0209,-0.0131
22,-0.111,-1.91,3.05,-0.000000953,-0.0209,-0.0131
23,0.368,2.28,-6.09,0.000164,0.0871,0.0327
24,-0.11,-0.913,1.46,-0.0000412,-0.0209,-0.0131
25,-0.111,-1.08,1.73,-0.0000101,-0.0209,-0.0131
26,-0.144,-0.278,0.354,0.000672,-0.0228,-0.014
27,0.344,-0.344,-2.76,-0.00202,0.0877,-0.0107
28,0.369,3.11,-8.3,0.0000105,0.0871,0.0327
29,0.383,1.04,-2.76,0.00418,0.089,0.033
30,-0.124,-0.421,0.679,-0.00246,-0.0216,-0.0133
import pandas as pd
import numpy as np

#importing t dataset with pandas
df=pd.read_csv('Dataset.csv')
df = df.set_index('Serial_no')
X=df.iloc[:,:]

我尝试了这个,但没有获取矩阵的行索引值

N = 7
def my_det(df,i):
    mat = df.iloc[i:i+N]
    return np.linalg.det(mat.T @ mat)

all_det = [my_det(df,i) for i in range(len(df)-N)]

print(np.argmax(all_det))
# 7

print(np.max(all_det))

数据集:

enter image description here

最佳答案

我无法抗拒。这是使用拉普拉斯和柯西-比奈公式的解决方案。以及用于验证的强力验证。

示例运行:

 [55.67740601  0.61391687]

此输出表明,在您的 30x6 示例中,暴力破解和智能解决方案给出了相同的答案,并且暴力破解花了 55.7 秒,而智能破解花了 0.6 秒。

代码:

import numpy as np
import pandas as pd
import itertools as it
import io

def brute_force(A):
    return max(map(list,it.combinations(range(len(A)),7)),
               key=lambda s: np.linalg.det(A[s].T@A[s]))

def smart(A):
    N = len(A)
    # binomial coefficients
    bcs = np.zeros((7,N+1),int)
    # determinants for jxj minors j=1..6
    dets = [A[:,0]]
    bcs[0] = np.arange(N+1)
    # subsets of size j for j=1..7
    combs = [bcs[None,0]]
    for j in range(1,7):
        # extend bcs, combs
        bcs[j] = bcs[j-1].cumsum()
        combs.append(np.zeros((j+1,bcs[j,-j-1]),int))
        combs[-1][0,bcs[j,1:-j-1]] = 1
        combs[-1][0] = combs[-1][0].cumsum()
        for i in range(N-j):
            combs[-1][1:,bcs[j,i]:bcs[j,i+1]] = combs[-2][:,:bcs[j-1,i+1]]
        # use Laplace's formula to compute j+1xj+1 minors
        # the tricky bit is getting the indexing right...
        idxs = np.zeros((j+1,bcs[j,-j-1]),int)
        idxs[1:] = bcs[np.arange(j)[::-1,None],combs[-1][:-1]+1].cumsum(0)
        idxs[-2::-1] += bcs[np.arange(j)[:,None],combs[-1][-1:0:-1]].cumsum(0)
        if j<6:
            dets.append(np.einsum('ji,ji->i',(A[combs[-1]+np.arange(j,-1,-1)[:,None],j])*(-1)**np.arange(j+1)[:,None],dets[-1][idxs]))
        else:
            # use Cauchy-Binet to calculate the M^T M determinants
            d62 = np.square(dets[-1])
            d7 = d62[idxs].sum(0)
            # find index of max, translate back to subset
            return combs[-1][::-1,d7.argmax()]+np.arange(7)

a = """
Serial_no,A,B,C,D,E,F
1,0.379,-0.588,-1.69,-0.0135,0.083,-0.0297
2,-0.144,0.278,0.354,-0.000672,-0.0228,0.014
3,0.295,-0.157,-1.63,-0.00451,0.0778,-0.00969
4,0.371,-0.623,-4.98,-0.000253,0.0872,-0.0109
5,0.369,-3.11,-8.3,-0.0000105,0.0871,-0.0327
6,0.369,-0.899,-7.19,-0.0000177,0.0872,-0.0109
7,0.383,-1.04,-2.76,-0.00418,0.089,-0.033
8,0.369,-1.04,-8.3,-0.00000263,0.0871,-0.0109
9,-0.124,0.421,0.679,0.00246,-0.0216,0.0133
10,0.37,2.15,-17.1,0.000244,0.0871,0.0109
11,0.369,5.61,-14.9,0.0000352,0.0872,0.0327
12,0.369,1.45,-11.6,-0.000000963,0.0872,0.0109
13,0.369,3.53,-9.41,-0.00000186,0.0872,0.0327
14,0.369,6.44,-17.2,0.000513,0.0872,0.0327
15,-0.11,-2.57,4.11,-0.000127,-0.0209,-0.0131
16,-0.11,-2.76,4.43,-0.000606,-0.0211,-0.0132
17,0.37,0.761,-6.09,0.0000571,0.0871,0.0109
18,0.3678,1.45,-3.88,0.00209,0.0865,0.0325
19,0.381,-2.46,-19.4,-0.00274,0.0874,-0.0111
20,0.369,4.36,-11.6,-0.000003,0.0872,0.0327
21,-0.111,-1.74,2.79,0.000000903,-0.0209,-0.0131
22,-0.111,-1.91,3.05,-0.000000953,-0.0209,-0.0131
23,0.368,2.28,-6.09,0.000164,0.0871,0.0327
24,-0.11,-0.913,1.46,-0.0000412,-0.0209,-0.0131
25,-0.111,-1.08,1.73,-0.0000101,-0.0209,-0.0131
26,-0.144,-0.278,0.354,0.000672,-0.0228,-0.014
27,0.344,-0.344,-2.76,-0.00202,0.0877,-0.0107
28,0.369,3.11,-8.3,0.0000105,0.0871,0.0327
29,0.383,1.04,-2.76,0.00418,0.089,0.033
30,-0.124,-0.421,0.679,-0.00246,-0.0216,-0.0133
"""

df = pd.read_csv(io.StringIO(a),index_col=0)

from time import perf_counter as pc
T = []
T.append(pc())
xb = brute_force(df.to_numpy())
T.append(pc())
xs = smart(df.to_numpy())
T.append(pc())

assert (xs==xb).all()
print(np.diff(T))

关于python - 如何迭代行索引并从Python数据集中找到具有最大行列式的组?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58143294/

相关文章:

python - 如何在不编写类的情况下访问 Tkinter 中按钮的父级?

python - TensorFlow - 在 session 中显示所有变量

python - 如何将带有索引列表的字典映射到新变量

python - 可能的全局变量问题?

python - 如何使用 Pandas 中每天变化的固定引用进行计算?

python - 如何使用 Python 只读取 CSV 文件的标题列?

python - 如何从 pandas 数据集中创建网格大小不均匀的热图?

python - 根据其他数组的阈值创建新的 2d numpy 数组

Python重采样实现如Matlab的Signal Toolbox的重采样函数

python - Cython/Numba 编译函数可以改进 numpy.max(numpy.abs(a-b)) 吗?