python - numpy的columnstack/hstack功能不一致

标签 python arrays numpy


相同类型和相同大小的 numpy 数组不会使用 np.hstack 列堆叠在一起。 , np.column_stack ,或np.concatenate(axis=1) .


我不明白 numpy 数组的哪些属性可以改变 numpy.hstack , numpy.column_stacknumpy.concatenate(axis=1)不能正常工作。我在让我的真实程序按列堆叠时遇到问题 - 它只附加到行。 numpy 数组是否有某些属性会导致这种情况成立?它不会抛出错误,它只是不执行“正确”或“正常”的行为。


a = np.array([['1', '2'], ['3', '4']], dtype=object)
b = np.array([['5', '6'], ['7', '8']], dtype=object)
np.hstack(a, b)
np.array([['1', '2', '5', '6'], ['3', '4', '7', '8']], dtype=object)



First array:
[['29.8989', '0'] ['29.8659', '-8.54805e-005'] ['29.902', '-0.00015875']
 ..., ['908.791', '-0.015765'] ['908.073', '-0.0154842'] []]

Second array (to be added on in columns):
[['29.8989', '26.8556'] ['29.8659', '26.7969'] ['29.902', '29.0183'] ...,
 ['908.791', '943.621'] ['908.073', '940.529'] []]

What should be the two arrays side by side or in columns:
[['29.8989', '0'] ['29.8659', '-8.54805e-005'] ['29.902', '-0.00015875']
 ..., ['908.791', '943.621'] ['908.073', '940.529'] []]



import numpy as np

def contiguous_regions(condition):
    d = np.diff(condition)
    idx, = d.nonzero() 
    idx += 1
    if condition[0]:
        idx = np.r_[0, idx]
    if condition[-1]:
        idx = np.r_[idx, condition.size]
    idx.shape = (-1,2)
    return idx

def is_number(s):
        return True
    except ValueError:
        return False

total_array = np.array([['1', '2'], ['3', '4'], ['strings','here'], ['5', '6'], ['7', '8']], dtype=object)
where_number = np.array(map(is_number, total_array))
contig_ixs = contiguous_regions(where_number)
print contig_ixs
t = tuple(total_array[s[0]:s[1]] for s in contig_ixs)
print t
print np.hstack(t)



import numpy as np

def retrieve_XY(file_path):
    # XY data is read in from a file in text format
    file_data = open(file_path).readlines()

    # The list of strings (lines in the file) is made into a list of lists while splitting by whitespace and removing commas
    file_data = np.array(map(lambda line: line.rstrip('\n').replace(',',' ').split(), file_data))

    # Remove empty lists, make into numpy array
    xy_array = np.array(filter(None, column_stacked_data_chain))

    # Each line is searched to make sure that all items in the line are a number
    where_num = np.array(map(is_number, file_data))

    # The data is searched for the longest contiguous chain of numbers
    contig = contiguous_regions(where_num)
        # Data lengths (number of rows) for each set of data in the file
        data_lengths = contig[:,1] - contig[:,0]
        # Get the maximum length of data (max number of contiguous rows) in the file
        maxs = np.amax(data_lengths)
        # Find the indices for where this long list of data is (index within the indices array of the file)
        # If there are two equally long lists of data, get both indices 
        longest_contig_idx = np.where(data_lengths == maxs)
    except ValueError:
        print 'Problem finding contiguous data'
        return np.array([])

    # Starting and stopping indices of the contiguous data are stored
    ss = contig[longest_contig_idx]
    # The file data with this longest contiguous chain of numbers
    # If there are multiple sets of data of the same length, they are added in columns
    longest_data_chains = tuple([file_data[i[0]:i[1]] for i in ss])
    print "First array:"
    print longest_data_chains[0]
    print "Second array (to be added on in columns):"
    print longest_data_chains[1]
    column_stacked_data_chain = np.concatenate(longest_data_chains, axis=1)

    print "What should be the two arrays side by side or in columns:"
    print column_stacked_data_chain


    xy = np.array(zip(*xy_array), dtype=float)
    return xy

def contiguous_regions(condition):
    """Finds contiguous True regions of the boolean array "condition". Returns
    a 2D array where the first column is the start index of the region and the
    second column is the end index."""

    # Find the indicies of changes in "condition"
    d = np.diff(condition)
    idx, = d.nonzero() 

    # We need to start things after the change in "condition". Therefore, 
    # we'll shift the index by 1 to the right.
    idx += 1

    if condition[0]:
        # If the start of condition is True prepend a 0
        idx = np.r_[0, idx]

    if condition[-1]:
        # If the end of condition is True, append the length of the array
        idx = np.r_[idx, condition.size] # Edit

    # Reshape the result into two columns
    idx.shape = (-1,2)
    return idx

def is_number(s):
        return True
    except ValueError:
        return False

更新: 我在 @hpaulj 的帮助下让它工作。显然,数据的结构类似于 np.array([['1','2'],['3','4']])在这两种情况下都不够,因为我使用的真实案例有 dtype=object列表中有一些字符串。因此,numpy 看到的是一维数组而不是二维数组,这是必需的。

解决此问题的解决方案是调用 map(float, data)readlines 给出的每个列表功能。


import numpy as np

def retrieve_XY(file_path):
    # XY data is read in from a file in text format
    file_data = open(file_path).readlines()

    # The list of strings (lines in the file) is made into a list of lists while splitting by whitespace and removing commas
    file_data = map(lambda line: line.rstrip('\n').replace(',',' ').split(), file_data)

    # Remove empty lists, make into numpy array
    xy_array = np.array(filter(None, file_data))

    # Each line is searched to make sure that all items in the line are a number
    where_num = np.array(map(is_number, xy_array))

    # The data is searched for the longest contiguous chain of numbers
    contig = contiguous_regions(where_num)
        # Data lengths
        data_lengths = contig[:,1] - contig[:,0]
        # All maximums in contiguous data
        maxs = np.amax(data_lengths)
        longest_contig_idx = np.where(data_lengths == maxs)
    except ValueError:
        print 'Problem finding contiguous data'
        return np.array([])
    # Starting and stopping indices of the contiguous data are stored
    ss = contig[longest_contig_idx]

    print ss
    # The file data with this longest contiguous chain of numbers
    # Float must be cast to each value in the lists of the contiguous data and cast to a numpy array 
    longest_data_chains = np.array([[map(float, n) for n in xy_array[i[0]:i[1]]] for i in ss])

    # If there are multiple sets of data of the same length, they are added in columns
    column_stacked_data_chain = np.hstack(longest_data_chains)

    xy = np.array(zip(*column_stacked_data_chain), dtype=float)
    return xy

def contiguous_regions(condition):
    """Finds contiguous True regions of the boolean array "condition". Returns
    a 2D array where the first column is the start index of the region and the
    second column is the end index."""

    # Find the indicies of changes in "condition"
    d = np.diff(condition)
    idx, = d.nonzero() 

    # We need to start things after the change in "condition". Therefore, 
    # we'll shift the index by 1 to the right.
    idx += 1

    if condition[0]:
        # If the start of condition is True prepend a 0
        idx = np.r_[0, idx]

    if condition[-1]:
        # If the end of condition is True, append the length of the array
        idx = np.r_[idx, condition.size] # Edit

    # Reshape the result into two columns
    idx.shape = (-1,2)
    return idx

def is_number(s):
        return True
    except ValueError:
        return False




>>> a = np.array([[1, 2], [3, 4]])
>>> b = np.array([[1, 2], [3, 4], []])
>>> a.shape
(2L, 2L)
>>> a.dtype
>>> b.shape
>>> b.dtype

由于末尾的空列表,它不是创建一个 2D 数组,而是创建一个 1D 数组,其中每个项目都包含一个包含两个项目的长列表对象。

关于python - numpy的columnstack/hstack功能不一致,我们在Stack Overflow上找到一个类似的问题:


python - 为 3D numpy 数组中的每个元素传递多个条件的更有效方法

python - 如何将 2D numpy 数组的 2D numpy 数组重构为 4D float 组?

python - 当输入来自CGI时,如何在OpenCV中读取解析图像

python - 将行从一个文件复制到另一个文件

python - 如何获得与 Matlab 的 'special' (mldivide) 运算符使用 numpy/scipy 返回的欠定线性系统相同的 `A\b` 解?


javascript - 使用 lodash 按值对对象进行排序

python - 为什么类在未启动时会得到 "called"? - Python

python defaultdict : 0 vs. int and [] vs list

javascript - 对键和值数组的 Jquery 对象(Ajax 响应)