python - 煎饼排序中最短翻转序列的计数

标签 python algorithm sorting optimization multiprocessing

现在,在 pancake sorting 中找到最短的翻转序列是一个 NP-hard 问题,但我想找到它们中的每一个,并对它们进行计数。

每个排列的含义我想找到所有前缀反转序列来恢复同一性但不长于最短的序列。

这是我到目前为止所得到的:

#!/bin/env python3
# coding: utf-8

from math import factorial
import itertools

from multiprocessing import cpu_count, Manager, Pool

import numpy
import scipy.sparse

def flip(x, value):
    return tuple(value[:x][::-1] + value[x:])

def rank(perm):
    n = len(perm)
    fact = factorial(n)
    r = 0
    for i in range(n):
        fact //= n - i
        r += len([x for x in perm[i:] if x < perm[i]]) * fact
    return r

def unrank(i, items):
    its = items[:]
    perm = []
    n = len(items)
    fact = factorial(n)
    r = i % fact
    while its:
        fact //= n
        c, r = divmod(r, fact)
        perm.append(its.pop(c))
        n -= 1
    return tuple(perm)

def get_colex_row(r, n, _fact):
    row = scipy.sparse.dok_matrix((
        1, _fact[n - 1]), dtype=numpy.int8)
    perm = unrank(r, [i for i in range(n)])
    for i in range(n):
        column = r - r % _fact[i] + rank(perm[:-i - 2:-1])
        row[0, column] = i + 1
    return row

def get_colex_matrix(n):
    fact = [factorial(i) for i in range(1, n + 1)]
    m = scipy.sparse.dok_matrix(
            (fact[n - 1], fact[n - 1]), dtype=numpy.int8)
    items = [_ for _ in range(1, n + 1)]

    for r in range(fact[n - 1]):
        row = get_colex_row(r, n, fact)
        m[r] = row
    return m

def get_distance(n, items):
    nfact = factorial(n)
    stack = {unrank(i, items) for i in range(nfact)}
    m = get_colex_matrix(n)
    distance = {unrank(nfact - 1, items)[::-1] : 0}
    new_distance = {nfact - 1}
    d = 0
    while distance.keys() != stack:
        new_new_distance = set()
        d += 1
        for visiting in new_distance:
            for i in range(2, n + 1):
                key_index = m[visiting].tolist().index(i)
                key = unrank(key_index, items)[::-1]
                if key not in distance:
                    distance[key] = d
                    new_new_distance.add(key_index)
        new_distance = new_new_distance
    return distance

def get_paths_serial(items):
    n = len(items)
    nfact = factorial(n)
    stack = {unrank(i, items) for i in range(nfact)}
    m = get_colex_matrix(n)
    distance = {unrank(nfact - 1, items)[::-1]: {()}}
    new_distance = {nfact - 1}
    while distance.keys() != stack:
        new_new_distance = set()
        for visiting_index in new_distance:
            for i in range(2, n + 1):
                key_index = m[visiting_index].tolist().index(i)
                key = unrank(key_index, items)[::-1]
                visiting = unrank(visiting_index, items)[::-1]
                paths = distance[visiting]

                prev_sample = next(iter(paths))

                if key not in distance:
                    distance[key] = {path + (i,) for path in paths}
                    new_new_distance.add(key_index)
                else:
                    curr_sample = next(iter(distance[key]))
                    if len(prev_sample) + 1 < len(curr_sample):
                        print("Shouldn't happen!")
                        distance[key] = {path + (i,) for path in paths}
                    elif len(prev_sample) + 1 == len(curr_sample):
                        distance[key] |= {path + (i,) for path in paths}
                    else:
                        # not relevant
                        pass
        new_distance = new_new_distance
    return distance

def _worker(ns, index):
    row = get_colex_row(index, ns.n, ns.fact).toarray().tolist()[0]
    visiting = unrank(index, ns.items)[::-1]
    paths = ns.distance[visiting]
    prev_sample = next(iter(paths))
    out = {}
    my_new_distance = set()
    for i in range(2, ns.n + 1):
        key_index = row.index(i)
        key = unrank(key_index, ns.items)[::-1]
        if key not in ns.distance:
            out[key] = {path + (i,) for path in paths}
            my_new_distance.add(key_index)
        else:
            curr_sample = next(iter(ns.distance[key]))
            if len(prev_sample) + 1 < len(curr_sample):
                print("Shouldn't happen!")
                out[key] = {path + (i,) for path in paths}
            elif len(prev_sample) + 1 == len(curr_sample):
                out[key].update(path + (i,) for path in paths)
    return my_new_distance, out

def get_paths_parallel(items):
    n = len(items)
    fact = [factorial(i) for i in range(1, n + 1)]
    distance = {unrank(fact[n - 1] - 1, items)[::-1]: {()}}
    stack = {unrank(i, items) for i in range(fact[n - 1])}
    already_visited = set()
    visiting = {fact[n - 1] - 1}
    mgr = Manager()

    namespace = mgr.Namespace()

    namespace.fact = fact
    namespace.distance = distance
    namespace.items = items
    namespace.n = n

    with Pool(2 * cpu_count()) as pool:
        while distance.keys() != stack:
            result = pool.starmap(_worker, ((namespace, job)
                    for job in visiting))

            visiting = set()
            for next_to_visit, visited in result:
                visiting |= next_to_visit
                for k, v in visited.items():
                    if k in distance:
                        distance[k] |= v
                    else:
                        distance[k] = v
            visiting -= already_visited
            already_visited |= visiting
            namespace.distance = distance
    return distance

def colex(value, other):
    for i in range(len(value) - 1, 0, -1):
        if value[i] == other[i]:
            continue
        return value[i] > other[i]
    return False

def ordered_by(order_cmp):
    'Convert a cmp= function into a key= function'
    if order_cmp is None:
        return None

    class K(object):
        def __init__(self, obj):
            self.value = obj
        def __gt__(self, other):
            if len(self.value) != len(other.value):
                assert "Not the same length"
            return order_cmp(self.value, other.value)
    return K

def get_ordered(n, order):
    return sorted(itertools.permutations(range(1, n + 1)),
                  key=ordered_by(order))


def get_matrix(n, order=None):
    stack = get_ordered(n, order)
    m = numpy.zeros((len(stack), len(stack)), numpy.int8)
    for i,s in enumerate(stack):
         for x in range(1, n + 1):
             m[i, stack.index(flip(x, s))] = x
    return m

我不确定我做错了什么,但是 get_paths_parallel 运行速度比 get_paths_serial 慢,请帮忙!

我真的应该(并且可能很快)更好地记录我的代码。

所以我暂时补充几句:

它使用共字典顺序对排列进行排序并在邻接矩阵中查找索引。我在哪里存储转换排列的翻转的长度,例如A(i,j) = k 如果对秩为 k 长度前缀反转>i 结果排名 j 排列。为了节省内存而不是存储整个矩阵,我按需生成行并通过排除已经访问过的行来限制访问我也出于同样的原因使用 scipy.sparse.dok_matrix

除此之外,它只是简单地淹没图表,直到达到所有排列。

有一些函数没有使用以上所有或任何考虑因素,如 get_matrix,但仅用于验证其他函数,如 get_colex_matrix 是否按预期工作.

我正在以有点复杂的方式创建 key 函数,但这只是因为我在确定 co-lex 之前尝试过其他排序。

最佳答案

使用multiprocessing.Manager 在进程之间共享数据会使它们变慢。

解决方案是将所需数据复制到每个进程的内存空间(将它们作为参数传递)或为它们使用全局变量。

同时使用 scipy.sparse.dok_matrix 是矫枉过正,dict 就可以了。

我会抓取我找到的有关该主题的文献,稍后再链接。

关于python - 煎饼排序中最短翻转序列的计数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48233306/

相关文章:

Python - 获取windows文件夹ACL权限

python - pycharm 看不到 python3.7 解释器

python - 查找、重新格式化和替换字符串中的多个日期

c++ - 为什么用 Eigen 和 OpenCV 计算的 SVD 左奇异 vector 有不同的符号

algorithm - Quick Sort when pivot 的时间复杂度(split list into 90% :10%) (always smallest element for even depth)

python - 检查模块是否在 Jupyter 中运行

Android:高质量的图像大小调整/缩放

java - 将两行连接成一行(在 Java 中)

javascript - 排序颜色/颜色值

java - 为什么在 Collections 中使用比较器而不是 equals()?