python - 如何根据字符串列表创建解密字典以按字符串加密

标签 python algorithm dictionary encryption cryptography

我有一些意见 inputs = ["and", "dick", "jane", "puff", "spot", "yertle"] 比我有由输入中的单词组成的句子 encrypted_sentence ="bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn" 但字母以某种模式发生了变化。例如“a”='x'; “n”='s'; “d”=“b”...... 字母表中的每个字母都被替换为字母表中的某个不同字母。我需要创建字典,其中新的“字母”将是,旧字母将是。像这样 decrypt_dict={ "x":'a', "s":'n', "b":'d', "q":'a', "t":'t'} 等等...如果输入中的唯一字母数量与句子中的唯一字母数量不同,打印“ERROR”因为不可能从输入创建句子 如果有些字母没有用到,没关系,跳过它们即可。没有必要用字母表中的每个字符创建字典,我只需要编码中使用的字符

这是我到目前为止的代码(我使用了另一个测试字符串):


import itertools

def deduplicate(lst):
    elems = set()
    rest = []
    for el in lst:
        if el not in elems:
            elems.add(el)
            rest.append(el)
    return rest


def deduplicate_lists(lst_of_lists, differenet_lists=False):
    lst_of_lists.sort()
    if differenet_lists:
        return [lst for lst, _ in itertools.groupby(lst_of_lists)]
    else:
        return [*[lst for lst, _ in itertools.groupby(lst_of_lists)]]


def try_it_faster(lists_by_len_of_words, decrypt_dict):
    for key, val in lists_by_len_of_words.items():
        # print(ke, val)
        if len(val) == 1:
            for index, char in enumerate(val[0]):
                # print(char, index_for_word)
                decrypt_dict.update({char: key[index]})
    return decrypt_dict


def decrypt(string_to_decrypt, decrypt_dict):
    returner = None
    try:
        returner = "".join(decrypt_dict[i] for i in string_to_decrypt)
    except KeyError:
        pass
    return returner


def c_just_multiple_val_dict(dict, help_dict=None):
    copy_dict = dict.copy()
    for keys, values in copy_dict.items():
        # print(keys, values)
        if len(values) == 1:
            dict.pop(keys)

    if help_dict is not None:
        copy_dict = dict.copy()
        for word in copy_dict.keys():
            if all([char in help_dict.values() for char in word]):
                dict.pop(word)
    return dict


def move_items(lst):
    return lst[0:] + lst[0]


# timed
number = 6
inputs = ['all', 'and', 'apple', 'ass', 'gibble', 'horror', 'hound', 'love',
          'tie']  # input words default: "and", "dick", "jane", "puff", "spot", "yertle"
decrypt_dict = {' ': ' '}
encryption_as_string = "cvvli euynm dxggli euzzuz cnm bxi lupi cll cww"  # default: bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn
encryption_key = encryption_as_string.split()  # create list from encrypted string
lists_by_len_of_words = {inp: deduplicate([a for a in encryption_key if len(a) == len(inp)]) for inp in
                         inputs}  # dict key=input word, deduplicated value=words from encryption_key with same length as input word
# timed

# print start values
print(f"1. {inputs}")
print(f"2. {encryption_key}")
print(f"3. {lists_by_len_of_words}")
print(f"4. {decrypt_dict}")

decrypt_dict = try_it_faster(lists_by_len_of_words,
                             decrypt_dict)  # if len of input words is unique, this will make it faster
secondary_decrypt_dict = decrypt_dict.copy()
print(f"4. After try_it_faster(): {decrypt_dict}")

# same_len_word_list = [word for word in lists_by_len_of_words.keys()]
inputs_sorted = sorted(inputs, key=len)  # input sorted
inputs_as_string = " ".join(inputs_sorted)  # sorted input to string

encryption_key_deduplicated_sorted = sorted(deduplicate(encryption_key),
                                            key=len)  # encryption_key sorted and deduplicated
encryption_key_as_string = " ".join(encryption_key_deduplicated_sorted)  # sorted encryption_key to string

# print sorted and deduplicated start values
print(f"1. Sorted input words as string: {inputs_as_string}")
print(f"2. Sorted and deduplicated encrypted words as list: {encryption_key_deduplicated_sorted}")
print(f"2. Sorted and deduplicated encrypted words as string: {encryption_key_as_string}")

mover_check = len(encryption_key_deduplicated_sorted)#*len(encryption_key_deduplicated_sorted)
mover = 0
# while decrypt(encryption_key_as_string, secondary_decrypt_dict) != inputs_as_string:
while decrypt(encryption_key_as_string, secondary_decrypt_dict) != inputs_as_string and mover < mover_check:
    print(f"Before clearing:{encryption_key_deduplicated_sorted}")
    for index_of_word, input_word in enumerate(inputs_sorted):
        print(index_of_word, input_word,
              decrypt(encryption_key_deduplicated_sorted[index_of_word], secondary_decrypt_dict))
        if decrypt(encryption_key_deduplicated_sorted[index_of_word], secondary_decrypt_dict) == input_word:
            for index_of_char, char in enumerate(encryption_key_deduplicated_sorted[index_of_word]):
                # print(index_of_char, char)
                decrypt_dict[char] = input_word[index_of_char]
            print(f"pop out {inputs_sorted[index_of_word]} a {encryption_key_deduplicated_sorted[index_of_word]}")
            inputs_sorted.pop(index_of_word)
            encryption_key_deduplicated_sorted.pop(index_of_word)

    print(f"After clearing:{encryption_key_deduplicated_sorted}")
    print(f"Work wi      :{inputs_sorted}")
    secondary_decrypt_dict.clear()
    secondary_decrypt_dict.update(decrypt_dict)
    # secondary_decrypt_dict.update(decrypt_dict)

    # print(encryption_key_deduplicated_sorted)
    for index_for_word, encryption_word in enumerate(encryption_key_deduplicated_sorted):
        print(index_for_word, encryption_word)
        for index_for_char, letter in enumerate(encryption_word):
            # print(letter)
            if secondary_decrypt_dict.get(letter) is None:
                print(f"secondary_dict nor contain:{letter}")
                if decrypt_dict.
                try:
                    print("".join(char for char in inputs_sorted[index_for_word])[index_for_char])
                    secondary_decrypt_dict.update(
                        {letter: "".join(char for char in inputs_sorted[index_for_word])[index_for_char]})
                    setter = False
                    # print(secondary_decrypt_dict)
                except IndexError:
                    setter = True
            else:
                # pass
                setter = False
                print(f"Contains:{letter}")
    encryption_key_deduplicated_sorted = encryption_key_deduplicated_sorted[1:] + encryption_key_deduplicated_sorted[:1]
    if setter:
        sorted(encryption_key_deduplicated_sorted, key=len)
    print(decrypt(encryption_key_as_string, secondary_decrypt_dict))
    mover += 1

print(secondary_decrypt_dict)


print(decrypt_dict)
final_dict = {**secondary_decrypt_dict, **decrypt_dict}

print(final_dict)
str = "cvvli euynm acqyg dxggli euzzuz cnm bxi lupi cll cww"
if decrypt(str, final_dict) is None:
    ret_str = ""
    for char in encryption_key_as_string:
        if char != " ":
            ret_str.__add__(char)
        else:
            ret_str.__add__(" ")
    print(ret_str)
else:
    print(decrypt(str, final_dict))


最佳答案

回溯方法可用于查找字母映射的组合,从而在逐个长度的基础上生成匹配的单词。

为了优化这种“试错”组合遍历,根据加密单词与相同长度的明文单词的配对,为每个加密字母分配了它可以映射到的潜在明文字母。

knownWords = ["and", "dick", "jane", "puff", "spot", "yertle"]
encrypted  = "bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn"

letterMap = dict()
for word in encrypted.split():
    for kw in knownWords:                 # matching words 
        if len(kw)!=len(word): continue   # of same length
        for k,e in zip(kw,word):                  
             letterMap.setdefault(e,set()).add(k) # letter to letter options

def decrypt(w,mapping): return "".join(mapping.get(c,c) for c in w)

from collections import deque
finalMap = None
mapQ     = deque([[dict(),set(letterMap),set()]]) # mapping,remaining,usedClear
while mapQ and not finalMap:
    mapping,remaining,used = mapQ.pop()           # DFS mapping to complete
    letter = remaining.pop()                      # pick a letter to map
    for clearLetter in letterMap[letter]-used:        # select clear letters
        newMap    = {**mapping,letter:clearLetter}    # enhance mapping
        if remaining:                                 # more letters to map
            mapQ.append([newMap, remaining.copy(), used | {clearLetter}]) 
            continue
        if all(decrypt(w,newMap) in knownWords for w in encrypted.split()):
            finalMap = newMap                         # found mapping that works
            break 

最终结果:

print(decrypt(encrypted, finalMap))
'dick and jane and puff and spot and yertle'

print(finalMap)
{'y': 'u', 'p': 'y', 'q': 'p', 't': 't', 'j': 'i', 'm': 'f', 'e': 'r',
 'f': 'l', 'g': 'k', 'h': 'j', 'a': 'o', 'v': 'c', 'n': 'e', 'x': 'a',
 'r': 's', 'b': 'd', 's': 'n'}

其他测试用例:

knownWords = ['all', 'and', 'apple', 'ass', 'gibble', 'horror', 'hound', 'love',
      'tie']
encrypted  = "cvvli euynm dxggli euzzuz cnm bxi lupi cll cww"

...

decrypt(encrypted, finalMap)
'apple hound gibble horror and tie love all ass'

decrypt("cvvli euynm acqyg dxggli euzzuz cnm bxi lupi cll cww",finalMap)
'apple hound aaqub gibble horror and tie love all ass'

请注意,我使用了严格条件来退出搜索循环,其中所有加密单词都在已知单词列表中找到。这可以通过跟踪“最佳匹配”并让循环耗尽mapQ队列来缓解。定义“最佳匹配”的含义取决于您。可以用单词数、不同单词数、字符覆盖率等来衡量。

[编辑]更有效的方法...

我尝试了上面的“最佳匹配”方法,发现处理时间会变得非常长。因此,我基于整个单词的映射而不是逐个字母的搜索制定了一个新的(递归)解决方案。这可以提供更好的结果,并且部分匹配的管理起来也更简单:

def decrypt(w,mapping): return "".join(mapping.get(c,c) for c in w)

def findMapping(knownWords,encrypted):
    if not encrypted: return {} # no more encrypted words, stop recursion
    if isinstance(encrypted,str): encrypted = list(set(encrypted.split()))
    bestMap   = findMapping(knownWords,encrypted[1:]) # track best
    bestMatch = sum(decrypt(w,bestMap) in knownWords for w in encrypted)
    eWord     = encrypted[0] # map first encrypted word
    for kWord in knownWords:
        if len(eWord) != len(kWord): continue # with word of same length
        mapping = {e:k for e,k in zip(eWord,kWord)}  # letter to letter
        if decrypt(eWord,mapping) != kWord: continue # one to one only
        unused   = str.maketrans('','',kWord)  # removed used letters
        subWords = [*filter(None,(kw.translate(unused) for kw in knownWords))]
        unmapped = str.maketrans('','',eWord)  # remove mapped letters
        subCrypt = [*filter(None,(ew.translate(unmapped) for ew in encrypted))]
        mapping.update(findMapping(subWords,subCrypt)) # Recurse subwords
        match = sum(decrypt(w,mapping) in knownWords for w in encrypted)
        if match>bestMatch: bestMap,bestMatch = mapping,match  # best match
    return bestMap # return mapping with best match (number of mapped words)
                    
  • 此解决方案选择一个加密单词,并尝试将其映射到相同长度的已知单词。
  • 这会建立一个字母映射(必须是一对一的)。
  • 在进入下一级递归之前,此映射中的字母将从加密单词和已知单词中删除。
  • 因此,下一级递归只需执行有限的映射,不会与第一个单词的字母映射重叠。
  • 对相同长度的每个已知单词执行此操作,并通过递归处理所有加密单词,将生成从字母到字母角度兼容的单词到单词的映射。
  • 跟踪匹配单词数量最多的映射并将其返回给调用者。
  • 为了支持不完全匹配,解决方案首先通过跳过第一个加密单词进行递归。这为选择最佳匹配建立了基线。

输出:

knownWords = ["and", "dick", "jane", "puff", "spot", "yertle"]
encrypted  = "bjvg xsb hxsn xsb qymm xsb rqat xsb pnetfn"
mapping    = findMapping(knownWords,encrypted)
print(decrypt(encrypted,mapping))
'dick and jane and puff and spot and yertle'
print(mapping)
{'r': 's', 'q': 'p', 'a': 'o', 't': 't', 'y': 'u', 'm': 'f', 'b': 'd',
 'j': 'i', 'v': 'c', 'g': 'k', 'p': 'y', 'n': 'e', 'e': 'r', 'f': 'l',
 'h': 'j', 'x': 'a', 's': 'n'}


knownWords = "apple hound gibble horror and tie love all ass".split()
encrypted  = "cvvli euynm acqyg dxggli euzzuz cnm bxi lupi upilucnm"
mapping    = findMapping(knownWords,encrypted)
print(decrypt(encrypted,mapping))
'apple hound aaqub gibble horror and tie love oveloand'
print(mapping)
{'b': 't', 'x': 'i', 'i': 'e', 'l': 'l', 'u': 'o', 'p': 'v', 'c': 'a',
 'v': 'p', 'd': 'g', 'g': 'b', 'e': 'h', 'y': 'u', 'n': 'n', 'm': 'd',
 'z': 'r'}

关于python - 如何根据字符串列表创建解密字典以按字符串加密,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/66581219/

相关文章:

python - 子进程返回退出状态 1

arrays - 在 n*3 维的二维数组中找到元素的最小总和,使得从一行中选择的索引不应在相邻行中选择

创建一个复杂的循环

python - 如何使用列表快速更新大字典?

java - 我们应该遍历 map 吗?为什么?

javascript - 在 Flask 中嵌入 Bokeh 图

python - 用于 OpenGL 第一人称射击游戏 View 的顶点/片段着色器?

python - 如何用 Pandas 替换每一列中的目录路径?

ruby - 加速 Ruby 中最大的质因数程序

c++ - 插入 map 时多次调用析构函数