python - 使用 Python 对契约(Contract)桥接分数的 elasticsearch json 对象的元素进行计算

标签 python elasticsearch

数据在这里:

{'took': 0, 'timed_out': False, '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, 'hits': {'total': 16, 'max_score': 1.0, 'hits': [{'_index': 'matchpoints', '_type': 'score', '_id': '6PKYGGgBjpp4O0gQgUu5', '_score': 1.0, '_source': {'board_number': '1', 'nsp': '4', 'ewp': '11', 'contract': '3NT', 'by': 'N', 'tricks': '11', 'nsscore': '460', 'ewscore ': '0'}}, {'_index': 'matchpoints', '_type': 'score', '_id': '7_KYGGgBjpp4O0gQgUu5', '_score': 1.0, '_source': {'board_number': '2', 'nsp': '3', 'ewp': '10', 'contract': '3C', 'by': 'E', 'tricks': '10', 'nsscore': '-130', 'ewscore ': '130'}}, {'_index': 'matchpoints', '_type': 'score', '_id': '6fKYGGgBjpp4O0gQgUu5', '_score': 1.0, '_source': {'board_number': '1', 'nsp': '5', 'ewp': '12', 'contract': '3NT', 'by': 'S', 'tricks': '10', 'nsscore': '400', 'ewscore ': '0'}}, {'_index': 'matchpoints', '_type': 'score', '_id': '8_KYGGgBjpp4O0gQgUu5', '_score': 1.0, '_source': {'board_number': '2', 'nsp': '7', 'ewp': '14', 'contract': '3C', 'by': 'E', 'tricks': '10', 'nsscore': '-130', 'ewscore ': '130'}}, {'_index': 'matchpoints', '_type': 'score', '_id': '9PKYGGgBjpp4O0gQgUu5', '_score': 1.0, '_source': {'board_number': '2', 'nsp': '8', 'ewp': '15', 'contract': '3C', 'by': 'E', 'tricks': '11', 'nsscore': '-150', 'ewscore ': '150'}}, {'_index': 'matchpoints', '_type': 'score', '_id': '5fKYGGgBjpp4O0gQgUu5', '_score': 1.0, '_source': {'board_number': '1', 'nsp': '1', 'ewp': '16', 'contract': '3NT', 'by': 'N', 'tricks': '10', 'nsscore': '430', 'ewscore ': '0'}}, {'_index': 'matchpoints', '_type': 'score', '_id': '6vKYGGgBjpp4O0gQgUu5', '_score': 1.0, '_source': {'board_number': '1', 'nsp': '6', 'ewp': '13', 'contract': '4S', 'by': 'S', 'tricks': '11', 'nsscore': '480', 'ewscore ': '0'}}, {'_index': 'matchpoints', '_type': 'score', '_id': '6_KYGGgBjpp4O0gQgUu5', '_score': 1.0, '_source': {'board_number': '1', 'nsp': '7', 'ewp': '14', 'contract': '3NT', 'by': 'S', 'tricks': '8', 'nsscore': '-50', 'ewscore ': '50'}}, {'_index': 'matchpoints', '_type': 'score', '_id': '7fKYGGgBjpp4O0gQgUu5', '_score': 1.0, '_source': {'board_number': '2', 'nsp': '1', 'ewp': '16', 'contract': '6S', 'by': 'N', 'tricks': '12', 'nsscore': '1430', 'ewscore ': '0'}}, {'_index': 'matchpoints', '_type': 'score', '_id': '7vKYGGgBjpp4O0gQgUu5', '_score': 1.0, '_source': {'board_number': '2', 'nsp': '2', 'ewp': '9', 'contract': '3C', 'by': 'E', 'tricks': '10', 'nsscore': '-130', 'ewscore ': '130'}}]}}

Python 代码,结合最近的变化,如下所示。作为我的中间尝试,没有尝试遍历不同的板。此数据仅由搜索所有查询生成。

@application.route('/', methods=['GET', 'POST'])
def index():
    search = {"query": {"match_all": {}}}
    resp = es.search(index="matchpoints", doc_type="score", body = search)
    rows = extract_rows(resp)
    for board in rows:
        scores = score_board(board)
        report(scores)
        print(report(scores))
    return 'ok'

def extract_rows(resp):                                                                                                          
    """Extract the rows for the board from the query response."""                                                                
    # Based on the data structure provided by the OP.                                                          
    rows = [row["_source"] for row in resp["hits"]["hits"]]
    # We want to return the group the data by board number
    # so that we can score each board.                                                                       
    keyfunc = lambda row: int(row['board_number'])                                                                               
    rows.sort(key=keyfunc)                                                                                                       
    for _, group in itertools.groupby(rows, keyfunc):                                                                            
        yield list(group)

def compute_mp(scores, score):
    """Compute the match point score for a pair."""
    mp_score = sum(v for k, v in scores.items() if score > k) * 2
    # The pair's own score will always compare equal - remove it.
    mp_score += sum(v for k, v in scores.items() if score == k) - 1
    return mp_score

def score_board(tables):
    """Build the scores for each pair."""
    scores = []
    top = 2 * (len(tables) - 1)
    # Store the scores for each N-S partnership.
    ns_scores = collections.Counter(int(table["nsscore"]) for table in tables)
    # Build the output for each pair.
    for table in tables:
        output = {
            "board": table["board_number"],
            "nsp": table["nsp"],
            "ewp": table["ewp"],
        }
        ns_score = int(table["nsscore"])
        ns_mp_score = compute_mp(ns_scores, ns_score)
        output["ns_mp_score"] = ns_mp_score
        ew_mp_score = top - ns_mp_score
        output["ew_mp_score"] = ew_mp_score
        scores.append(output)
    return scores

# Replace this function with one that adds the rows to
# the new search index
def report(scores):
    """Print the scores."""
    for row in scores:
        print(row)

和以前一样,它会生成正确的字典,其中评分是正确的,但结果重复且行数过多。另外,有两个“无”的实例,我不知道那是从哪里来的。 :

{'board': '1', 'nsp': '4', 'ewp': '11', 'ns_mp_score': 6, 'ew_mp_score': 2}
{'board': '1', 'nsp': '5', 'ewp': '12', 'ns_mp_score': 2, 'ew_mp_score': 6}
{'board': '1', 'nsp': '1', 'ewp': '16', 'ns_mp_score': 4, 'ew_mp_score': 4}
{'board': '1', 'nsp': '6', 'ewp': '13', 'ns_mp_score': 8, 'ew_mp_score': 0}
{'board': '1', 'nsp': '7', 'ewp': '14', 'ns_mp_score': 0, 'ew_mp_score': 8}
{'board': '1', 'nsp': '4', 'ewp': '11', 'ns_mp_score': 6, 'ew_mp_score': 2}
{'board': '1', 'nsp': '5', 'ewp': '12', 'ns_mp_score': 2, 'ew_mp_score': 6}
{'board': '1', 'nsp': '1', 'ewp': '16', 'ns_mp_score': 4, 'ew_mp_score': 4}
{'board': '1', 'nsp': '6', 'ewp': '13', 'ns_mp_score': 8, 'ew_mp_score': 0}
{'board': '1', 'nsp': '7', 'ewp': '14', 'ns_mp_score': 0, 'ew_mp_score': 8}
None
{'board': '2', 'nsp': '3', 'ewp': '10', 'ns_mp_score': 4, 'ew_mp_score': 4}
{'board': '2', 'nsp': '7', 'ewp': '14', 'ns_mp_score': 4, 'ew_mp_score': 4}
{'board': '2', 'nsp': '8', 'ewp': '15', 'ns_mp_score': 0, 'ew_mp_score': 8}
{'board': '2', 'nsp': '1', 'ewp': '16', 'ns_mp_score': 8, 'ew_mp_score': 0}
{'board': '2', 'nsp': '2', 'ewp': '9', 'ns_mp_score': 4, 'ew_mp_score': 4}
{'board': '2', 'nsp': '3', 'ewp': '10', 'ns_mp_score': 4, 'ew_mp_score': 4}
{'board': '2', 'nsp': '7', 'ewp': '14', 'ns_mp_score': 4, 'ew_mp_score': 4}
{'board': '2', 'nsp': '8', 'ewp': '15', 'ns_mp_score': 0, 'ew_mp_score': 8}
{'board': '2', 'nsp': '1', 'ewp': '16', 'ns_mp_score': 8, 'ew_mp_score': 0}
{'board': '2', 'nsp': '2', 'ewp': '9', 'ns_mp_score': 4, 'ew_mp_score': 4}
None

评分是正确的,但同样存在多个相同配对结果重复的案例。

最佳答案

此代码将计算分数。代码相当简单。

不是遍历输入字典来计算每对的分数,南北分数存储在 collections.Counter 中。记录每个得分的对数的实例。这使得计算每对配对的匹配点分数变得更容易——我们只需将较低分数的数量加倍,并加上相同分数的数量,减去 1 来计算当前伙伴关系的分数。

import collections                                                                                                               
import itertools                                                                                                                                                                                                                                    


def extract_rows(resp):                                                                                                          
    """Extract the rows for the board from the query response."""                                                                
    # Based on the data structure provided by the OP.                                                          
    rows = [row["_source"] for row in resp["hits"]["hits"]]
    # We want to return the group the data by board number
    # so that we can score each board.                                                                       
    keyfunc = lambda row: int(row['board_number'])                                                                               
    rows.sort(key=keyfunc)                                                                                                       
    for _, group in itertools.groupby(rows, keyfunc):                                                                            
        yield list(group)


def compute_mp(scores, score):
    """Compute the match point score for a pair."""
    mp_score = sum(v for k, v in scores.items() if score > k) * 2
    # The pair's own score will always compare equal - remove it.
    mp_score += sum(v for k, v in scores.items() if score == k) - 1
    return mp_score


def score_board(tables):
    """Build the scores for each pair."""
    scores = []

    # Store the scores for each N-S partnership.
    ns_scores = collections.Counter(int(table["nsscore"]) for table in tables)
    # The top score is (2 * number of tables) - 2, then reduced by one for each 
    # equal top score.
    top = 2 * (len(tables) - 1) - (ns_scores[max(ns_scores)] - 1)
    # Build the output for each pair.
    for table in tables:
        output = {
            "board": table["board_number"],
            "nsp": table["nsp"],
            "ewp": table["ewp"],
        }
        ns_score = int(table["nsscore"])
        ns_mp_score = compute_mp(ns_scores, ns_score)
        output["ns_mp_score"] = ns_mp_score
        ew_mp_score = top - ns_mp_score
        output["ew_mp_score"] = ew_mp_score
        scores.append(output)
    return scores

# Replace this function with one that adds the rows to
# the new search index
def report(scores):
    """Print the scores."""
    for row in scores:
        print(row)

运行代码:

rows = extract_rows(resp)
scores = [score for rows in extract_rows(resp) for score in score_board(rows)]
report(scores)

产生这个输出:

{'board': '1', 'nsp': '4', 'ewp': '11', 'ns_mp_score': 6, 'ew_mp_score': 2}
{'board': '1', 'nsp': '5', 'ewp': '12', 'ns_mp_score': 2, 'ew_mp_score': 6}
{'board': '1', 'nsp': '1', 'ewp': '16', 'ns_mp_score': 4, 'ew_mp_score': 4}
{'board': '1', 'nsp': '6', 'ewp': '13', 'ns_mp_score': 8, 'ew_mp_score': 0}
{'board': '1', 'nsp': '7', 'ewp': '14', 'ns_mp_score': 0, 'ew_mp_score': 8}
{'board': '2', 'nsp': '3', 'ewp': '10', 'ns_mp_score': 4, 'ew_mp_score': 4}
{'board': '2', 'nsp': '7', 'ewp': '14', 'ns_mp_score': 4, 'ew_mp_score': 4}
{'board': '2', 'nsp': '8', 'ewp': '15', 'ns_mp_score': 0, 'ew_mp_score': 8}
{'board': '2', 'nsp': '1', 'ewp': '16', 'ns_mp_score': 8, 'ew_mp_score': 0}
{'board': '2', 'nsp': '2', 'ewp': '9', 'ns_mp_score': 4, 'ew_mp_score': 4}

关于python - 使用 Python 对契约(Contract)桥接分数的 elasticsearch json 对象的元素进行计算,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53916503/

相关文章:

elasticsearch - 使用Solr或Elasticsearch通过大型OR查询子句进行搜索/检索

ruby-on-rails - AWS elasticsearch 服务抛出 Faraday::ConnectionFailed: 无法打开到 https:80 的 TCP 连接(getaddrinfo: 名称或服务未知)

elasticsearch - Elasticsearch:分数得分,具有multi_match(启用模糊性)并在同一查询中过滤,仅基于multi_match部分

python - 如何在 Python 中习惯性地对嵌套字典键进行排序

python - 什么是卡住的 Python 模块?

python - Pandas 读取混合日期格式的 Excel

java - 使用 JSON 模式和 ElasticSearch Java API 添加类型映射

elasticsearch - 从 ElasticSearch 中删除数据

python - 如何解决websocket ping超时?

python - 如何获取 xlsx 文件的边框使用 xlrd/