HIS_Sur_Data_Deal/app/processors/dynamic_router.py

import re


UNMATCHED_HEADER = "未匹配检测内容"
UNASSIGNED_SHEET_NAME = "未归属检测内容"

CATEGORY_REASON_KEYWORDS = {
    "血细胞": ["血细胞", "血常规"],
    "凝血": ["凝血"],
    "肝功": ["肝功", "肾功", "电解质", "葡萄糖", "心肌酶"],
    "各类肿瘤标志物": ["肿瘤", "标志物", "癌胚", "甲状旁腺", "降钙素", "鳞状细胞"],
    "七抗": ["七抗", "自身抗体"],
    "传染指标": ["传染"],
    "血气分析+生化分析": ["血气"],
    "感染指标": ["感染", "新冠", "冠状病毒", "结核", "细菌", "病毒", "HPV", "C反应蛋白", "降钙素原"],
    "基因检测指标": ["基因", "CYP"],
    "心衰系列": ["心衰", "B型", "BNP", "Pro-BNP", "钠尿肽", "肌钙蛋白"],
    "普通指标": ["血型", "隐血", "卡式"],
    "免疫系列": ["甲功", "甲状腺", "促甲状腺", "抗甲状腺"],
    "特殊指标": ["细胞因子", "白介素", "血管内皮"],
    "内分泌代谢系列": ["内分泌", "代谢", "儿茶酚胺", "ANCA"],
    "用药指导": ["用药", "VKORC", "CYP2C9", "ALDH2", "ApoE", "SLCO"],
}


def match_re(value, pattern):
    return re.match(str(pattern), str(value or "")) is not None


def clean_result(value):
    if value in (None, "", "."):
        return "None"
    return str(value)


def detail_value(row):
    for key in ("result_str", "result_ref", "result_txt", "result1"):
        value = row.get(key, "")
        if value not in (None, ""):
            return clean_result(value)
    return ""


def route_detail_rows(detail_rows, all_tests, reason=""):
    sheet_results = {}
    unassigned_items = []
    tests_by_name = {test["test_check_name"]: test for test in all_tests}
    candidates_by_row = []
    candidate_counts = {}

    for detail_row in detail_rows:
        candidates = _match_candidates(detail_row, all_tests)
        candidates_by_row.append((detail_row, candidates))
        for candidate in candidates:
            candidate_counts[candidate["sheet_name"]] = candidate_counts.get(candidate["sheet_name"], 0) + 1

    for detail_row, candidates in candidates_by_row:
        item_name = detail_row.get("rpt_itemname", "")
        if not candidates:
            unassigned_items.append(f"{item_name}：{detail_value(detail_row)}")
            continue

        candidate = _choose_candidate(candidates, reason, candidate_counts)
        test = tests_by_name[candidate["sheet_name"]]
        sheet_results.setdefault(candidate["sheet_name"], {})[candidate["result_name"]] = clean_result(
            detail_row.get(test["test_result_col_name"], "")
        )

    return sheet_results, unassigned_items


def _match_candidates(detail_row, all_tests):
    item_name = detail_row.get("rpt_itemname", "")
    candidates = []

    for test_index, test in enumerate(all_tests):
        test_check_list = test["test_check_list"]
        test_check_list_all = test["test_check_list_all"]

        for item_index, checks in enumerate(test_check_list_all):
            if isinstance(checks, str):
                checks = [checks]
            if any(match_re(item_name, pattern) for pattern in checks):
                candidates.append(
                    {
                        "sheet_name": test["test_check_name"],
                        "result_name": test_check_list[item_index],
                        "test_index": test_index,
                        "item_index": item_index,
                    }
                )
                break

    return candidates


def _choose_candidate(candidates, reason, candidate_counts):
    if len(candidates) == 1:
        return candidates[0]

    reason = str(reason or "")

    def score(candidate):
        sheet_name = candidate["sheet_name"]
        keywords = CATEGORY_REASON_KEYWORDS.get(sheet_name, [])
        reason_score = 100 if any(keyword and keyword in reason for keyword in keywords) else 0
        density_score = candidate_counts.get(sheet_name, 0)
        return (reason_score, density_score, -candidate["test_index"], -candidate["item_index"])

    return max(candidates, key=score)


def append_routed_report(
    add_content_to_excel,
    result_save_path,
    all_tests,
    excel_head,
    excel_basic,
    sheet_results,
    unassigned_items,
    show_not_match,
):
    for test in all_tests:
        sheet_name = test["test_check_name"]
        if sheet_name not in sheet_results:
            continue

        result_values = sheet_results[sheet_name]
        row = excel_head + excel_basic + [
            result_values.get(test_item, "Not_Find") for test_item in test["test_check_list"]
        ]
        if show_not_match:
            row += unassigned_items
        add_content_to_excel(result_save_path, sheet_name, row)

    if unassigned_items and not sheet_results:
        add_content_to_excel(result_save_path, UNASSIGNED_SHEET_NAME, excel_head + excel_basic + unassigned_items)