Tighten lab item routing and dedupe rows
This commit is contained in:
@@ -4,6 +4,24 @@ import re
|
||||
UNMATCHED_HEADER = "未匹配检测内容"
|
||||
UNASSIGNED_SHEET_NAME = "未归属检测内容"
|
||||
|
||||
CATEGORY_REASON_KEYWORDS = {
|
||||
"血细胞": ["血细胞", "血常规"],
|
||||
"凝血": ["凝血"],
|
||||
"肝功": ["肝功", "肾功", "电解质", "葡萄糖", "心肌酶"],
|
||||
"各类肿瘤标志物": ["肿瘤", "标志物", "癌胚", "甲状旁腺", "降钙素", "鳞状细胞"],
|
||||
"七抗": ["七抗", "自身抗体"],
|
||||
"传染指标": ["传染"],
|
||||
"血气分析+生化分析": ["血气"],
|
||||
"感染指标": ["感染", "新冠", "冠状病毒", "结核", "细菌", "病毒", "HPV", "C反应蛋白", "降钙素原"],
|
||||
"基因检测指标": ["基因", "CYP"],
|
||||
"心衰系列": ["心衰", "B型", "BNP", "Pro-BNP", "钠尿肽", "肌钙蛋白"],
|
||||
"普通指标": ["血型", "隐血", "卡式"],
|
||||
"免疫系列": ["甲功", "甲状腺", "促甲状腺", "抗甲状腺"],
|
||||
"特殊指标": ["细胞因子", "白介素", "血管内皮"],
|
||||
"内分泌代谢系列": ["内分泌", "代谢", "儿茶酚胺", "ANCA"],
|
||||
"用药指导": ["用药", "VKORC", "CYP2C9", "ALDH2", "ApoE", "SLCO"],
|
||||
}
|
||||
|
||||
|
||||
def match_re(value, pattern):
|
||||
return re.match(str(pattern), str(value or "")) is not None
|
||||
@@ -23,37 +41,75 @@ def detail_value(row):
|
||||
return ""
|
||||
|
||||
|
||||
def route_detail_rows(detail_rows, all_tests):
|
||||
def route_detail_rows(detail_rows, all_tests, reason=""):
|
||||
sheet_results = {}
|
||||
unassigned_items = []
|
||||
tests_by_name = {test["test_check_name"]: test for test in all_tests}
|
||||
candidates_by_row = []
|
||||
candidate_counts = {}
|
||||
|
||||
for detail_row in detail_rows:
|
||||
candidates = _match_candidates(detail_row, all_tests)
|
||||
candidates_by_row.append((detail_row, candidates))
|
||||
for candidate in candidates:
|
||||
candidate_counts[candidate["sheet_name"]] = candidate_counts.get(candidate["sheet_name"], 0) + 1
|
||||
|
||||
for detail_row, candidates in candidates_by_row:
|
||||
item_name = detail_row.get("rpt_itemname", "")
|
||||
matched_any = False
|
||||
|
||||
for test in all_tests:
|
||||
test_result_col_name = test["test_result_col_name"]
|
||||
test_check_list = test["test_check_list"]
|
||||
test_check_list_all = test["test_check_list_all"]
|
||||
|
||||
for index, checks in enumerate(test_check_list_all):
|
||||
if isinstance(checks, str):
|
||||
checks = [checks]
|
||||
if any(match_re(item_name, pattern) for pattern in checks):
|
||||
sheet_name = test["test_check_name"]
|
||||
result_name = test_check_list[index]
|
||||
sheet_results.setdefault(sheet_name, {})[result_name] = clean_result(
|
||||
detail_row.get(test_result_col_name, "")
|
||||
)
|
||||
matched_any = True
|
||||
break
|
||||
|
||||
if not matched_any:
|
||||
if not candidates:
|
||||
unassigned_items.append(f"{item_name}:{detail_value(detail_row)}")
|
||||
continue
|
||||
|
||||
candidate = _choose_candidate(candidates, reason, candidate_counts)
|
||||
test = tests_by_name[candidate["sheet_name"]]
|
||||
sheet_results.setdefault(candidate["sheet_name"], {})[candidate["result_name"]] = clean_result(
|
||||
detail_row.get(test["test_result_col_name"], "")
|
||||
)
|
||||
|
||||
return sheet_results, unassigned_items
|
||||
|
||||
|
||||
def _match_candidates(detail_row, all_tests):
|
||||
item_name = detail_row.get("rpt_itemname", "")
|
||||
candidates = []
|
||||
|
||||
for test_index, test in enumerate(all_tests):
|
||||
test_check_list = test["test_check_list"]
|
||||
test_check_list_all = test["test_check_list_all"]
|
||||
|
||||
for item_index, checks in enumerate(test_check_list_all):
|
||||
if isinstance(checks, str):
|
||||
checks = [checks]
|
||||
if any(match_re(item_name, pattern) for pattern in checks):
|
||||
candidates.append(
|
||||
{
|
||||
"sheet_name": test["test_check_name"],
|
||||
"result_name": test_check_list[item_index],
|
||||
"test_index": test_index,
|
||||
"item_index": item_index,
|
||||
}
|
||||
)
|
||||
break
|
||||
|
||||
return candidates
|
||||
|
||||
|
||||
def _choose_candidate(candidates, reason, candidate_counts):
|
||||
if len(candidates) == 1:
|
||||
return candidates[0]
|
||||
|
||||
reason = str(reason or "")
|
||||
|
||||
def score(candidate):
|
||||
sheet_name = candidate["sheet_name"]
|
||||
keywords = CATEGORY_REASON_KEYWORDS.get(sheet_name, [])
|
||||
reason_score = 100 if any(keyword and keyword in reason for keyword in keywords) else 0
|
||||
density_score = candidate_counts.get(sheet_name, 0)
|
||||
return (reason_score, density_score, -candidate["test_index"], -candidate["item_index"])
|
||||
|
||||
return max(candidates, key=score)
|
||||
|
||||
|
||||
def append_routed_report(
|
||||
add_content_to_excel,
|
||||
result_save_path,
|
||||
|
||||
Reference in New Issue
Block a user