Tighten lab item routing and dedupe rows
This commit is contained in:
@@ -464,6 +464,7 @@ def _postprocess_workbook(
|
|||||||
workbook = load_workbook(path)
|
workbook = load_workbook(path)
|
||||||
try:
|
try:
|
||||||
_normalize_unmatched_columns(workbook)
|
_normalize_unmatched_columns(workbook)
|
||||||
|
_deduplicate_regular_rows(workbook)
|
||||||
summary_records = _collect_summary_records(workbook)
|
summary_records = _collect_summary_records(workbook)
|
||||||
_remove_not_found_rows(workbook)
|
_remove_not_found_rows(workbook)
|
||||||
_remove_empty_unmatched_columns(workbook)
|
_remove_empty_unmatched_columns(workbook)
|
||||||
@@ -555,6 +556,25 @@ def _remove_not_found_rows(workbook) -> None:
|
|||||||
sheet.delete_rows(row_index, 1)
|
sheet.delete_rows(row_index, 1)
|
||||||
|
|
||||||
|
|
||||||
|
def _deduplicate_regular_rows(workbook) -> None:
|
||||||
|
for sheet in workbook.worksheets:
|
||||||
|
if sheet.title == SUMMARY_SHEET_NAME or sheet.max_row < 3:
|
||||||
|
continue
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
for row_index in range(sheet.max_row, 1, -1):
|
||||||
|
values = tuple(
|
||||||
|
_cell_text(sheet.cell(row_index, col).value)
|
||||||
|
for col in range(1, sheet.max_column + 1)
|
||||||
|
)
|
||||||
|
if not any(values):
|
||||||
|
continue
|
||||||
|
if values in seen:
|
||||||
|
sheet.delete_rows(row_index, 1)
|
||||||
|
else:
|
||||||
|
seen.add(values)
|
||||||
|
|
||||||
|
|
||||||
def _collect_summary_records(workbook) -> list[dict[str, object]]:
|
def _collect_summary_records(workbook) -> list[dict[str, object]]:
|
||||||
records: list[dict[str, object]] = []
|
records: list[dict[str, object]] = []
|
||||||
for sheet in workbook.worksheets:
|
for sheet in workbook.worksheets:
|
||||||
|
|||||||
@@ -271,7 +271,7 @@ for pat_no in pat_no_col:
|
|||||||
with open(row_file_path, "r", encoding='utf-8-sig') as row_file:
|
with open(row_file_path, "r", encoding='utf-8-sig') as row_file:
|
||||||
detail_rows = list(csv.DictReader(row_file))
|
detail_rows = list(csv.DictReader(row_file))
|
||||||
|
|
||||||
sheet_results, unassigned_items = route_detail_rows(detail_rows, ALL_tests)
|
sheet_results, unassigned_items = route_detail_rows(detail_rows, ALL_tests, row_1.get("req_reason", ""))
|
||||||
append_routed_report(
|
append_routed_report(
|
||||||
add_content_to_excel,
|
add_content_to_excel,
|
||||||
result_save_pth,
|
result_save_pth,
|
||||||
@@ -286,4 +286,3 @@ for pat_no in pat_no_col:
|
|||||||
# 每处理一个患者数据,保存相关信息
|
# 每处理一个患者数据,保存相关信息
|
||||||
save_excel()
|
save_excel()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -236,7 +236,7 @@ for pat_no in os.listdir(file_dir): # 遍历 file_dir 下的所有文件和文
|
|||||||
with open(row_file_path, "r", encoding="utf-8-sig") as row_file:
|
with open(row_file_path, "r", encoding="utf-8-sig") as row_file:
|
||||||
detail_rows = list(csv.DictReader(row_file))
|
detail_rows = list(csv.DictReader(row_file))
|
||||||
|
|
||||||
sheet_results, unassigned_items = route_detail_rows(detail_rows, ALL_tests)
|
sheet_results, unassigned_items = route_detail_rows(detail_rows, ALL_tests, row_1.get("req_reason", ""))
|
||||||
append_routed_report(
|
append_routed_report(
|
||||||
add_content_to_excel,
|
add_content_to_excel,
|
||||||
result_save_pth,
|
result_save_pth,
|
||||||
|
|||||||
@@ -4,6 +4,24 @@ import re
|
|||||||
UNMATCHED_HEADER = "未匹配检测内容"
|
UNMATCHED_HEADER = "未匹配检测内容"
|
||||||
UNASSIGNED_SHEET_NAME = "未归属检测内容"
|
UNASSIGNED_SHEET_NAME = "未归属检测内容"
|
||||||
|
|
||||||
|
CATEGORY_REASON_KEYWORDS = {
|
||||||
|
"血细胞": ["血细胞", "血常规"],
|
||||||
|
"凝血": ["凝血"],
|
||||||
|
"肝功": ["肝功", "肾功", "电解质", "葡萄糖", "心肌酶"],
|
||||||
|
"各类肿瘤标志物": ["肿瘤", "标志物", "癌胚", "甲状旁腺", "降钙素", "鳞状细胞"],
|
||||||
|
"七抗": ["七抗", "自身抗体"],
|
||||||
|
"传染指标": ["传染"],
|
||||||
|
"血气分析+生化分析": ["血气"],
|
||||||
|
"感染指标": ["感染", "新冠", "冠状病毒", "结核", "细菌", "病毒", "HPV", "C反应蛋白", "降钙素原"],
|
||||||
|
"基因检测指标": ["基因", "CYP"],
|
||||||
|
"心衰系列": ["心衰", "B型", "BNP", "Pro-BNP", "钠尿肽", "肌钙蛋白"],
|
||||||
|
"普通指标": ["血型", "隐血", "卡式"],
|
||||||
|
"免疫系列": ["甲功", "甲状腺", "促甲状腺", "抗甲状腺"],
|
||||||
|
"特殊指标": ["细胞因子", "白介素", "血管内皮"],
|
||||||
|
"内分泌代谢系列": ["内分泌", "代谢", "儿茶酚胺", "ANCA"],
|
||||||
|
"用药指导": ["用药", "VKORC", "CYP2C9", "ALDH2", "ApoE", "SLCO"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def match_re(value, pattern):
|
def match_re(value, pattern):
|
||||||
return re.match(str(pattern), str(value or "")) is not None
|
return re.match(str(pattern), str(value or "")) is not None
|
||||||
@@ -23,37 +41,75 @@ def detail_value(row):
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def route_detail_rows(detail_rows, all_tests):
|
def route_detail_rows(detail_rows, all_tests, reason=""):
|
||||||
sheet_results = {}
|
sheet_results = {}
|
||||||
unassigned_items = []
|
unassigned_items = []
|
||||||
|
tests_by_name = {test["test_check_name"]: test for test in all_tests}
|
||||||
|
candidates_by_row = []
|
||||||
|
candidate_counts = {}
|
||||||
|
|
||||||
for detail_row in detail_rows:
|
for detail_row in detail_rows:
|
||||||
|
candidates = _match_candidates(detail_row, all_tests)
|
||||||
|
candidates_by_row.append((detail_row, candidates))
|
||||||
|
for candidate in candidates:
|
||||||
|
candidate_counts[candidate["sheet_name"]] = candidate_counts.get(candidate["sheet_name"], 0) + 1
|
||||||
|
|
||||||
|
for detail_row, candidates in candidates_by_row:
|
||||||
item_name = detail_row.get("rpt_itemname", "")
|
item_name = detail_row.get("rpt_itemname", "")
|
||||||
matched_any = False
|
if not candidates:
|
||||||
|
|
||||||
for test in all_tests:
|
|
||||||
test_result_col_name = test["test_result_col_name"]
|
|
||||||
test_check_list = test["test_check_list"]
|
|
||||||
test_check_list_all = test["test_check_list_all"]
|
|
||||||
|
|
||||||
for index, checks in enumerate(test_check_list_all):
|
|
||||||
if isinstance(checks, str):
|
|
||||||
checks = [checks]
|
|
||||||
if any(match_re(item_name, pattern) for pattern in checks):
|
|
||||||
sheet_name = test["test_check_name"]
|
|
||||||
result_name = test_check_list[index]
|
|
||||||
sheet_results.setdefault(sheet_name, {})[result_name] = clean_result(
|
|
||||||
detail_row.get(test_result_col_name, "")
|
|
||||||
)
|
|
||||||
matched_any = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if not matched_any:
|
|
||||||
unassigned_items.append(f"{item_name}:{detail_value(detail_row)}")
|
unassigned_items.append(f"{item_name}:{detail_value(detail_row)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
candidate = _choose_candidate(candidates, reason, candidate_counts)
|
||||||
|
test = tests_by_name[candidate["sheet_name"]]
|
||||||
|
sheet_results.setdefault(candidate["sheet_name"], {})[candidate["result_name"]] = clean_result(
|
||||||
|
detail_row.get(test["test_result_col_name"], "")
|
||||||
|
)
|
||||||
|
|
||||||
return sheet_results, unassigned_items
|
return sheet_results, unassigned_items
|
||||||
|
|
||||||
|
|
||||||
|
def _match_candidates(detail_row, all_tests):
|
||||||
|
item_name = detail_row.get("rpt_itemname", "")
|
||||||
|
candidates = []
|
||||||
|
|
||||||
|
for test_index, test in enumerate(all_tests):
|
||||||
|
test_check_list = test["test_check_list"]
|
||||||
|
test_check_list_all = test["test_check_list_all"]
|
||||||
|
|
||||||
|
for item_index, checks in enumerate(test_check_list_all):
|
||||||
|
if isinstance(checks, str):
|
||||||
|
checks = [checks]
|
||||||
|
if any(match_re(item_name, pattern) for pattern in checks):
|
||||||
|
candidates.append(
|
||||||
|
{
|
||||||
|
"sheet_name": test["test_check_name"],
|
||||||
|
"result_name": test_check_list[item_index],
|
||||||
|
"test_index": test_index,
|
||||||
|
"item_index": item_index,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
return candidates
|
||||||
|
|
||||||
|
|
||||||
|
def _choose_candidate(candidates, reason, candidate_counts):
|
||||||
|
if len(candidates) == 1:
|
||||||
|
return candidates[0]
|
||||||
|
|
||||||
|
reason = str(reason or "")
|
||||||
|
|
||||||
|
def score(candidate):
|
||||||
|
sheet_name = candidate["sheet_name"]
|
||||||
|
keywords = CATEGORY_REASON_KEYWORDS.get(sheet_name, [])
|
||||||
|
reason_score = 100 if any(keyword and keyword in reason for keyword in keywords) else 0
|
||||||
|
density_score = candidate_counts.get(sheet_name, 0)
|
||||||
|
return (reason_score, density_score, -candidate["test_index"], -candidate["item_index"])
|
||||||
|
|
||||||
|
return max(candidates, key=score)
|
||||||
|
|
||||||
|
|
||||||
def append_routed_report(
|
def append_routed_report(
|
||||||
add_content_to_excel,
|
add_content_to_excel,
|
||||||
result_save_path,
|
result_save_path,
|
||||||
|
|||||||
@@ -79,6 +79,22 @@ def build_fixture(root):
|
|||||||
"req_reason": "未知组合检测",
|
"req_reason": "未知组合检测",
|
||||||
"sampled_dt": "2026-01-01 08:10:00",
|
"sampled_dt": "2026-01-01 08:10:00",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"rptunitid": "161",
|
||||||
|
"rechkdt": "2026-01-01 10:15:00",
|
||||||
|
"reportid": "20260101-161-4",
|
||||||
|
"reporttype": "10",
|
||||||
|
"req_reason": "甲功七项(化学发光法)[复]",
|
||||||
|
"sampled_dt": "2026-01-01 08:15:00",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"rptunitid": "161",
|
||||||
|
"rechkdt": "2026-01-01 10:15:00",
|
||||||
|
"reportid": "20260101-161-4",
|
||||||
|
"reporttype": "10",
|
||||||
|
"req_reason": "甲功七项(化学发光法)[复]",
|
||||||
|
"sampled_dt": "2026-01-01 08:15:00",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
for row in reports:
|
for row in reports:
|
||||||
for header in REPORT_HEADERS:
|
for header in REPORT_HEADERS:
|
||||||
@@ -90,6 +106,12 @@ def build_fixture(root):
|
|||||||
[{"reportid": reports[0]["reportid"], "rpt_itemname": "谷草转氨酶", "result_str": "21"}],
|
[{"reportid": reports[0]["reportid"], "rpt_itemname": "谷草转氨酶", "result_str": "21"}],
|
||||||
[{"reportid": reports[1]["reportid"], "rpt_itemname": "B型前脑尿钠肽", "result_str": "57.20"}],
|
[{"reportid": reports[1]["reportid"], "rpt_itemname": "B型前脑尿钠肽", "result_str": "57.20"}],
|
||||||
[{"reportid": reports[2]["reportid"], "rpt_itemname": "神秘检测项目", "result_str": "42"}],
|
[{"reportid": reports[2]["reportid"], "rpt_itemname": "神秘检测项目", "result_str": "42"}],
|
||||||
|
[
|
||||||
|
{"reportid": reports[3]["reportid"], "rpt_itemname": "甲状腺球蛋白", "result_str": "6.02"},
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{"reportid": reports[4]["reportid"], "rpt_itemname": "甲状腺球蛋白", "result_str": "6.02"},
|
||||||
|
],
|
||||||
]
|
]
|
||||||
detail_dir = root / "Tests_Detail_List" / patient_id
|
detail_dir = root / "Tests_Detail_List" / patient_id
|
||||||
for report, rows in zip(reports, details):
|
for report, rows in zip(reports, details):
|
||||||
@@ -114,10 +136,13 @@ def assert_dynamic_routing(workbook_path):
|
|||||||
try:
|
try:
|
||||||
liver_rows = rows_for(workbook["肝功"])
|
liver_rows = rows_for(workbook["肝功"])
|
||||||
heart_rows = rows_for(workbook["心衰系列"])
|
heart_rows = rows_for(workbook["心衰系列"])
|
||||||
|
immune_rows = rows_for(workbook["免疫系列"])
|
||||||
summary_text = "\n".join("\t".join(row) for row in rows_for(workbook["未检测到内容汇总"]))
|
summary_text = "\n".join("\t".join(row) for row in rows_for(workbook["未检测到内容汇总"]))
|
||||||
|
|
||||||
liver_data = [row for row in liver_rows[1:] if row and row[0]]
|
liver_data = [row for row in liver_rows[1:] if row and row[0]]
|
||||||
heart_data = [row for row in heart_rows[1:] if row and row[0]]
|
heart_data = [row for row in heart_rows[1:] if row and row[0]]
|
||||||
|
tumor_data = [row for row in rows_for(workbook["各类肿瘤标志物"])[1:] if row and row[0]]
|
||||||
|
immune_data = [row for row in immune_rows[1:] if row and row[0]]
|
||||||
|
|
||||||
assert len(liver_data) == 1, liver_data
|
assert len(liver_data) == 1, liver_data
|
||||||
assert liver_data[0][3] == "肝功十项[复]_电解质五项[复]_肾功三项[复]"
|
assert liver_data[0][3] == "肝功十项[复]_电解质五项[复]_肾功三项[复]"
|
||||||
@@ -132,6 +157,9 @@ def assert_dynamic_routing(workbook_path):
|
|||||||
assert "神秘检测项目" in summary_text
|
assert "神秘检测项目" in summary_text
|
||||||
assert "谷草转氨酶" not in summary_text
|
assert "谷草转氨酶" not in summary_text
|
||||||
assert "B型前脑尿钠肽" not in summary_text
|
assert "B型前脑尿钠肽" not in summary_text
|
||||||
|
assert len(tumor_data) == 0, tumor_data
|
||||||
|
assert len(immune_data) == 1, immune_data
|
||||||
|
assert immune_data[0][3] == "甲功七项(化学发光法)[复]"
|
||||||
finally:
|
finally:
|
||||||
workbook.close()
|
workbook.close()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user