Coverage for app \ processing \ entity_extractor.py: 100%
46 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-24 13:18 +0530
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-24 13:18 +0530
1from typing import Dict, List
3import torch
4from gliner import GLiNER
6from app.config import settings
7from app.utils.logger import get_logger
9logger = get_logger(__name__)
11_model: GLiNER | None = None
14def _get_device() -> str:
15 if settings.NER_DEVICE:
16 return settings.NER_DEVICE
17 return "cuda" if torch.cuda.is_available() else "cpu"
20def _get_model() -> GLiNER:
21 """
22 Lazy-load and cache the GLiNER model.
23 """
24 global _model
26 if _model is None:
27 device = _get_device()
28 logger.info(
29 "Loading NER model",
30 extra={
31 "model_name": settings.NER_MODEL_NAME,
32 "device": device,
33 },
34 )
35 _model = GLiNER.from_pretrained(settings.NER_MODEL_NAME).to(device)
37 return _model
40def _empty_result() -> Dict[str, List[str]]:
41 return {
42 "drugs": [],
43 "conditions": [],
44 "biomarkers": [],
45 "symptoms": [],
46 }
49def extract_medical_entities(text: str) -> Dict[str, List[str]]:
50 """
51 Extract medical entities from text using GLiNER.
52 Safe for repeated calls and production workloads.
53 """
54 if not text or not isinstance(text, str):
55 logger.debug("NER skipped: invalid or empty text")
56 return _empty_result()
58 model = _get_model()
60 labels = settings.NER_LABELS
62 try:
63 entities = model.predict_entities(
64 text,
65 labels,
66 threshold=settings.NER_CONFIDENCE_THRESHOLD,
67 )
68 except Exception:
69 logger.exception("NER inference failed")
70 return _empty_result()
72 results = _empty_result()
74 for ent in entities:
75 value = ent.get("text", "").lower().strip()
76 label = ent.get("label")
78 if not value:
79 continue
81 if label == "drug" and value not in results["drugs"]:
82 results["drugs"].append(value)
83 elif label == "medical condition" and value not in results["conditions"]:
84 results["conditions"].append(value)
85 elif label == "biomarker" and value not in results["biomarkers"]:
86 results["biomarkers"].append(value)
87 elif label == "symptom" and value not in results["symptoms"]:
88 results["symptoms"].append(value)
90 logger.debug(
91 "NER extraction complete",
92 extra={
93 "drugs": len(results["drugs"]),
94 "conditions": len(results["conditions"]),
95 "biomarkers": len(results["biomarkers"]),
96 "symptoms": len(results["symptoms"]),
97 },
98 )
100 return results