Coverage for app \ processing \ entity_extractor.py: 100%

46 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-24 13:18 +0530

1from typing import Dict, List 

2 

3import torch 

4from gliner import GLiNER 

5 

6from app.config import settings 

7from app.utils.logger import get_logger 

8 

9logger = get_logger(__name__) 

10 

11_model: GLiNER | None = None 

12 

13 

14def _get_device() -> str: 

15 if settings.NER_DEVICE: 

16 return settings.NER_DEVICE 

17 return "cuda" if torch.cuda.is_available() else "cpu" 

18 

19 

20def _get_model() -> GLiNER: 

21 """ 

22 Lazy-load and cache the GLiNER model. 

23 """ 

24 global _model 

25 

26 if _model is None: 

27 device = _get_device() 

28 logger.info( 

29 "Loading NER model", 

30 extra={ 

31 "model_name": settings.NER_MODEL_NAME, 

32 "device": device, 

33 }, 

34 ) 

35 _model = GLiNER.from_pretrained(settings.NER_MODEL_NAME).to(device) 

36 

37 return _model 

38 

39 

40def _empty_result() -> Dict[str, List[str]]: 

41 return { 

42 "drugs": [], 

43 "conditions": [], 

44 "biomarkers": [], 

45 "symptoms": [], 

46 } 

47 

48 

49def extract_medical_entities(text: str) -> Dict[str, List[str]]: 

50 """ 

51 Extract medical entities from text using GLiNER. 

52 Safe for repeated calls and production workloads. 

53 """ 

54 if not text or not isinstance(text, str): 

55 logger.debug("NER skipped: invalid or empty text") 

56 return _empty_result() 

57 

58 model = _get_model() 

59 

60 labels = settings.NER_LABELS 

61 

62 try: 

63 entities = model.predict_entities( 

64 text, 

65 labels, 

66 threshold=settings.NER_CONFIDENCE_THRESHOLD, 

67 ) 

68 except Exception: 

69 logger.exception("NER inference failed") 

70 return _empty_result() 

71 

72 results = _empty_result() 

73 

74 for ent in entities: 

75 value = ent.get("text", "").lower().strip() 

76 label = ent.get("label") 

77 

78 if not value: 

79 continue 

80 

81 if label == "drug" and value not in results["drugs"]: 

82 results["drugs"].append(value) 

83 elif label == "medical condition" and value not in results["conditions"]: 

84 results["conditions"].append(value) 

85 elif label == "biomarker" and value not in results["biomarkers"]: 

86 results["biomarkers"].append(value) 

87 elif label == "symptom" and value not in results["symptoms"]: 

88 results["symptoms"].append(value) 

89 

90 logger.debug( 

91 "NER extraction complete", 

92 extra={ 

93 "drugs": len(results["drugs"]), 

94 "conditions": len(results["conditions"]), 

95 "biomarkers": len(results["biomarkers"]), 

96 "symptoms": len(results["symptoms"]), 

97 }, 

98 ) 

99 

100 return results