Coverage for app \ rag \ fact_checker.py: 93%

56 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-24 13:18 +0530

1""" 

2Simple fact checker for Graph-RAG claims. 

3 

4- Verifies claims using the provided `context` (patient profile + papers). 

5- Uses deterministic checks (string matching against patient medications/conditions 

6 and paper title/text previews). No network calls, no mutation. 

7- Returns augmented claims: { type, statement, verified: bool, sources: [ ... ] } 

8""" 

9 

10from typing import List, Dict 

11import re 

12from app.rag.claim_extractor import extract_claims 

13 

14 

15def _normalize(text: str) -> str: 

16 return (text or "").lower() 

17 

18 

19def _match_any(token_list: List[str], text: str) -> bool: 

20 t = _normalize(text) 

21 for tok in token_list: 

22 if tok and tok.lower() in t: 

23 return True 

24 return False 

25 

26 

27def _patient_med_names(context: Dict) -> List[str]: 

28 meds = context.get("patient", {}).get("medications", []) or [] 

29 return [m.get("name", "").lower() for m in meds if m.get("name")] 

30 

31 

32def _patient_conditions(context: Dict) -> List[str]: 

33 conds = context.get("patient", {}).get("conditions", []) or [] 

34 return [c.get("name", "").lower() for c in conds if c.get("name")] 

35 

36 

37def _papers_evidence(context: Dict, claim_text: str) -> List[Dict]: 

38 hits = [] 

39 for p in context.get("papers", []) or []: 

40 title = p.get("title", "") or "" 

41 preview = (p.get("text_preview") or "")[:1000] 

42 combined = f"{title}\n{preview}".lower() 

43 if claim_text.lower() in combined or any(word in combined for word in re.findall(r"\w+", claim_text.lower())[:5]): 

44 hits.append( 

45 { 

46 "type": "paper", 

47 "pmid": p.get("pmid"), 

48 "title": p.get("title"), 

49 "snippet": (preview[:300] + "...") if preview else "", 

50 } 

51 ) 

52 return hits 

53 

54 

55def verify_claim(claim: Dict[str, str], context: Dict) -> Dict: 

56 """ 

57 Verify a single claim dictionary: {"type":..., "statement":...} 

58 Returns claim augmented with "verified": bool and "sources": list. 

59 """ 

60 statement = claim.get("statement", "") 

61 sources = [] 

62 verified = False 

63 

64 # Check KG: patient meds 

65 meds = _patient_med_names(context) 

66 if _match_any(meds, statement): 

67 sources.append({"type": "kg", "detail": "patient_medication_match", "medications": meds}) 

68 verified = True 

69 

70 # Check KG: patient conditions 

71 conds = _patient_conditions(context) 

72 if _match_any(conds, statement): 

73 sources.append({"type": "kg", "detail": "patient_condition_match", "conditions": conds}) 

74 verified = True 

75 

76 # Check papers (Qdrant results included in context) 

77 paper_hits = _papers_evidence(context, statement) 

78 if paper_hits: 

79 sources.extend(paper_hits) 

80 verified = True 

81 

82 return { 

83 "type": claim.get("type", "general"), 

84 "statement": statement, 

85 "verified": verified, 

86 "sources": sources, 

87 } 

88 

89 

90def verify_claims(claims: List[Dict[str, str]], context: Dict) -> List[Dict]: 

91 """ 

92 Verify a list of extracted claims. 

93 If input is plain text, it will run extractor first. 

94 """ 

95 if not claims: 

96 return [] 

97 

98 # If claims look like raw text rather than structured, run extractor 

99 if isinstance(claims, str): 

100 claims = extract_claims(claims) 

101 

102 verified = [] 

103 for c in claims: 

104 try: 

105 v = verify_claim(c, context) 

106 verified.append(v) 

107 except Exception: 

108 verified.append( 

109 { 

110 "type": c.get("type", "general"), 

111 "statement": c.get("statement", ""), 

112 "verified": False, 

113 "sources": [], 

114 } 

115 ) 

116 return verified