Coverage for app\rag\fact

1"""

2Simple fact checker for Graph-RAG claims.

4- Verifies claims using the provided `context` (patient profile + papers).

5- Uses deterministic checks (string matching against patient medications/conditions

6 and paper title/text previews). No network calls, no mutation.

7- Returns augmented claims: { type, statement, verified: bool, sources: [ ... ] }

8"""

10from typing import List, Dict

11import re

12from app.rag.claim_extractor import extract_claims

15def _normalize(text: str) -> str:

16 return (text or "").lower()

19def _match_any(token_list: List[str], text: str) -> bool:

20 t = _normalize(text)

21 for tok in token_list:

22 if tok and tok.lower() in t:

23 return True

24 return False

27def _patient_med_names(context: Dict) -> List[str]:

28 meds = context.get("patient", {}).get("medications", []) or []

29 return [m.get("name", "").lower() for m in meds if m.get("name")]

32def _patient_conditions(context: Dict) -> List[str]:

33 conds = context.get("patient", {}).get("conditions", []) or []

34 return [c.get("name", "").lower() for c in conds if c.get("name")]

37def _papers_evidence(context: Dict, claim_text: str) -> List[Dict]:

38 hits = []

39 for p in context.get("papers", []) or []:

40 title = p.get("title", "") or ""

41 preview = (p.get("text_preview") or "")[:1000]

42 combined = f"{title}\n{preview}".lower()

43 if claim_text.lower() in combined or any(word in combined for word in re.findall(r"\w+", claim_text.lower())[:5]):

44 hits.append(

45 {

46 "type": "paper",

47 "pmid": p.get("pmid"),

48 "title": p.get("title"),

49 "snippet": (preview[:300] + "...") if preview else "",

50 }

51 )

52 return hits

55def verify_claim(claim: Dict[str, str], context: Dict) -> Dict:

56 """

57 Verify a single claim dictionary: {"type":..., "statement":...}

58 Returns claim augmented with "verified": bool and "sources": list.

59 """

60 statement = claim.get("statement", "")

61 sources = []

62 verified = False

64 # Check KG: patient meds

65 meds = _patient_med_names(context)

66 if _match_any(meds, statement):

67 sources.append({"type": "kg", "detail": "patient_medication_match", "medications": meds})

68 verified = True

70 # Check KG: patient conditions

71 conds = _patient_conditions(context)

72 if _match_any(conds, statement):

73 sources.append({"type": "kg", "detail": "patient_condition_match", "conditions": conds})

74 verified = True

76 # Check papers (Qdrant results included in context)

77 paper_hits = _papers_evidence(context, statement)

78 if paper_hits:

79 sources.extend(paper_hits)

80 verified = True

82 return {

83 "type": claim.get("type", "general"),

84 "statement": statement,

85 "verified": verified,

86 "sources": sources,

87 }

90def verify_claims(claims: List[Dict[str, str]], context: Dict) -> List[Dict]:

91 """

92 Verify a list of extracted claims.

93 If input is plain text, it will run extractor first.

94 """

95 if not claims:

96 return []

98 # If claims look like raw text rather than structured, run extractor

99 if isinstance(claims, str):

100 claims = extract_claims(claims)

101

102 verified = []

103 for c in claims:

104 try:

105 v = verify_claim(c, context)

106 verified.append(v)

107 except Exception:

108 verified.append(

109 {

110 "type": c.get("type", "general"),

111 "statement": c.get("statement", ""),

112 "verified": False,

113 "sources": [],

114 }

115 )

116 return verified

Coverage for app \ rag \ fact_checker.py: 93%

56 statements