Coverage for app \ schema \ schema_builder.py: 100%

8 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-24 13:18 +0530

1from typing import Any, Dict, List, Optional 

2from datetime import datetime, UTC 

3 

4SCHEMA_VERSION = "1.0" 

5 

6 

7def _empty_entity_block() -> Dict[str, List[str]]: 

8 """Standard empty entity structure.""" 

9 return { 

10 "drugs": [], 

11 "conditions": [], 

12 "biomarkers": [], 

13 "symptoms": [], 

14 } 

15 

16 

17def build_payload( 

18 *, 

19 text: str, 

20 pmid: str, 

21 title: str, 

22 journal: str, 

23 year: int, 

24 authors: List[str], 

25 section: str, 

26 chunk_index: int, 

27 api_query: str, 

28 entities: Optional[Dict[str, List[str]]] = None, 

29 kg_node_ids: Optional[Dict[str, List[str]]] = None, 

30) -> Dict[str, Any]: 

31 """ 

32 Build a standardized payload for Qdrant storage. 

33 

34 This schema is versioned and safe for long-term storage. 

35 """ 

36 

37 payload = { 

38 # ---- schema metadata ---- 

39 "schema_version": SCHEMA_VERSION, 

40 "source": "pubmed_api", 

41 "retrieved_at": datetime.now(UTC).isoformat(), 

42 

43 # ---- document metadata ---- 

44 "pmid": str(pmid), 

45 "title": title, 

46 "journal": journal, 

47 "year": int(year), 

48 "authors": authors, 

49 "section": section, 

50 "chunk_index": int(chunk_index), 

51 "api_query": api_query, 

52 

53 # ---- content ---- 

54 "text": text, 

55 

56 # ---- NLP / KG ---- 

57 "entities": entities if entities is not None else _empty_entity_block(), 

58 "relations": [], 

59 "kg_node_ids": kg_node_ids if kg_node_ids is not None else _empty_entity_block(), 

60 

61 # ---- future ML fields ---- 

62 "study_type": None, 

63 "confidence_level": None, 

64 } 

65 

66 return payload