Coverage for app \ schema \ schema_builder.py: 100%
8 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-24 13:18 +0530
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-24 13:18 +0530
1from typing import Any, Dict, List, Optional
2from datetime import datetime, UTC
4SCHEMA_VERSION = "1.0"
7def _empty_entity_block() -> Dict[str, List[str]]:
8 """Standard empty entity structure."""
9 return {
10 "drugs": [],
11 "conditions": [],
12 "biomarkers": [],
13 "symptoms": [],
14 }
17def build_payload(
18 *,
19 text: str,
20 pmid: str,
21 title: str,
22 journal: str,
23 year: int,
24 authors: List[str],
25 section: str,
26 chunk_index: int,
27 api_query: str,
28 entities: Optional[Dict[str, List[str]]] = None,
29 kg_node_ids: Optional[Dict[str, List[str]]] = None,
30) -> Dict[str, Any]:
31 """
32 Build a standardized payload for Qdrant storage.
34 This schema is versioned and safe for long-term storage.
35 """
37 payload = {
38 # ---- schema metadata ----
39 "schema_version": SCHEMA_VERSION,
40 "source": "pubmed_api",
41 "retrieved_at": datetime.now(UTC).isoformat(),
43 # ---- document metadata ----
44 "pmid": str(pmid),
45 "title": title,
46 "journal": journal,
47 "year": int(year),
48 "authors": authors,
49 "section": section,
50 "chunk_index": int(chunk_index),
51 "api_query": api_query,
53 # ---- content ----
54 "text": text,
56 # ---- NLP / KG ----
57 "entities": entities if entities is not None else _empty_entity_block(),
58 "relations": [],
59 "kg_node_ids": kg_node_ids if kg_node_ids is not None else _empty_entity_block(),
61 # ---- future ML fields ----
62 "study_type": None,
63 "confidence_level": None,
64 }
66 return payload