knowledge: stamp model + backend on facts for audit
Adds two nullable TEXT columns to entity_facts —
`created_by_model` (LLM identifier) and `created_by_backend`
("local" / "hybrid" / "manual" / NULL) — so the curator can audit
which configurations produce good fact-keeping and which produce
noise.
photo_insights already carries model_version + backend, and
entity_facts.source_insight_id links to it, but:
- source_insight_id is set post-loop, so chat-continuation and
regenerated-insight facts lose the link.
- JOINing per read is more friction than embedding provenance on
the row itself.
- Manual facts (POST /knowledge/facts) have no insight at all and
need their own "manual" provenance marker.
Threading: execute_tool grows `model` + `backend` params, passed
from the three call sites (agentic insight loop, chat single-turn,
chat stream) using the loop-time `chat_backend.primary_model()` +
`effective_backend` already in scope. tool_store_fact stamps the
new fact accordingly; manual create_fact stamps backend="manual".
Legacy rows leave both NULL — pre-tracking data can't be back-
filled reliably from training_messages without burning compute.
Indexes are partial (WHERE NOT NULL) so legacy rows don't bloat
them, and "show me all facts from model X" stays fast.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,4 @@
|
|||||||
|
DROP INDEX IF EXISTS idx_entity_facts_created_by_backend;
|
||||||
|
DROP INDEX IF EXISTS idx_entity_facts_created_by_model;
|
||||||
|
ALTER TABLE entity_facts DROP COLUMN created_by_backend;
|
||||||
|
ALTER TABLE entity_facts DROP COLUMN created_by_model;
|
||||||
30
migrations/2026-05-10-000300_entity_facts_provenance/up.sql
Normal file
30
migrations/2026-05-10-000300_entity_facts_provenance/up.sql
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
-- Track which model + backend generated each fact so the curator
|
||||||
|
-- can audit which configurations produce trustworthy knowledge.
|
||||||
|
--
|
||||||
|
-- photo_insights already carries `model_version` + `backend`, and
|
||||||
|
-- entity_facts.source_insight_id links to it — but:
|
||||||
|
-- 1. source_insight_id is only set after an insight is stored
|
||||||
|
-- (post-loop), so chat-continuation facts and facts whose insight
|
||||||
|
-- was regenerated lose the link.
|
||||||
|
-- 2. JOINing for every read is more friction than just embedding the
|
||||||
|
-- provenance on the fact row itself.
|
||||||
|
-- 3. Manual facts (POST /knowledge/facts) have no insight at all and
|
||||||
|
-- need to record "manual" as their provenance.
|
||||||
|
--
|
||||||
|
-- Two nullable TEXT columns are enough for the audit use case: model
|
||||||
|
-- (e.g. "qwen2.5:7b", "anthropic/claude-sonnet-4") and backend
|
||||||
|
-- ("local", "hybrid", "manual"). Pre-existing rows leave both NULL —
|
||||||
|
-- legacy facts predate this tracking and can't be back-filled
|
||||||
|
-- reliably from training_messages without burning compute.
|
||||||
|
|
||||||
|
ALTER TABLE entity_facts ADD COLUMN created_by_model TEXT;
|
||||||
|
ALTER TABLE entity_facts ADD COLUMN created_by_backend TEXT;
|
||||||
|
|
||||||
|
-- Indexes are cheap and useful for "show me all facts from model X"
|
||||||
|
-- audit queries — partial so the legacy NULL rows don't bloat them.
|
||||||
|
CREATE INDEX idx_entity_facts_created_by_model
|
||||||
|
ON entity_facts(created_by_model)
|
||||||
|
WHERE created_by_model IS NOT NULL;
|
||||||
|
CREATE INDEX idx_entity_facts_created_by_backend
|
||||||
|
ON entity_facts(created_by_backend)
|
||||||
|
WHERE created_by_backend IS NOT NULL;
|
||||||
@@ -497,6 +497,8 @@ impl InsightChatService {
|
|||||||
&normalized,
|
&normalized,
|
||||||
req.user_id,
|
req.user_id,
|
||||||
&active_persona,
|
&active_persona,
|
||||||
|
&model_used,
|
||||||
|
&effective_backend,
|
||||||
&loop_cx,
|
&loop_cx,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -870,6 +872,8 @@ impl InsightChatService {
|
|||||||
&normalized,
|
&normalized,
|
||||||
req.user_id,
|
req.user_id,
|
||||||
&active_persona,
|
&active_persona,
|
||||||
|
&model_used,
|
||||||
|
&effective_backend,
|
||||||
max_iterations,
|
max_iterations,
|
||||||
&tx,
|
&tx,
|
||||||
)
|
)
|
||||||
@@ -1059,6 +1063,8 @@ impl InsightChatService {
|
|||||||
&normalized,
|
&normalized,
|
||||||
req.user_id,
|
req.user_id,
|
||||||
&active_persona,
|
&active_persona,
|
||||||
|
&model_used,
|
||||||
|
&effective_backend,
|
||||||
max_iterations,
|
max_iterations,
|
||||||
&tx,
|
&tx,
|
||||||
)
|
)
|
||||||
@@ -1210,6 +1216,10 @@ impl InsightChatService {
|
|||||||
normalized: &str,
|
normalized: &str,
|
||||||
user_id: i32,
|
user_id: i32,
|
||||||
active_persona: &str,
|
active_persona: &str,
|
||||||
|
// Provenance — stamped onto any store_fact tool call made
|
||||||
|
// during this loop. Mirrors the non-streaming chat path.
|
||||||
|
model_used: &str,
|
||||||
|
effective_backend: &str,
|
||||||
max_iterations: usize,
|
max_iterations: usize,
|
||||||
tx: &tokio::sync::mpsc::Sender<ChatStreamEvent>,
|
tx: &tokio::sync::mpsc::Sender<ChatStreamEvent>,
|
||||||
) -> Result<AgenticLoopOutcome> {
|
) -> Result<AgenticLoopOutcome> {
|
||||||
@@ -1290,6 +1300,8 @@ impl InsightChatService {
|
|||||||
normalized,
|
normalized,
|
||||||
user_id,
|
user_id,
|
||||||
active_persona,
|
active_persona,
|
||||||
|
model_used,
|
||||||
|
effective_backend,
|
||||||
&cx,
|
&cx,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|||||||
@@ -1554,6 +1554,13 @@ Return ONLY the summary, nothing else."#,
|
|||||||
file_path: &str,
|
file_path: &str,
|
||||||
user_id: i32,
|
user_id: i32,
|
||||||
persona_id: &str,
|
persona_id: &str,
|
||||||
|
// Provenance — written into entity_facts.created_by_* when
|
||||||
|
// the loop calls store_fact. The caller knows the actual
|
||||||
|
// chat-runtime model and backend (which may differ from
|
||||||
|
// ollama.primary_model in hybrid mode where chat lives on
|
||||||
|
// OpenRouter while Ollama still handles vision).
|
||||||
|
model: &str,
|
||||||
|
backend: &str,
|
||||||
cx: &opentelemetry::Context,
|
cx: &opentelemetry::Context,
|
||||||
) -> String {
|
) -> String {
|
||||||
let result = match tool_name {
|
let result = match tool_name {
|
||||||
@@ -1574,8 +1581,10 @@ Return ONLY the summary, nothing else."#,
|
|||||||
}
|
}
|
||||||
"store_entity" => self.tool_store_entity(arguments, ollama, cx).await,
|
"store_entity" => self.tool_store_entity(arguments, ollama, cx).await,
|
||||||
"store_fact" => {
|
"store_fact" => {
|
||||||
self.tool_store_fact(arguments, file_path, user_id, persona_id, cx)
|
self.tool_store_fact(
|
||||||
.await
|
arguments, file_path, user_id, persona_id, model, backend, cx,
|
||||||
|
)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
"get_current_datetime" => Self::tool_get_current_datetime(),
|
"get_current_datetime" => Self::tool_get_current_datetime(),
|
||||||
unknown => format!("Unknown tool: {}", unknown),
|
unknown => format!("Unknown tool: {}", unknown),
|
||||||
@@ -2632,6 +2641,8 @@ Return ONLY the summary, nothing else."#,
|
|||||||
file_path: &str,
|
file_path: &str,
|
||||||
user_id: i32,
|
user_id: i32,
|
||||||
persona_id: &str,
|
persona_id: &str,
|
||||||
|
model: &str,
|
||||||
|
backend: &str,
|
||||||
cx: &opentelemetry::Context,
|
cx: &opentelemetry::Context,
|
||||||
) -> String {
|
) -> String {
|
||||||
use crate::database::models::{InsertEntityFact, InsertEntityPhotoLink};
|
use crate::database::models::{InsertEntityFact, InsertEntityPhotoLink};
|
||||||
@@ -2700,6 +2711,8 @@ Return ONLY the summary, nothing else."#,
|
|||||||
valid_from,
|
valid_from,
|
||||||
valid_until: None,
|
valid_until: None,
|
||||||
superseded_by: None,
|
superseded_by: None,
|
||||||
|
created_by_model: Some(model.to_string()),
|
||||||
|
created_by_backend: Some(backend.to_string()),
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut kdao = self
|
let mut kdao = self
|
||||||
@@ -3730,6 +3743,8 @@ Return ONLY the summary, nothing else."#,
|
|||||||
&file_path,
|
&file_path,
|
||||||
user_id,
|
user_id,
|
||||||
&persona_id,
|
&persona_id,
|
||||||
|
chat_backend.primary_model(),
|
||||||
|
&backend_label,
|
||||||
&loop_cx,
|
&loop_cx,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|||||||
@@ -1470,6 +1470,8 @@ mod tests {
|
|||||||
valid_from: None,
|
valid_from: None,
|
||||||
valid_until: None,
|
valid_until: None,
|
||||||
superseded_by: None,
|
superseded_by: None,
|
||||||
|
created_by_model: None,
|
||||||
|
created_by_backend: None,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -1691,6 +1693,8 @@ mod tests {
|
|||||||
valid_from: None,
|
valid_from: None,
|
||||||
valid_until: None,
|
valid_until: None,
|
||||||
superseded_by: None,
|
superseded_by: None,
|
||||||
|
created_by_model: None,
|
||||||
|
created_by_backend: None,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
assert!(
|
assert!(
|
||||||
@@ -1923,6 +1927,8 @@ mod tests {
|
|||||||
valid_from: None,
|
valid_from: None,
|
||||||
valid_until: None,
|
valid_until: None,
|
||||||
superseded_by: None,
|
superseded_by: None,
|
||||||
|
created_by_model: None,
|
||||||
|
created_by_backend: None,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|||||||
@@ -262,6 +262,12 @@ pub struct InsertEntityFact {
|
|||||||
/// the supersede endpoint; status flips to 'superseded' in the
|
/// the supersede endpoint; status flips to 'superseded' in the
|
||||||
/// same transaction. See migration 2026-05-10-000200.
|
/// same transaction. See migration 2026-05-10-000200.
|
||||||
pub superseded_by: Option<i32>,
|
pub superseded_by: Option<i32>,
|
||||||
|
/// Provenance for model audit — see migration 2026-05-10-000300.
|
||||||
|
/// `created_by_model` is the LLM identifier (e.g. "qwen2.5:7b",
|
||||||
|
/// "anthropic/claude-sonnet-4") or NULL for legacy / manual rows.
|
||||||
|
/// `created_by_backend` is "local" / "hybrid" / "manual" / NULL.
|
||||||
|
pub created_by_model: Option<String>,
|
||||||
|
pub created_by_backend: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Queryable, Clone, Debug)]
|
#[derive(Serialize, Queryable, Clone, Debug)]
|
||||||
@@ -281,6 +287,8 @@ pub struct EntityFact {
|
|||||||
pub valid_from: Option<i64>,
|
pub valid_from: Option<i64>,
|
||||||
pub valid_until: Option<i64>,
|
pub valid_until: Option<i64>,
|
||||||
pub superseded_by: Option<i32>,
|
pub superseded_by: Option<i32>,
|
||||||
|
pub created_by_model: Option<String>,
|
||||||
|
pub created_by_backend: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Insertable)]
|
#[derive(Insertable)]
|
||||||
|
|||||||
@@ -62,6 +62,8 @@ diesel::table! {
|
|||||||
valid_from -> Nullable<BigInt>,
|
valid_from -> Nullable<BigInt>,
|
||||||
valid_until -> Nullable<BigInt>,
|
valid_until -> Nullable<BigInt>,
|
||||||
superseded_by -> Nullable<Integer>,
|
superseded_by -> Nullable<Integer>,
|
||||||
|
created_by_model -> Nullable<Text>,
|
||||||
|
created_by_backend -> Nullable<Text>,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -119,6 +119,10 @@ pub struct FactDetail {
|
|||||||
/// supersession, migration 2026-05-10-000200). Only set when
|
/// supersession, migration 2026-05-10-000200). Only set when
|
||||||
/// status == 'superseded'.
|
/// status == 'superseded'.
|
||||||
pub superseded_by: Option<i32>,
|
pub superseded_by: Option<i32>,
|
||||||
|
/// Provenance — see migration 2026-05-10-000300. NULL on legacy
|
||||||
|
/// rows. `created_by_backend` is "local" / "hybrid" / "manual".
|
||||||
|
pub created_by_model: Option<String>,
|
||||||
|
pub created_by_backend: Option<String>,
|
||||||
/// Set when another active fact has the same subject+predicate,
|
/// Set when another active fact has the same subject+predicate,
|
||||||
/// a different object, AND their valid-time intervals overlap.
|
/// a different object, AND their valid-time intervals overlap.
|
||||||
/// Detected at read time by the get_entity handler grouping
|
/// Detected at read time by the get_entity handler grouping
|
||||||
@@ -432,6 +436,8 @@ async fn get_entity<D: KnowledgeDao + 'static>(
|
|||||||
valid_from: f.valid_from,
|
valid_from: f.valid_from,
|
||||||
valid_until: f.valid_until,
|
valid_until: f.valid_until,
|
||||||
superseded_by: f.superseded_by,
|
superseded_by: f.superseded_by,
|
||||||
|
created_by_model: f.created_by_model,
|
||||||
|
created_by_backend: f.created_by_backend,
|
||||||
in_conflict: false,
|
in_conflict: false,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -768,6 +774,11 @@ async fn create_fact<D: KnowledgeDao + 'static>(
|
|||||||
valid_from: body.valid_from,
|
valid_from: body.valid_from,
|
||||||
valid_until: body.valid_until,
|
valid_until: body.valid_until,
|
||||||
superseded_by: None,
|
superseded_by: None,
|
||||||
|
// Manual creation via curation UI — provenance recorded as
|
||||||
|
// "manual" with no model, distinguishing user-entered facts
|
||||||
|
// from agent-generated ones in the audit view.
|
||||||
|
created_by_model: None,
|
||||||
|
created_by_backend: Some("manual".to_string()),
|
||||||
};
|
};
|
||||||
|
|
||||||
match dao.upsert_fact(&cx, insert) {
|
match dao.upsert_fact(&cx, insert) {
|
||||||
|
|||||||
Reference in New Issue
Block a user