feature/knowledge-curation #91

Merged
cameron merged 19 commits from feature/knowledge-curation into master 2026-05-12 15:40:57 +00:00
8 changed files with 148 additions and 122 deletions
Showing only changes of commit 6dca0c027d - Show all commits

View File

@@ -885,10 +885,7 @@ pub async fn chat_history_handler(
.flatten() .flatten()
.unwrap_or_else(|| app_state.primary_library()); .unwrap_or_else(|| app_state.primary_library());
match app_state match app_state.insight_chat.load_history(library.id, &query.path) {
.insight_chat
.load_history(library.id, &query.path)
{
Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse { Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse {
messages: view messages: view
.messages .messages

View File

@@ -185,9 +185,9 @@ async fn main() -> anyhow::Result<()> {
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
let face_dao: Arc<Mutex<Box<dyn FaceDao>>> = let face_dao: Arc<Mutex<Box<dyn FaceDao>>> =
Arc::new(Mutex::new(Box::new(SqliteFaceDao::new()))); Arc::new(Mutex::new(Box::new(SqliteFaceDao::new())));
let persona_dao: Arc<Mutex<Box<dyn image_api::database::PersonaDao>>> = Arc::new( let persona_dao: Arc<Mutex<Box<dyn image_api::database::PersonaDao>>> = Arc::new(Mutex::new(
Mutex::new(Box::new(image_api::database::SqlitePersonaDao::new())), Box::new(image_api::database::SqlitePersonaDao::new()),
); ));
// Pass the full library set so `resolve_full_path` probes every root, // Pass the full library set so `resolve_full_path` probes every root,
// even when --library restricts the walk. A rel_path shared across // even when --library restricts the walk. A rel_path shared across

View File

@@ -204,7 +204,11 @@ impl InsightDao for SqliteInsightDao {
lib_id: i32, lib_id: i32,
path: &str, path: &str,
) -> Result<Option<PhotoInsight>, DbError> { ) -> Result<Option<PhotoInsight>, DbError> {
trace_db_call(context, "query", "get_current_insight_for_library", |_span| { trace_db_call(
context,
"query",
"get_current_insight_for_library",
|_span| {
use schema::photo_insights::dsl::*; use schema::photo_insights::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get InsightDao"); let mut connection = self.connection.lock().expect("Unable to get InsightDao");
@@ -216,7 +220,8 @@ impl InsightDao for SqliteInsightDao {
.first::<PhotoInsight>(connection.deref_mut()) .first::<PhotoInsight>(connection.deref_mut())
.optional() .optional()
.map_err(|_| anyhow::anyhow!("Query error")) .map_err(|_| anyhow::anyhow!("Query error"))
}) },
)
.map_err(|_| DbError::new(DbErrorKind::QueryError)) .map_err(|_| DbError::new(DbErrorKind::QueryError))
} }

View File

@@ -691,7 +691,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
// (user_id, persona_id); All = union across the user's // (user_id, persona_id); All = union across the user's
// personas (mirror PersonaFilter::All read semantics). // personas (mirror PersonaFilter::All read semantics).
let fact_count_join = match persona { let fact_count_join = match persona {
PersonaFilter::Single { user_id: _, persona_id: _ } => { PersonaFilter::Single {
user_id: _,
persona_id: _,
} => {
"LEFT JOIN (\ "LEFT JOIN (\
SELECT subject_entity_id, COUNT(*) AS fact_count \ SELECT subject_entity_id, COUNT(*) AS fact_count \
FROM entity_facts \ FROM entity_facts \
@@ -712,9 +715,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
let order_by = match sort { let order_by = match sort {
EntitySort::UpdatedDesc => "e.updated_at DESC", EntitySort::UpdatedDesc => "e.updated_at DESC",
EntitySort::NameAsc => "lower(e.name) ASC", EntitySort::NameAsc => "lower(e.name) ASC",
EntitySort::FactCountDesc => { EntitySort::FactCountDesc => "COALESCE(fc.fact_count, 0) DESC, lower(e.name) ASC",
"COALESCE(fc.fact_count, 0) DESC, lower(e.name) ASC"
}
}; };
let select_sql = format!( let select_sql = format!(
@@ -728,9 +729,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
LIMIT ? OFFSET ?" LIMIT ? OFFSET ?"
); );
let count_sql = format!( let count_sql = format!("SELECT COUNT(*) AS total FROM entities e {where_clause}");
"SELECT COUNT(*) AS total FROM entities e {where_clause}"
);
// ── Total count ───────────────────────────────────────── // ── Total count ─────────────────────────────────────────
#[derive(diesel::QueryableByName)] #[derive(diesel::QueryableByName)]
@@ -776,7 +775,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
// Persona binds first (they're earlier in the SQL — inside // Persona binds first (they're earlier in the SQL — inside
// the subquery LEFT JOIN). // the subquery LEFT JOIN).
match persona { match persona {
PersonaFilter::Single { user_id, persona_id } => { PersonaFilter::Single {
user_id,
persona_id,
} => {
q = q q = q
.bind::<Integer, _>(*user_id) .bind::<Integer, _>(*user_id)
.bind::<Text, _>(persona_id.clone()); .bind::<Text, _>(persona_id.clone());
@@ -970,10 +972,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
// Biggest clusters first; tie-break on the strongest // Biggest clusters first; tie-break on the strongest
// pair so the most-obvious dupes surface at the top. // pair so the most-obvious dupes surface at the top.
result.sort_by(|a, b| { result.sort_by(|a, b| {
b.entities b.entities.len().cmp(&a.entities.len()).then_with(|| {
.len()
.cmp(&a.entities.len())
.then_with(|| {
b.max_cosine b.max_cosine
.partial_cmp(&a.max_cosine) .partial_cmp(&a.max_cosine)
.unwrap_or(std::cmp::Ordering::Equal) .unwrap_or(std::cmp::Ordering::Equal)
@@ -1286,7 +1285,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
.filter(status.ne("rejected")) .filter(status.ne("rejected"))
.filter(user_id.eq(persona.user_id())) .filter(user_id.eq(persona.user_id()))
.into_boxed(); .into_boxed();
if let PersonaFilter::Single { persona_id: pid, .. } = persona { if let PersonaFilter::Single {
persona_id: pid, ..
} = persona
{
q = q.filter(persona_id.eq(pid.clone())); q = q.filter(persona_id.eq(pid.clone()));
} }
q.load::<EntityFact>(conn.deref_mut()) q.load::<EntityFact>(conn.deref_mut())
@@ -1326,7 +1328,11 @@ impl KnowledgeDao for SqliteKnowledgeDao {
query = query.filter(predicate.eq(pred)); query = query.filter(predicate.eq(pred));
count_query = count_query.filter(predicate.eq(pred)); count_query = count_query.filter(predicate.eq(pred));
} }
if let PersonaFilter::Single { persona_id: ref pid, .. } = filter.persona { if let PersonaFilter::Single {
persona_id: ref pid,
..
} = filter.persona
{
query = query.filter(persona_id.eq(pid.clone())); query = query.filter(persona_id.eq(pid.clone()));
count_query = count_query.filter(persona_id.eq(pid.clone())); count_query = count_query.filter(persona_id.eq(pid.clone()));
} }
@@ -1499,8 +1505,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
None => (None, None), None => (None, None),
}; };
conn.transaction::<Option<EntityFact>, diesel::result::Error, _>( conn.transaction::<Option<EntityFact>, diesel::result::Error, _>(|conn| {
|conn| {
// Pull the new fact's valid_from so we can close // Pull the new fact's valid_from so we can close
// the old fact's interval at the same point. // the old fact's interval at the same point.
let new_fact: Option<EntityFact> = entity_facts let new_fact: Option<EntityFact> = entity_facts
@@ -1543,8 +1548,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
.filter(id.eq(old_id)) .filter(id.eq(old_id))
.first::<EntityFact>(conn) .first::<EntityFact>(conn)
.optional() .optional()
}, })
)
.map_err(|e| anyhow::anyhow!("Supersede error: {}", e)) .map_err(|e| anyhow::anyhow!("Supersede error: {}", e))
}) })
.map_err(|e| { .map_err(|e| {
@@ -1722,7 +1726,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
.filter(ef::created_at.gt(since)) .filter(ef::created_at.gt(since))
.filter(ef::user_id.eq(persona.user_id())) .filter(ef::user_id.eq(persona.user_id()))
.into_boxed(); .into_boxed();
if let PersonaFilter::Single { persona_id: pid, .. } = persona { if let PersonaFilter::Single {
persona_id: pid, ..
} = persona
{
facts_q = facts_q.filter(ef::persona_id.eq(pid.clone())); facts_q = facts_q.filter(ef::persona_id.eq(pid.clone()));
} }
let recent_facts = facts_q let recent_facts = facts_q
@@ -1880,7 +1887,14 @@ mod tests {
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone()); let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let entity = make_entity(&mut dao, "Cabin"); let entity = make_entity(&mut dao, "Cabin");
add_fact(&mut dao, entity.id, "located_in", "Vermont", alice, "default"); add_fact(
&mut dao,
entity.id,
"located_in",
"Vermont",
alice,
"default",
);
add_fact(&mut dao, entity.id, "color", "red", bob, "default"); add_fact(&mut dao, entity.id, "color", "red", bob, "default");
let alice_view = dao let alice_view = dao
@@ -1987,8 +2001,22 @@ mod tests {
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone()); let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let entity = make_entity(&mut dao, "Cabin"); let entity = make_entity(&mut dao, "Cabin");
add_fact(&mut dao, entity.id, "p_alice_default", "x", alice, "default"); add_fact(
add_fact(&mut dao, entity.id, "p_alice_journal", "y", alice, "journal"); &mut dao,
entity.id,
"p_alice_default",
"x",
alice,
"default",
);
add_fact(
&mut dao,
entity.id,
"p_alice_journal",
"y",
alice,
"journal",
);
add_fact(&mut dao, entity.id, "p_bob_journal", "z", bob, "journal"); add_fact(&mut dao, entity.id, "p_bob_journal", "z", bob, "journal");
// Delete alice's journal persona — CASCADE should remove only // Delete alice's journal persona — CASCADE should remove only
@@ -2167,7 +2195,9 @@ mod tests {
let old = add_fact(&mut dao, cameron.id, "lives_in", "NYC", alice, "default"); let old = add_fact(&mut dao, cameron.id, "lives_in", "NYC", alice, "default");
let new = add_fact(&mut dao, cameron.id, "lives_in", "SF", alice, "default"); let new = add_fact(&mut dao, cameron.id, "lives_in", "SF", alice, "default");
dao.supersede_fact(&cx, old.id, new.id, None).unwrap().unwrap(); dao.supersede_fact(&cx, old.id, new.id, None)
.unwrap()
.unwrap();
dao.delete_fact(&cx, new.id).unwrap(); dao.delete_fact(&cx, new.id).unwrap();
let rehydrated = dao let rehydrated = dao

View File

@@ -2992,9 +2992,12 @@ mod tests {
status: "detected".into(), status: "detected".into(),
model_version: "buffalo_l".into(), model_version: "buffalo_l".into(),
}; };
dao.store_detection(&ctx(), mk_row("a1", Some(alice.id))).unwrap(); dao.store_detection(&ctx(), mk_row("a1", Some(alice.id)))
dao.store_detection(&ctx(), mk_row("a2", Some(alice.id))).unwrap(); .unwrap();
dao.store_detection(&ctx(), mk_row("b1", Some(bob.id))).unwrap(); dao.store_detection(&ctx(), mk_row("a2", Some(alice.id)))
.unwrap();
dao.store_detection(&ctx(), mk_row("b1", Some(bob.id)))
.unwrap();
dao.store_detection(&ctx(), mk_row("u1", None)).unwrap(); dao.store_detection(&ctx(), mk_row("u1", None)).unwrap();
// person_id=alice returns only alice's two faces — ignoring the // person_id=alice returns only alice's two faces — ignoring the
@@ -3004,9 +3007,11 @@ mod tests {
.list_embeddings(&ctx(), None, true, Some(alice.id), 100, 0) .list_embeddings(&ctx(), None, true, Some(alice.id), 100, 0)
.unwrap(); .unwrap();
assert_eq!(alice_rows.len(), 2); assert_eq!(alice_rows.len(), 2);
assert!(alice_rows assert!(
alice_rows
.iter() .iter()
.all(|(r, _)| r.person_id == Some(alice.id))); .all(|(r, _)| r.person_id == Some(alice.id))
);
// unassigned=true with no person_id behaves as before. // unassigned=true with no person_id behaves as before.
let unassigned_rows = dao let unassigned_rows = dao

View File

@@ -384,13 +384,9 @@ where
.route(web::delete().to(delete_fact::<D>)), .route(web::delete().to(delete_fact::<D>)),
) )
.service( .service(
web::resource("/facts/{id}/supersede") web::resource("/facts/{id}/supersede").route(web::post().to(supersede_fact::<D>)),
.route(web::post().to(supersede_fact::<D>)),
)
.service(
web::resource("/facts/{id}/restore")
.route(web::post().to(restore_fact::<D>)),
) )
.service(web::resource("/facts/{id}/restore").route(web::post().to(restore_fact::<D>)))
.service(web::resource("/recent").route(web::get().to(get_recent::<D>))) .service(web::resource("/recent").route(web::get().to(get_recent::<D>)))
.service( .service(
web::resource("/consolidation-proposals") web::resource("/consolidation-proposals")
@@ -546,10 +542,7 @@ async fn get_entity<D: KnowledgeDao + 'static>(
// either bound treats that side as unbounded — a fact with no // either bound treats that side as unbounded — a fact with no
// valid-time data still flags against any time period (worst case // valid-time data still flags against any time period (worst case
// for legacy data; user adds dates to suppress). // for legacy data; user adds dates to suppress).
fn intervals_overlap( fn intervals_overlap(a: (Option<i64>, Option<i64>), b: (Option<i64>, Option<i64>)) -> bool {
a: (Option<i64>, Option<i64>),
b: (Option<i64>, Option<i64>),
) -> bool {
let a_lo = a.0.unwrap_or(i64::MIN); let a_lo = a.0.unwrap_or(i64::MIN);
let a_hi = a.1.unwrap_or(i64::MAX); let a_hi = a.1.unwrap_or(i64::MAX);
let b_lo = b.0.unwrap_or(i64::MIN); let b_lo = b.0.unwrap_or(i64::MIN);
@@ -574,8 +567,7 @@ async fn get_entity<D: KnowledgeDao + 'static>(
} }
for (a_pos, &i) in indices.iter().enumerate() { for (a_pos, &i) in indices.iter().enumerate() {
for &j in &indices[a_pos + 1..] { for &j in &indices[a_pos + 1..] {
let same_object = facts[i].object_entity_id let same_object = facts[i].object_entity_id == facts[j].object_entity_id
== facts[j].object_entity_id
&& facts[i].object_value == facts[j].object_value; && facts[i].object_value == facts[j].object_value;
if same_object { if same_object {
continue; continue;
@@ -806,8 +798,7 @@ async fn synthesize_merge<D: KnowledgeDao + 'static>(
preamble, no labels, no quotes."; preamble, no labels, no quotes.";
let prompt = format!( let prompt = format!(
"Entity A: {} [{}]\nDescription: {}\n\nEntity B: {} [{}]\nDescription: {}\n\nMerged description:", "Entity A: {} [{}]\nDescription: {}\n\nEntity B: {} [{}]\nDescription: {}\n\nMerged description:",
source.name, source.entity_type, source_desc, source.name, source.entity_type, source_desc, target.name, target.entity_type, target_desc,
target.name, target.entity_type, target_desc,
); );
let ollama = app_state.ollama.clone(); let ollama = app_state.ollama.clone();
@@ -843,16 +834,12 @@ async fn synthesize_merge<D: KnowledgeDao + 'static>(
s = stripped.trim_start().to_string(); s = stripped.trim_start().to_string();
} }
// Wrapping quotes // Wrapping quotes
s = s s = s.trim_matches(|c| c == '"' || c == '\'').to_string();
.trim_matches(|c| c == '"' || c == '\'')
.to_string();
// Inline emphasis: drop standalone `**` / `*` / `__` / // Inline emphasis: drop standalone `**` / `*` / `__` /
// `_` markers without trying to parse markdown — just // `_` markers without trying to parse markdown — just
// remove the punctuation. Rare enough that this naive // remove the punctuation. Rare enough that this naive
// replace is fine. // replace is fine.
s = s s = s.replace("**", "").replace("__", "");
.replace("**", "")
.replace("__", "");
s s
} }
Err(e) => { Err(e) => {
@@ -965,7 +952,10 @@ async fn create_fact<D: KnowledgeDao + 'static>(
// pin a specific persona for writes via X-Persona-Id. // pin a specific persona for writes via X-Persona-Id.
let persona = resolve_persona_filter(&req, &claims, &persona_dao); let persona = resolve_persona_filter(&req, &claims, &persona_dao);
let (user_id, persona_id) = match &persona { let (user_id, persona_id) = match &persona {
PersonaFilter::Single { user_id, persona_id } => (*user_id, persona_id.clone()), PersonaFilter::Single {
user_id,
persona_id,
} => (*user_id, persona_id.clone()),
PersonaFilter::All { user_id } => (*user_id, "default".to_string()), PersonaFilter::All { user_id } => (*user_id, "default".to_string()),
}; };
@@ -1113,8 +1103,9 @@ async fn supersede_fact<D: KnowledgeDao + 'static>(
// the PATCH path. // the PATCH path.
match dao.supersede_fact(&cx, old_id, body.by_fact_id, Some(("manual", "manual"))) { match dao.supersede_fact(&cx, old_id, body.by_fact_id, Some(("manual", "manual"))) {
Ok(Some(fact)) => HttpResponse::Ok().json(fact), Ok(Some(fact)) => HttpResponse::Ok().json(fact),
Ok(None) => HttpResponse::NotFound() Ok(None) => {
.json(serde_json::json!({"error": "Old or new fact not found"})), HttpResponse::NotFound().json(serde_json::json!({"error": "Old or new fact not found"}))
}
Err(e) => { Err(e) => {
log::error!("supersede_fact error: {:?}", e); log::error!("supersede_fact error: {:?}", e);
HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"})) HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"}))
@@ -1132,8 +1123,7 @@ async fn restore_fact<D: KnowledgeDao + 'static>(
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao"); let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
match dao.revert_supersession(&cx, fact_id, Some(("manual", "manual"))) { match dao.revert_supersession(&cx, fact_id, Some(("manual", "manual"))) {
Ok(Some(fact)) => HttpResponse::Ok().json(fact), Ok(Some(fact)) => HttpResponse::Ok().json(fact),
Ok(None) => HttpResponse::NotFound() Ok(None) => HttpResponse::NotFound().json(serde_json::json!({"error": "Fact not found"})),
.json(serde_json::json!({"error": "Fact not found"})),
Err(e) => { Err(e) => {
log::error!("restore_fact error: {:?}", e); log::error!("restore_fact error: {:?}", e);
HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"})) HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"}))

View File

@@ -241,8 +241,7 @@ async fn update_persona(
// identity. Mirrors the same guard delete_persona enforces below. // identity. Mirrors the same guard delete_persona enforces below.
match dao.get_persona(&cx, uid, &pid) { match dao.get_persona(&cx, uid, &pid) {
Ok(Some(p)) if p.is_built_in => { Ok(Some(p)) if p.is_built_in => {
let editing_identity = let editing_identity = body.name.is_some() || body.system_prompt.is_some();
body.name.is_some() || body.system_prompt.is_some();
if editing_identity { if editing_identity {
return HttpResponse::Conflict().json(serde_json::json!({ return HttpResponse::Conflict().json(serde_json::json!({
"error": "Cannot edit name or systemPrompt of a built-in persona" "error": "Cannot edit name or systemPrompt of a built-in persona"

View File

@@ -207,9 +207,9 @@ impl Default for AppState {
Arc::new(Mutex::new(Box::new(SqliteTagDao::default()))); Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> = let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
let persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>> = Arc::new( let persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>> = Arc::new(Mutex::new(
Mutex::new(Box::new(crate::database::SqlitePersonaDao::new())), Box::new(crate::database::SqlitePersonaDao::new()),
); ));
let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> = let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> =
Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new()))); Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new())));
@@ -356,9 +356,9 @@ impl AppState {
Arc::new(Mutex::new(Box::new(SqliteTagDao::default()))); Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> = let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
let persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>> = Arc::new( let persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>> = Arc::new(Mutex::new(
Mutex::new(Box::new(crate::database::SqlitePersonaDao::new())), Box::new(crate::database::SqlitePersonaDao::new()),
); ));
let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> = let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> =
Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new()))); Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new())));