feature/knowledge-curation #91

Merged
cameron merged 19 commits from feature/knowledge-curation into master 2026-05-12 15:40:57 +00:00
8 changed files with 148 additions and 122 deletions
Showing only changes of commit 6dca0c027d - Show all commits

View File

@@ -885,10 +885,7 @@ pub async fn chat_history_handler(
.flatten()
.unwrap_or_else(|| app_state.primary_library());
match app_state
.insight_chat
.load_history(library.id, &query.path)
{
match app_state.insight_chat.load_history(library.id, &query.path) {
Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse {
messages: view
.messages

View File

@@ -185,9 +185,9 @@ async fn main() -> anyhow::Result<()> {
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
let face_dao: Arc<Mutex<Box<dyn FaceDao>>> =
Arc::new(Mutex::new(Box::new(SqliteFaceDao::new())));
let persona_dao: Arc<Mutex<Box<dyn image_api::database::PersonaDao>>> = Arc::new(
Mutex::new(Box::new(image_api::database::SqlitePersonaDao::new())),
);
let persona_dao: Arc<Mutex<Box<dyn image_api::database::PersonaDao>>> = Arc::new(Mutex::new(
Box::new(image_api::database::SqlitePersonaDao::new()),
));
// Pass the full library set so `resolve_full_path` probes every root,
// even when --library restricts the walk. A rel_path shared across

View File

@@ -204,19 +204,24 @@ impl InsightDao for SqliteInsightDao {
lib_id: i32,
path: &str,
) -> Result<Option<PhotoInsight>, DbError> {
trace_db_call(context, "query", "get_current_insight_for_library", |_span| {
use schema::photo_insights::dsl::*;
trace_db_call(
context,
"query",
"get_current_insight_for_library",
|_span| {
use schema::photo_insights::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
photo_insights
.filter(library_id.eq(lib_id))
.filter(rel_path.eq(path))
.filter(is_current.eq(true))
.first::<PhotoInsight>(connection.deref_mut())
.optional()
.map_err(|_| anyhow::anyhow!("Query error"))
})
photo_insights
.filter(library_id.eq(lib_id))
.filter(rel_path.eq(path))
.filter(is_current.eq(true))
.first::<PhotoInsight>(connection.deref_mut())
.optional()
.map_err(|_| anyhow::anyhow!("Query error"))
},
)
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}

View File

@@ -691,7 +691,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
// (user_id, persona_id); All = union across the user's
// personas (mirror PersonaFilter::All read semantics).
let fact_count_join = match persona {
PersonaFilter::Single { user_id: _, persona_id: _ } => {
PersonaFilter::Single {
user_id: _,
persona_id: _,
} => {
"LEFT JOIN (\
SELECT subject_entity_id, COUNT(*) AS fact_count \
FROM entity_facts \
@@ -712,9 +715,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
let order_by = match sort {
EntitySort::UpdatedDesc => "e.updated_at DESC",
EntitySort::NameAsc => "lower(e.name) ASC",
EntitySort::FactCountDesc => {
"COALESCE(fc.fact_count, 0) DESC, lower(e.name) ASC"
}
EntitySort::FactCountDesc => "COALESCE(fc.fact_count, 0) DESC, lower(e.name) ASC",
};
let select_sql = format!(
@@ -728,9 +729,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
LIMIT ? OFFSET ?"
);
let count_sql = format!(
"SELECT COUNT(*) AS total FROM entities e {where_clause}"
);
let count_sql = format!("SELECT COUNT(*) AS total FROM entities e {where_clause}");
// ── Total count ─────────────────────────────────────────
#[derive(diesel::QueryableByName)]
@@ -776,7 +775,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
// Persona binds first (they're earlier in the SQL — inside
// the subquery LEFT JOIN).
match persona {
PersonaFilter::Single { user_id, persona_id } => {
PersonaFilter::Single {
user_id,
persona_id,
} => {
q = q
.bind::<Integer, _>(*user_id)
.bind::<Text, _>(persona_id.clone());
@@ -970,14 +972,11 @@ impl KnowledgeDao for SqliteKnowledgeDao {
// Biggest clusters first; tie-break on the strongest
// pair so the most-obvious dupes surface at the top.
result.sort_by(|a, b| {
b.entities
.len()
.cmp(&a.entities.len())
.then_with(|| {
b.max_cosine
.partial_cmp(&a.max_cosine)
.unwrap_or(std::cmp::Ordering::Equal)
})
b.entities.len().cmp(&a.entities.len()).then_with(|| {
b.max_cosine
.partial_cmp(&a.max_cosine)
.unwrap_or(std::cmp::Ordering::Equal)
})
});
result.truncate(max_groups);
Ok(result)
@@ -1286,7 +1285,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
.filter(status.ne("rejected"))
.filter(user_id.eq(persona.user_id()))
.into_boxed();
if let PersonaFilter::Single { persona_id: pid, .. } = persona {
if let PersonaFilter::Single {
persona_id: pid, ..
} = persona
{
q = q.filter(persona_id.eq(pid.clone()));
}
q.load::<EntityFact>(conn.deref_mut())
@@ -1326,7 +1328,11 @@ impl KnowledgeDao for SqliteKnowledgeDao {
query = query.filter(predicate.eq(pred));
count_query = count_query.filter(predicate.eq(pred));
}
if let PersonaFilter::Single { persona_id: ref pid, .. } = filter.persona {
if let PersonaFilter::Single {
persona_id: ref pid,
..
} = filter.persona
{
query = query.filter(persona_id.eq(pid.clone()));
count_query = count_query.filter(persona_id.eq(pid.clone()));
}
@@ -1499,52 +1505,50 @@ impl KnowledgeDao for SqliteKnowledgeDao {
None => (None, None),
};
conn.transaction::<Option<EntityFact>, diesel::result::Error, _>(
|conn| {
// Pull the new fact's valid_from so we can close
// the old fact's interval at the same point.
let new_fact: Option<EntityFact> = entity_facts
.filter(id.eq(new_id))
.first::<EntityFact>(conn)
.optional()?;
let Some(new_fact) = new_fact else {
return Ok(None);
};
conn.transaction::<Option<EntityFact>, diesel::result::Error, _>(|conn| {
// Pull the new fact's valid_from so we can close
// the old fact's interval at the same point.
let new_fact: Option<EntityFact> = entity_facts
.filter(id.eq(new_id))
.first::<EntityFact>(conn)
.optional()?;
let Some(new_fact) = new_fact else {
return Ok(None);
};
// Verify the old fact exists before touching it —
// returning None lets the handler 404 cleanly.
let old_fact: Option<EntityFact> = entity_facts
.filter(id.eq(old_id))
.first::<EntityFact>(conn)
.optional()?;
if old_fact.is_none() {
return Ok(None);
}
// Verify the old fact exists before touching it —
// returning None lets the handler 404 cleanly.
let old_fact: Option<EntityFact> = entity_facts
.filter(id.eq(old_id))
.first::<EntityFact>(conn)
.optional()?;
if old_fact.is_none() {
return Ok(None);
}
// Only stamp valid_until if the user hasn't
// already set it — respecting hand-curated bounds.
let target_valid_until = old_fact
.as_ref()
.and_then(|f| f.valid_until)
.or(new_fact.valid_from);
// Only stamp valid_until if the user hasn't
// already set it — respecting hand-curated bounds.
let target_valid_until = old_fact
.as_ref()
.and_then(|f| f.valid_until)
.or(new_fact.valid_from);
diesel::update(entity_facts.filter(id.eq(old_id)))
.set((
status.eq("superseded"),
superseded_by.eq(Some(new_id)),
valid_until.eq(target_valid_until),
last_modified_by_model.eq(audit_model.clone()),
last_modified_by_backend.eq(audit_backend.clone()),
last_modified_at.eq(Some(now)),
))
.execute(conn)?;
diesel::update(entity_facts.filter(id.eq(old_id)))
.set((
status.eq("superseded"),
superseded_by.eq(Some(new_id)),
valid_until.eq(target_valid_until),
last_modified_by_model.eq(audit_model.clone()),
last_modified_by_backend.eq(audit_backend.clone()),
last_modified_at.eq(Some(now)),
))
.execute(conn)?;
entity_facts
.filter(id.eq(old_id))
.first::<EntityFact>(conn)
.optional()
},
)
entity_facts
.filter(id.eq(old_id))
.first::<EntityFact>(conn)
.optional()
})
.map_err(|e| anyhow::anyhow!("Supersede error: {}", e))
})
.map_err(|e| {
@@ -1722,7 +1726,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
.filter(ef::created_at.gt(since))
.filter(ef::user_id.eq(persona.user_id()))
.into_boxed();
if let PersonaFilter::Single { persona_id: pid, .. } = persona {
if let PersonaFilter::Single {
persona_id: pid, ..
} = persona
{
facts_q = facts_q.filter(ef::persona_id.eq(pid.clone()));
}
let recent_facts = facts_q
@@ -1880,7 +1887,14 @@ mod tests {
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let entity = make_entity(&mut dao, "Cabin");
add_fact(&mut dao, entity.id, "located_in", "Vermont", alice, "default");
add_fact(
&mut dao,
entity.id,
"located_in",
"Vermont",
alice,
"default",
);
add_fact(&mut dao, entity.id, "color", "red", bob, "default");
let alice_view = dao
@@ -1987,8 +2001,22 @@ mod tests {
let mut dao = SqliteKnowledgeDao::from_connection(conn.clone());
let entity = make_entity(&mut dao, "Cabin");
add_fact(&mut dao, entity.id, "p_alice_default", "x", alice, "default");
add_fact(&mut dao, entity.id, "p_alice_journal", "y", alice, "journal");
add_fact(
&mut dao,
entity.id,
"p_alice_default",
"x",
alice,
"default",
);
add_fact(
&mut dao,
entity.id,
"p_alice_journal",
"y",
alice,
"journal",
);
add_fact(&mut dao, entity.id, "p_bob_journal", "z", bob, "journal");
// Delete alice's journal persona — CASCADE should remove only
@@ -2167,7 +2195,9 @@ mod tests {
let old = add_fact(&mut dao, cameron.id, "lives_in", "NYC", alice, "default");
let new = add_fact(&mut dao, cameron.id, "lives_in", "SF", alice, "default");
dao.supersede_fact(&cx, old.id, new.id, None).unwrap().unwrap();
dao.supersede_fact(&cx, old.id, new.id, None)
.unwrap()
.unwrap();
dao.delete_fact(&cx, new.id).unwrap();
let rehydrated = dao

View File

@@ -2992,9 +2992,12 @@ mod tests {
status: "detected".into(),
model_version: "buffalo_l".into(),
};
dao.store_detection(&ctx(), mk_row("a1", Some(alice.id))).unwrap();
dao.store_detection(&ctx(), mk_row("a2", Some(alice.id))).unwrap();
dao.store_detection(&ctx(), mk_row("b1", Some(bob.id))).unwrap();
dao.store_detection(&ctx(), mk_row("a1", Some(alice.id)))
.unwrap();
dao.store_detection(&ctx(), mk_row("a2", Some(alice.id)))
.unwrap();
dao.store_detection(&ctx(), mk_row("b1", Some(bob.id)))
.unwrap();
dao.store_detection(&ctx(), mk_row("u1", None)).unwrap();
// person_id=alice returns only alice's two faces — ignoring the
@@ -3004,9 +3007,11 @@ mod tests {
.list_embeddings(&ctx(), None, true, Some(alice.id), 100, 0)
.unwrap();
assert_eq!(alice_rows.len(), 2);
assert!(alice_rows
.iter()
.all(|(r, _)| r.person_id == Some(alice.id)));
assert!(
alice_rows
.iter()
.all(|(r, _)| r.person_id == Some(alice.id))
);
// unassigned=true with no person_id behaves as before.
let unassigned_rows = dao

View File

@@ -384,13 +384,9 @@ where
.route(web::delete().to(delete_fact::<D>)),
)
.service(
web::resource("/facts/{id}/supersede")
.route(web::post().to(supersede_fact::<D>)),
)
.service(
web::resource("/facts/{id}/restore")
.route(web::post().to(restore_fact::<D>)),
web::resource("/facts/{id}/supersede").route(web::post().to(supersede_fact::<D>)),
)
.service(web::resource("/facts/{id}/restore").route(web::post().to(restore_fact::<D>)))
.service(web::resource("/recent").route(web::get().to(get_recent::<D>)))
.service(
web::resource("/consolidation-proposals")
@@ -546,10 +542,7 @@ async fn get_entity<D: KnowledgeDao + 'static>(
// either bound treats that side as unbounded — a fact with no
// valid-time data still flags against any time period (worst case
// for legacy data; user adds dates to suppress).
fn intervals_overlap(
a: (Option<i64>, Option<i64>),
b: (Option<i64>, Option<i64>),
) -> bool {
fn intervals_overlap(a: (Option<i64>, Option<i64>), b: (Option<i64>, Option<i64>)) -> bool {
let a_lo = a.0.unwrap_or(i64::MIN);
let a_hi = a.1.unwrap_or(i64::MAX);
let b_lo = b.0.unwrap_or(i64::MIN);
@@ -574,8 +567,7 @@ async fn get_entity<D: KnowledgeDao + 'static>(
}
for (a_pos, &i) in indices.iter().enumerate() {
for &j in &indices[a_pos + 1..] {
let same_object = facts[i].object_entity_id
== facts[j].object_entity_id
let same_object = facts[i].object_entity_id == facts[j].object_entity_id
&& facts[i].object_value == facts[j].object_value;
if same_object {
continue;
@@ -806,8 +798,7 @@ async fn synthesize_merge<D: KnowledgeDao + 'static>(
preamble, no labels, no quotes.";
let prompt = format!(
"Entity A: {} [{}]\nDescription: {}\n\nEntity B: {} [{}]\nDescription: {}\n\nMerged description:",
source.name, source.entity_type, source_desc,
target.name, target.entity_type, target_desc,
source.name, source.entity_type, source_desc, target.name, target.entity_type, target_desc,
);
let ollama = app_state.ollama.clone();
@@ -843,16 +834,12 @@ async fn synthesize_merge<D: KnowledgeDao + 'static>(
s = stripped.trim_start().to_string();
}
// Wrapping quotes
s = s
.trim_matches(|c| c == '"' || c == '\'')
.to_string();
s = s.trim_matches(|c| c == '"' || c == '\'').to_string();
// Inline emphasis: drop standalone `**` / `*` / `__` /
// `_` markers without trying to parse markdown — just
// remove the punctuation. Rare enough that this naive
// replace is fine.
s = s
.replace("**", "")
.replace("__", "");
s = s.replace("**", "").replace("__", "");
s
}
Err(e) => {
@@ -965,7 +952,10 @@ async fn create_fact<D: KnowledgeDao + 'static>(
// pin a specific persona for writes via X-Persona-Id.
let persona = resolve_persona_filter(&req, &claims, &persona_dao);
let (user_id, persona_id) = match &persona {
PersonaFilter::Single { user_id, persona_id } => (*user_id, persona_id.clone()),
PersonaFilter::Single {
user_id,
persona_id,
} => (*user_id, persona_id.clone()),
PersonaFilter::All { user_id } => (*user_id, "default".to_string()),
};
@@ -1113,8 +1103,9 @@ async fn supersede_fact<D: KnowledgeDao + 'static>(
// the PATCH path.
match dao.supersede_fact(&cx, old_id, body.by_fact_id, Some(("manual", "manual"))) {
Ok(Some(fact)) => HttpResponse::Ok().json(fact),
Ok(None) => HttpResponse::NotFound()
.json(serde_json::json!({"error": "Old or new fact not found"})),
Ok(None) => {
HttpResponse::NotFound().json(serde_json::json!({"error": "Old or new fact not found"}))
}
Err(e) => {
log::error!("supersede_fact error: {:?}", e);
HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"}))
@@ -1132,8 +1123,7 @@ async fn restore_fact<D: KnowledgeDao + 'static>(
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
match dao.revert_supersession(&cx, fact_id, Some(("manual", "manual"))) {
Ok(Some(fact)) => HttpResponse::Ok().json(fact),
Ok(None) => HttpResponse::NotFound()
.json(serde_json::json!({"error": "Fact not found"})),
Ok(None) => HttpResponse::NotFound().json(serde_json::json!({"error": "Fact not found"})),
Err(e) => {
log::error!("restore_fact error: {:?}", e);
HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"}))

View File

@@ -241,8 +241,7 @@ async fn update_persona(
// identity. Mirrors the same guard delete_persona enforces below.
match dao.get_persona(&cx, uid, &pid) {
Ok(Some(p)) if p.is_built_in => {
let editing_identity =
body.name.is_some() || body.system_prompt.is_some();
let editing_identity = body.name.is_some() || body.system_prompt.is_some();
if editing_identity {
return HttpResponse::Conflict().json(serde_json::json!({
"error": "Cannot edit name or systemPrompt of a built-in persona"

View File

@@ -207,9 +207,9 @@ impl Default for AppState {
Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
let persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>> = Arc::new(
Mutex::new(Box::new(crate::database::SqlitePersonaDao::new())),
);
let persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>> = Arc::new(Mutex::new(
Box::new(crate::database::SqlitePersonaDao::new()),
));
let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> =
Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new())));
@@ -356,9 +356,9 @@ impl AppState {
Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
let persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>> = Arc::new(
Mutex::new(Box::new(crate::database::SqlitePersonaDao::new())),
);
let persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>> = Arc::new(Mutex::new(
Box::new(crate::database::SqlitePersonaDao::new()),
));
let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> =
Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new())));