003-knowledge-memory #55
@@ -1845,6 +1845,41 @@ Return ONLY the summary, nothing else."#,
|
|||||||
description
|
description
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Pre-flight similarity check — surface near-duplicates to the model
|
||||||
|
// before it commits to a new entity. Uses the first name token as the
|
||||||
|
// search term so "Sarah" matches when storing "Sarah Johnson" and vice
|
||||||
|
// versa. Exact-name matches are excluded (upsert_entity deduplicates
|
||||||
|
// those already). Results are appended to the tool response so the
|
||||||
|
// model can choose to use an existing entity's ID instead.
|
||||||
|
let similar_entities: Vec<String> = {
|
||||||
|
use crate::database::{EntityFilter, KnowledgeDao};
|
||||||
|
use crate::database::knowledge_dao::normalize_entity_type;
|
||||||
|
let normalised_type = normalize_entity_type(&entity_type);
|
||||||
|
let first_token = name
|
||||||
|
.split_whitespace()
|
||||||
|
.next()
|
||||||
|
.unwrap_or(&name)
|
||||||
|
.to_string();
|
||||||
|
let filter = EntityFilter {
|
||||||
|
entity_type: None, // search all types, filter client-side to avoid case issues
|
||||||
|
status: Some("active".to_string()),
|
||||||
|
search: Some(first_token),
|
||||||
|
limit: 10,
|
||||||
|
offset: 0,
|
||||||
|
};
|
||||||
|
let mut kdao = self.knowledge_dao.lock().expect("Unable to lock KnowledgeDao");
|
||||||
|
kdao.list_entities(cx, filter)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.0
|
||||||
|
.into_iter()
|
||||||
|
.filter(|e| {
|
||||||
|
normalize_entity_type(&e.entity_type) == normalised_type
|
||||||
|
&& e.name.to_lowercase() != name.to_lowercase()
|
||||||
|
})
|
||||||
|
.map(|e| format!(" ID:{} | {} | {}", e.id, e.name, e.description))
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
|
||||||
// Generate embedding for name + description (best-effort)
|
// Generate embedding for name + description (best-effort)
|
||||||
let embed_text = format!("{} {}", name, description);
|
let embed_text = format!("{} {}", name, description);
|
||||||
let embedding: Option<Vec<u8>> = match ollama.generate_embedding(&embed_text).await {
|
let embedding: Option<Vec<u8>> = match ollama.generate_embedding(&embed_text).await {
|
||||||
@@ -1875,10 +1910,22 @@ Return ONLY the summary, nothing else."#,
|
|||||||
.lock()
|
.lock()
|
||||||
.expect("Unable to lock KnowledgeDao");
|
.expect("Unable to lock KnowledgeDao");
|
||||||
match kdao.upsert_entity(cx, insert) {
|
match kdao.upsert_entity(cx, insert) {
|
||||||
Ok(entity) => format!(
|
Ok(entity) => {
|
||||||
|
let mut response = format!(
|
||||||
"Entity stored: ID:{} | {} | {} | confidence:{:.2}",
|
"Entity stored: ID:{} | {} | {} | confidence:{:.2}",
|
||||||
entity.id, entity.entity_type, entity.name, entity.confidence
|
entity.id, entity.entity_type, entity.name, entity.confidence
|
||||||
),
|
);
|
||||||
|
if !similar_entities.is_empty() {
|
||||||
|
response.push_str(
|
||||||
|
"\nSimilar existing entities found — verify this is not a duplicate:\n",
|
||||||
|
);
|
||||||
|
response.push_str(&similar_entities.join("\n"));
|
||||||
|
response.push_str(
|
||||||
|
"\nIf one of these is the same entity, use their existing ID in store_fact instead of the newly created one.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
response
|
||||||
|
}
|
||||||
Err(e) => format!("Error storing entity: {:?}", e),
|
Err(e) => format!("Error storing entity: {:?}", e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,6 +10,25 @@ use crate::database::schema;
|
|||||||
use crate::database::{DbError, DbErrorKind, connect};
|
use crate::database::{DbError, DbErrorKind, connect};
|
||||||
use crate::otel::trace_db_call;
|
use crate::otel::trace_db_call;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Entity type normalisation
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Canonicalise a model-supplied entity_type to a consistent lowercase form.
|
||||||
|
/// Weak models frequently vary capitalisation ("Person" vs "person") or use
|
||||||
|
/// synonym types ("location" vs "place"). Normalising here prevents duplicate
|
||||||
|
/// entities that differ only by type spelling.
|
||||||
|
pub(crate) fn normalize_entity_type(raw: &str) -> String {
|
||||||
|
match raw.to_lowercase().as_str() {
|
||||||
|
"person" | "people" | "human" | "individual" | "contact" => "person",
|
||||||
|
"place" | "location" | "venue" | "site" | "area" | "landmark" => "place",
|
||||||
|
"event" | "occasion" | "activity" | "celebration" => "event",
|
||||||
|
"thing" | "object" | "item" | "product" => "thing",
|
||||||
|
other => other,
|
||||||
|
}
|
||||||
|
.to_string()
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Filter / patch types
|
// Filter / patch types
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -250,13 +269,22 @@ impl KnowledgeDao for SqliteKnowledgeDao {
|
|||||||
|
|
||||||
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
|
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
|
||||||
|
|
||||||
// Case-insensitive lookup by name + entity_type
|
// Normalise type before lookup and insert so that model variations
|
||||||
|
// ("Person" / "person", "location" / "place") collapse to one row.
|
||||||
|
let entity = InsertEntity {
|
||||||
|
entity_type: normalize_entity_type(&entity.entity_type),
|
||||||
|
..entity
|
||||||
|
};
|
||||||
|
|
||||||
|
// Case-insensitive lookup by name + entity_type.
|
||||||
|
// Use lower() on both sides so existing dirty rows ("Person") still match.
|
||||||
let name_lower = entity.name.to_lowercase();
|
let name_lower = entity.name.to_lowercase();
|
||||||
|
let type_lower = entity.entity_type.to_lowercase();
|
||||||
let existing: Option<Entity> = entities
|
let existing: Option<Entity> = entities
|
||||||
.filter(diesel::dsl::sql::<diesel::sql_types::Bool>(&format!(
|
.filter(diesel::dsl::sql::<diesel::sql_types::Bool>(&format!(
|
||||||
"lower(name) = '{}' AND entity_type = '{}'",
|
"lower(name) = '{}' AND lower(entity_type) = '{}'",
|
||||||
name_lower.replace('\'', "''"),
|
name_lower.replace('\'', "''"),
|
||||||
entity.entity_type.replace('\'', "''")
|
type_lower.replace('\'', "''")
|
||||||
)))
|
)))
|
||||||
.first::<Entity>(conn.deref_mut())
|
.first::<Entity>(conn.deref_mut())
|
||||||
.optional()
|
.optional()
|
||||||
|
|||||||
Reference in New Issue
Block a user