fix: reduce duplicate entities from weak model inconsistency
Adds normalize_entity_type() which lowercases and canonicalises synonyms (location→place, human→person, etc.) before every upsert. The SQL lookup now uses lower(entity_type) on both sides so existing dirty rows (Person, Location) correctly deduplicate against normalised writes without a migration. Adds a pre-flight similarity check in tool_store_entity: before upserting, searches active entities of the same type using the first name token. Any non-exact matches are appended to the tool response so the agentic loop can choose to reuse an existing entity ID rather than create a duplicate. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,6 +10,25 @@ use crate::database::schema;
|
||||
use crate::database::{DbError, DbErrorKind, connect};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Entity type normalisation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Canonicalise a model-supplied entity_type to a consistent lowercase form.
|
||||
/// Weak models frequently vary capitalisation ("Person" vs "person") or use
|
||||
/// synonym types ("location" vs "place"). Normalising here prevents duplicate
|
||||
/// entities that differ only by type spelling.
|
||||
pub(crate) fn normalize_entity_type(raw: &str) -> String {
|
||||
match raw.to_lowercase().as_str() {
|
||||
"person" | "people" | "human" | "individual" | "contact" => "person",
|
||||
"place" | "location" | "venue" | "site" | "area" | "landmark" => "place",
|
||||
"event" | "occasion" | "activity" | "celebration" => "event",
|
||||
"thing" | "object" | "item" | "product" => "thing",
|
||||
other => other,
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Filter / patch types
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -250,13 +269,22 @@ impl KnowledgeDao for SqliteKnowledgeDao {
|
||||
|
||||
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
|
||||
|
||||
// Case-insensitive lookup by name + entity_type
|
||||
// Normalise type before lookup and insert so that model variations
|
||||
// ("Person" / "person", "location" / "place") collapse to one row.
|
||||
let entity = InsertEntity {
|
||||
entity_type: normalize_entity_type(&entity.entity_type),
|
||||
..entity
|
||||
};
|
||||
|
||||
// Case-insensitive lookup by name + entity_type.
|
||||
// Use lower() on both sides so existing dirty rows ("Person") still match.
|
||||
let name_lower = entity.name.to_lowercase();
|
||||
let type_lower = entity.entity_type.to_lowercase();
|
||||
let existing: Option<Entity> = entities
|
||||
.filter(diesel::dsl::sql::<diesel::sql_types::Bool>(&format!(
|
||||
"lower(name) = '{}' AND entity_type = '{}'",
|
||||
"lower(name) = '{}' AND lower(entity_type) = '{}'",
|
||||
name_lower.replace('\'', "''"),
|
||||
entity.entity_type.replace('\'', "''")
|
||||
type_lower.replace('\'', "''")
|
||||
)))
|
||||
.first::<Entity>(conn.deref_mut())
|
||||
.optional()
|
||||
|
||||
Reference in New Issue
Block a user