diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs
index 4f15ef4..a6a50f1 100644
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
@@ -28,6 +28,11 @@ use crate::otel::global_tracer;
 use crate::tags::TagDao;
 use crate::utils::{earliest_fs_time, normalize_path};
 
+/// Max location records rendered by `tool_get_location_history`. The DAO
+/// query is range-bounded, not limited, so the tool caps the rendered list
+/// and labels the truncation via `found_header`.
+const LOCATION_HISTORY_DISPLAY_LIMIT: usize = 20;
+
 /// Parse a "Title: ...\n\n<body>" response into (title, body).
 /// Falls back to the first sentence as the title if the model didn't
 /// follow the format.
@@ -218,15 +223,11 @@ impl InsightGenerator {
     /// be called once per chat turn / generation. `has_vision` is
     /// supplied by the caller because it depends on the model selected
     /// for this turn, not on persistent state.
-    pub fn current_gate_opts(&self, has_vision: bool) -> ToolGateOpts {
-        self.current_gate_opts_for_persona(has_vision, None)
-    }
-
-    /// Same as `current_gate_opts` but resolves the per-persona
-    /// `allow_agent_corrections` flag too. Pass `Some((user_id,
-    /// persona_id))` when generating in a persona context (every chat
-    /// turn does); pass `None` for callers that don't have one yet
-    /// (cold paths, populate_knowledge bin), which defaults the gate
+    ///
+    /// Also resolves the per-persona `allow_agent_corrections` flag.
+    /// Pass `Some((user_id, persona_id))` when generating in a persona
+    /// context (every chat turn and agentic generation does); pass
+    /// `None` for callers that don't have one, which defaults the gate
     /// to closed — the conservative posture.
     pub fn current_gate_opts_for_persona(
         &self,
@@ -277,6 +278,22 @@ impl InsightGenerator {
         }
     }
 
+    /// Resolve the stored system prompt for `(user_id, persona_id)` from
+    /// the persona store. Returns `None` when the persona row doesn't
+    /// exist or its prompt is blank — callers fall back to their own
+    /// default. Used for server-side persona resolution: a request that
+    /// carries `persona_id` but no explicit `system_prompt` should speak
+    /// in the persona's stored voice, not the neutral default.
+    pub(crate) fn persona_system_prompt(&self, user_id: i32, persona_id: &str) -> Option<String> {
+        let cx = opentelemetry::Context::new();
+        let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao");
+        pdao.get_persona(&cx, user_id, persona_id)
+            .ok()
+            .flatten()
+            .map(|p| p.system_prompt)
+            .filter(|s| !s.trim().is_empty())
+    }
+
     /// Resolve `rel_path` against the configured libraries, returning the
     /// first root under which the file exists. Insights may be generated
     /// for any library — the generator itself doesn't know which — so we
@@ -1673,6 +1690,10 @@ Return ONLY the summary, nothing else."#,
                 self.tool_recall_facts_for_photo(arguments, user_id, persona_id, cx)
                     .await
             }
+            "recall_facts_for_entity" => {
+                self.tool_recall_facts_for_entity(arguments, user_id, persona_id, cx)
+                    .await
+            }
             "store_entity" => self.tool_store_entity(arguments, cx).await,
             "store_fact" => {
                 self.tool_store_fact(
@@ -2145,8 +2166,8 @@ Return ONLY the summary, nothing else."#,
                     })
                     .collect();
                 format!(
-                    "Found {} messages:\n{}",
-                    messages.len(),
+                    "{}\n{}",
+                    Self::found_header(messages.len(), formatted.len(), "messages"),
                     formatted.join("\n")
                 )
             }
@@ -2171,7 +2192,8 @@ Return ONLY the summary, nothing else."#,
         let days_radius = args
             .get("days_radius")
             .and_then(|v| v.as_i64())
-            .unwrap_or(7);
+            .unwrap_or(7)
+            .clamp(1, 30);
         let limit = args
             .get("limit")
             .and_then(|v| v.as_i64())
@@ -2250,7 +2272,8 @@ Return ONLY the summary, nothing else."#,
         let days_radius = args
             .get("days_radius")
             .and_then(|v| v.as_i64())
-            .unwrap_or(14);
+            .unwrap_or(14)
+            .clamp(1, 30);
 
         let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") {
             Ok(d) => d,
@@ -2279,7 +2302,7 @@ Return ONLY the summary, nothing else."#,
             Some(locs) if !locs.is_empty() => {
                 let formatted: Vec<String> = locs
                     .iter()
-                    .take(20)
+                    .take(LOCATION_HISTORY_DISPLAY_LIMIT)
                     .map(|loc| {
                         let dt = DateTime::from_timestamp(loc.timestamp, 0)
                             .map(|dt| {
@@ -2305,8 +2328,8 @@ Return ONLY the summary, nothing else."#,
                     })
                     .collect();
                 format!(
-                    "Found {} location records:\n{}",
-                    locs.len(),
+                    "{}\n{}",
+                    Self::found_header(locs.len(), formatted.len(), "location records"),
                     formatted.join("\n")
                 )
             }
@@ -2315,6 +2338,20 @@ Return ONLY the summary, nothing else."#,
         }
     }
 
+    /// Render a `Found N <noun>:` tool-result header, annotating when only
+    /// the first `shown` of `total` items are listed below it. Without the
+    /// annotation the model believes it saw everything ("Found 312
+    /// messages:" followed by 60 lines) and reasons from a silently
+    /// truncated list. `summarize_tool_result` keeps parsing the leading
+    /// count either way.
+    fn found_header(total: usize, shown: usize, noun: &str) -> String {
+        if shown < total {
+            format!("Found {} {} (showing first {}):", total, noun, shown)
+        } else {
+            format!("Found {} {}:", total, noun)
+        }
+    }
+
     /// Tool: get_file_tags — fetch tags for a file path
     async fn tool_get_file_tags(
         &self,
@@ -2669,6 +2706,102 @@ Return ONLY the summary, nothing else."#,
         }
     }
 
+    /// Tool: recall_facts_for_entity — retrieve facts for one entity by id.
+    /// Persona scoping and the reviewed-only-facts filter mirror
+    /// `tool_recall_facts_for_photo` exactly: reads are always Single
+    /// (user_id, persona_id), and strict-mode personas see only
+    /// human-reviewed facts.
+    async fn tool_recall_facts_for_entity(
+        &self,
+        args: &serde_json::Value,
+        user_id: i32,
+        persona_id: &str,
+        cx: &opentelemetry::Context,
+    ) -> String {
+        use crate::database::PersonaFilter;
+        let persona_filter = PersonaFilter::Single {
+            user_id,
+            persona_id: persona_id.to_string(),
+        };
+        let entity_id = match args.get("entity_id").and_then(|v| v.as_i64()) {
+            Some(id) => id as i32,
+            None => return "Error: missing required parameter 'entity_id'".to_string(),
+        };
+        let limit = args
+            .get("limit")
+            .and_then(|v| v.as_i64())
+            .unwrap_or(50)
+            .clamp(1, 100) as usize;
+
+        log::info!(
+            "tool_recall_facts_for_entity: entity_id={}, limit={}",
+            entity_id,
+            limit
+        );
+
+        // Resolve the persona's reviewed-only-mode flag once — identical
+        // fallback semantics to recall_facts_for_photo (missing persona
+        // row → permissive active+reviewed default).
+        let reviewed_only = {
+            let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao");
+            pdao.get_persona(cx, user_id, persona_id)
+                .ok()
+                .flatten()
+                .map(|p| p.reviewed_only_facts)
+                .unwrap_or(false)
+        };
+
+        let mut kdao = self
+            .knowledge_dao
+            .lock()
+            .expect("Unable to lock KnowledgeDao");
+
+        let entity = match kdao.get_entity_by_id(cx, entity_id) {
+            Ok(Some(e)) => e,
+            Ok(None) => return format!("No entity found with ID {}.", entity_id),
+            Err(e) => return format!("Error fetching entity: {:?}", e),
+        };
+
+        let mut output_lines = vec![format!("Entity: {} ({})", entity.name, entity.entity_type)];
+        match kdao.get_facts_for_entity(cx, entity_id, &persona_filter) {
+            Ok(facts) => {
+                // Default scope: active + reviewed. Strict mode trims to
+                // reviewed only — same allow rule as recall_facts_for_photo.
+                let allow = |s: &str| -> bool {
+                    if reviewed_only {
+                        s == "reviewed"
+                    } else {
+                        s == "active" || s == "reviewed"
+                    }
+                };
+                for f in facts.iter().filter(|f| allow(&f.status)).take(limit) {
+                    let obj = if let Some(ref v) = f.object_value {
+                        v.clone()
+                    } else if let Some(oid) = f.object_entity_id {
+                        kdao.get_entity_by_id(cx, oid)
+                            .ok()
+                            .flatten()
+                            .map(|e| format!("{} (entity ID: {})", e.name, e.id))
+                            .unwrap_or_else(|| format!("entity:{}", oid))
+                    } else {
+                        "(unknown)".to_string()
+                    };
+                    output_lines.push(format!("  - {} {}", f.predicate, obj));
+                }
+            }
+            Err(e) => return format!("Error fetching facts: {:?}", e),
+        }
+
+        if output_lines.len() == 1 {
+            format!(
+                "No active knowledge facts found for entity {} (ID: {}).",
+                entity.name, entity_id
+            )
+        } else {
+            format!("Knowledge for this entity:\n{}", output_lines.join("\n"))
+        }
+    }
+
     /// Tool: store_entity — upsert an entity into the knowledge memory.
     /// Embeddings go through the configured local backend (`LLM_BACKEND`),
     /// independent of the per-request chat backend in the caller.
@@ -3063,7 +3196,7 @@ Return ONLY the summary, nothing else."#,
     /// Build the list of tool definitions for the agentic loop, gated by
     /// `opts`. Always-on tools: `search_messages`, `get_sms_messages`,
     /// `get_file_tags`, `reverse_geocode`, `get_current_datetime`, the
-    /// four knowledge-memory tools. Conditional: `describe_photo` (vision
+    /// five knowledge-memory tools. Conditional: `describe_photo` (vision
     /// model), `get_personal_place_at` (Apollo configured), `search_rag`
     /// (daily_summaries populated), `get_calendar_events` (calendar
     /// populated), `get_location_history` (location history populated).
@@ -3280,6 +3413,22 @@ Return ONLY the summary, nothing else."#,
             }),
         ));
 
+        tools.push(Tool::function(
+            "recall_facts_for_entity",
+            "Retrieve all stored facts about one specific entity by its ID. Use to follow up on an entity \
+             surfaced by `recall_entities` (or referenced by another fact's object) that is NOT linked to the \
+             current photo — `recall_facts_for_photo` only covers photo-linked entities. \
+             Example: `{entity_id: 7}` — everything known about entity 7.",
+            serde_json::json!({
+                "type": "object",
+                "required": ["entity_id"],
+                "properties": {
+                    "entity_id": { "type": "integer", "description": "ID of the entity to fetch facts for (from recall_entities, store_entity, or a fact's object reference)." },
+                    "limit":     { "type": "integer", "description": "Max facts to return (default 50, max 100)." }
+                }
+            }),
+        ));
+
         tools.push(Tool::function(
             "store_entity",
             "Upsert a person / place / event / thing into the knowledge memory. Returns the entity id (use it as \
@@ -3543,7 +3692,7 @@ Return ONLY the summary, nothing else."#,
                     format!("{} personal place(s)", n)
                 }
             }
-            "recall_entities" | "recall_facts_for_photo" => {
+            "recall_entities" | "recall_facts_for_photo" | "recall_facts_for_entity" => {
                 let n = raw.lines().skip(1).filter(|l| !l.trim().is_empty()).count();
                 let kind = if tool_name == "recall_entities" {
                     "entities"
@@ -4033,8 +4182,11 @@ Return ONLY the summary, nothing else."#,
 
         // 10. Define tools. describe_photo offered only when the chat model
         //     sees images directly (images_inline); in hybrid mode the visual
-        //     description is already inlined as text.
-        let gate_opts = self.current_gate_opts(backend.images_inline);
+        //     description is already inlined as text. Persona-aware so the
+        //     persona's allow_agent_corrections gate opens here exactly like
+        //     it does for chat turns (insight_chat does the same).
+        let gate_opts =
+            self.current_gate_opts_for_persona(backend.images_inline, Some((user_id, &persona_id)));
         let tools = Self::build_tool_definitions(gate_opts);
 
         // 11. Build initial messages. images_inline → attach base64 to the
@@ -4145,7 +4297,10 @@ Return ONLY the summary, nothing else."#,
             .set_attribute(KeyValue::new("iterations_used", iterations_used as i64));
         loop_cx.span().set_status(Status::Ok);
 
-        // 13. Parse title from the model's inline response.
+        // 13. Strip any leaked <think>…</think> reasoning block (thinking
+        // models emit it ahead of the answer; the raw transcript in
+        // training_messages keeps it), then parse the title.
+        final_content = crate::ai::llm_client::strip_think_blocks(&final_content);
         let (title, body) = parse_title_body(&final_content);
         final_content = body;
 
@@ -4341,6 +4496,7 @@ mod tests {
         assert!(names.contains(&"get_current_datetime"));
         assert!(names.contains(&"recall_entities"));
         assert!(names.contains(&"recall_facts_for_photo"));
+        assert!(names.contains(&"recall_facts_for_entity"));
         assert!(names.contains(&"store_entity"));
         assert!(names.contains(&"store_fact"));
 
@@ -4550,6 +4706,37 @@ mod tests {
         );
     }
 
+    #[test]
+    fn found_header_labels_truncation_honestly() {
+        // Truncated: total exceeds what's listed below the header.
+        assert_eq!(
+            InsightGenerator::found_header(312, 60, "messages"),
+            "Found 312 messages (showing first 60):"
+        );
+        // Not truncated: plain header, no annotation noise.
+        assert_eq!(
+            InsightGenerator::found_header(7, 7, "messages"),
+            "Found 7 messages:"
+        );
+        assert_eq!(
+            InsightGenerator::found_header(40, 20, "location records"),
+            "Found 40 location records (showing first 20):"
+        );
+    }
+
+    #[test]
+    fn summarize_parses_truncated_found_header() {
+        // The "(showing first K)" annotation must not break the few-shot
+        // trajectory summarizer's "Found N" count parsing.
+        assert_eq!(
+            InsightGenerator::summarize_tool_result(
+                "get_sms_messages",
+                "Found 312 messages (showing first 60):\n[2023-08-15 10:00] Sarah: hi"
+            ),
+            "312 messages"
+        );
+    }
+
     fn make_search_hit(
         id: i64,
         contact: &str,
@@ -4869,11 +5056,9 @@ mod tests {
         // Replicate the resolve_backend local-client construction
         // (lines ~3686-3695 of this file).
         let mut lc = base.clone();
-        if let Some(ref m) = overrides_model {
-            if !is_hybrid {
-                lc.primary_model = m.clone();
-                lc.set_vision_model(m.clone());
-            }
+        if !is_hybrid && let Some(ref m) = overrides_model {
+            lc.primary_model = m.clone();
+            lc.set_vision_model(m.clone());
         }
 
         // In hybrid mode the local client must keep its configured slots.