feat(ai): search_messages tool + RAG reranker
Adds a search_messages tool that hits the Django FTS5/semantic/hybrid endpoint for keyword-quality text search over message bodies, and an LLM-based reranker inside tool_search_rag (gated by SEARCH_RAG_RERANK, default on). Reranker pulls ~3x candidates from the vector index, asks the chat model to rank by relevance, and falls back to vector order on parse failure. The reranker shares the active chat turn's OllamaClient so num_ctx and sampling match — otherwise Ollama unloads/reloads the model on every rerank call. (Unverified end-to-end; caught by inspection, awaiting e2e confirmation.) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -250,6 +250,45 @@ impl SmsApiClient {
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Search message bodies via the Django side's FTS5 / semantic / hybrid
|
||||
/// endpoint. `mode` selects the ranking strategy:
|
||||
/// - "fts5" keyword-only, supports phrase / prefix / boolean / NEAR
|
||||
/// - "semantic" embedding similarity
|
||||
/// - "hybrid" both merged via reciprocal rank fusion (recommended)
|
||||
pub async fn search_messages(
|
||||
&self,
|
||||
query: &str,
|
||||
mode: &str,
|
||||
limit: usize,
|
||||
) -> Result<Vec<SmsSearchHit>> {
|
||||
let url = format!(
|
||||
"{}/api/messages/search/?q={}&mode={}&limit={}",
|
||||
self.base_url,
|
||||
urlencoding::encode(query),
|
||||
urlencoding::encode(mode),
|
||||
limit
|
||||
);
|
||||
|
||||
let mut request = self.client.get(&url);
|
||||
if let Some(token) = &self.token {
|
||||
request = request.header("Authorization", format!("Bearer {}", token));
|
||||
}
|
||||
|
||||
let response = request.send().await?;
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
return Err(anyhow::anyhow!(
|
||||
"SMS search request failed: {} - {}",
|
||||
status,
|
||||
body
|
||||
));
|
||||
}
|
||||
|
||||
let data: SmsSearchResponse = response.json().await?;
|
||||
Ok(data.results)
|
||||
}
|
||||
|
||||
pub async fn summarize_context(
|
||||
&self,
|
||||
messages: &[SmsMessage],
|
||||
@@ -314,3 +353,28 @@ struct SmsApiMessage {
|
||||
#[serde(rename = "type")]
|
||||
type_: i32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct SmsSearchHit {
|
||||
#[allow(dead_code)]
|
||||
pub message_id: i64,
|
||||
pub contact_name: String,
|
||||
#[allow(dead_code)]
|
||||
pub contact_address: String,
|
||||
pub body: String,
|
||||
pub date: i64,
|
||||
/// Message direction code: 1 = received, 2 = sent.
|
||||
#[serde(rename = "type")]
|
||||
pub type_: i32,
|
||||
/// Present for semantic / hybrid modes; absent for fts5.
|
||||
#[serde(default)]
|
||||
pub similarity_score: Option<f32>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct SmsSearchResponse {
|
||||
results: Vec<SmsSearchHit>,
|
||||
#[allow(dead_code)]
|
||||
#[serde(default)]
|
||||
search_method: String,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user