insights: push sms search filters server-side, render snippets, expand fts5 docs
- Refactor search_messages_with_contact -> search_messages(query, &SmsSearchParams) exposing date_from / date_to / offset / is_mms / has_media; drop the over-fetch + client-side date post-filter that could silently drop in-window hits past position 100. - Surface SMS-API's <mark>-wrapped snippet for MMS messages that only matched via message_parts_fts (attachment text / filename) - pre-snippet, those rendered as a blank body preview to the LLM. - Expose is_mms / has_media on the search_messages tool schema; expand the FTS5 syntax docs with worked examples for phrase / prefix / boolean / NEAR / grouping so the model picks the right operator. - Unit tests for format_search_hits (body fallback, snippet preferred, MMS attachment-only regression, empty-snippet fallback) and strip_mark_tags. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+49
-10
@@ -257,30 +257,45 @@ impl SmsApiClient {
|
||||
}
|
||||
|
||||
/// Search message bodies via the Django side's FTS5 / semantic / hybrid
|
||||
/// endpoint. `mode` selects the ranking strategy:
|
||||
/// endpoint. `params.mode` selects the ranking strategy:
|
||||
/// - "fts5" keyword-only, supports phrase / prefix / boolean / NEAR
|
||||
/// - "semantic" embedding similarity
|
||||
/// - "hybrid" both merged via reciprocal rank fusion (recommended)
|
||||
///
|
||||
/// The SMS-API endpoint accepts `contact_id` natively; date filtering is
|
||||
/// the caller's responsibility (post-filter on the returned rows).
|
||||
pub async fn search_messages_with_contact(
|
||||
/// All of `contact_id`, `date_from` / `date_to` (unix seconds), `is_mms`,
|
||||
/// `has_media`, and `offset` are pushed to SMS-API server-side so the
|
||||
/// filtered+paginated result set is exact rather than a client-side
|
||||
/// over-fetch.
|
||||
pub async fn search_messages(
|
||||
&self,
|
||||
query: &str,
|
||||
mode: &str,
|
||||
limit: usize,
|
||||
contact_id: Option<i64>,
|
||||
params: &SmsSearchParams<'_>,
|
||||
) -> Result<Vec<SmsSearchHit>> {
|
||||
let mut url = format!(
|
||||
"{}/api/messages/search/?q={}&mode={}&limit={}",
|
||||
self.base_url,
|
||||
urlencoding::encode(query),
|
||||
urlencoding::encode(mode),
|
||||
limit
|
||||
urlencoding::encode(params.mode),
|
||||
params.limit,
|
||||
);
|
||||
if let Some(cid) = contact_id {
|
||||
if let Some(cid) = params.contact_id {
|
||||
url.push_str(&format!("&contact_id={}", cid));
|
||||
}
|
||||
if let Some(off) = params.offset {
|
||||
url.push_str(&format!("&offset={}", off));
|
||||
}
|
||||
if let Some(from) = params.date_from {
|
||||
url.push_str(&format!("&date_from={}", from));
|
||||
}
|
||||
if let Some(to) = params.date_to {
|
||||
url.push_str(&format!("&date_to={}", to));
|
||||
}
|
||||
if let Some(is_mms) = params.is_mms {
|
||||
url.push_str(&format!("&is_mms={}", is_mms));
|
||||
}
|
||||
if let Some(has_media) = params.has_media {
|
||||
url.push_str(&format!("&has_media={}", has_media));
|
||||
}
|
||||
|
||||
let mut request = self.client.get(&url);
|
||||
if let Some(token) = &self.token {
|
||||
@@ -383,6 +398,30 @@ pub struct SmsSearchHit {
|
||||
/// Present for semantic / hybrid modes; absent for fts5.
|
||||
#[serde(default)]
|
||||
pub similarity_score: Option<f32>,
|
||||
/// SMS-API-generated excerpt around the match, wrapped in `<mark>` tags.
|
||||
/// For MMS messages that only matched via attachment text / filename
|
||||
/// (empty `body`), the snippet is the only meaningful preview.
|
||||
#[serde(default)]
|
||||
pub snippet: Option<String>,
|
||||
}
|
||||
|
||||
/// Optional filter / paging knobs for [`SmsApiClient::search_messages`].
|
||||
/// All fields except `mode` and `limit` map 1:1 to the same-named SMS-API
|
||||
/// query params (added in the 2026-05 search-enhancements release).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SmsSearchParams<'a> {
|
||||
pub mode: &'a str,
|
||||
pub limit: usize,
|
||||
pub contact_id: Option<i64>,
|
||||
/// Unix-seconds inclusive lower bound on `date`.
|
||||
pub date_from: Option<i64>,
|
||||
/// Unix-seconds inclusive upper bound on `date`.
|
||||
pub date_to: Option<i64>,
|
||||
/// `Some(true)` = MMS only, `Some(false)` = SMS only, `None` = both.
|
||||
pub is_mms: Option<bool>,
|
||||
/// `Some(true)` = only messages with image/video/audio attachments.
|
||||
pub has_media: Option<bool>,
|
||||
pub offset: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
||||
Reference in New Issue
Block a user