Merge pull request 'feature/insights' (#46) from feature/insights into master

Reviewed-on: #46
This commit was merged in pull request #46.
This commit is contained in:
2026-01-15 01:07:57 +00:00
49 changed files with 8249 additions and 174 deletions

1
.gitignore vendored
View File

@@ -12,3 +12,4 @@ database/target
.idea/dataSources.local.xml
# Editor-based HTTP Client requests
.idea/httpRequests/
/.claude/settings.local.json

View File

@@ -250,8 +250,31 @@ Optional:
WATCH_QUICK_INTERVAL_SECONDS=60 # Quick scan interval
WATCH_FULL_INTERVAL_SECONDS=3600 # Full scan interval
OTLP_OTLS_ENDPOINT=http://... # OpenTelemetry collector (release builds)
# AI Insights Configuration
OLLAMA_PRIMARY_URL=http://desktop:11434 # Primary Ollama server (e.g., desktop)
OLLAMA_FALLBACK_URL=http://server:11434 # Fallback Ollama server (optional, always-on)
OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b # Model for primary server (default: nemotron-3-nano:30b)
OLLAMA_FALLBACK_MODEL=llama3.2:3b # Model for fallback server (optional, uses primary if not set)
SMS_API_URL=http://localhost:8000 # SMS message API endpoint (default: localhost:8000)
SMS_API_TOKEN=your-api-token # SMS API authentication token (optional)
```
**AI Insights Fallback Behavior:**
- Primary server is tried first with its configured model (5-second connection timeout)
- On connection failure, automatically falls back to secondary server with its model (if configured)
- If `OLLAMA_FALLBACK_MODEL` not set, uses same model as primary server on fallback
- Total request timeout is 120 seconds to accommodate slow LLM inference
- Logs indicate which server and model was used (info level) and failover attempts (warn level)
- Backwards compatible: `OLLAMA_URL` and `OLLAMA_MODEL` still supported as fallbacks
**Model Discovery:**
The `OllamaClient` provides methods to query available models:
- `OllamaClient::list_models(url)` - Returns list of all models on a server
- `OllamaClient::is_model_available(url, model_name)` - Checks if a specific model exists
This allows runtime verification of model availability before generating insights.
## Dependencies of Note
- **actix-web**: HTTP framework

671
Cargo.lock generated
View File

@@ -340,6 +340,19 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "ahash"
version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
"cfg-if",
"getrandom 0.3.3",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.3"
@@ -646,9 +659,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.2.35"
version = "1.2.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "590f9024a68a8c40351881787f1934dc11afd69090f5edb6831464694d836ea3"
checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203"
dependencies = [
"find-msvc-tools",
"jobserver",
@@ -694,7 +707,7 @@ dependencies = [
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-link",
"windows-link 0.1.3",
]
[[package]]
@@ -753,19 +766,6 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "console"
version = "0.15.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
dependencies = [
"encode_unicode",
"libc",
"once_cell",
"unicode-width",
"windows-sys 0.59.0",
]
[[package]]
name = "convert_case"
version = "0.4.0"
@@ -783,6 +783,16 @@ dependencies = [
"version_check",
]
[[package]]
name = "core-foundation"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
@@ -851,6 +861,29 @@ dependencies = [
"typenum",
]
[[package]]
name = "cssparser"
version = "0.31.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa",
"phf 0.11.3",
"smallvec",
]
[[package]]
name = "cssparser-macros"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
"quote",
"syn",
]
[[package]]
name = "darling"
version = "0.20.11"
@@ -942,19 +975,6 @@ dependencies = [
"unicode-xid",
]
[[package]]
name = "dialoguer"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "658bce805d770f407bc62102fca7c2c64ceef2fbcb2b8bd19d2765ce093980de"
dependencies = [
"console",
"shell-words",
"tempfile",
"thiserror 1.0.69",
"zeroize",
]
[[package]]
name = "diesel"
version = "2.2.12"
@@ -1040,18 +1060,33 @@ dependencies = [
"syn",
]
[[package]]
name = "dtoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
[[package]]
name = "dtoa-short"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
dependencies = [
"dtoa",
]
[[package]]
name = "ego-tree"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642"
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "encode_unicode"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "encoding_rs"
version = "0.8.35"
@@ -1137,9 +1172,9 @@ dependencies = [
[[package]]
name = "find-msvc-tools"
version = "0.1.0"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e178e4fba8a2726903f6ba98a6d221e76f9c12c650d5dc0e6afdc50677b49650"
checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff"
[[package]]
name = "flate2"
@@ -1163,6 +1198,21 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "foreign-types"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
dependencies = [
"foreign-types-shared",
]
[[package]]
name = "foreign-types-shared"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
[[package]]
name = "form_urlencoded"
version = "1.2.2"
@@ -1172,6 +1222,16 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]]
name = "futures"
version = "0.3.31"
@@ -1267,6 +1327,15 @@ dependencies = [
"slab",
]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
name = "generic-array"
version = "0.14.7"
@@ -1277,6 +1346,15 @@ dependencies = [
"version_check",
]
[[package]]
name = "getopts"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
dependencies = [
"unicode-width",
]
[[package]]
name = "getrandom"
version = "0.2.16"
@@ -1384,6 +1462,20 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "html5ever"
version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
dependencies = [
"log",
"mac",
"markup5ever",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "http"
version = "0.2.12"
@@ -1469,6 +1561,22 @@ dependencies = [
"want",
]
[[package]]
name = "hyper-rustls"
version = "0.27.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
dependencies = [
"http 1.3.1",
"hyper",
"hyper-util",
"rustls",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
]
[[package]]
name = "hyper-timeout"
version = "0.5.2"
@@ -1482,6 +1590,22 @@ dependencies = [
"tower-service",
]
[[package]]
name = "hyper-tls"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
dependencies = [
"bytes",
"http-body-util",
"hyper",
"hyper-util",
"native-tls",
"tokio",
"tokio-native-tls",
"tower-service",
]
[[package]]
name = "hyper-util"
version = "0.1.16"
@@ -1501,9 +1625,11 @@ dependencies = [
"percent-encoding",
"pin-project-lite",
"socket2 0.6.0",
"system-configuration",
"tokio",
"tower-service",
"tracing",
"windows-registry",
]
[[package]]
@@ -1530,6 +1656,15 @@ dependencies = [
"cc",
]
[[package]]
name = "ical"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b7cab7543a8b7729a19e2c04309f902861293dcdae6558dfbeb634454d279f6"
dependencies = [
"thiserror 1.0.69",
]
[[package]]
name = "icu_collections"
version = "2.0.0"
@@ -1662,7 +1797,7 @@ dependencies = [
[[package]]
name = "image-api"
version = "0.4.1"
version = "0.5.0"
dependencies = [
"actix",
"actix-cors",
@@ -1673,20 +1808,22 @@ dependencies = [
"actix-web",
"actix-web-prom",
"anyhow",
"base64",
"bcrypt",
"chrono",
"clap",
"dialoguer",
"diesel",
"diesel_migrations",
"dotenv",
"env_logger",
"futures",
"ical",
"image",
"infer",
"jsonwebtoken",
"kamadak-exif",
"lazy_static",
"libsqlite3-sys",
"log",
"opentelemetry",
"opentelemetry-appender-log",
@@ -1698,11 +1835,15 @@ dependencies = [
"rand 0.8.5",
"rayon",
"regex",
"reqwest",
"scraper",
"serde",
"serde_json",
"tempfile",
"tokio",
"urlencoding",
"walkdir",
"zerocopy",
]
[[package]]
@@ -1915,6 +2056,7 @@ version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f"
dependencies = [
"cc",
"pkg-config",
"vcpkg",
]
@@ -1973,6 +2115,26 @@ dependencies = [
"imgref",
]
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
dependencies = [
"log",
"phf 0.11.3",
"phf_codegen 0.11.3",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]]
name = "maybe-rayon"
version = "0.1.1"
@@ -2070,6 +2232,23 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13d2233c9842d08cfe13f9eac96e207ca6a2ea10b80259ebe8ad0268be27d2af"
[[package]]
name = "native-tls"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
dependencies = [
"libc",
"log",
"openssl",
"openssl-probe",
"openssl-sys",
"schannel",
"security-framework",
"security-framework-sys",
"tempfile",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.6"
@@ -2181,6 +2360,50 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
[[package]]
name = "openssl"
version = "0.10.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328"
dependencies = [
"bitflags",
"cfg-if",
"foreign-types",
"libc",
"once_cell",
"openssl-macros",
"openssl-sys",
]
[[package]]
name = "openssl-macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "openssl-probe"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
[[package]]
name = "openssl-sys"
version = "0.9.111"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321"
dependencies = [
"cc",
"libc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "opentelemetry"
version = "0.31.0"
@@ -2347,6 +2570,96 @@ version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "phf"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
dependencies = [
"phf_shared 0.10.0",
]
[[package]]
name = "phf"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_macros",
"phf_shared 0.11.3",
]
[[package]]
name = "phf_codegen"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
dependencies = [
"phf_generator 0.10.0",
"phf_shared 0.10.0",
]
[[package]]
name = "phf_codegen"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
]
[[package]]
name = "phf_generator"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared 0.10.0",
"rand 0.8.5",
]
[[package]]
name = "phf_generator"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
"phf_shared 0.11.3",
"rand 0.8.5",
]
[[package]]
name = "phf_macros"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
"siphasher 0.3.11",
]
[[package]]
name = "phf_shared"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
dependencies = [
"siphasher 1.0.1",
]
[[package]]
name = "pin-project"
version = "1.1.10"
@@ -2437,6 +2750,12 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro2"
version = "1.0.101"
@@ -2744,23 +3063,31 @@ checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb"
dependencies = [
"base64",
"bytes",
"encoding_rs",
"futures-channel",
"futures-core",
"futures-util",
"h2 0.4.12",
"http 1.3.1",
"http-body",
"http-body-util",
"hyper",
"hyper-rustls",
"hyper-tls",
"hyper-util",
"js-sys",
"log",
"mime",
"native-tls",
"percent-encoding",
"pin-project-lite",
"rustls-pki-types",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tokio-native-tls",
"tower",
"tower-http",
"tower-service",
@@ -2818,6 +3145,39 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "rustls"
version = "0.23.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f"
dependencies = [
"once_cell",
"rustls-pki-types",
"rustls-webpki",
"subtle",
"zeroize",
]
[[package]]
name = "rustls-pki-types"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282"
dependencies = [
"zeroize",
]
[[package]]
name = "rustls-webpki"
version = "0.103.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52"
dependencies = [
"ring",
"rustls-pki-types",
"untrusted",
]
[[package]]
name = "rustversion"
version = "1.0.22"
@@ -2839,12 +3199,79 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "schannel"
version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scraper"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b90460b31bfe1fc07be8262e42c665ad97118d4585869de9345a84d501a9eaf0"
dependencies = [
"ahash",
"cssparser",
"ego-tree",
"getopts",
"html5ever",
"once_cell",
"selectors",
"tendril",
]
[[package]]
name = "security-framework"
version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
"bitflags",
"core-foundation",
"core-foundation-sys",
"libc",
"security-framework-sys",
]
[[package]]
name = "security-framework-sys"
version = "2.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "selectors"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06"
dependencies = [
"bitflags",
"cssparser",
"derive_more 0.99.20",
"fxhash",
"log",
"new_debug_unreachable",
"phf 0.10.1",
"phf_codegen 0.10.0",
"precomputed-hash",
"servo_arc",
"smallvec",
]
[[package]]
name = "semver"
version = "1.0.26"
@@ -2922,6 +3349,15 @@ dependencies = [
"serde",
]
[[package]]
name = "servo_arc"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "sha1"
version = "0.10.6"
@@ -2933,12 +3369,6 @@ dependencies = [
"digest",
]
[[package]]
name = "shell-words"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
[[package]]
name = "shlex"
version = "1.3.0"
@@ -2981,6 +3411,18 @@ dependencies = [
"time",
]
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]]
name = "siphasher"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
[[package]]
name = "slab"
version = "0.4.11"
@@ -3034,6 +3476,31 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29fdc163db75f7b5ffa3daf0c5a7136fb0d4b2f35523cd1769da05e034159feb"
[[package]]
name = "string_cache"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
dependencies = [
"new_debug_unreachable",
"parking_lot",
"phf_shared 0.11.3",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
]
[[package]]
name = "strsim"
version = "0.11.1"
@@ -3077,6 +3544,27 @@ dependencies = [
"syn",
]
[[package]]
name = "system-configuration"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [
"bitflags",
"core-foundation",
"system-configuration-sys",
]
[[package]]
name = "system-configuration-sys"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "system-deps"
version = "6.2.2"
@@ -3109,6 +3597,17 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]]
name = "thiserror"
version = "1.0.69"
@@ -3205,9 +3704,41 @@ dependencies = [
"signal-hook-registry",
"slab",
"socket2 0.6.0",
"tokio-macros",
"windows-sys 0.59.0",
]
[[package]]
name = "tokio-macros"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tokio-native-tls"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
dependencies = [
"native-tls",
"tokio",
]
[[package]]
name = "tokio-rustls"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
dependencies = [
"rustls",
"tokio",
]
[[package]]
name = "tokio-stream"
version = "0.1.17"
@@ -3363,9 +3894,9 @@ dependencies = [
[[package]]
name = "tower-http"
version = "0.6.6"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
dependencies = [
"bitflags",
"bytes",
@@ -3477,6 +4008,18 @@ dependencies = [
"serde",
]
[[package]]
name = "urlencoding"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]]
name = "utf-8"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8_iter"
version = "1.0.4"
@@ -3688,7 +4231,7 @@ checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link",
"windows-link 0.1.3",
"windows-result",
"windows-strings",
]
@@ -3721,13 +4264,30 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-registry"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
dependencies = [
"windows-link 0.1.3",
"windows-result",
"windows-strings",
]
[[package]]
name = "windows-result"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
dependencies = [
"windows-link",
"windows-link 0.1.3",
]
[[package]]
@@ -3736,7 +4296,7 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
dependencies = [
"windows-link",
"windows-link 0.1.3",
]
[[package]]
@@ -3766,6 +4326,15 @@ dependencies = [
"windows-targets 0.53.3",
]
[[package]]
name = "windows-sys"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link 0.2.1",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
@@ -3788,7 +4357,7 @@ version = "0.53.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
dependencies = [
"windows-link",
"windows-link 0.1.3",
"windows_aarch64_gnullvm 0.53.0",
"windows_aarch64_msvc 0.53.0",
"windows_i686_gnu 0.53.0",

View File

@@ -1,6 +1,6 @@
[package]
name = "image-api"
version = "0.4.1"
version = "0.5.0"
authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
edition = "2024"
@@ -13,7 +13,7 @@ lto = "thin"
actix = "0.13.1"
actix-web = "4"
actix-rt = "2.6"
tokio = { version = "1.42.0", features = ["default", "process", "sync"] }
tokio = { version = "1.42.0", features = ["default", "process", "sync", "macros", "rt-multi-thread"] }
actix-files = "0.6"
actix-cors = "0.7"
actix-multipart = "0.7.2"
@@ -23,10 +23,10 @@ jsonwebtoken = "9.3.0"
serde = "1"
serde_json = "1"
diesel = { version = "2.2.10", features = ["sqlite"] }
libsqlite3-sys = { version = "0.35", features = ["bundled"] }
diesel_migrations = "2.2.0"
chrono = "0.4"
clap = { version = "4.5", features = ["derive"] }
dialoguer = "0.11"
dotenv = "0.15"
bcrypt = "0.17.1"
image = { version = "0.25.5", default-features = false, features = ["jpeg", "png", "rayon"] }
@@ -49,3 +49,9 @@ opentelemetry-appender-log = "0.31.0"
tempfile = "3.20.0"
regex = "1.11.1"
exif = { package = "kamadak-exif", version = "0.6.1" }
reqwest = { version = "0.12", features = ["json"] }
urlencoding = "2.1"
zerocopy = "0.8"
ical = "0.11"
scraper = "0.20"
base64 = "0.22"

View File

@@ -9,6 +9,9 @@ Upon first run it will generate thumbnails for all images and videos at `BASE_PA
- Video streaming with HLS
- Tag-based organization
- Memories API for browsing photos by date
- **AI-Powered Photo Insights** - Generate contextual insights from photos using LLMs
- **RAG-based Context Retrieval** - Semantic search over daily conversation summaries
- **Automatic Daily Summaries** - LLM-generated summaries of daily conversations with embeddings
## Environment
There are a handful of required environment variables to have the API run.
@@ -26,3 +29,38 @@ You must have `ffmpeg` installed for streaming video and generating video thumbn
- `WATCH_QUICK_INTERVAL_SECONDS` (optional) is the interval in seconds for quick file scans [default: 60]
- `WATCH_FULL_INTERVAL_SECONDS` (optional) is the interval in seconds for full file scans [default: 3600]
### AI Insights Configuration (Optional)
The following environment variables configure AI-powered photo insights and daily conversation summaries:
#### Ollama Configuration
- `OLLAMA_PRIMARY_URL` - Primary Ollama server URL [default: `http://localhost:11434`]
- Example: `http://desktop:11434` (your main/powerful server)
- `OLLAMA_FALLBACK_URL` - Fallback Ollama server URL (optional)
- Example: `http://server:11434` (always-on backup server)
- `OLLAMA_PRIMARY_MODEL` - Model to use on primary server [default: `nemotron-3-nano:30b`]
- Example: `nemotron-3-nano:30b`, `llama3.2:3b`, etc.
- `OLLAMA_FALLBACK_MODEL` - Model to use on fallback server (optional)
- If not set, uses `OLLAMA_PRIMARY_MODEL` on fallback server
**Legacy Variables** (still supported):
- `OLLAMA_URL` - Used if `OLLAMA_PRIMARY_URL` not set
- `OLLAMA_MODEL` - Used if `OLLAMA_PRIMARY_MODEL` not set
#### SMS API Configuration
- `SMS_API_URL` - URL to SMS message API [default: `http://localhost:8000`]
- Used to fetch conversation data for context in insights
- `SMS_API_TOKEN` - Authentication token for SMS API (optional)
#### Fallback Behavior
- Primary server is tried first with 5-second connection timeout
- On failure, automatically falls back to secondary server (if configured)
- Total request timeout is 120 seconds to accommodate LLM inference
- Logs indicate which server/model was used and any failover attempts
#### Daily Summary Generation
Daily conversation summaries are generated automatically on server startup. Configure in `src/main.rs`:
- Date range for summary generation
- Contacts to process
- Model version used for embeddings: `nomic-embed-text:v1.5`

View File

@@ -0,0 +1,3 @@
-- Rollback AI insights table
DROP INDEX IF EXISTS idx_photo_insights_path;
DROP TABLE IF EXISTS photo_insights;

View File

@@ -0,0 +1,11 @@
-- AI-generated insights for individual photos
CREATE TABLE IF NOT EXISTS photo_insights (
id INTEGER PRIMARY KEY NOT NULL,
file_path TEXT NOT NULL UNIQUE, -- Full path to the photo
title TEXT NOT NULL, -- "At the beach with Sarah"
summary TEXT NOT NULL, -- 2-3 sentence description
generated_at BIGINT NOT NULL,
model_version TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_photo_insights_path ON photo_insights(file_path);

View File

@@ -0,0 +1 @@
DROP TABLE daily_conversation_summaries;

View File

@@ -0,0 +1,19 @@
-- Daily conversation summaries for improved RAG quality
-- Each row = one day's conversation with a contact, summarized by LLM and embedded
CREATE TABLE daily_conversation_summaries (
id INTEGER PRIMARY KEY NOT NULL,
date TEXT NOT NULL, -- ISO date "2024-08-15"
contact TEXT NOT NULL, -- Contact name
summary TEXT NOT NULL, -- LLM-generated 3-5 sentence summary
message_count INTEGER NOT NULL, -- Number of messages in this day
embedding BLOB NOT NULL, -- 768-dim vector of the summary
created_at BIGINT NOT NULL, -- When this summary was generated
model_version TEXT NOT NULL, -- "nomic-embed-text:v1.5"
UNIQUE(date, contact)
);
-- Indexes for efficient querying
CREATE INDEX idx_daily_summaries_date ON daily_conversation_summaries(date);
CREATE INDEX idx_daily_summaries_contact ON daily_conversation_summaries(contact);
CREATE INDEX idx_daily_summaries_date_contact ON daily_conversation_summaries(date, contact);

View File

@@ -0,0 +1 @@
DROP TABLE IF EXISTS calendar_events;

View File

@@ -0,0 +1,20 @@
CREATE TABLE calendar_events (
id INTEGER PRIMARY KEY NOT NULL,
event_uid TEXT,
summary TEXT NOT NULL,
description TEXT,
location TEXT,
start_time BIGINT NOT NULL,
end_time BIGINT NOT NULL,
all_day BOOLEAN NOT NULL DEFAULT 0,
organizer TEXT,
attendees TEXT,
embedding BLOB,
created_at BIGINT NOT NULL,
source_file TEXT,
UNIQUE(event_uid, start_time)
);
CREATE INDEX idx_calendar_start_time ON calendar_events(start_time);
CREATE INDEX idx_calendar_end_time ON calendar_events(end_time);
CREATE INDEX idx_calendar_time_range ON calendar_events(start_time, end_time);

View File

@@ -0,0 +1 @@
DROP TABLE IF EXISTS location_history;

View File

@@ -0,0 +1,19 @@
CREATE TABLE location_history (
id INTEGER PRIMARY KEY NOT NULL,
timestamp BIGINT NOT NULL,
latitude REAL NOT NULL,
longitude REAL NOT NULL,
accuracy INTEGER,
activity TEXT,
activity_confidence INTEGER,
place_name TEXT,
place_category TEXT,
embedding BLOB,
created_at BIGINT NOT NULL,
source_file TEXT,
UNIQUE(timestamp, latitude, longitude)
);
CREATE INDEX idx_location_timestamp ON location_history(timestamp);
CREATE INDEX idx_location_coords ON location_history(latitude, longitude);
CREATE INDEX idx_location_activity ON location_history(activity);

View File

@@ -0,0 +1 @@
DROP TABLE IF EXISTS search_history;

View File

@@ -0,0 +1,13 @@
CREATE TABLE search_history (
id INTEGER PRIMARY KEY NOT NULL,
timestamp BIGINT NOT NULL,
query TEXT NOT NULL,
search_engine TEXT,
embedding BLOB NOT NULL,
created_at BIGINT NOT NULL,
source_file TEXT,
UNIQUE(timestamp, query)
);
CREATE INDEX idx_search_timestamp ON search_history(timestamp);
CREATE INDEX idx_search_query ON search_history(query);

403
src/ai/daily_summary_job.rs Normal file
View File

@@ -0,0 +1,403 @@
use anyhow::Result;
use chrono::{NaiveDate, Utc};
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use tokio::time::sleep;
use crate::ai::{OllamaClient, SmsApiClient, SmsMessage};
use crate::database::{DailySummaryDao, InsertDailySummary};
use crate::otel::global_tracer;
/// Strip boilerplate prefixes and common phrases from summaries before embedding.
/// This improves embedding diversity by removing structural similarity.
pub fn strip_summary_boilerplate(summary: &str) -> String {
let mut text = summary.trim().to_string();
// Remove markdown headers
while text.starts_with('#') {
if let Some(pos) = text.find('\n') {
text = text[pos..].trim_start().to_string();
} else {
// Single line with just headers, try to extract content after #s
text = text.trim_start_matches('#').trim().to_string();
break;
}
}
// Remove "Summary:" prefix variations (with optional markdown bold)
let prefixes = [
"**Summary:**",
"**Summary**:",
"*Summary:*",
"Summary:",
"**summary:**",
"summary:",
];
for prefix in prefixes {
if text.to_lowercase().starts_with(&prefix.to_lowercase()) {
text = text[prefix.len()..].trim_start().to_string();
break;
}
}
// Remove common opening phrases that add no semantic value
let opening_phrases = [
"Today, Melissa and I discussed",
"Today, Amanda and I discussed",
"Today Melissa and I discussed",
"Today Amanda and I discussed",
"Melissa and I discussed",
"Amanda and I discussed",
"Today, I discussed",
"Today I discussed",
"The conversation covered",
"This conversation covered",
"In this conversation,",
"During this conversation,",
];
for phrase in opening_phrases {
if text.to_lowercase().starts_with(&phrase.to_lowercase()) {
text = text[phrase.len()..].trim_start().to_string();
// Remove leading punctuation/articles after stripping phrase
text = text
.trim_start_matches([',', ':', '-'])
.trim_start()
.to_string();
break;
}
}
// Remove any remaining leading markdown bold markers
if text.starts_with("**")
&& let Some(end) = text[2..].find("**")
{
// Keep the content between ** but remove the markers
let bold_content = &text[2..2 + end];
text = format!("{}{}", bold_content, &text[4 + end..]);
}
text.trim().to_string()
}
/// Generate and embed daily conversation summaries for a date range
/// Default: August 2024 ±30 days (July 1 - September 30, 2024)
pub async fn generate_daily_summaries(
contact: &str,
start_date: Option<NaiveDate>,
end_date: Option<NaiveDate>,
ollama: &OllamaClient,
sms_client: &SmsApiClient,
summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>>,
) -> Result<()> {
let tracer = global_tracer();
// Get current context (empty in background task) and start span with it
let current_cx = opentelemetry::Context::current();
let mut span = tracer.start_with_context("ai.daily_summary.generate_batch", &current_cx);
span.set_attribute(KeyValue::new("contact", contact.to_string()));
// Create context with this span for child operations
let parent_cx = current_cx.with_span(span);
// Default to August 2024 ±30 days
let start = start_date.unwrap_or_else(|| NaiveDate::from_ymd_opt(2024, 7, 1).unwrap());
let end = end_date.unwrap_or_else(|| NaiveDate::from_ymd_opt(2024, 9, 30).unwrap());
parent_cx
.span()
.set_attribute(KeyValue::new("start_date", start.to_string()));
parent_cx
.span()
.set_attribute(KeyValue::new("end_date", end.to_string()));
parent_cx.span().set_attribute(KeyValue::new(
"date_range_days",
(end - start).num_days() + 1,
));
log::info!("========================================");
log::info!("Starting daily summary generation for {}", contact);
log::info!(
"Date range: {} to {} ({} days)",
start,
end,
(end - start).num_days() + 1
);
log::info!("========================================");
// Fetch all messages for the contact in the date range
log::info!("Fetching messages for date range...");
let _start_timestamp = start.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
let _end_timestamp = end.and_hms_opt(23, 59, 59).unwrap().and_utc().timestamp();
let all_messages = sms_client.fetch_all_messages_for_contact(contact).await?;
// Filter to date range and group by date
let mut messages_by_date: HashMap<NaiveDate, Vec<SmsMessage>> = HashMap::new();
for msg in all_messages {
let msg_dt = chrono::DateTime::from_timestamp(msg.timestamp, 0);
if let Some(dt) = msg_dt {
let date = dt.date_naive();
if date >= start && date <= end {
messages_by_date.entry(date).or_default().push(msg);
}
}
}
log::info!(
"Grouped messages into {} days with activity",
messages_by_date.len()
);
if messages_by_date.is_empty() {
log::warn!("No messages found in date range");
return Ok(());
}
// Sort dates for ordered processing
let mut dates: Vec<NaiveDate> = messages_by_date.keys().cloned().collect();
dates.sort();
let total_days = dates.len();
let mut processed = 0;
let mut skipped = 0;
let mut failed = 0;
log::info!("Processing {} days with messages...", total_days);
for (idx, date) in dates.iter().enumerate() {
let messages = messages_by_date.get(date).unwrap();
let date_str = date.format("%Y-%m-%d").to_string();
// Check if summary already exists
{
let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
let otel_context = opentelemetry::Context::new();
if dao
.summary_exists(&otel_context, &date_str, contact)
.unwrap_or(false)
{
skipped += 1;
if idx % 10 == 0 {
log::info!(
"Progress: {}/{} ({} processed, {} skipped)",
idx + 1,
total_days,
processed,
skipped
);
}
continue;
}
}
// Generate summary for this day
match generate_and_store_daily_summary(
&parent_cx,
date,
contact,
messages,
ollama,
summary_dao.clone(),
)
.await
{
Ok(_) => {
processed += 1;
log::info!(
"✓ {}/{}: {} ({} messages)",
idx + 1,
total_days,
date_str,
messages.len()
);
}
Err(e) => {
failed += 1;
log::error!("✗ Failed to process {}: {:?}", date_str, e);
}
}
// Rate limiting: sleep 500ms between summaries
if idx < total_days - 1 {
sleep(std::time::Duration::from_millis(500)).await;
}
// Progress logging every 10 days
if idx % 10 == 0 && idx > 0 {
log::info!(
"Progress: {}/{} ({} processed, {} skipped, {} failed)",
idx + 1,
total_days,
processed,
skipped,
failed
);
}
}
log::info!("========================================");
log::info!("Daily summary generation complete!");
log::info!(
"Processed: {}, Skipped: {}, Failed: {}",
processed,
skipped,
failed
);
log::info!("========================================");
// Record final metrics in span
parent_cx
.span()
.set_attribute(KeyValue::new("days_processed", processed as i64));
parent_cx
.span()
.set_attribute(KeyValue::new("days_skipped", skipped as i64));
parent_cx
.span()
.set_attribute(KeyValue::new("days_failed", failed as i64));
parent_cx
.span()
.set_attribute(KeyValue::new("total_days", total_days as i64));
if failed > 0 {
parent_cx
.span()
.set_status(Status::error(format!("{} days failed to process", failed)));
} else {
parent_cx.span().set_status(Status::Ok);
}
Ok(())
}
/// Generate and store a single day's summary
async fn generate_and_store_daily_summary(
parent_cx: &opentelemetry::Context,
date: &NaiveDate,
contact: &str,
messages: &[SmsMessage],
ollama: &OllamaClient,
summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>>,
) -> Result<()> {
let tracer = global_tracer();
let mut span = tracer.start_with_context("ai.daily_summary.generate_single", parent_cx);
span.set_attribute(KeyValue::new("date", date.to_string()));
span.set_attribute(KeyValue::new("contact", contact.to_string()));
span.set_attribute(KeyValue::new("message_count", messages.len() as i64));
// Format messages for LLM
let messages_text: String = messages
.iter()
.take(200) // Limit to 200 messages per day to avoid token overflow
.map(|m| {
if m.is_sent {
format!("Me: {}", m.body)
} else {
format!("{}: {}", m.contact, m.body)
}
})
.collect::<Vec<_>>()
.join("\n");
let weekday = date.format("%A");
let prompt = format!(
r#"Summarize this day's conversation between me and {}.
CRITICAL FORMAT RULES:
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
- Do NOT repeat the date at the beginning
- Start DIRECTLY with the content - begin with a person's name or action
- Write in past tense, as if recording what happened
NARRATIVE (3-5 sentences):
- What specific topics, activities, or events were discussed?
- What places, people, or organizations were mentioned?
- What plans were made or decisions discussed?
- Clearly distinguish between what "I" did versus what {} did
KEYWORDS (comma-separated):
5-10 specific keywords that capture this conversation's unique content:
- Proper nouns (people, places, brands)
- Specific activities ("drum corps audition" not just "music")
- Distinctive terms that make this day unique
Date: {} ({})
Messages:
{}
YOUR RESPONSE (follow this format EXACTLY):
Summary: [Start directly with content, NO preamble]
Keywords: [specific, unique terms]"#,
contact,
contact,
date.format("%B %d, %Y"),
weekday,
messages_text
);
// Generate summary with LLM
let summary = ollama
.generate(
&prompt,
Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
)
.await?;
log::debug!(
"Generated summary for {}: {}",
date,
summary.chars().take(100).collect::<String>()
);
span.set_attribute(KeyValue::new("summary_length", summary.len() as i64));
// Strip boilerplate before embedding to improve vector diversity
let stripped_summary = strip_summary_boilerplate(&summary);
log::debug!(
"Stripped summary for embedding: {}",
stripped_summary.chars().take(100).collect::<String>()
);
// Embed the stripped summary (store original summary in DB)
let embedding = ollama.generate_embedding(&stripped_summary).await?;
span.set_attribute(KeyValue::new(
"embedding_dimensions",
embedding.len() as i64,
));
// Store in database
let insert = InsertDailySummary {
date: date.format("%Y-%m-%d").to_string(),
contact: contact.to_string(),
summary: summary.trim().to_string(),
message_count: messages.len() as i32,
embedding,
created_at: Utc::now().timestamp(),
// model_version: "nomic-embed-text:v1.5".to_string(),
model_version: "mxbai-embed-large:335m".to_string(),
};
// Create context from current span for DB operation
let child_cx = opentelemetry::Context::current_with_span(span);
let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
let result = dao
.store_summary(&child_cx, insert)
.map_err(|e| anyhow::anyhow!("Failed to store summary: {:?}", e));
match &result {
Ok(_) => child_cx.span().set_status(Status::Ok),
Err(e) => child_cx.span().set_status(Status::error(e.to_string())),
}
result?;
Ok(())
}

263
src/ai/handlers.rs Normal file
View File

@@ -0,0 +1,263 @@
use actix_web::{HttpRequest, HttpResponse, Responder, delete, get, post, web};
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, Tracer};
use serde::{Deserialize, Serialize};
use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
use crate::data::Claims;
use crate::database::InsightDao;
use crate::otel::{extract_context_from_request, global_tracer};
use crate::utils::normalize_path;
#[derive(Debug, Deserialize)]
pub struct GeneratePhotoInsightRequest {
pub file_path: String,
#[serde(default)]
pub model: Option<String>,
#[serde(default)]
pub system_prompt: Option<String>,
#[serde(default)]
pub num_ctx: Option<i32>,
}
#[derive(Debug, Deserialize)]
pub struct GetPhotoInsightQuery {
pub path: String,
}
#[derive(Debug, Serialize)]
pub struct PhotoInsightResponse {
pub id: i32,
pub file_path: String,
pub title: String,
pub summary: String,
pub generated_at: i64,
pub model_version: String,
}
#[derive(Debug, Serialize)]
pub struct AvailableModelsResponse {
pub primary: ServerModels,
#[serde(skip_serializing_if = "Option::is_none")]
pub fallback: Option<ServerModels>,
}
#[derive(Debug, Serialize)]
pub struct ServerModels {
pub url: String,
pub models: Vec<ModelCapabilities>,
pub default_model: String,
}
/// POST /insights/generate - Generate insight for a specific photo
#[post("/insights/generate")]
pub async fn generate_insight_handler(
http_request: HttpRequest,
_claims: Claims,
request: web::Json<GeneratePhotoInsightRequest>,
insight_generator: web::Data<InsightGenerator>,
) -> impl Responder {
let parent_context = extract_context_from_request(&http_request);
let tracer = global_tracer();
let mut span = tracer.start_with_context("http.insights.generate", &parent_context);
let normalized_path = normalize_path(&request.file_path);
span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
if let Some(ref model) = request.model {
span.set_attribute(KeyValue::new("model", model.clone()));
}
if let Some(ref prompt) = request.system_prompt {
span.set_attribute(KeyValue::new("has_custom_prompt", true));
span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64));
}
if let Some(ctx) = request.num_ctx {
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
}
log::info!(
"Manual insight generation triggered for photo: {} with model: {:?}, custom_prompt: {}, num_ctx: {:?}",
normalized_path,
request.model,
request.system_prompt.is_some(),
request.num_ctx
);
// Generate insight with optional custom model, system prompt, and context size
let result = insight_generator
.generate_insight_for_photo_with_config(
&normalized_path,
request.model.clone(),
request.system_prompt.clone(),
request.num_ctx,
)
.await;
match result {
Ok(()) => {
span.set_status(Status::Ok);
HttpResponse::Ok().json(serde_json::json!({
"success": true,
"message": "Insight generated successfully"
}))
}
Err(e) => {
log::error!("Failed to generate insight: {:?}", e);
span.set_status(Status::error(e.to_string()));
HttpResponse::InternalServerError().json(serde_json::json!({
"error": format!("Failed to generate insight: {:?}", e)
}))
}
}
}
/// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo
#[get("/insights")]
pub async fn get_insight_handler(
_claims: Claims,
query: web::Query<GetPhotoInsightQuery>,
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
let normalized_path = normalize_path(&query.path);
log::debug!("Fetching insight for {}", normalized_path);
let otel_context = opentelemetry::Context::new();
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
match dao.get_insight(&otel_context, &normalized_path) {
Ok(Some(insight)) => {
let response = PhotoInsightResponse {
id: insight.id,
file_path: insight.file_path,
title: insight.title,
summary: insight.summary,
generated_at: insight.generated_at,
model_version: insight.model_version,
};
HttpResponse::Ok().json(response)
}
Ok(None) => HttpResponse::NotFound().json(serde_json::json!({
"error": "Insight not found"
})),
Err(e) => {
log::error!("Failed to fetch insight ({}): {:?}", &query.path, e);
HttpResponse::InternalServerError().json(serde_json::json!({
"error": format!("Failed to fetch insight: {:?}", e)
}))
}
}
}
/// DELETE /insights?path=/path/to/photo.jpg - Remove insight (will regenerate on next request)
#[delete("/insights")]
pub async fn delete_insight_handler(
_claims: Claims,
query: web::Query<GetPhotoInsightQuery>,
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
let normalized_path = normalize_path(&query.path);
log::info!("Deleting insight for {}", normalized_path);
let otel_context = opentelemetry::Context::new();
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
match dao.delete_insight(&otel_context, &normalized_path) {
Ok(()) => HttpResponse::Ok().json(serde_json::json!({
"success": true,
"message": "Insight deleted successfully"
})),
Err(e) => {
log::error!("Failed to delete insight: {:?}", e);
HttpResponse::InternalServerError().json(serde_json::json!({
"error": format!("Failed to delete insight: {:?}", e)
}))
}
}
}
/// GET /insights/all - Get all insights
#[get("/insights/all")]
pub async fn get_all_insights_handler(
_claims: Claims,
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
log::debug!("Fetching all insights");
let otel_context = opentelemetry::Context::new();
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
match dao.get_all_insights(&otel_context) {
Ok(insights) => {
let responses: Vec<PhotoInsightResponse> = insights
.into_iter()
.map(|insight| PhotoInsightResponse {
id: insight.id,
file_path: insight.file_path,
title: insight.title,
summary: insight.summary,
generated_at: insight.generated_at,
model_version: insight.model_version,
})
.collect();
HttpResponse::Ok().json(responses)
}
Err(e) => {
log::error!("Failed to fetch all insights: {:?}", e);
HttpResponse::InternalServerError().json(serde_json::json!({
"error": format!("Failed to fetch insights: {:?}", e)
}))
}
}
}
/// GET /insights/models - List available models from both servers with capabilities
#[get("/insights/models")]
pub async fn get_available_models_handler(
_claims: Claims,
app_state: web::Data<crate::state::AppState>,
) -> impl Responder {
log::debug!("Fetching available models with capabilities");
let ollama_client = &app_state.ollama;
// Fetch models with capabilities from primary server
let primary_models =
match OllamaClient::list_models_with_capabilities(&ollama_client.primary_url).await {
Ok(models) => models,
Err(e) => {
log::warn!("Failed to fetch models from primary server: {:?}", e);
vec![]
}
};
let primary = ServerModels {
url: ollama_client.primary_url.clone(),
models: primary_models,
default_model: ollama_client.primary_model.clone(),
};
// Fetch models with capabilities from fallback server if configured
let fallback = if let Some(fallback_url) = &ollama_client.fallback_url {
match OllamaClient::list_models_with_capabilities(fallback_url).await {
Ok(models) => Some(ServerModels {
url: fallback_url.clone(),
models,
default_model: ollama_client
.fallback_model
.clone()
.unwrap_or_else(|| ollama_client.primary_model.clone()),
}),
Err(e) => {
log::warn!("Failed to fetch models from fallback server: {:?}", e);
None
}
}
} else {
None
};
let response = AvailableModelsResponse { primary, fallback };
HttpResponse::Ok().json(response)
}

1306
src/ai/insight_generator.rs Normal file

File diff suppressed because it is too large Load Diff

16
src/ai/mod.rs Normal file
View File

@@ -0,0 +1,16 @@
pub mod daily_summary_job;
pub mod handlers;
pub mod insight_generator;
pub mod ollama;
pub mod sms_client;
// strip_summary_boilerplate is used by binaries (test_daily_summary), not the library
#[allow(unused_imports)]
pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate};
pub use handlers::{
delete_insight_handler, generate_insight_handler, get_all_insights_handler,
get_available_models_handler, get_insight_handler,
};
pub use insight_generator::InsightGenerator;
pub use ollama::{ModelCapabilities, OllamaClient};
pub use sms_client::{SmsApiClient, SmsMessage};

735
src/ai/ollama.rs Normal file
View File

@@ -0,0 +1,735 @@
use anyhow::Result;
use chrono::NaiveDate;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
// Cache duration: 15 minutes
const CACHE_DURATION_SECS: u64 = 15 * 60;
// Cached entry with timestamp
#[derive(Clone)]
struct CachedEntry<T> {
data: T,
cached_at: Instant,
}
impl<T> CachedEntry<T> {
fn new(data: T) -> Self {
Self {
data,
cached_at: Instant::now(),
}
}
fn is_expired(&self) -> bool {
self.cached_at.elapsed().as_secs() > CACHE_DURATION_SECS
}
}
// Global cache for model lists and capabilities
lazy_static::lazy_static! {
static ref MODEL_LIST_CACHE: Arc<Mutex<HashMap<String, CachedEntry<Vec<String>>>>> =
Arc::new(Mutex::new(HashMap::new()));
static ref MODEL_CAPABILITIES_CACHE: Arc<Mutex<HashMap<String, CachedEntry<Vec<ModelCapabilities>>>>> =
Arc::new(Mutex::new(HashMap::new()));
}
#[derive(Clone)]
pub struct OllamaClient {
client: Client,
pub primary_url: String,
pub fallback_url: Option<String>,
pub primary_model: String,
pub fallback_model: Option<String>,
num_ctx: Option<i32>,
}
impl OllamaClient {
pub fn new(
primary_url: String,
fallback_url: Option<String>,
primary_model: String,
fallback_model: Option<String>,
) -> Self {
Self {
client: Client::builder()
.connect_timeout(Duration::from_secs(5)) // Quick connection timeout
.timeout(Duration::from_secs(120)) // Total request timeout for generation
.build()
.unwrap_or_else(|_| Client::new()),
primary_url,
fallback_url,
primary_model,
fallback_model,
num_ctx: None,
}
}
pub fn set_num_ctx(&mut self, num_ctx: Option<i32>) {
self.num_ctx = num_ctx;
}
/// List available models on an Ollama server (cached for 15 minutes)
pub async fn list_models(url: &str) -> Result<Vec<String>> {
// Check cache first
{
let cache = MODEL_LIST_CACHE.lock().unwrap();
if let Some(entry) = cache.get(url)
&& !entry.is_expired()
{
log::debug!("Returning cached model list for {}", url);
return Ok(entry.data.clone());
}
}
log::debug!("Fetching fresh model list from {}", url);
let client = Client::builder()
.connect_timeout(Duration::from_secs(5))
.timeout(Duration::from_secs(10))
.build()?;
let response = client.get(format!("{}/api/tags", url)).send().await?;
if !response.status().is_success() {
return Err(anyhow::anyhow!("Failed to list models from {}", url));
}
let tags_response: OllamaTagsResponse = response.json().await?;
let models: Vec<String> = tags_response.models.into_iter().map(|m| m.name).collect();
// Store in cache
{
let mut cache = MODEL_LIST_CACHE.lock().unwrap();
cache.insert(url.to_string(), CachedEntry::new(models.clone()));
}
Ok(models)
}
/// Check if a model is available on a server
pub async fn is_model_available(url: &str, model_name: &str) -> Result<bool> {
let models = Self::list_models(url).await?;
Ok(models.iter().any(|m| m == model_name))
}
/// Clear the model list cache for a specific URL or all URLs
pub fn clear_model_cache(url: Option<&str>) {
let mut cache = MODEL_LIST_CACHE.lock().unwrap();
if let Some(url) = url {
cache.remove(url);
log::debug!("Cleared model list cache for {}", url);
} else {
cache.clear();
log::debug!("Cleared all model list cache entries");
}
}
/// Clear the model capabilities cache for a specific URL or all URLs
pub fn clear_capabilities_cache(url: Option<&str>) {
let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
if let Some(url) = url {
cache.remove(url);
log::debug!("Cleared model capabilities cache for {}", url);
} else {
cache.clear();
log::debug!("Cleared all model capabilities cache entries");
}
}
/// Check if a model has vision capabilities using the /api/show endpoint
pub async fn check_model_capabilities(
url: &str,
model_name: &str,
) -> Result<ModelCapabilities> {
let client = Client::builder()
.connect_timeout(Duration::from_secs(5))
.timeout(Duration::from_secs(10))
.build()?;
#[derive(Serialize)]
struct ShowRequest {
model: String,
}
let response = client
.post(format!("{}/api/show", url))
.json(&ShowRequest {
model: model_name.to_string(),
})
.send()
.await?;
if !response.status().is_success() {
return Err(anyhow::anyhow!(
"Failed to get model details for {} from {}",
model_name,
url
));
}
let show_response: OllamaShowResponse = response.json().await?;
// Check if "vision" is in the capabilities array
let has_vision = show_response.capabilities.iter().any(|cap| cap == "vision");
Ok(ModelCapabilities {
name: model_name.to_string(),
has_vision,
})
}
/// List all models with their capabilities from a server (cached for 15 minutes)
pub async fn list_models_with_capabilities(url: &str) -> Result<Vec<ModelCapabilities>> {
// Check cache first
{
let cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
if let Some(entry) = cache.get(url)
&& !entry.is_expired()
{
log::debug!("Returning cached model capabilities for {}", url);
return Ok(entry.data.clone());
}
}
log::debug!("Fetching fresh model capabilities from {}", url);
let models = Self::list_models(url).await?;
let mut capabilities = Vec::new();
for model_name in models {
match Self::check_model_capabilities(url, &model_name).await {
Ok(cap) => capabilities.push(cap),
Err(e) => {
log::warn!("Failed to get capabilities for model {}: {}", model_name, e);
// Fallback: assume no vision if we can't check
capabilities.push(ModelCapabilities {
name: model_name,
has_vision: false,
});
}
}
}
// Store in cache
{
let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
cache.insert(url.to_string(), CachedEntry::new(capabilities.clone()));
}
Ok(capabilities)
}
/// Extract final answer from thinking model output
/// Handles <think>...</think> tags and takes everything after
fn extract_final_answer(&self, response: &str) -> String {
let response = response.trim();
// Look for </think> tag and take everything after it
if let Some(pos) = response.find("</think>") {
let answer = response[pos + 8..].trim();
if !answer.is_empty() {
return answer.to_string();
}
}
// Fallback: return the whole response trimmed
response.to_string()
}
async fn try_generate(
&self,
url: &str,
model: &str,
prompt: &str,
system: Option<&str>,
images: Option<Vec<String>>,
) -> Result<String> {
let request = OllamaRequest {
model: model.to_string(),
prompt: prompt.to_string(),
stream: false,
system: system.map(|s| s.to_string()),
options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: ctx }),
images,
};
let response = self
.client
.post(format!("{}/api/generate", url))
.json(&request)
.send()
.await?;
if !response.status().is_success() {
let status = response.status();
let error_body = response.text().await.unwrap_or_default();
return Err(anyhow::anyhow!(
"Ollama request failed: {} - {}",
status,
error_body
));
}
let result: OllamaResponse = response.json().await?;
Ok(result.response)
}
pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
self.generate_with_images(prompt, system, None).await
}
pub async fn generate_with_images(
&self,
prompt: &str,
system: Option<&str>,
images: Option<Vec<String>>,
) -> Result<String> {
log::debug!("=== Ollama Request ===");
log::debug!("Primary model: {}", self.primary_model);
if let Some(sys) = system {
log::debug!("System: {}", sys);
}
log::debug!("Prompt:\n{}", prompt);
if let Some(ref imgs) = images {
log::debug!("Images: {} image(s) included", imgs.len());
}
log::debug!("=====================");
// Try primary server first with primary model
log::info!(
"Attempting to generate with primary server: {} (model: {})",
self.primary_url,
self.primary_model
);
let primary_result = self
.try_generate(
&self.primary_url,
&self.primary_model,
prompt,
system,
images.clone(),
)
.await;
let raw_response = match primary_result {
Ok(response) => {
log::info!("Successfully generated response from primary server");
response
}
Err(e) => {
log::warn!("Primary server failed: {}", e);
// Try fallback server if available
if let Some(fallback_url) = &self.fallback_url {
// Use fallback model if specified, otherwise use primary model
let fallback_model =
self.fallback_model.as_ref().unwrap_or(&self.primary_model);
log::info!(
"Attempting to generate with fallback server: {} (model: {})",
fallback_url,
fallback_model
);
match self
.try_generate(fallback_url, fallback_model, prompt, system, images.clone())
.await
{
Ok(response) => {
log::info!("Successfully generated response from fallback server");
response
}
Err(fallback_e) => {
log::error!("Fallback server also failed: {}", fallback_e);
return Err(anyhow::anyhow!(
"Both primary and fallback servers failed. Primary: {}, Fallback: {}",
e,
fallback_e
));
}
}
} else {
log::error!("No fallback server configured");
return Err(e);
}
}
};
log::debug!("=== Ollama Response ===");
log::debug!("Raw response: {}", raw_response.trim());
log::debug!("=======================");
// Extract final answer from thinking model output
let cleaned = self.extract_final_answer(&raw_response);
log::debug!("=== Cleaned Response ===");
log::debug!("Final answer: {}", cleaned);
log::debug!("========================");
Ok(cleaned)
}
/// Generate a title for a single photo based on its context
pub async fn generate_photo_title(
&self,
date: NaiveDate,
location: Option<&str>,
contact: Option<&str>,
sms_summary: Option<&str>,
custom_system: Option<&str>,
image_base64: Option<String>,
) -> Result<String> {
let location_str = location.unwrap_or("Unknown location");
let sms_str = sms_summary.unwrap_or("No messages");
let prompt = if image_base64.is_some() {
if let Some(contact_name) = contact {
format!(
r#"Create a short title (maximum 8 words) about this moment by analyzing the image and context:
Date: {}
Location: {}
Person/Contact: {}
Messages: {}
Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. If limited information is available, use a simple descriptive title based on what you see.
Return ONLY the title, nothing else."#,
date.format("%B %d, %Y"),
location_str,
contact_name,
sms_str,
contact_name
)
} else {
format!(
r#"Create a short title (maximum 8 words) about this moment by analyzing the image and context:
Date: {}
Location: {}
Messages: {}
Analyze the image and use specific details from both the visual content and the context above. If limited information is available, use a simple descriptive title based on what you see.
Return ONLY the title, nothing else."#,
date.format("%B %d, %Y"),
location_str,
sms_str
)
}
} else if let Some(contact_name) = contact {
format!(
r#"Create a short title (maximum 8 words) about this moment:
Date: {}
Location: {}
Person/Contact: {}
Messages: {}
Use specific details from the context above. The photo is from a folder for {}, so they are likely related to this moment. If no specific details are available, use a simple descriptive title.
Return ONLY the title, nothing else."#,
date.format("%B %d, %Y"),
location_str,
contact_name,
sms_str,
contact_name
)
} else {
format!(
r#"Create a short title (maximum 8 words) about this moment:
Date: {}
Location: {}
Messages: {}
Use specific details from the context above. If no specific details are available, use a simple descriptive title.
Return ONLY the title, nothing else."#,
date.format("%B %d, %Y"),
location_str,
sms_str
)
};
let system = custom_system.unwrap_or("You are my long term memory assistant. Use only the information provided. Do not invent details.");
let images = image_base64.map(|img| vec![img]);
let title = self
.generate_with_images(&prompt, Some(system), images)
.await?;
Ok(title.trim().trim_matches('"').to_string())
}
/// Generate a summary for a single photo based on its context
pub async fn generate_photo_summary(
&self,
date: NaiveDate,
location: Option<&str>,
contact: Option<&str>,
sms_summary: Option<&str>,
custom_system: Option<&str>,
image_base64: Option<String>,
) -> Result<String> {
let location_str = location.unwrap_or("Unknown");
let sms_str = sms_summary.unwrap_or("No messages");
let prompt = if image_base64.is_some() {
if let Some(contact_name) = contact {
format!(
r#"Write a 1-3 paragraph description of this moment by analyzing the image and the available context:
Date: {}
Location: {}
Person/Contact: {}
Messages: {}
Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. Mention people's names (especially {}), places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
date.format("%B %d, %Y"),
location_str,
contact_name,
sms_str,
contact_name,
contact_name
)
} else {
format!(
r#"Write a 1-3 paragraph description of this moment by analyzing the image and the available context:
Date: {}
Location: {}
Messages: {}
Analyze the image and use specific details from both the visual content and the context above. Mention people's names, places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
date.format("%B %d, %Y"),
location_str,
sms_str
)
}
} else if let Some(contact_name) = contact {
format!(
r#"Write a 1-3 paragraph description of this moment based on the available information:
Date: {}
Location: {}
Person/Contact: {}
Messages: {}
Use only the specific details provided above. The photo is from a folder for {}, so they are likely related to this moment. Mention people's names (especially {}), places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
date.format("%B %d, %Y"),
location_str,
contact_name,
sms_str,
contact_name,
contact_name
)
} else {
format!(
r#"Write a 1-3 paragraph description of this moment based on the available information:
Date: {}
Location: {}
Messages: {}
Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
date.format("%B %d, %Y"),
location_str,
sms_str
)
};
let system = custom_system.unwrap_or("You are a memory refreshing assistant who is able to provide insights through analyzing past conversations. Use only the information provided. Do not invent details.");
let images = image_base64.map(|img| vec![img]);
self.generate_with_images(&prompt, Some(system), images)
.await
}
/// Generate an embedding vector for text using nomic-embed-text:v1.5
/// Returns a 768-dimensional vector as Vec<f32>
pub async fn generate_embedding(&self, text: &str) -> Result<Vec<f32>> {
let embeddings = self.generate_embeddings(&[text]).await?;
embeddings
.into_iter()
.next()
.ok_or_else(|| anyhow::anyhow!("No embedding returned"))
}
/// Generate embeddings for multiple texts in a single API call (batch mode)
/// Returns a vector of 768-dimensional vectors
/// This is much more efficient than calling generate_embedding multiple times
pub async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
let embedding_model = "nomic-embed-text:v1.5";
log::debug!("=== Ollama Batch Embedding Request ===");
log::debug!("Model: {}", embedding_model);
log::debug!("Batch size: {} texts", texts.len());
log::debug!("======================================");
// Try primary server first
log::debug!(
"Attempting to generate {} embeddings with primary server: {} (model: {})",
texts.len(),
self.primary_url,
embedding_model
);
let primary_result = self
.try_generate_embeddings(&self.primary_url, embedding_model, texts)
.await;
let embeddings = match primary_result {
Ok(embeddings) => {
log::debug!(
"Successfully generated {} embeddings from primary server",
embeddings.len()
);
embeddings
}
Err(e) => {
log::warn!("Primary server batch embedding failed: {}", e);
// Try fallback server if available
if let Some(fallback_url) = &self.fallback_url {
log::info!(
"Attempting to generate {} embeddings with fallback server: {} (model: {})",
texts.len(),
fallback_url,
embedding_model
);
match self
.try_generate_embeddings(fallback_url, embedding_model, texts)
.await
{
Ok(embeddings) => {
log::info!(
"Successfully generated {} embeddings from fallback server",
embeddings.len()
);
embeddings
}
Err(fallback_e) => {
log::error!(
"Fallback server batch embedding also failed: {}",
fallback_e
);
return Err(anyhow::anyhow!(
"Both primary and fallback servers failed. Primary: {}, Fallback: {}",
e,
fallback_e
));
}
}
} else {
log::error!("No fallback server configured");
return Err(e);
}
}
};
// Validate embedding dimensions (should be 768 for nomic-embed-text:v1.5)
for (i, embedding) in embeddings.iter().enumerate() {
if embedding.len() != 768 {
log::warn!(
"Unexpected embedding dimensions for item {}: {} (expected 768)",
i,
embedding.len()
);
}
}
Ok(embeddings)
}
/// Internal helper to try generating embeddings for multiple texts from a specific server
async fn try_generate_embeddings(
&self,
url: &str,
model: &str,
texts: &[&str],
) -> Result<Vec<Vec<f32>>> {
let request = OllamaBatchEmbedRequest {
model: model.to_string(),
input: texts.iter().map(|s| s.to_string()).collect(),
};
let response = self
.client
.post(format!("{}/api/embed", url))
.json(&request)
.send()
.await?;
if !response.status().is_success() {
let status = response.status();
let error_body = response.text().await.unwrap_or_default();
return Err(anyhow::anyhow!(
"Ollama batch embedding request failed: {} - {}",
status,
error_body
));
}
let result: OllamaEmbedResponse = response.json().await?;
Ok(result.embeddings)
}
}
#[derive(Serialize)]
struct OllamaRequest {
model: String,
prompt: String,
stream: bool,
#[serde(skip_serializing_if = "Option::is_none")]
system: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
options: Option<OllamaOptions>,
#[serde(skip_serializing_if = "Option::is_none")]
images: Option<Vec<String>>,
}
#[derive(Serialize)]
struct OllamaOptions {
num_ctx: i32,
}
#[derive(Deserialize)]
struct OllamaResponse {
response: String,
}
#[derive(Deserialize)]
struct OllamaTagsResponse {
models: Vec<OllamaModel>,
}
#[derive(Deserialize)]
struct OllamaModel {
name: String,
}
#[derive(Deserialize)]
struct OllamaShowResponse {
#[serde(default)]
capabilities: Vec<String>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ModelCapabilities {
pub name: String,
pub has_vision: bool,
}
#[derive(Serialize)]
struct OllamaBatchEmbedRequest {
model: String,
input: Vec<String>,
}
#[derive(Deserialize)]
struct OllamaEmbedResponse {
embeddings: Vec<Vec<f32>>,
}

316
src/ai/sms_client.rs Normal file
View File

@@ -0,0 +1,316 @@
use anyhow::Result;
use reqwest::Client;
use serde::Deserialize;
use super::ollama::OllamaClient;
#[derive(Clone)]
pub struct SmsApiClient {
client: Client,
base_url: String,
token: Option<String>,
}
impl SmsApiClient {
pub fn new(base_url: String, token: Option<String>) -> Self {
Self {
client: Client::new(),
base_url,
token,
}
}
/// Fetch messages for a specific contact within ±1 day of the given timestamp
/// Falls back to all contacts if no messages found for the specific contact
/// Messages are sorted by proximity to the center timestamp
pub async fn fetch_messages_for_contact(
&self,
contact: Option<&str>,
center_timestamp: i64,
) -> Result<Vec<SmsMessage>> {
use chrono::Duration;
// Calculate ±2 days range around the center timestamp
let center_dt = chrono::DateTime::from_timestamp(center_timestamp, 0)
.ok_or_else(|| anyhow::anyhow!("Invalid timestamp"))?;
let start_dt = center_dt - Duration::days(2);
let end_dt = center_dt + Duration::days(2);
let start_ts = start_dt.timestamp();
let end_ts = end_dt.timestamp();
// If contact specified, try fetching for that contact first
if let Some(contact_name) = contact {
log::info!(
"Fetching SMS for contact: {} (±2 days from {})",
contact_name,
center_dt.format("%Y-%m-%d %H:%M:%S")
);
let messages = self
.fetch_messages(start_ts, end_ts, Some(contact_name), Some(center_timestamp))
.await?;
if !messages.is_empty() {
log::info!(
"Found {} messages for contact {}",
messages.len(),
contact_name
);
return Ok(messages);
}
log::info!(
"No messages found for contact {}, falling back to all contacts",
contact_name
);
}
// Fallback to all contacts
log::info!(
"Fetching all SMS messages (±1 day from {})",
center_dt.format("%Y-%m-%d %H:%M:%S")
);
self.fetch_messages(start_ts, end_ts, None, Some(center_timestamp))
.await
}
/// Fetch all messages for a specific contact across all time
/// Used for embedding generation - retrieves complete message history
/// Handles pagination automatically if the API returns a limited number of results
pub async fn fetch_all_messages_for_contact(&self, contact: &str) -> Result<Vec<SmsMessage>> {
let start_ts = chrono::DateTime::parse_from_rfc3339("2000-01-01T00:00:00Z")
.unwrap()
.timestamp();
let end_ts = chrono::Utc::now().timestamp();
log::info!("Fetching all historical messages for contact: {}", contact);
let mut all_messages = Vec::new();
let mut offset = 0;
let limit = 1000; // Fetch in batches of 1000
loop {
log::debug!(
"Fetching batch at offset {} for contact {}",
offset,
contact
);
let batch = self
.fetch_messages_paginated(start_ts, end_ts, Some(contact), None, limit, offset)
.await?;
let batch_size = batch.len();
all_messages.extend(batch);
log::debug!(
"Fetched {} messages (total so far: {})",
batch_size,
all_messages.len()
);
// If we got fewer messages than the limit, we've reached the end
if batch_size < limit {
break;
}
offset += limit;
}
log::info!(
"Fetched {} total messages for contact {}",
all_messages.len(),
contact
);
Ok(all_messages)
}
/// Internal method to fetch messages with pagination support
async fn fetch_messages_paginated(
&self,
start_ts: i64,
end_ts: i64,
contact: Option<&str>,
center_timestamp: Option<i64>,
limit: usize,
offset: usize,
) -> Result<Vec<SmsMessage>> {
let mut url = format!(
"{}/api/messages/by-date-range/?start_date={}&end_date={}&limit={}&offset={}",
self.base_url, start_ts, end_ts, limit, offset
);
if let Some(contact_name) = contact {
url.push_str(&format!("&contact={}", urlencoding::encode(contact_name)));
}
if let Some(ts) = center_timestamp {
url.push_str(&format!("&timestamp={}", ts));
}
log::debug!("Fetching SMS messages from: {}", url);
let mut request = self.client.get(&url);
if let Some(token) = &self.token {
request = request.header("Authorization", format!("Bearer {}", token));
}
let response = request.send().await?;
log::debug!("SMS API response status: {}", response.status());
if !response.status().is_success() {
let status = response.status();
let error_body = response.text().await.unwrap_or_default();
log::error!("SMS API request failed: {} - {}", status, error_body);
return Err(anyhow::anyhow!(
"SMS API request failed: {} - {}",
status,
error_body
));
}
let data: SmsApiResponse = response.json().await?;
Ok(data
.messages
.into_iter()
.map(|m| SmsMessage {
contact: m.contact_name,
body: m.body,
timestamp: m.date,
is_sent: m.type_ == 2,
})
.collect())
}
/// Internal method to fetch messages with optional contact filter and timestamp sorting
async fn fetch_messages(
&self,
start_ts: i64,
end_ts: i64,
contact: Option<&str>,
center_timestamp: Option<i64>,
) -> Result<Vec<SmsMessage>> {
// Call Django endpoint
let mut url = format!(
"{}/api/messages/by-date-range/?start_date={}&end_date={}",
self.base_url, start_ts, end_ts
);
// Add contact filter if provided
if let Some(contact_name) = contact {
url.push_str(&format!("&contact={}", urlencoding::encode(contact_name)));
}
// Add timestamp for proximity sorting if provided
if let Some(ts) = center_timestamp {
url.push_str(&format!("&timestamp={}", ts));
}
log::debug!("Fetching SMS messages from: {}", url);
let mut request = self.client.get(&url);
// Add authorization header if token exists
if let Some(token) = &self.token {
request = request.header("Authorization", format!("Bearer {}", token));
}
let response = request.send().await?;
log::debug!("SMS API response status: {}", response.status());
if !response.status().is_success() {
let status = response.status();
let error_body = response.text().await.unwrap_or_default();
log::error!("SMS API request failed: {} - {}", status, error_body);
return Err(anyhow::anyhow!(
"SMS API request failed: {} - {}",
status,
error_body
));
}
let data: SmsApiResponse = response.json().await?;
// Convert to internal format
Ok(data
.messages
.into_iter()
.map(|m| SmsMessage {
contact: m.contact_name,
body: m.body,
timestamp: m.date,
is_sent: m.type_ == 2, // type 2 = sent
})
.collect())
}
pub async fn summarize_context(
&self,
messages: &[SmsMessage],
ollama: &OllamaClient,
) -> Result<String> {
if messages.is_empty() {
return Ok(String::from("No messages on this day"));
}
// Create prompt for Ollama with sender/receiver distinction
let messages_text: String = messages
.iter()
.take(60) // Limit to avoid token overflow
.map(|m| {
if m.is_sent {
format!("Me: {}", m.body)
} else {
format!("{}: {}", m.contact, m.body)
}
})
.collect::<Vec<_>>()
.join("\n");
let prompt = format!(
r#"Summarize these messages in up to 4-5 sentences. Focus on key topics, places, people mentioned, and the overall context of the conversations.
Messages:
{}
Summary:"#,
messages_text
);
ollama
.generate(
&prompt,
// Some("You are a summarizer for the purposes of jogging my memory and highlighting events and situations."),
Some("You are the keeper of memories, ingest the context and give me a casual summary of the moment."),
)
.await
}
}
#[derive(Debug, Clone)]
pub struct SmsMessage {
pub contact: String,
pub body: String,
pub timestamp: i64,
pub is_sent: bool,
}
#[derive(Deserialize)]
struct SmsApiResponse {
messages: Vec<SmsApiMessage>,
}
#[derive(Deserialize)]
struct SmsApiMessage {
contact_name: String,
body: String,
date: i64,
#[serde(rename = "type")]
type_: i32,
}

View File

@@ -0,0 +1,307 @@
use anyhow::Result;
use clap::Parser;
use diesel::prelude::*;
use diesel::sql_query;
use diesel::sqlite::SqliteConnection;
use std::env;
#[derive(Parser, Debug)]
#[command(author, version, about = "Diagnose embedding distribution and identify problematic summaries", long_about = None)]
struct Args {
/// Show detailed per-summary statistics
#[arg(short, long, default_value_t = false)]
verbose: bool,
/// Number of top "central" summaries to show (ones that match everything)
#[arg(short, long, default_value_t = 10)]
top: usize,
/// Test a specific query to see what matches
#[arg(short, long)]
query: Option<String>,
}
#[derive(QueryableByName, Debug)]
struct EmbeddingRow {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
#[diesel(sql_type = diesel::sql_types::Text)]
date: String,
#[diesel(sql_type = diesel::sql_types::Text)]
contact: String,
#[diesel(sql_type = diesel::sql_types::Text)]
summary: String,
#[diesel(sql_type = diesel::sql_types::Binary)]
embedding: Vec<u8>,
}
fn deserialize_embedding(bytes: &[u8]) -> Result<Vec<f32>> {
if !bytes.len().is_multiple_of(4) {
return Err(anyhow::anyhow!("Invalid embedding byte length"));
}
let count = bytes.len() / 4;
let mut vec = Vec::with_capacity(count);
for chunk in bytes.chunks_exact(4) {
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
vec.push(float);
}
Ok(vec)
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude_a == 0.0 || magnitude_b == 0.0 {
return 0.0;
}
dot_product / (magnitude_a * magnitude_b)
}
fn main() -> Result<()> {
dotenv::dotenv().ok();
let args = Args::parse();
let database_url = env::var("DATABASE_URL").unwrap_or_else(|_| "auth.db".to_string());
println!("Connecting to database: {}", database_url);
let mut conn = SqliteConnection::establish(&database_url)?;
// Load all embeddings
println!("\nLoading embeddings from daily_conversation_summaries...");
let rows: Vec<EmbeddingRow> = sql_query(
"SELECT id, date, contact, summary, embedding FROM daily_conversation_summaries ORDER BY date"
)
.load(&mut conn)?;
println!("Found {} summaries with embeddings\n", rows.len());
if rows.is_empty() {
println!("No summaries found!");
return Ok(());
}
// Parse all embeddings
let mut embeddings: Vec<(i32, String, String, String, Vec<f32>)> = Vec::new();
for row in &rows {
match deserialize_embedding(&row.embedding) {
Ok(emb) => {
embeddings.push((
row.id,
row.date.clone(),
row.contact.clone(),
row.summary.clone(),
emb,
));
}
Err(e) => {
println!(
"Warning: Failed to parse embedding for id {}: {}",
row.id, e
);
}
}
}
println!("Successfully parsed {} embeddings\n", embeddings.len());
// Compute embedding statistics
println!("========================================");
println!("EMBEDDING STATISTICS");
println!("========================================\n");
// Check embedding variance (are values clustered or spread out?)
let first_emb = &embeddings[0].4;
let dim = first_emb.len();
println!("Embedding dimensions: {}", dim);
// Calculate mean and std dev per dimension
let mut dim_means: Vec<f32> = vec![0.0; dim];
let mut dim_vars: Vec<f32> = vec![0.0; dim];
for (_, _, _, _, emb) in &embeddings {
for (i, &val) in emb.iter().enumerate() {
dim_means[i] += val;
}
}
for m in &mut dim_means {
*m /= embeddings.len() as f32;
}
for (_, _, _, _, emb) in &embeddings {
for (i, &val) in emb.iter().enumerate() {
let diff = val - dim_means[i];
dim_vars[i] += diff * diff;
}
}
for v in &mut dim_vars {
*v = (*v / embeddings.len() as f32).sqrt();
}
let avg_std_dev: f32 = dim_vars.iter().sum::<f32>() / dim as f32;
let min_std_dev: f32 = dim_vars.iter().cloned().fold(f32::INFINITY, f32::min);
let max_std_dev: f32 = dim_vars.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
println!("Per-dimension standard deviation:");
println!(" Average: {:.6}", avg_std_dev);
println!(" Min: {:.6}", min_std_dev);
println!(" Max: {:.6}", max_std_dev);
println!();
// Compute pairwise similarities
println!("Computing pairwise similarities (this may take a moment)...\n");
let mut all_similarities: Vec<f32> = Vec::new();
let mut per_embedding_avg: Vec<(usize, f32)> = Vec::new();
for i in 0..embeddings.len() {
let mut sum = 0.0;
let mut count = 0;
for j in 0..embeddings.len() {
if i != j {
let sim = cosine_similarity(&embeddings[i].4, &embeddings[j].4);
all_similarities.push(sim);
sum += sim;
count += 1;
}
}
per_embedding_avg.push((i, sum / count as f32));
}
// Sort similarities for percentile analysis
all_similarities.sort_by(|a, b| a.partial_cmp(b).unwrap());
let min_sim = all_similarities.first().copied().unwrap_or(0.0);
let max_sim = all_similarities.last().copied().unwrap_or(0.0);
let median_sim = all_similarities[all_similarities.len() / 2];
let p25 = all_similarities[all_similarities.len() / 4];
let p75 = all_similarities[3 * all_similarities.len() / 4];
let mean_sim: f32 = all_similarities.iter().sum::<f32>() / all_similarities.len() as f32;
println!("========================================");
println!("PAIRWISE SIMILARITY DISTRIBUTION");
println!("========================================\n");
println!("Total pairs analyzed: {}", all_similarities.len());
println!();
println!("Min similarity: {:.4}", min_sim);
println!("25th percentile: {:.4}", p25);
println!("Median similarity: {:.4}", median_sim);
println!("Mean similarity: {:.4}", mean_sim);
println!("75th percentile: {:.4}", p75);
println!("Max similarity: {:.4}", max_sim);
println!();
// Analyze distribution
let count_above_08 = all_similarities.iter().filter(|&&s| s > 0.8).count();
let count_above_07 = all_similarities.iter().filter(|&&s| s > 0.7).count();
let count_above_06 = all_similarities.iter().filter(|&&s| s > 0.6).count();
let count_above_05 = all_similarities.iter().filter(|&&s| s > 0.5).count();
let count_below_03 = all_similarities.iter().filter(|&&s| s < 0.3).count();
println!("Similarity distribution:");
println!(
" > 0.8: {} ({:.1}%)",
count_above_08,
100.0 * count_above_08 as f32 / all_similarities.len() as f32
);
println!(
" > 0.7: {} ({:.1}%)",
count_above_07,
100.0 * count_above_07 as f32 / all_similarities.len() as f32
);
println!(
" > 0.6: {} ({:.1}%)",
count_above_06,
100.0 * count_above_06 as f32 / all_similarities.len() as f32
);
println!(
" > 0.5: {} ({:.1}%)",
count_above_05,
100.0 * count_above_05 as f32 / all_similarities.len() as f32
);
println!(
" < 0.3: {} ({:.1}%)",
count_below_03,
100.0 * count_below_03 as f32 / all_similarities.len() as f32
);
println!();
// Identify "central" embeddings (high average similarity to all others)
per_embedding_avg.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
println!("========================================");
println!("TOP {} MOST 'CENTRAL' SUMMARIES", args.top);
println!("(These match everything with high similarity)");
println!("========================================\n");
for (rank, (idx, avg_sim)) in per_embedding_avg.iter().take(args.top).enumerate() {
let (id, date, contact, summary, _) = &embeddings[*idx];
let preview: String = summary.chars().take(80).collect();
println!("{}. [id={}, avg_sim={:.4}]", rank + 1, id, avg_sim);
println!(" Date: {}, Contact: {}", date, contact);
println!(" Preview: {}...", preview.replace('\n', " "));
println!();
}
// Also show the least central (most unique)
println!("========================================");
println!("TOP {} MOST UNIQUE SUMMARIES", args.top);
println!("(These are most different from others)");
println!("========================================\n");
for (rank, (idx, avg_sim)) in per_embedding_avg.iter().rev().take(args.top).enumerate() {
let (id, date, contact, summary, _) = &embeddings[*idx];
let preview: String = summary.chars().take(80).collect();
println!("{}. [id={}, avg_sim={:.4}]", rank + 1, id, avg_sim);
println!(" Date: {}, Contact: {}", date, contact);
println!(" Preview: {}...", preview.replace('\n', " "));
println!();
}
// Diagnosis
println!("========================================");
println!("DIAGNOSIS");
println!("========================================\n");
if mean_sim > 0.7 {
println!("⚠️ HIGH AVERAGE SIMILARITY ({:.4})", mean_sim);
println!(" All embeddings are very similar to each other.");
println!(" This explains why the same summaries always match.");
println!();
println!(" Possible causes:");
println!(
" 1. Summaries have similar structure/phrasing (e.g., all start with 'Summary:')"
);
println!(" 2. Embedding model isn't capturing semantic differences well");
println!(" 3. Daily conversations have similar topics (e.g., 'good morning', plans)");
println!();
println!(" Recommendations:");
println!(" 1. Try a different embedding model (mxbai-embed-large, bge-large)");
println!(" 2. Improve summary diversity by varying the prompt");
println!(" 3. Extract and embed only keywords/entities, not full summaries");
} else if mean_sim > 0.5 {
println!("⚡ MODERATE AVERAGE SIMILARITY ({:.4})", mean_sim);
println!(" Some clustering in embeddings, but some differentiation exists.");
println!();
println!(" The 'central' summaries above are likely dominating search results.");
println!(" Consider:");
println!(" 1. Filtering out summaries with very high centrality");
println!(" 2. Adding time-based weighting to prefer recent/relevant dates");
println!(" 3. Increasing the similarity threshold from 0.3 to 0.5");
} else {
println!("✅ GOOD EMBEDDING DIVERSITY ({:.4})", mean_sim);
println!(" Embeddings are well-differentiated.");
println!(" If same results keep appearing, the issue may be elsewhere.");
}
Ok(())
}

166
src/bin/import_calendar.rs Normal file
View File

@@ -0,0 +1,166 @@
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Parser;
use image_api::ai::ollama::OllamaClient;
use image_api::database::calendar_dao::{InsertCalendarEvent, SqliteCalendarEventDao};
use image_api::parsers::ical_parser::parse_ics_file;
use log::{error, info};
use std::sync::{Arc, Mutex};
// Import the trait to use its methods
use image_api::database::CalendarEventDao;
#[derive(Parser, Debug)]
#[command(author, version, about = "Import Google Takeout Calendar data", long_about = None)]
struct Args {
/// Path to the .ics calendar file
#[arg(short, long)]
path: String,
/// Generate embeddings for calendar events (slower but enables semantic search)
#[arg(long, default_value = "false")]
generate_embeddings: bool,
/// Skip events that already exist in the database
#[arg(long, default_value = "true")]
skip_existing: bool,
/// Batch size for embedding generation
#[arg(long, default_value = "128")]
batch_size: usize,
}
#[tokio::main]
async fn main() -> Result<()> {
dotenv::dotenv().ok();
env_logger::init();
let args = Args::parse();
info!("Parsing calendar file: {}", args.path);
let events = parse_ics_file(&args.path).context("Failed to parse .ics file")?;
info!("Found {} calendar events", events.len());
let context = opentelemetry::Context::current();
let ollama = if args.generate_embeddings {
let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
.or_else(|_| dotenv::var("OLLAMA_URL"))
.unwrap_or_else(|_| "http://localhost:11434".to_string());
let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
.or_else(|_| dotenv::var("OLLAMA_MODEL"))
.unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
Some(OllamaClient::new(
primary_url,
fallback_url,
primary_model,
fallback_model,
))
} else {
None
};
let inserted_count = Arc::new(Mutex::new(0));
let skipped_count = Arc::new(Mutex::new(0));
let error_count = Arc::new(Mutex::new(0));
// Process events in batches
// Can't use rayon with async, so process sequentially
for event in &events {
let mut dao_instance = SqliteCalendarEventDao::new();
// Check if event exists
if args.skip_existing
&& let Ok(exists) = dao_instance.event_exists(
&context,
event.event_uid.as_deref().unwrap_or(""),
event.start_time,
)
&& exists
{
*skipped_count.lock().unwrap() += 1;
continue;
}
// Generate embedding if requested (blocking call)
let embedding = if let Some(ref ollama_client) = ollama {
let text = format!(
"{} {} {}",
event.summary,
event.description.as_deref().unwrap_or(""),
event.location.as_deref().unwrap_or("")
);
match tokio::task::block_in_place(|| {
tokio::runtime::Handle::current()
.block_on(async { ollama_client.generate_embedding(&text).await })
}) {
Ok(emb) => Some(emb),
Err(e) => {
error!(
"Failed to generate embedding for event '{}': {}",
event.summary, e
);
None
}
}
} else {
None
};
// Insert into database
let insert_event = InsertCalendarEvent {
event_uid: event.event_uid.clone(),
summary: event.summary.clone(),
description: event.description.clone(),
location: event.location.clone(),
start_time: event.start_time,
end_time: event.end_time,
all_day: event.all_day,
organizer: event.organizer.clone(),
attendees: if event.attendees.is_empty() {
None
} else {
Some(serde_json::to_string(&event.attendees).unwrap_or_default())
},
embedding,
created_at: Utc::now().timestamp(),
source_file: Some(args.path.clone()),
};
match dao_instance.store_event(&context, insert_event) {
Ok(_) => {
*inserted_count.lock().unwrap() += 1;
if *inserted_count.lock().unwrap() % 100 == 0 {
info!("Imported {} events...", *inserted_count.lock().unwrap());
}
}
Err(e) => {
error!("Failed to store event '{}': {:?}", event.summary, e);
*error_count.lock().unwrap() += 1;
}
}
}
let final_inserted = *inserted_count.lock().unwrap();
let final_skipped = *skipped_count.lock().unwrap();
let final_errors = *error_count.lock().unwrap();
info!("\n=== Import Summary ===");
info!("Total events found: {}", events.len());
info!("Successfully inserted: {}", final_inserted);
info!("Skipped (already exist): {}", final_skipped);
info!("Errors: {}", final_errors);
if args.generate_embeddings {
info!("Embeddings were generated for semantic search");
} else {
info!("No embeddings generated (use --generate-embeddings to enable semantic search)");
}
Ok(())
}

View File

@@ -0,0 +1,114 @@
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Parser;
use image_api::database::location_dao::{InsertLocationRecord, SqliteLocationHistoryDao};
use image_api::parsers::location_json_parser::parse_location_json;
use log::{error, info};
// Import the trait to use its methods
use image_api::database::LocationHistoryDao;
#[derive(Parser, Debug)]
#[command(author, version, about = "Import Google Takeout Location History data", long_about = None)]
struct Args {
/// Path to the Location History JSON file
#[arg(short, long)]
path: String,
/// Skip locations that already exist in the database
#[arg(long, default_value = "true")]
skip_existing: bool,
/// Batch size for database inserts
#[arg(long, default_value = "1000")]
batch_size: usize,
}
#[tokio::main]
async fn main() -> Result<()> {
dotenv::dotenv().ok();
env_logger::init();
let args = Args::parse();
info!("Parsing location history file: {}", args.path);
let locations =
parse_location_json(&args.path).context("Failed to parse location history JSON")?;
info!("Found {} location records", locations.len());
let context = opentelemetry::Context::current();
let mut inserted_count = 0;
let mut skipped_count = 0;
let mut error_count = 0;
let mut dao_instance = SqliteLocationHistoryDao::new();
let created_at = Utc::now().timestamp();
// Process in batches using batch insert for massive speedup
for (batch_idx, chunk) in locations.chunks(args.batch_size).enumerate() {
info!(
"Processing batch {} ({} records)...",
batch_idx + 1,
chunk.len()
);
// Convert to InsertLocationRecord
let mut batch_inserts = Vec::with_capacity(chunk.len());
for location in chunk {
// Skip existing check if requested (makes import much slower)
if args.skip_existing
&& let Ok(exists) = dao_instance.location_exists(
&context,
location.timestamp,
location.latitude,
location.longitude,
)
&& exists
{
skipped_count += 1;
continue;
}
batch_inserts.push(InsertLocationRecord {
timestamp: location.timestamp,
latitude: location.latitude,
longitude: location.longitude,
accuracy: location.accuracy,
activity: location.activity.clone(),
activity_confidence: location.activity_confidence,
place_name: None,
place_category: None,
embedding: None,
created_at,
source_file: Some(args.path.clone()),
});
}
// Batch insert entire chunk in single transaction
if !batch_inserts.is_empty() {
match dao_instance.store_locations_batch(&context, batch_inserts) {
Ok(count) => {
inserted_count += count;
info!(
"Imported {} locations (total: {})...",
count, inserted_count
);
}
Err(e) => {
error!("Failed to store batch: {:?}", e);
error_count += chunk.len();
}
}
}
}
info!("\n=== Import Summary ===");
info!("Total locations found: {}", locations.len());
info!("Successfully inserted: {}", inserted_count);
info!("Skipped (already exist): {}", skipped_count);
info!("Errors: {}", error_count);
Ok(())
}

View File

@@ -0,0 +1,152 @@
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Parser;
use image_api::ai::ollama::OllamaClient;
use image_api::database::search_dao::{InsertSearchRecord, SqliteSearchHistoryDao};
use image_api::parsers::search_html_parser::parse_search_html;
use log::{error, info, warn};
// Import the trait to use its methods
use image_api::database::SearchHistoryDao;
#[derive(Parser, Debug)]
#[command(author, version, about = "Import Google Takeout Search History data", long_about = None)]
struct Args {
/// Path to the search history HTML file
#[arg(short, long)]
path: String,
/// Skip searches that already exist in the database
#[arg(long, default_value = "true")]
skip_existing: bool,
/// Batch size for embedding generation (max 128 recommended)
#[arg(long, default_value = "64")]
batch_size: usize,
}
#[tokio::main]
async fn main() -> Result<()> {
dotenv::dotenv().ok();
env_logger::init();
let args = Args::parse();
info!("Parsing search history file: {}", args.path);
let searches = parse_search_html(&args.path).context("Failed to parse search history HTML")?;
info!("Found {} search records", searches.len());
let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
.or_else(|_| dotenv::var("OLLAMA_URL"))
.unwrap_or_else(|_| "http://localhost:11434".to_string());
let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
.or_else(|_| dotenv::var("OLLAMA_MODEL"))
.unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
let ollama = OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model);
let context = opentelemetry::Context::current();
let mut inserted_count = 0;
let mut skipped_count = 0;
let mut error_count = 0;
let mut dao_instance = SqliteSearchHistoryDao::new();
let created_at = Utc::now().timestamp();
// Process searches in batches (embeddings are REQUIRED for searches)
for (batch_idx, chunk) in searches.chunks(args.batch_size).enumerate() {
info!(
"Processing batch {} ({} searches)...",
batch_idx + 1,
chunk.len()
);
// Generate embeddings for this batch
let queries: Vec<String> = chunk.iter().map(|s| s.query.clone()).collect();
let embeddings_result = tokio::task::spawn({
let ollama_client = ollama.clone();
async move {
// Generate embeddings in parallel for the batch
let mut embeddings = Vec::new();
for query in &queries {
match ollama_client.generate_embedding(query).await {
Ok(emb) => embeddings.push(Some(emb)),
Err(e) => {
warn!("Failed to generate embedding for query '{}': {}", query, e);
embeddings.push(None);
}
}
}
embeddings
}
})
.await
.context("Failed to generate embeddings for batch")?;
// Build batch of searches with embeddings
let mut batch_inserts = Vec::new();
for (search, embedding_opt) in chunk.iter().zip(embeddings_result.iter()) {
// Check if search exists (optional for speed)
if args.skip_existing
&& let Ok(exists) =
dao_instance.search_exists(&context, search.timestamp, &search.query)
&& exists
{
skipped_count += 1;
continue;
}
// Only insert if we have an embedding
if let Some(embedding) = embedding_opt {
batch_inserts.push(InsertSearchRecord {
timestamp: search.timestamp,
query: search.query.clone(),
search_engine: search.search_engine.clone(),
embedding: embedding.clone(),
created_at,
source_file: Some(args.path.clone()),
});
} else {
error!(
"Skipping search '{}' due to missing embedding",
search.query
);
error_count += 1;
}
}
// Batch insert entire chunk in single transaction
if !batch_inserts.is_empty() {
match dao_instance.store_searches_batch(&context, batch_inserts) {
Ok(count) => {
inserted_count += count;
info!("Imported {} searches (total: {})...", count, inserted_count);
}
Err(e) => {
error!("Failed to store batch: {:?}", e);
error_count += chunk.len();
}
}
}
// Rate limiting between batches
if batch_idx < searches.len() / args.batch_size {
info!("Waiting 500ms before next batch...");
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
}
}
info!("\n=== Import Summary ===");
info!("Total searches found: {}", searches.len());
info!("Successfully inserted: {}", inserted_count);
info!("Skipped (already exist): {}", skipped_count);
info!("Errors: {}", error_count);
info!("All imported searches have embeddings for semantic search");
Ok(())
}

View File

@@ -3,7 +3,6 @@ use std::sync::{Arc, Mutex};
use chrono::Utc;
use clap::Parser;
use opentelemetry;
use rayon::prelude::*;
use walkdir::WalkDir;
@@ -102,11 +101,11 @@ fn main() -> anyhow::Result<()> {
width: exif_data.width,
height: exif_data.height,
orientation: exif_data.orientation,
gps_latitude: exif_data.gps_latitude,
gps_longitude: exif_data.gps_longitude,
gps_altitude: exif_data.gps_altitude,
focal_length: exif_data.focal_length,
aperture: exif_data.aperture,
gps_latitude: exif_data.gps_latitude.map(|v| v as f32),
gps_longitude: exif_data.gps_longitude.map(|v| v as f32),
gps_altitude: exif_data.gps_altitude.map(|v| v as f32),
focal_length: exif_data.focal_length.map(|v| v as f32),
aperture: exif_data.aperture.map(|v| v as f32),
shutter_speed: exif_data.shutter_speed,
iso: exif_data.iso,
date_taken: exif_data.date_taken,

View File

@@ -0,0 +1,288 @@
use anyhow::Result;
use chrono::NaiveDate;
use clap::Parser;
use image_api::ai::{OllamaClient, SmsApiClient, strip_summary_boilerplate};
use image_api::database::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
use std::env;
use std::sync::{Arc, Mutex};
#[derive(Parser, Debug)]
#[command(author, version, about = "Test daily summary generation with different models and prompts", long_about = None)]
struct Args {
/// Contact name to generate summaries for
#[arg(short, long)]
contact: String,
/// Start date (YYYY-MM-DD)
#[arg(short, long)]
start: String,
/// End date (YYYY-MM-DD)
#[arg(short, long)]
end: String,
/// Optional: Override the model to use (e.g., "qwen2.5:32b", "llama3.1:30b")
#[arg(short, long)]
model: Option<String>,
/// Test mode: Generate but don't save to database (shows output only)
#[arg(short = 't', long, default_value_t = false)]
test_mode: bool,
/// Show message count and preview
#[arg(short, long, default_value_t = false)]
verbose: bool,
}
#[tokio::main]
async fn main() -> Result<()> {
// Load .env file
dotenv::dotenv().ok();
// Initialize logging
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
let args = Args::parse();
// Parse dates
let start_date = NaiveDate::parse_from_str(&args.start, "%Y-%m-%d")
.expect("Invalid start date format. Use YYYY-MM-DD");
let end_date = NaiveDate::parse_from_str(&args.end, "%Y-%m-%d")
.expect("Invalid end date format. Use YYYY-MM-DD");
println!("========================================");
println!("Daily Summary Generation Test Tool");
println!("========================================");
println!("Contact: {}", args.contact);
println!("Date range: {} to {}", start_date, end_date);
println!("Days: {}", (end_date - start_date).num_days() + 1);
if let Some(ref model) = args.model {
println!("Model: {}", model);
} else {
println!(
"Model: {} (from env)",
env::var("OLLAMA_PRIMARY_MODEL")
.or_else(|_| env::var("OLLAMA_MODEL"))
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
);
}
if args.test_mode {
println!("⚠ TEST MODE: Results will NOT be saved to database");
}
println!("========================================");
println!();
// Initialize AI clients
let ollama_primary_url = env::var("OLLAMA_PRIMARY_URL")
.or_else(|_| env::var("OLLAMA_URL"))
.unwrap_or_else(|_| "http://localhost:11434".to_string());
let ollama_fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
// Use provided model or fallback to env
let model_to_use = args.model.clone().unwrap_or_else(|| {
env::var("OLLAMA_PRIMARY_MODEL")
.or_else(|_| env::var("OLLAMA_MODEL"))
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
});
let ollama = OllamaClient::new(
ollama_primary_url,
ollama_fallback_url.clone(),
model_to_use.clone(),
Some(model_to_use), // Use same model for fallback
);
let sms_api_url =
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
let sms_api_token = env::var("SMS_API_TOKEN").ok();
let sms_client = SmsApiClient::new(sms_api_url, sms_api_token);
// Initialize DAO
let summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new())));
// Fetch messages for contact
println!("Fetching messages for {}...", args.contact);
let all_messages = sms_client
.fetch_all_messages_for_contact(&args.contact)
.await?;
println!(
"Found {} total messages for {}",
all_messages.len(),
args.contact
);
println!();
// Filter to date range and group by date
let mut messages_by_date = std::collections::HashMap::new();
for msg in all_messages {
if let Some(dt) = chrono::DateTime::from_timestamp(msg.timestamp, 0) {
let date = dt.date_naive();
if date >= start_date && date <= end_date {
messages_by_date
.entry(date)
.or_insert_with(Vec::new)
.push(msg);
}
}
}
if messages_by_date.is_empty() {
println!("⚠ No messages found in date range");
return Ok(());
}
println!("Found {} days with messages", messages_by_date.len());
println!();
// Sort dates
let mut dates: Vec<NaiveDate> = messages_by_date.keys().cloned().collect();
dates.sort();
// Process each day
for (idx, date) in dates.iter().enumerate() {
let messages = messages_by_date.get(date).unwrap();
let date_str = date.format("%Y-%m-%d").to_string();
let weekday = date.format("%A");
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!(
"Day {}/{}: {} ({}) - {} messages",
idx + 1,
dates.len(),
date_str,
weekday,
messages.len()
);
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
if args.verbose {
println!("\nMessage preview:");
for (i, msg) in messages.iter().take(3).enumerate() {
let sender = if msg.is_sent { "Me" } else { &msg.contact };
let preview = msg.body.chars().take(60).collect::<String>();
println!(" {}. {}: {}...", i + 1, sender, preview);
}
if messages.len() > 3 {
println!(" ... and {} more", messages.len() - 3);
}
println!();
}
// Format messages for LLM
let messages_text: String = messages
.iter()
.take(200)
.map(|m| {
if m.is_sent {
format!("Me: {}", m.body)
} else {
format!("{}: {}", m.contact, m.body)
}
})
.collect::<Vec<_>>()
.join("\n");
let prompt = format!(
r#"Summarize this day's conversation between me and {}.
CRITICAL FORMAT RULES:
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
- Do NOT repeat the date at the beginning
- Start DIRECTLY with the content - begin with a person's name or action
- Write in past tense, as if recording what happened
NARRATIVE (3-5 sentences):
- What specific topics, activities, or events were discussed?
- What places, people, or organizations were mentioned?
- What plans were made or decisions discussed?
- Clearly distinguish between what "I" did versus what {} did
KEYWORDS (comma-separated):
5-10 specific keywords that capture this conversation's unique content:
- Proper nouns (people, places, brands)
- Specific activities ("drum corps audition" not just "music")
- Distinctive terms that make this day unique
Date: {} ({})
Messages:
{}
YOUR RESPONSE (follow this format EXACTLY):
Summary: [Start directly with content, NO preamble]
Keywords: [specific, unique terms]"#,
args.contact,
args.contact,
date.format("%B %d, %Y"),
weekday,
messages_text
);
println!("Generating summary...");
let summary = ollama
.generate(
&prompt,
Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
)
.await?;
println!("\n📝 GENERATED SUMMARY:");
println!("─────────────────────────────────────────");
println!("{}", summary.trim());
println!("─────────────────────────────────────────");
if !args.test_mode {
println!("\nStripping boilerplate for embedding...");
let stripped = strip_summary_boilerplate(&summary);
println!(
"Stripped: {}...",
stripped.chars().take(80).collect::<String>()
);
println!("\nGenerating embedding...");
let embedding = ollama.generate_embedding(&stripped).await?;
println!("✓ Embedding generated ({} dimensions)", embedding.len());
println!("Saving to database...");
let insert = InsertDailySummary {
date: date_str.clone(),
contact: args.contact.clone(),
summary: summary.trim().to_string(),
message_count: messages.len() as i32,
embedding,
created_at: chrono::Utc::now().timestamp(),
// model_version: "nomic-embed-text:v1.5".to_string(),
model_version: "mxbai-embed-large:335m".to_string(),
};
let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
let context = opentelemetry::Context::new();
match dao.store_summary(&context, insert) {
Ok(_) => println!("✓ Saved to database"),
Err(e) => println!("✗ Database error: {:?}", e),
}
} else {
println!("\n⚠ TEST MODE: Not saved to database");
}
println!();
// Rate limiting between days
if idx < dates.len() - 1 {
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
}
}
println!("========================================");
println!("✓ Complete!");
println!("Processed {} days", dates.len());
println!("========================================");
Ok(())
}

View File

@@ -167,8 +167,10 @@ pub enum PhotoSize {
#[derive(Debug, Deserialize)]
pub struct ThumbnailRequest {
pub(crate) path: String,
#[allow(dead_code)] // Part of API contract, may be used in future
pub(crate) size: Option<PhotoSize>,
#[serde(default)]
#[allow(dead_code)] // Part of API contract, may be used in future
pub(crate) format: Option<ThumbnailFormat>,
}
@@ -298,17 +300,17 @@ impl From<ImageExif> for ExifMetadata {
},
gps: if has_gps {
Some(GpsCoordinates {
latitude: exif.gps_latitude,
longitude: exif.gps_longitude,
altitude: exif.gps_altitude,
latitude: exif.gps_latitude.map(|v| v as f64),
longitude: exif.gps_longitude.map(|v| v as f64),
altitude: exif.gps_altitude.map(|v| v as f64),
})
} else {
None
},
capture_settings: if has_capture_settings {
Some(CaptureSettings {
focal_length: exif.focal_length,
aperture: exif.aperture,
focal_length: exif.focal_length.map(|v| v as f64),
aperture: exif.aperture.map(|v| v as f64),
shutter_speed: exif.shutter_speed,
iso: exif.iso,
})

View File

@@ -0,0 +1,554 @@
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use serde::Serialize;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::{DbError, DbErrorKind, connect};
use crate::otel::trace_db_call;
/// Represents a calendar event
#[derive(Serialize, Clone, Debug)]
pub struct CalendarEvent {
pub id: i32,
pub event_uid: Option<String>,
pub summary: String,
pub description: Option<String>,
pub location: Option<String>,
pub start_time: i64,
pub end_time: i64,
pub all_day: bool,
pub organizer: Option<String>,
pub attendees: Option<String>, // JSON string
pub created_at: i64,
pub source_file: Option<String>,
}
/// Data for inserting a new calendar event
#[derive(Clone, Debug)]
#[allow(dead_code)]
pub struct InsertCalendarEvent {
pub event_uid: Option<String>,
pub summary: String,
pub description: Option<String>,
pub location: Option<String>,
pub start_time: i64,
pub end_time: i64,
pub all_day: bool,
pub organizer: Option<String>,
pub attendees: Option<String>,
pub embedding: Option<Vec<f32>>, // 768-dim, optional
pub created_at: i64,
pub source_file: Option<String>,
}
pub trait CalendarEventDao: Sync + Send {
/// Store calendar event with optional embedding
fn store_event(
&mut self,
context: &opentelemetry::Context,
event: InsertCalendarEvent,
) -> Result<CalendarEvent, DbError>;
/// Batch insert events (for import efficiency)
fn store_events_batch(
&mut self,
context: &opentelemetry::Context,
events: Vec<InsertCalendarEvent>,
) -> Result<usize, DbError>;
/// Find events in time range (PRIMARY query method)
fn find_events_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<CalendarEvent>, DbError>;
/// Find semantically similar events (SECONDARY - requires embeddings)
fn find_similar_events(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<CalendarEvent>, DbError>;
/// Hybrid: Time-filtered + semantic ranking
/// "Events during photo timestamp ±N days, ranked by similarity to context"
fn find_relevant_events_hybrid(
&mut self,
context: &opentelemetry::Context,
center_timestamp: i64,
time_window_days: i64,
query_embedding: Option<&[f32]>,
limit: usize,
) -> Result<Vec<CalendarEvent>, DbError>;
/// Check if event exists (idempotency)
fn event_exists(
&mut self,
context: &opentelemetry::Context,
event_uid: &str,
start_time: i64,
) -> Result<bool, DbError>;
/// Get count of events
fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
}
pub struct SqliteCalendarEventDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteCalendarEventDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteCalendarEventDao {
pub fn new() -> Self {
SqliteCalendarEventDao {
connection: Arc::new(Mutex::new(connect())),
}
}
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
use zerocopy::IntoBytes;
vec.as_bytes().to_vec()
}
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
if !bytes.len().is_multiple_of(4) {
return Err(DbError::new(DbErrorKind::QueryError));
}
let count = bytes.len() / 4;
let mut vec = Vec::with_capacity(count);
for chunk in bytes.chunks_exact(4) {
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
vec.push(float);
}
Ok(vec)
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude_a == 0.0 || magnitude_b == 0.0 {
return 0.0;
}
dot_product / (magnitude_a * magnitude_b)
}
}
#[derive(QueryableByName)]
struct CalendarEventWithVectorRow {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
event_uid: Option<String>,
#[diesel(sql_type = diesel::sql_types::Text)]
summary: String,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
description: Option<String>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
location: Option<String>,
#[diesel(sql_type = diesel::sql_types::BigInt)]
start_time: i64,
#[diesel(sql_type = diesel::sql_types::BigInt)]
end_time: i64,
#[diesel(sql_type = diesel::sql_types::Bool)]
all_day: bool,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
organizer: Option<String>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
attendees: Option<String>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Binary>)]
embedding: Option<Vec<u8>>,
#[diesel(sql_type = diesel::sql_types::BigInt)]
created_at: i64,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
source_file: Option<String>,
}
impl CalendarEventWithVectorRow {
fn to_calendar_event(&self) -> CalendarEvent {
CalendarEvent {
id: self.id,
event_uid: self.event_uid.clone(),
summary: self.summary.clone(),
description: self.description.clone(),
location: self.location.clone(),
start_time: self.start_time,
end_time: self.end_time,
all_day: self.all_day,
organizer: self.organizer.clone(),
attendees: self.attendees.clone(),
created_at: self.created_at,
source_file: self.source_file.clone(),
}
}
}
#[derive(QueryableByName)]
struct LastInsertRowId {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
}
impl CalendarEventDao for SqliteCalendarEventDao {
fn store_event(
&mut self,
context: &opentelemetry::Context,
event: InsertCalendarEvent,
) -> Result<CalendarEvent, DbError> {
trace_db_call(context, "insert", "store_event", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get CalendarEventDao");
// Validate embedding dimensions if provided
if let Some(ref emb) = event.embedding
&& emb.len() != 768
{
return Err(anyhow::anyhow!(
"Invalid embedding dimensions: {} (expected 768)",
emb.len()
));
}
let embedding_bytes = event.embedding.as_ref().map(|e| Self::serialize_vector(e));
// INSERT OR REPLACE to handle re-imports
diesel::sql_query(
"INSERT OR REPLACE INTO calendar_events
(event_uid, summary, description, location, start_time, end_time, all_day,
organizer, attendees, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.event_uid)
.bind::<diesel::sql_types::Text, _>(&event.summary)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.description)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.location)
.bind::<diesel::sql_types::BigInt, _>(event.start_time)
.bind::<diesel::sql_types::BigInt, _>(event.end_time)
.bind::<diesel::sql_types::Bool, _>(event.all_day)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.organizer)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.attendees)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(&embedding_bytes)
.bind::<diesel::sql_types::BigInt, _>(event.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.source_file)
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
.get_result::<LastInsertRowId>(conn.deref_mut())
.map(|r| r.id)
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
Ok(CalendarEvent {
id: row_id,
event_uid: event.event_uid,
summary: event.summary,
description: event.description,
location: event.location,
start_time: event.start_time,
end_time: event.end_time,
all_day: event.all_day,
organizer: event.organizer,
attendees: event.attendees,
created_at: event.created_at,
source_file: event.source_file,
})
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn store_events_batch(
&mut self,
context: &opentelemetry::Context,
events: Vec<InsertCalendarEvent>,
) -> Result<usize, DbError> {
trace_db_call(context, "insert", "store_events_batch", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get CalendarEventDao");
let mut inserted = 0;
conn.transaction::<_, anyhow::Error, _>(|conn| {
for event in events {
// Validate embedding if provided
if let Some(ref emb) = event.embedding
&& emb.len() != 768
{
log::warn!(
"Skipping event with invalid embedding dimensions: {}",
emb.len()
);
continue;
}
let embedding_bytes =
event.embedding.as_ref().map(|e| Self::serialize_vector(e));
diesel::sql_query(
"INSERT OR REPLACE INTO calendar_events
(event_uid, summary, description, location, start_time, end_time, all_day,
organizer, attendees, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.event_uid,
)
.bind::<diesel::sql_types::Text, _>(&event.summary)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.description,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.location,
)
.bind::<diesel::sql_types::BigInt, _>(event.start_time)
.bind::<diesel::sql_types::BigInt, _>(event.end_time)
.bind::<diesel::sql_types::Bool, _>(event.all_day)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.organizer,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.attendees,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(
&embedding_bytes,
)
.bind::<diesel::sql_types::BigInt, _>(event.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.source_file,
)
.execute(conn)
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
inserted += 1;
}
Ok(())
})
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
Ok(inserted)
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn find_events_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<CalendarEvent>, DbError> {
trace_db_call(context, "query", "find_events_in_range", |_span| {
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
diesel::sql_query(
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
organizer, attendees, NULL as embedding, created_at, source_file
FROM calendar_events
WHERE start_time >= ?1 AND start_time <= ?2
ORDER BY start_time ASC"
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
.map(|rows| rows.into_iter().map(|r| r.to_calendar_event()).collect())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_similar_events(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<CalendarEvent>, DbError> {
trace_db_call(context, "query", "find_similar_events", |_span| {
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
if query_embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_embedding.len()
));
}
// Load all events with embeddings
let results = diesel::sql_query(
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
organizer, attendees, embedding, created_at, source_file
FROM calendar_events
WHERE embedding IS NOT NULL"
)
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
// Compute similarities
let mut scored_events: Vec<(f32, CalendarEvent)> = results
.into_iter()
.filter_map(|row| {
if let Some(ref emb_bytes) = row.embedding {
if let Ok(emb) = Self::deserialize_vector(emb_bytes) {
let similarity = Self::cosine_similarity(query_embedding, &emb);
Some((similarity, row.to_calendar_event()))
} else {
None
}
} else {
None
}
})
.collect();
// Sort by similarity descending
scored_events.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
log::info!("Found {} similar calendar events", scored_events.len());
if !scored_events.is_empty() {
log::info!("Top similarity: {:.4}", scored_events[0].0);
}
Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_relevant_events_hybrid(
&mut self,
context: &opentelemetry::Context,
center_timestamp: i64,
time_window_days: i64,
query_embedding: Option<&[f32]>,
limit: usize,
) -> Result<Vec<CalendarEvent>, DbError> {
trace_db_call(context, "query", "find_relevant_events_hybrid", |_span| {
let window_seconds = time_window_days * 86400;
let start_ts = center_timestamp - window_seconds;
let end_ts = center_timestamp + window_seconds;
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
// Step 1: Time-based filter (fast, indexed)
let events_in_range = diesel::sql_query(
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
organizer, attendees, embedding, created_at, source_file
FROM calendar_events
WHERE start_time >= ?1 AND start_time <= ?2"
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
// Step 2: If query embedding provided, rank by semantic similarity
if let Some(query_emb) = query_embedding {
if query_emb.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_emb.len()
));
}
let mut scored_events: Vec<(f32, CalendarEvent)> = events_in_range
.into_iter()
.map(|row| {
// Events with embeddings get semantic scoring
let similarity = if let Some(ref emb_bytes) = row.embedding {
if let Ok(emb) = Self::deserialize_vector(emb_bytes) {
Self::cosine_similarity(query_emb, &emb)
} else {
0.5 // Neutral score for deserialization errors
}
} else {
0.5 // Neutral score for events without embeddings
};
(similarity, row.to_calendar_event())
})
.collect();
// Sort by similarity descending
scored_events.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
log::info!("Hybrid query: {} events in time range, ranked by similarity", scored_events.len());
if !scored_events.is_empty() {
log::info!("Top similarity: {:.4}", scored_events[0].0);
}
Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
} else {
// No semantic ranking, just return time-sorted (limit applied)
log::info!("Time-only query: {} events in range", events_in_range.len());
Ok(events_in_range.into_iter().take(limit).map(|r| r.to_calendar_event()).collect())
}
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn event_exists(
&mut self,
context: &opentelemetry::Context,
event_uid: &str,
start_time: i64,
) -> Result<bool, DbError> {
trace_db_call(context, "query", "event_exists", |_span| {
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::Integer)]
count: i32,
}
let result: CountResult = diesel::sql_query(
"SELECT COUNT(*) as count FROM calendar_events WHERE event_uid = ?1 AND start_time = ?2"
)
.bind::<diesel::sql_types::Text, _>(event_uid)
.bind::<diesel::sql_types::BigInt, _>(start_time)
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count > 0)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
trace_db_call(context, "query", "get_event_count", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get CalendarEventDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::BigInt)]
count: i64,
}
let result: CountResult =
diesel::sql_query("SELECT COUNT(*) as count FROM calendar_events")
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}

View File

@@ -0,0 +1,489 @@
use chrono::NaiveDate;
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use serde::Serialize;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::{DbError, DbErrorKind, connect};
use crate::otel::trace_db_call;
/// Represents a daily conversation summary
#[derive(Serialize, Clone, Debug)]
pub struct DailySummary {
pub id: i32,
pub date: String,
pub contact: String,
pub summary: String,
pub message_count: i32,
pub created_at: i64,
pub model_version: String,
}
/// Data for inserting a new daily summary
#[derive(Clone, Debug)]
pub struct InsertDailySummary {
pub date: String,
pub contact: String,
pub summary: String,
pub message_count: i32,
pub embedding: Vec<f32>,
pub created_at: i64,
pub model_version: String,
}
pub trait DailySummaryDao: Sync + Send {
/// Store a daily summary with its embedding
fn store_summary(
&mut self,
context: &opentelemetry::Context,
summary: InsertDailySummary,
) -> Result<DailySummary, DbError>;
/// Find semantically similar daily summaries using vector similarity
fn find_similar_summaries(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<DailySummary>, DbError>;
/// Find semantically similar daily summaries with time-based weighting
/// Combines cosine similarity with temporal proximity to target_date
/// Final score = similarity * time_weight, where time_weight decays with distance from target_date
fn find_similar_summaries_with_time_weight(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
target_date: &str,
limit: usize,
) -> Result<Vec<DailySummary>, DbError>;
/// Check if a summary exists for a given date and contact
fn summary_exists(
&mut self,
context: &opentelemetry::Context,
date: &str,
contact: &str,
) -> Result<bool, DbError>;
/// Get count of summaries for a contact
fn get_summary_count(
&mut self,
context: &opentelemetry::Context,
contact: &str,
) -> Result<i64, DbError>;
}
pub struct SqliteDailySummaryDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteDailySummaryDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteDailySummaryDao {
pub fn new() -> Self {
SqliteDailySummaryDao {
connection: Arc::new(Mutex::new(connect())),
}
}
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
use zerocopy::IntoBytes;
vec.as_bytes().to_vec()
}
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
if !bytes.len().is_multiple_of(4) {
return Err(DbError::new(DbErrorKind::QueryError));
}
let count = bytes.len() / 4;
let mut vec = Vec::with_capacity(count);
for chunk in bytes.chunks_exact(4) {
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
vec.push(float);
}
Ok(vec)
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude_a == 0.0 || magnitude_b == 0.0 {
return 0.0;
}
dot_product / (magnitude_a * magnitude_b)
}
}
impl DailySummaryDao for SqliteDailySummaryDao {
fn store_summary(
&mut self,
context: &opentelemetry::Context,
summary: InsertDailySummary,
) -> Result<DailySummary, DbError> {
trace_db_call(context, "insert", "store_summary", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get DailySummaryDao");
// Validate embedding dimensions
if summary.embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid embedding dimensions: {} (expected 768)",
summary.embedding.len()
));
}
let embedding_bytes = Self::serialize_vector(&summary.embedding);
// INSERT OR REPLACE to handle updates if summary needs regeneration
diesel::sql_query(
"INSERT OR REPLACE INTO daily_conversation_summaries
(date, contact, summary, message_count, embedding, created_at, model_version)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
)
.bind::<diesel::sql_types::Text, _>(&summary.date)
.bind::<diesel::sql_types::Text, _>(&summary.contact)
.bind::<diesel::sql_types::Text, _>(&summary.summary)
.bind::<diesel::sql_types::Integer, _>(summary.message_count)
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
.bind::<diesel::sql_types::BigInt, _>(summary.created_at)
.bind::<diesel::sql_types::Text, _>(&summary.model_version)
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
.get_result::<LastInsertRowId>(conn.deref_mut())
.map(|r| r.id as i32)
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
Ok(DailySummary {
id: row_id,
date: summary.date,
contact: summary.contact,
summary: summary.summary,
message_count: summary.message_count,
created_at: summary.created_at,
model_version: summary.model_version,
})
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn find_similar_summaries(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<DailySummary>, DbError> {
trace_db_call(context, "query", "find_similar_summaries", |_span| {
let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao");
if query_embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_embedding.len()
));
}
// Load all summaries with embeddings
let results = diesel::sql_query(
"SELECT id, date, contact, summary, message_count, embedding, created_at, model_version
FROM daily_conversation_summaries"
)
.load::<DailySummaryWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
log::info!("Loaded {} daily summaries for similarity comparison", results.len());
// Compute similarity for each summary
let mut scored_summaries: Vec<(f32, DailySummary)> = results
.into_iter()
.filter_map(|row| {
match Self::deserialize_vector(&row.embedding) {
Ok(embedding) => {
let similarity = Self::cosine_similarity(query_embedding, &embedding);
Some((
similarity,
DailySummary {
id: row.id,
date: row.date,
contact: row.contact,
summary: row.summary,
message_count: row.message_count,
created_at: row.created_at,
model_version: row.model_version,
},
))
}
Err(e) => {
log::warn!("Failed to deserialize embedding for summary {}: {:?}", row.id, e);
None
}
}
})
.collect();
// Sort by similarity (highest first)
scored_summaries.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
// Filter out poor matches (similarity < 0.3 is likely noise)
scored_summaries.retain(|(similarity, _)| *similarity >= 0.3);
// Log similarity distribution
if !scored_summaries.is_empty() {
let top_score = scored_summaries.first().map(|(s, _)| *s).unwrap_or(0.0);
let median_score = scored_summaries.get(scored_summaries.len() / 2).map(|(s, _)| *s).unwrap_or(0.0);
log::info!(
"Daily summary similarity - Top: {:.3}, Median: {:.3}, Count: {} (after 0.3 threshold)",
top_score,
median_score,
scored_summaries.len()
);
} else {
log::warn!("No daily summaries met the 0.3 similarity threshold");
}
// Take top N and log matches
let top_results: Vec<DailySummary> = scored_summaries
.into_iter()
.take(limit)
.map(|(similarity, summary)| {
log::info!(
"Summary match: similarity={:.3}, date={}, contact={}, summary=\"{}\"",
similarity,
summary.date,
summary.contact,
summary.summary.chars().take(100).collect::<String>()
);
summary
})
.collect();
Ok(top_results)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_similar_summaries_with_time_weight(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
target_date: &str,
limit: usize,
) -> Result<Vec<DailySummary>, DbError> {
trace_db_call(context, "query", "find_similar_summaries_with_time_weight", |_span| {
let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao");
if query_embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_embedding.len()
));
}
// Parse target date
let target = NaiveDate::parse_from_str(target_date, "%Y-%m-%d")
.map_err(|e| anyhow::anyhow!("Invalid target date: {}", e))?;
// Load all summaries with embeddings
let results = diesel::sql_query(
"SELECT id, date, contact, summary, message_count, embedding, created_at, model_version
FROM daily_conversation_summaries"
)
.load::<DailySummaryWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
log::info!("Loaded {} daily summaries for time-weighted similarity (target: {})", results.len(), target_date);
// Compute time-weighted similarity for each summary
// Score = cosine_similarity * time_weight
// time_weight = 1 / (1 + days_distance/30) - decays with ~30 day half-life
let mut scored_summaries: Vec<(f32, f32, i64, DailySummary)> = results
.into_iter()
.filter_map(|row| {
match Self::deserialize_vector(&row.embedding) {
Ok(embedding) => {
let similarity = Self::cosine_similarity(query_embedding, &embedding);
// Calculate time weight
let summary_date = NaiveDate::parse_from_str(&row.date, "%Y-%m-%d").ok()?;
let days_distance = (target - summary_date).num_days().abs();
// Exponential decay with 30-day half-life
// At 0 days: weight = 1.0
// At 30 days: weight = 0.5
// At 60 days: weight = 0.25
// At 365 days: weight ~= 0.0001
let time_weight = 0.5_f32.powf(days_distance as f32 / 30.0);
// Combined score - but ensure semantic similarity still matters
// We use sqrt to soften the time weight's impact
let combined_score = similarity * time_weight.sqrt();
Some((
combined_score,
similarity,
days_distance,
DailySummary {
id: row.id,
date: row.date,
contact: row.contact,
summary: row.summary,
message_count: row.message_count,
created_at: row.created_at,
model_version: row.model_version,
},
))
}
Err(e) => {
log::warn!("Failed to deserialize embedding for summary {}: {:?}", row.id, e);
None
}
}
})
.collect();
// Sort by combined score (highest first)
scored_summaries.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
// Filter out poor matches (base similarity < 0.5 - stricter than before since we have time weighting)
scored_summaries.retain(|(_, similarity, _, _)| *similarity >= 0.5);
// Log similarity distribution
if !scored_summaries.is_empty() {
let (top_combined, top_sim, top_days, _) = &scored_summaries[0];
log::info!(
"Time-weighted similarity - Top: combined={:.3} (sim={:.3}, days={}), Count: {} matches",
top_combined,
top_sim,
top_days,
scored_summaries.len()
);
} else {
log::warn!("No daily summaries met the 0.5 similarity threshold");
}
// Take top N and log matches
let top_results: Vec<DailySummary> = scored_summaries
.into_iter()
.take(limit)
.map(|(combined, similarity, days, summary)| {
log::info!(
"Summary match: combined={:.3} (sim={:.3}, days={}), date={}, contact={}, summary=\"{}\"",
combined,
similarity,
days,
summary.date,
summary.contact,
summary.summary.chars().take(80).collect::<String>()
);
summary
})
.collect();
Ok(top_results)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn summary_exists(
&mut self,
context: &opentelemetry::Context,
date: &str,
contact: &str,
) -> Result<bool, DbError> {
trace_db_call(context, "query", "summary_exists", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get DailySummaryDao");
let count = diesel::sql_query(
"SELECT COUNT(*) as count FROM daily_conversation_summaries
WHERE date = ?1 AND contact = ?2",
)
.bind::<diesel::sql_types::Text, _>(date)
.bind::<diesel::sql_types::Text, _>(contact)
.get_result::<CountResult>(conn.deref_mut())
.map(|r| r.count)
.map_err(|e| anyhow::anyhow!("Count query error: {:?}", e))?;
Ok(count > 0)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_summary_count(
&mut self,
context: &opentelemetry::Context,
contact: &str,
) -> Result<i64, DbError> {
trace_db_call(context, "query", "get_summary_count", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get DailySummaryDao");
diesel::sql_query(
"SELECT COUNT(*) as count FROM daily_conversation_summaries WHERE contact = ?1",
)
.bind::<diesel::sql_types::Text, _>(contact)
.get_result::<CountResult>(conn.deref_mut())
.map(|r| r.count)
.map_err(|e| anyhow::anyhow!("Count query error: {:?}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}
// Helper structs for raw SQL queries
#[derive(QueryableByName)]
struct LastInsertRowId {
#[diesel(sql_type = diesel::sql_types::BigInt)]
id: i64,
}
#[derive(QueryableByName)]
struct DailySummaryWithVectorRow {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
#[diesel(sql_type = diesel::sql_types::Text)]
date: String,
#[diesel(sql_type = diesel::sql_types::Text)]
contact: String,
#[diesel(sql_type = diesel::sql_types::Text)]
summary: String,
#[diesel(sql_type = diesel::sql_types::Integer)]
message_count: i32,
#[diesel(sql_type = diesel::sql_types::Binary)]
embedding: Vec<u8>,
#[diesel(sql_type = diesel::sql_types::BigInt)]
created_at: i64,
#[diesel(sql_type = diesel::sql_types::Text)]
model_version: String,
}
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::BigInt)]
count: i64,
}

View File

@@ -0,0 +1,133 @@
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::models::{InsertPhotoInsight, PhotoInsight};
use crate::database::schema;
use crate::database::{DbError, DbErrorKind, connect};
use crate::otel::trace_db_call;
pub trait InsightDao: Sync + Send {
fn store_insight(
&mut self,
context: &opentelemetry::Context,
insight: InsertPhotoInsight,
) -> Result<PhotoInsight, DbError>;
fn get_insight(
&mut self,
context: &opentelemetry::Context,
file_path: &str,
) -> Result<Option<PhotoInsight>, DbError>;
fn delete_insight(
&mut self,
context: &opentelemetry::Context,
file_path: &str,
) -> Result<(), DbError>;
fn get_all_insights(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<PhotoInsight>, DbError>;
}
pub struct SqliteInsightDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteInsightDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteInsightDao {
pub fn new() -> Self {
SqliteInsightDao {
connection: Arc::new(Mutex::new(connect())),
}
}
}
impl InsightDao for SqliteInsightDao {
fn store_insight(
&mut self,
context: &opentelemetry::Context,
insight: InsertPhotoInsight,
) -> Result<PhotoInsight, DbError> {
trace_db_call(context, "insert", "store_insight", |_span| {
use schema::photo_insights::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
// Insert or replace on conflict (UNIQUE constraint on file_path)
diesel::replace_into(photo_insights)
.values(&insight)
.execute(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Insert error"))?;
// Retrieve the inserted record
photo_insights
.filter(file_path.eq(&insight.file_path))
.first::<PhotoInsight>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn get_insight(
&mut self,
context: &opentelemetry::Context,
path: &str,
) -> Result<Option<PhotoInsight>, DbError> {
trace_db_call(context, "query", "get_insight", |_span| {
use schema::photo_insights::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
photo_insights
.filter(file_path.eq(path))
.first::<PhotoInsight>(connection.deref_mut())
.optional()
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn delete_insight(
&mut self,
context: &opentelemetry::Context,
path: &str,
) -> Result<(), DbError> {
trace_db_call(context, "delete", "delete_insight", |_span| {
use schema::photo_insights::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
diesel::delete(photo_insights.filter(file_path.eq(path)))
.execute(connection.deref_mut())
.map(|_| ())
.map_err(|_| anyhow::anyhow!("Delete error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_all_insights(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<PhotoInsight>, DbError> {
trace_db_call(context, "query", "get_all_insights", |_span| {
use schema::photo_insights::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
photo_insights
.order(generated_at.desc())
.load::<PhotoInsight>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}

View File

@@ -0,0 +1,528 @@
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use serde::Serialize;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::{DbError, DbErrorKind, connect};
use crate::otel::trace_db_call;
/// Represents a location history record
#[derive(Serialize, Clone, Debug)]
pub struct LocationRecord {
pub id: i32,
pub timestamp: i64,
pub latitude: f64,
pub longitude: f64,
pub accuracy: Option<i32>,
pub activity: Option<String>,
pub activity_confidence: Option<i32>,
pub place_name: Option<String>,
pub place_category: Option<String>,
pub created_at: i64,
pub source_file: Option<String>,
}
/// Data for inserting a new location record
#[derive(Clone, Debug)]
pub struct InsertLocationRecord {
pub timestamp: i64,
pub latitude: f64,
pub longitude: f64,
pub accuracy: Option<i32>,
pub activity: Option<String>,
pub activity_confidence: Option<i32>,
pub place_name: Option<String>,
pub place_category: Option<String>,
pub embedding: Option<Vec<f32>>, // 768-dim, optional (rarely used)
pub created_at: i64,
pub source_file: Option<String>,
}
pub trait LocationHistoryDao: Sync + Send {
/// Store single location record
fn store_location(
&mut self,
context: &opentelemetry::Context,
location: InsertLocationRecord,
) -> Result<LocationRecord, DbError>;
/// Batch insert locations (Google Takeout has millions of points)
fn store_locations_batch(
&mut self,
context: &opentelemetry::Context,
locations: Vec<InsertLocationRecord>,
) -> Result<usize, DbError>;
/// Find nearest location to timestamp (PRIMARY query)
/// "Where was I at photo timestamp ±N minutes?"
fn find_nearest_location(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
max_time_diff_seconds: i64,
) -> Result<Option<LocationRecord>, DbError>;
/// Find locations in time range
fn find_locations_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<LocationRecord>, DbError>;
/// Find locations near GPS coordinates (for "photos near this place")
/// Uses approximate bounding box for performance
fn find_locations_near_point(
&mut self,
context: &opentelemetry::Context,
latitude: f64,
longitude: f64,
radius_km: f64,
) -> Result<Vec<LocationRecord>, DbError>;
/// Deduplicate: check if location exists
fn location_exists(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
latitude: f64,
longitude: f64,
) -> Result<bool, DbError>;
/// Get count of location records
fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
}
pub struct SqliteLocationHistoryDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteLocationHistoryDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteLocationHistoryDao {
pub fn new() -> Self {
SqliteLocationHistoryDao {
connection: Arc::new(Mutex::new(connect())),
}
}
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
use zerocopy::IntoBytes;
vec.as_bytes().to_vec()
}
/// Haversine distance calculation (in kilometers)
/// Used for filtering locations by proximity to a point
fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
const R: f64 = 6371.0; // Earth radius in km
let d_lat = (lat2 - lat1).to_radians();
let d_lon = (lon2 - lon1).to_radians();
let a = (d_lat / 2.0).sin().powi(2)
+ lat1.to_radians().cos() * lat2.to_radians().cos() * (d_lon / 2.0).sin().powi(2);
let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
R * c
}
/// Calculate approximate bounding box for spatial queries
/// Returns (min_lat, max_lat, min_lon, max_lon)
fn bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f64) {
const KM_PER_DEGREE_LAT: f64 = 111.0;
let km_per_degree_lon = 111.0 * lat.to_radians().cos();
let delta_lat = radius_km / KM_PER_DEGREE_LAT;
let delta_lon = radius_km / km_per_degree_lon;
(
lat - delta_lat, // min_lat
lat + delta_lat, // max_lat
lon - delta_lon, // min_lon
lon + delta_lon, // max_lon
)
}
}
#[derive(QueryableByName)]
struct LocationRecordRow {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
#[diesel(sql_type = diesel::sql_types::BigInt)]
timestamp: i64,
#[diesel(sql_type = diesel::sql_types::Float)]
latitude: f32,
#[diesel(sql_type = diesel::sql_types::Float)]
longitude: f32,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Integer>)]
accuracy: Option<i32>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
activity: Option<String>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Integer>)]
activity_confidence: Option<i32>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
place_name: Option<String>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
place_category: Option<String>,
#[diesel(sql_type = diesel::sql_types::BigInt)]
created_at: i64,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
source_file: Option<String>,
}
impl LocationRecordRow {
fn to_location_record(&self) -> LocationRecord {
LocationRecord {
id: self.id,
timestamp: self.timestamp,
latitude: self.latitude as f64,
longitude: self.longitude as f64,
accuracy: self.accuracy,
activity: self.activity.clone(),
activity_confidence: self.activity_confidence,
place_name: self.place_name.clone(),
place_category: self.place_category.clone(),
created_at: self.created_at,
source_file: self.source_file.clone(),
}
}
}
#[derive(QueryableByName)]
struct LastInsertRowId {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
}
impl LocationHistoryDao for SqliteLocationHistoryDao {
fn store_location(
&mut self,
context: &opentelemetry::Context,
location: InsertLocationRecord,
) -> Result<LocationRecord, DbError> {
trace_db_call(context, "insert", "store_location", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
// Validate embedding dimensions if provided (rare for location data)
if let Some(ref emb) = location.embedding
&& emb.len() != 768
{
return Err(anyhow::anyhow!(
"Invalid embedding dimensions: {} (expected 768)",
emb.len()
));
}
let embedding_bytes = location
.embedding
.as_ref()
.map(|e| Self::serialize_vector(e));
// INSERT OR IGNORE to handle re-imports (UNIQUE constraint on timestamp+lat+lon)
diesel::sql_query(
"INSERT OR IGNORE INTO location_history
(timestamp, latitude, longitude, accuracy, activity, activity_confidence,
place_name, place_category, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
)
.bind::<diesel::sql_types::BigInt, _>(location.timestamp)
.bind::<diesel::sql_types::Float, _>(location.latitude as f32)
.bind::<diesel::sql_types::Float, _>(location.longitude as f32)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(&location.accuracy)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.activity)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
&location.activity_confidence,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.place_name)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&location.place_category,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(&embedding_bytes)
.bind::<diesel::sql_types::BigInt, _>(location.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.source_file)
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
.get_result::<LastInsertRowId>(conn.deref_mut())
.map(|r| r.id)
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
Ok(LocationRecord {
id: row_id,
timestamp: location.timestamp,
latitude: location.latitude,
longitude: location.longitude,
accuracy: location.accuracy,
activity: location.activity,
activity_confidence: location.activity_confidence,
place_name: location.place_name,
place_category: location.place_category,
created_at: location.created_at,
source_file: location.source_file,
})
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn store_locations_batch(
&mut self,
context: &opentelemetry::Context,
locations: Vec<InsertLocationRecord>,
) -> Result<usize, DbError> {
trace_db_call(context, "insert", "store_locations_batch", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
let mut inserted = 0;
conn.transaction::<_, anyhow::Error, _>(|conn| {
for location in locations {
// Validate embedding if provided (rare)
if let Some(ref emb) = location.embedding
&& emb.len() != 768
{
log::warn!(
"Skipping location with invalid embedding dimensions: {}",
emb.len()
);
continue;
}
let embedding_bytes = location
.embedding
.as_ref()
.map(|e| Self::serialize_vector(e));
let rows_affected = diesel::sql_query(
"INSERT OR IGNORE INTO location_history
(timestamp, latitude, longitude, accuracy, activity, activity_confidence,
place_name, place_category, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
)
.bind::<diesel::sql_types::BigInt, _>(location.timestamp)
.bind::<diesel::sql_types::Float, _>(location.latitude as f32)
.bind::<diesel::sql_types::Float, _>(location.longitude as f32)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
&location.accuracy,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&location.activity,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
&location.activity_confidence,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&location.place_name,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&location.place_category,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(
&embedding_bytes,
)
.bind::<diesel::sql_types::BigInt, _>(location.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&location.source_file,
)
.execute(conn)
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
if rows_affected > 0 {
inserted += 1;
}
}
Ok(())
})
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
Ok(inserted)
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn find_nearest_location(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
max_time_diff_seconds: i64,
) -> Result<Option<LocationRecord>, DbError> {
trace_db_call(context, "query", "find_nearest_location", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
let start_ts = timestamp - max_time_diff_seconds;
let end_ts = timestamp + max_time_diff_seconds;
// Find location closest to target timestamp within window
let results = diesel::sql_query(
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
place_name, place_category, created_at, source_file
FROM location_history
WHERE timestamp >= ?1 AND timestamp <= ?2
ORDER BY ABS(timestamp - ?3) ASC
LIMIT 1"
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.bind::<diesel::sql_types::BigInt, _>(timestamp)
.load::<LocationRecordRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(results.into_iter().next().map(|r| r.to_location_record()))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_locations_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<LocationRecord>, DbError> {
trace_db_call(context, "query", "find_locations_in_range", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
diesel::sql_query(
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
place_name, place_category, created_at, source_file
FROM location_history
WHERE timestamp >= ?1 AND timestamp <= ?2
ORDER BY timestamp ASC"
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.load::<LocationRecordRow>(conn.deref_mut())
.map(|rows| rows.into_iter().map(|r| r.to_location_record()).collect())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_locations_near_point(
&mut self,
context: &opentelemetry::Context,
latitude: f64,
longitude: f64,
radius_km: f64,
) -> Result<Vec<LocationRecord>, DbError> {
trace_db_call(context, "query", "find_locations_near_point", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
// Use bounding box for initial filter (fast, indexed)
let (min_lat, max_lat, min_lon, max_lon) =
Self::bounding_box(latitude, longitude, radius_km);
let results = diesel::sql_query(
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
place_name, place_category, created_at, source_file
FROM location_history
WHERE latitude >= ?1 AND latitude <= ?2
AND longitude >= ?3 AND longitude <= ?4"
)
.bind::<diesel::sql_types::Float, _>(min_lat as f32)
.bind::<diesel::sql_types::Float, _>(max_lat as f32)
.bind::<diesel::sql_types::Float, _>(min_lon as f32)
.bind::<diesel::sql_types::Float, _>(max_lon as f32)
.load::<LocationRecordRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
// Refine with Haversine distance (in-memory, post-filter)
let filtered: Vec<LocationRecord> = results
.into_iter()
.map(|r| r.to_location_record())
.filter(|loc| {
let distance =
Self::haversine_distance(latitude, longitude, loc.latitude, loc.longitude);
distance <= radius_km
})
.collect();
log::info!(
"Found {} locations within {} km of ({}, {})",
filtered.len(),
radius_km,
latitude,
longitude
);
Ok(filtered)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn location_exists(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
latitude: f64,
longitude: f64,
) -> Result<bool, DbError> {
trace_db_call(context, "query", "location_exists", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::Integer)]
count: i32,
}
let result: CountResult = diesel::sql_query(
"SELECT COUNT(*) as count FROM location_history
WHERE timestamp = ?1 AND latitude = ?2 AND longitude = ?3",
)
.bind::<diesel::sql_types::BigInt, _>(timestamp)
.bind::<diesel::sql_types::Float, _>(latitude as f32)
.bind::<diesel::sql_types::Float, _>(longitude as f32)
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count > 0)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
trace_db_call(context, "query", "get_location_count", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::BigInt)]
count: i64,
}
let result: CountResult =
diesel::sql_query("SELECT COUNT(*) as count FROM location_history")
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}

View File

@@ -9,8 +9,19 @@ use crate::database::models::{
};
use crate::otel::trace_db_call;
pub mod calendar_dao;
pub mod daily_summary_dao;
pub mod insights_dao;
pub mod location_dao;
pub mod models;
pub mod schema;
pub mod search_dao;
pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
pub use insights_dao::{InsightDao, SqliteInsightDao};
pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao};
pub trait UserDao {
fn create_user(&mut self, user: &str, password: &str) -> Option<User>;
@@ -339,8 +350,13 @@ impl ExifDao for SqliteExifDao {
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
// Try both normalized (forward slash) and Windows (backslash) paths
// since database may contain either format
let normalized = path.replace('\\', "/");
let windows_path = path.replace('/', "\\");
match image_exif
.filter(file_path.eq(path))
.filter(file_path.eq(&normalized).or(file_path.eq(&windows_path)))
.first::<ImageExif>(connection.deref_mut())
{
Ok(exif) => Ok(Some(exif)),
@@ -478,8 +494,8 @@ impl ExifDao for SqliteExifDao {
// GPS bounding box
if let Some((min_lat, max_lat, min_lon, max_lon)) = gps_bounds {
query = query
.filter(gps_latitude.between(min_lat, max_lat))
.filter(gps_longitude.between(min_lon, max_lon))
.filter(gps_latitude.between(min_lat as f32, max_lat as f32))
.filter(gps_longitude.between(min_lon as f32, max_lon as f32))
.filter(gps_latitude.is_not_null())
.filter(gps_longitude.is_not_null());
}

View File

@@ -1,4 +1,4 @@
use crate::database::schema::{favorites, image_exif, users};
use crate::database::schema::{favorites, image_exif, photo_insights, users};
use serde::Serialize;
#[derive(Insertable)]
@@ -40,11 +40,11 @@ pub struct InsertImageExif {
pub width: Option<i32>,
pub height: Option<i32>,
pub orientation: Option<i32>,
pub gps_latitude: Option<f64>,
pub gps_longitude: Option<f64>,
pub gps_altitude: Option<f64>,
pub focal_length: Option<f64>,
pub aperture: Option<f64>,
pub gps_latitude: Option<f32>,
pub gps_longitude: Option<f32>,
pub gps_altitude: Option<f32>,
pub focal_length: Option<f32>,
pub aperture: Option<f32>,
pub shutter_speed: Option<String>,
pub iso: Option<i32>,
pub date_taken: Option<i64>,
@@ -62,14 +62,34 @@ pub struct ImageExif {
pub width: Option<i32>,
pub height: Option<i32>,
pub orientation: Option<i32>,
pub gps_latitude: Option<f64>,
pub gps_longitude: Option<f64>,
pub gps_altitude: Option<f64>,
pub focal_length: Option<f64>,
pub aperture: Option<f64>,
pub gps_latitude: Option<f32>,
pub gps_longitude: Option<f32>,
pub gps_altitude: Option<f32>,
pub focal_length: Option<f32>,
pub aperture: Option<f32>,
pub shutter_speed: Option<String>,
pub iso: Option<i32>,
pub date_taken: Option<i64>,
pub created_time: i64,
pub last_modified: i64,
}
#[derive(Insertable)]
#[diesel(table_name = photo_insights)]
pub struct InsertPhotoInsight {
pub file_path: String,
pub title: String,
pub summary: String,
pub generated_at: i64,
pub model_version: String,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct PhotoInsight {
pub id: i32,
pub file_path: String,
pub title: String,
pub summary: String,
pub generated_at: i64,
pub model_version: String,
}

View File

@@ -1,4 +1,37 @@
table! {
// @generated automatically by Diesel CLI.
diesel::table! {
calendar_events (id) {
id -> Integer,
event_uid -> Nullable<Text>,
summary -> Text,
description -> Nullable<Text>,
location -> Nullable<Text>,
start_time -> BigInt,
end_time -> BigInt,
all_day -> Bool,
organizer -> Nullable<Text>,
attendees -> Nullable<Text>,
embedding -> Nullable<Binary>,
created_at -> BigInt,
source_file -> Nullable<Text>,
}
}
diesel::table! {
daily_conversation_summaries (id) {
id -> Integer,
date -> Text,
contact -> Text,
summary -> Text,
message_count -> Integer,
embedding -> Binary,
created_at -> BigInt,
model_version -> Text,
}
}
diesel::table! {
favorites (id) {
id -> Integer,
userid -> Integer,
@@ -6,7 +39,7 @@ table! {
}
}
table! {
diesel::table! {
image_exif (id) {
id -> Integer,
file_path -> Text,
@@ -16,11 +49,11 @@ table! {
width -> Nullable<Integer>,
height -> Nullable<Integer>,
orientation -> Nullable<Integer>,
gps_latitude -> Nullable<Double>,
gps_longitude -> Nullable<Double>,
gps_altitude -> Nullable<Double>,
focal_length -> Nullable<Double>,
aperture -> Nullable<Double>,
gps_latitude -> Nullable<Float>,
gps_longitude -> Nullable<Float>,
gps_altitude -> Nullable<Float>,
focal_length -> Nullable<Float>,
aperture -> Nullable<Float>,
shutter_speed -> Nullable<Text>,
iso -> Nullable<Integer>,
date_taken -> Nullable<BigInt>,
@@ -29,7 +62,72 @@ table! {
}
}
table! {
diesel::table! {
knowledge_embeddings (id) {
id -> Integer,
keyword -> Text,
description -> Text,
category -> Nullable<Text>,
embedding -> Binary,
created_at -> BigInt,
model_version -> Text,
}
}
diesel::table! {
location_history (id) {
id -> Integer,
timestamp -> BigInt,
latitude -> Float,
longitude -> Float,
accuracy -> Nullable<Integer>,
activity -> Nullable<Text>,
activity_confidence -> Nullable<Integer>,
place_name -> Nullable<Text>,
place_category -> Nullable<Text>,
embedding -> Nullable<Binary>,
created_at -> BigInt,
source_file -> Nullable<Text>,
}
}
diesel::table! {
message_embeddings (id) {
id -> Integer,
contact -> Text,
body -> Text,
timestamp -> BigInt,
is_sent -> Bool,
embedding -> Binary,
created_at -> BigInt,
model_version -> Text,
}
}
diesel::table! {
photo_insights (id) {
id -> Integer,
file_path -> Text,
title -> Text,
summary -> Text,
generated_at -> BigInt,
model_version -> Text,
}
}
diesel::table! {
search_history (id) {
id -> Integer,
timestamp -> BigInt,
query -> Text,
search_engine -> Nullable<Text>,
embedding -> Binary,
created_at -> BigInt,
source_file -> Nullable<Text>,
}
}
diesel::table! {
tagged_photo (id) {
id -> Integer,
photo_name -> Text,
@@ -38,7 +136,7 @@ table! {
}
}
table! {
diesel::table! {
tags (id) {
id -> Integer,
name -> Text,
@@ -46,7 +144,7 @@ table! {
}
}
table! {
diesel::table! {
users (id) {
id -> Integer,
username -> Text,
@@ -54,6 +152,19 @@ table! {
}
}
joinable!(tagged_photo -> tags (tag_id));
diesel::joinable!(tagged_photo -> tags (tag_id));
allow_tables_to_appear_in_same_query!(favorites, image_exif, tagged_photo, tags, users,);
diesel::allow_tables_to_appear_in_same_query!(
calendar_events,
daily_conversation_summaries,
favorites,
image_exif,
knowledge_embeddings,
location_history,
message_embeddings,
photo_insights,
search_history,
tagged_photo,
tags,
users,
);

516
src/database/search_dao.rs Normal file
View File

@@ -0,0 +1,516 @@
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use serde::Serialize;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::{DbError, DbErrorKind, connect};
use crate::otel::trace_db_call;
/// Represents a search history record
#[derive(Serialize, Clone, Debug)]
pub struct SearchRecord {
pub id: i32,
pub timestamp: i64,
pub query: String,
pub search_engine: Option<String>,
pub created_at: i64,
pub source_file: Option<String>,
}
/// Data for inserting a new search record
#[derive(Clone, Debug)]
pub struct InsertSearchRecord {
pub timestamp: i64,
pub query: String,
pub search_engine: Option<String>,
pub embedding: Vec<f32>, // 768-dim, REQUIRED
pub created_at: i64,
pub source_file: Option<String>,
}
pub trait SearchHistoryDao: Sync + Send {
/// Store search with embedding
fn store_search(
&mut self,
context: &opentelemetry::Context,
search: InsertSearchRecord,
) -> Result<SearchRecord, DbError>;
/// Batch insert searches
fn store_searches_batch(
&mut self,
context: &opentelemetry::Context,
searches: Vec<InsertSearchRecord>,
) -> Result<usize, DbError>;
/// Find searches in time range (for temporal context)
fn find_searches_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<SearchRecord>, DbError>;
/// Find semantically similar searches (PRIMARY - embeddings shine here)
fn find_similar_searches(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<SearchRecord>, DbError>;
/// Hybrid: Time window + semantic ranking
fn find_relevant_searches_hybrid(
&mut self,
context: &opentelemetry::Context,
center_timestamp: i64,
time_window_days: i64,
query_embedding: Option<&[f32]>,
limit: usize,
) -> Result<Vec<SearchRecord>, DbError>;
/// Deduplication check
fn search_exists(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
query: &str,
) -> Result<bool, DbError>;
/// Get count of search records
fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
}
pub struct SqliteSearchHistoryDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteSearchHistoryDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteSearchHistoryDao {
pub fn new() -> Self {
SqliteSearchHistoryDao {
connection: Arc::new(Mutex::new(connect())),
}
}
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
use zerocopy::IntoBytes;
vec.as_bytes().to_vec()
}
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
if !bytes.len().is_multiple_of(4) {
return Err(DbError::new(DbErrorKind::QueryError));
}
let count = bytes.len() / 4;
let mut vec = Vec::with_capacity(count);
for chunk in bytes.chunks_exact(4) {
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
vec.push(float);
}
Ok(vec)
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude_a == 0.0 || magnitude_b == 0.0 {
return 0.0;
}
dot_product / (magnitude_a * magnitude_b)
}
}
#[derive(QueryableByName)]
struct SearchRecordWithVectorRow {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
#[diesel(sql_type = diesel::sql_types::BigInt)]
timestamp: i64,
#[diesel(sql_type = diesel::sql_types::Text)]
query: String,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
search_engine: Option<String>,
#[diesel(sql_type = diesel::sql_types::Binary)]
embedding: Vec<u8>,
#[diesel(sql_type = diesel::sql_types::BigInt)]
created_at: i64,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
source_file: Option<String>,
}
impl SearchRecordWithVectorRow {
fn to_search_record(&self) -> SearchRecord {
SearchRecord {
id: self.id,
timestamp: self.timestamp,
query: self.query.clone(),
search_engine: self.search_engine.clone(),
created_at: self.created_at,
source_file: self.source_file.clone(),
}
}
}
#[derive(QueryableByName)]
struct LastInsertRowId {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
}
impl SearchHistoryDao for SqliteSearchHistoryDao {
fn store_search(
&mut self,
context: &opentelemetry::Context,
search: InsertSearchRecord,
) -> Result<SearchRecord, DbError> {
trace_db_call(context, "insert", "store_search", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
// Validate embedding dimensions (REQUIRED for searches)
if search.embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid embedding dimensions: {} (expected 768)",
search.embedding.len()
));
}
let embedding_bytes = Self::serialize_vector(&search.embedding);
// INSERT OR IGNORE to handle re-imports (UNIQUE constraint on timestamp+query)
diesel::sql_query(
"INSERT OR IGNORE INTO search_history
(timestamp, query, search_engine, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
)
.bind::<diesel::sql_types::BigInt, _>(search.timestamp)
.bind::<diesel::sql_types::Text, _>(&search.query)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&search.search_engine)
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
.bind::<diesel::sql_types::BigInt, _>(search.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&search.source_file)
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
.get_result::<LastInsertRowId>(conn.deref_mut())
.map(|r| r.id)
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
Ok(SearchRecord {
id: row_id,
timestamp: search.timestamp,
query: search.query,
search_engine: search.search_engine,
created_at: search.created_at,
source_file: search.source_file,
})
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn store_searches_batch(
&mut self,
context: &opentelemetry::Context,
searches: Vec<InsertSearchRecord>,
) -> Result<usize, DbError> {
trace_db_call(context, "insert", "store_searches_batch", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
let mut inserted = 0;
conn.transaction::<_, anyhow::Error, _>(|conn| {
for search in searches {
// Validate embedding (REQUIRED)
if search.embedding.len() != 768 {
log::warn!(
"Skipping search with invalid embedding dimensions: {}",
search.embedding.len()
);
continue;
}
let embedding_bytes = Self::serialize_vector(&search.embedding);
let rows_affected = diesel::sql_query(
"INSERT OR IGNORE INTO search_history
(timestamp, query, search_engine, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
)
.bind::<diesel::sql_types::BigInt, _>(search.timestamp)
.bind::<diesel::sql_types::Text, _>(&search.query)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&search.search_engine,
)
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
.bind::<diesel::sql_types::BigInt, _>(search.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&search.source_file,
)
.execute(conn)
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
if rows_affected > 0 {
inserted += 1;
}
}
Ok(())
})
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
Ok(inserted)
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn find_searches_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<SearchRecord>, DbError> {
trace_db_call(context, "query", "find_searches_in_range", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
diesel::sql_query(
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
FROM search_history
WHERE timestamp >= ?1 AND timestamp <= ?2
ORDER BY timestamp DESC",
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
.map(|rows| rows.into_iter().map(|r| r.to_search_record()).collect())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_similar_searches(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<SearchRecord>, DbError> {
trace_db_call(context, "query", "find_similar_searches", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
if query_embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_embedding.len()
));
}
// Load all searches with embeddings
let results = diesel::sql_query(
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
FROM search_history",
)
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
// Compute similarities
let mut scored_searches: Vec<(f32, SearchRecord)> = results
.into_iter()
.filter_map(|row| {
if let Ok(emb) = Self::deserialize_vector(&row.embedding) {
let similarity = Self::cosine_similarity(query_embedding, &emb);
Some((similarity, row.to_search_record()))
} else {
None
}
})
.collect();
// Sort by similarity descending
scored_searches
.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
log::info!("Found {} similar searches", scored_searches.len());
if !scored_searches.is_empty() {
log::info!(
"Top similarity: {:.4} for query: '{}'",
scored_searches[0].0,
scored_searches[0].1.query
);
}
Ok(scored_searches
.into_iter()
.take(limit)
.map(|(_, search)| search)
.collect())
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_relevant_searches_hybrid(
&mut self,
context: &opentelemetry::Context,
center_timestamp: i64,
time_window_days: i64,
query_embedding: Option<&[f32]>,
limit: usize,
) -> Result<Vec<SearchRecord>, DbError> {
trace_db_call(context, "query", "find_relevant_searches_hybrid", |_span| {
let window_seconds = time_window_days * 86400;
let start_ts = center_timestamp - window_seconds;
let end_ts = center_timestamp + window_seconds;
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
// Step 1: Time-based filter (fast, indexed)
let searches_in_range = diesel::sql_query(
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
FROM search_history
WHERE timestamp >= ?1 AND timestamp <= ?2",
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
// Step 2: If query embedding provided, rank by semantic similarity
if let Some(query_emb) = query_embedding {
if query_emb.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_emb.len()
));
}
let mut scored_searches: Vec<(f32, SearchRecord)> = searches_in_range
.into_iter()
.filter_map(|row| {
if let Ok(emb) = Self::deserialize_vector(&row.embedding) {
let similarity = Self::cosine_similarity(query_emb, &emb);
Some((similarity, row.to_search_record()))
} else {
None
}
})
.collect();
// Sort by similarity descending
scored_searches
.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
log::info!(
"Hybrid query: {} searches in time range, ranked by similarity",
scored_searches.len()
);
if !scored_searches.is_empty() {
log::info!(
"Top similarity: {:.4} for '{}'",
scored_searches[0].0,
scored_searches[0].1.query
);
}
Ok(scored_searches
.into_iter()
.take(limit)
.map(|(_, search)| search)
.collect())
} else {
// No semantic ranking, just return time-sorted (most recent first)
log::info!(
"Time-only query: {} searches in range",
searches_in_range.len()
);
Ok(searches_in_range
.into_iter()
.take(limit)
.map(|r| r.to_search_record())
.collect())
}
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn search_exists(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
query: &str,
) -> Result<bool, DbError> {
trace_db_call(context, "query", "search_exists", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::Integer)]
count: i32,
}
let result: CountResult = diesel::sql_query(
"SELECT COUNT(*) as count FROM search_history WHERE timestamp = ?1 AND query = ?2",
)
.bind::<diesel::sql_types::BigInt, _>(timestamp)
.bind::<diesel::sql_types::Text, _>(query)
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count > 0)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
trace_db_call(context, "query", "get_search_count", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::BigInt)]
count: i64,
}
let result: CountResult =
diesel::sql_query("SELECT COUNT(*) as count FROM search_history")
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}

View File

@@ -35,18 +35,21 @@ pub fn is_media_file(path: &Path) -> bool {
}
/// Check if a DirEntry is an image file (for walkdir usage)
#[allow(dead_code)]
pub fn direntry_is_image(entry: &DirEntry) -> bool {
is_image_file(&entry.path())
is_image_file(entry.path())
}
/// Check if a DirEntry is a video file (for walkdir usage)
#[allow(dead_code)]
pub fn direntry_is_video(entry: &DirEntry) -> bool {
is_video_file(&entry.path())
is_video_file(entry.path())
}
/// Check if a DirEntry is a media file (for walkdir usage)
#[allow(dead_code)]
pub fn direntry_is_media(entry: &DirEntry) -> bool {
is_media_file(&entry.path())
is_media_file(entry.path())
}
#[cfg(test)]

View File

@@ -1,3 +1,6 @@
use ::anyhow;
use actix::{Handler, Message};
use anyhow::{Context, anyhow};
use std::collections::HashSet;
use std::fmt::Debug;
use std::fs::read_dir;
@@ -5,10 +8,7 @@ use std::io;
use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use ::anyhow;
use actix::{Handler, Message};
use anyhow::{Context, anyhow};
use std::time::SystemTime;
use crate::data::{Claims, FilesRequest, FilterMode, MediaType, PhotosResponse, SortType};
use crate::database::ExifDao;
@@ -16,11 +16,13 @@ use crate::file_types;
use crate::geo::{gps_bounding_box, haversine_distance};
use crate::memories::extract_date_from_filename;
use crate::{AppState, create_thumbnails};
use actix_web::dev::ResourcePath;
use actix_web::web::Data;
use actix_web::{
HttpRequest, HttpResponse,
web::{self, Query},
};
use chrono::{DateTime, Utc};
use log::{debug, error, info, trace, warn};
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
@@ -50,6 +52,7 @@ fn apply_sorting_with_exif(
sort_type: SortType,
exif_dao: &mut Box<dyn ExifDao>,
span_context: &opentelemetry::Context,
base_path: &Path,
) -> Vec<String> {
match sort_type {
SortType::DateTakenAsc | SortType::DateTakenDesc => {
@@ -71,10 +74,24 @@ fn apply_sorting_with_exif(
.into_iter()
.map(|f| {
// Try EXIF date first
let date_taken = exif_map.get(&f.file_name).copied().or_else(|| {
// Fallback to filename extraction
extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp())
});
let date_taken = exif_map
.get(&f.file_name)
.copied()
.or_else(|| {
// Fallback to filename extraction
extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp())
})
.or_else(|| {
// Fallback to filesystem metadata creation date
let full_path = base_path.join(&f.file_name);
std::fs::metadata(full_path)
.and_then(|md| md.created().or(md.modified()))
.ok()
.map(|system_time| {
<SystemTime as Into<DateTime<Utc>>>::into(system_time)
.timestamp()
})
});
FileWithMetadata {
file_name: f.file_name,
@@ -216,7 +233,8 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
if let (Some(photo_lat), Some(photo_lon)) =
(exif.gps_latitude, exif.gps_longitude)
{
let distance = haversine_distance(lat, lon, photo_lat, photo_lon);
let distance =
haversine_distance(lat, lon, photo_lat as f64, photo_lon as f64);
distance <= radius_km
} else {
false
@@ -317,8 +335,13 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
// Handle sorting - use helper function that supports EXIF date sorting
let sort_type = req.sort.unwrap_or(NameAsc);
let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao");
let result =
apply_sorting_with_exif(files, sort_type, &mut exif_dao_guard, &span_context);
let result = apply_sorting_with_exif(
files,
sort_type,
&mut exif_dao_guard,
&span_context,
app_state.base_path.as_ref(),
);
drop(exif_dao_guard);
result
})
@@ -383,7 +406,13 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
)
})
.map(|path: &PathBuf| {
let relative = path.strip_prefix(&app_state.base_path).unwrap();
let relative = path.strip_prefix(&app_state.base_path).unwrap_or_else(|_| {
panic!(
"Unable to strip base path {} from file path {}",
&app_state.base_path.path(),
path.display()
)
});
relative.to_path_buf()
})
.map(|f| f.to_str().unwrap().to_string())
@@ -454,8 +483,13 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
// Handle sorting - use helper function that supports EXIF date sorting
let response_files = if let Some(sort_type) = req.sort {
let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao");
let result =
apply_sorting_with_exif(photos, sort_type, &mut exif_dao_guard, &span_context);
let result = apply_sorting_with_exif(
photos,
sort_type,
&mut exif_dao_guard,
&span_context,
app_state.base_path.as_ref(),
);
drop(exif_dao_guard);
result
} else {
@@ -757,6 +791,7 @@ pub struct RealFileSystem {
}
impl RealFileSystem {
#[allow(dead_code)] // Used in main.rs binary and tests
pub(crate) fn new(base_path: String) -> RealFileSystem {
RealFileSystem { base_path }
}
@@ -862,7 +897,7 @@ mod tests {
struct MockExifDao;
impl crate::database::ExifDao for MockExifDao {
impl ExifDao for MockExifDao {
fn store_exif(
&mut self,
_context: &opentelemetry::Context,
@@ -1018,10 +1053,11 @@ mod tests {
let request: Query<FilesRequest> = Query::from_query("path=").unwrap();
// Create AppState with the same base_path as RealFileSystem
let test_state = AppState::test_state();
// Create a dedicated test directory to avoid interference from other files in system temp
let mut test_base = env::temp_dir();
test_base.push("image_api_test_list_photos");
fs::create_dir_all(&test_base).unwrap();
let test_base = PathBuf::from(test_state.base_path.clone());
let mut test_dir = test_base.clone();
test_dir.push("test-dir");
@@ -1031,17 +1067,6 @@ mod tests {
photo_path.push("photo.jpg");
File::create(&photo_path).unwrap();
// Create AppState with the same base_path as RealFileSystem
use actix::Actor;
let test_state = AppState::new(
std::sync::Arc::new(crate::video::actors::StreamActor {}.start()),
test_base.to_str().unwrap().to_string(),
test_base.join("thumbnails").to_str().unwrap().to_string(),
test_base.join("videos").to_str().unwrap().to_string(),
test_base.join("gifs").to_str().unwrap().to_string(),
Vec::new(),
);
let response: HttpResponse = list_photos(
claims,
TestRequest::default().to_http_request(),
@@ -1049,9 +1074,7 @@ mod tests {
Data::new(test_state),
Data::new(RealFileSystem::new(test_base.to_str().unwrap().to_string())),
Data::new(Mutex::new(SqliteTagDao::default())),
Data::new(Mutex::new(
Box::new(MockExifDao) as Box<dyn crate::database::ExifDao>
)),
Data::new(Mutex::new(Box::new(MockExifDao) as Box<dyn ExifDao>)),
)
.await;
let status = response.status();

View File

@@ -1,6 +1,7 @@
#[macro_use]
extern crate diesel;
pub mod ai;
pub mod auth;
pub mod cleanup;
pub mod data;
@@ -12,11 +13,13 @@ pub mod files;
pub mod geo;
pub mod memories;
pub mod otel;
pub mod parsers;
pub mod service;
pub mod state;
pub mod tags;
#[cfg(test)]
pub mod testhelpers;
pub mod utils;
pub mod video;
// Re-export commonly used types

View File

@@ -30,7 +30,9 @@ use actix_web::{
use chrono::Utc;
use diesel::sqlite::Sqlite;
use rayon::prelude::*;
use urlencoding::decode;
use crate::ai::InsightGenerator;
use crate::auth::login;
use crate::data::*;
use crate::database::models::InsertImageExif;
@@ -45,11 +47,11 @@ use crate::tags::*;
use crate::video::actors::{
ProcessMessage, ScanDirectoryMessage, create_playlist, generate_video_thumbnail,
};
use crate::video::generate_video_gifs;
use log::{debug, error, info, trace, warn};
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
use opentelemetry::{KeyValue, global};
mod ai;
mod auth;
mod data;
mod database;
@@ -60,6 +62,7 @@ mod files;
mod geo;
mod state;
mod tags;
mod utils;
mod video;
mod memories;
@@ -223,8 +226,13 @@ async fn upload_image(
if let Some(content_type) = part.content_disposition() {
debug!("{:?}", content_type);
if let Some(filename) = content_type.get_filename() {
debug!("Name: {:?}", filename);
file_name = Some(filename.to_string());
debug!("Name (raw): {:?}", filename);
// Decode URL-encoded filename (e.g., "file%20name.jpg" -> "file name.jpg")
let decoded_filename = decode(filename)
.map(|s| s.to_string())
.unwrap_or_else(|_| filename.to_string());
debug!("Name (decoded): {:?}", decoded_filename);
file_name = Some(decoded_filename);
while let Some(Ok(data)) = part.next().await {
file_content.put(data);
@@ -241,6 +249,10 @@ async fn upload_image(
let path = file_path.unwrap_or_else(|| app_state.base_path.clone());
if !file_content.is_empty() {
if file_name.is_none() {
span.set_status(Status::error("No filename provided"));
return HttpResponse::BadRequest().body("No filename provided");
}
let full_path = PathBuf::from(&path).join(file_name.unwrap());
if let Some(full_path) = is_valid_full_path(
&app_state.base_path,
@@ -301,11 +313,11 @@ async fn upload_image(
width: exif_data.width,
height: exif_data.height,
orientation: exif_data.orientation,
gps_latitude: exif_data.gps_latitude,
gps_longitude: exif_data.gps_longitude,
gps_altitude: exif_data.gps_altitude,
focal_length: exif_data.focal_length,
aperture: exif_data.aperture,
gps_latitude: exif_data.gps_latitude.map(|v| v as f32),
gps_longitude: exif_data.gps_longitude.map(|v| v as f32),
gps_altitude: exif_data.gps_altitude.map(|v| v as f32),
focal_length: exif_data.focal_length.map(|v| v as f32),
aperture: exif_data.aperture.map(|v| v as f32),
shutter_speed: exif_data.shutter_speed,
iso: exif_data.iso,
date_taken: exif_data.date_taken,
@@ -715,7 +727,7 @@ fn main() -> std::io::Result<()> {
}
create_thumbnails();
generate_video_gifs().await;
// generate_video_gifs().await;
let app_data = Data::new(AppState::default());
@@ -739,11 +751,58 @@ fn main() -> std::io::Result<()> {
directory: app_state.base_path.clone(),
});
// Spawn background job to generate daily conversation summaries
{
use crate::ai::generate_daily_summaries;
use crate::database::{DailySummaryDao, SqliteDailySummaryDao};
use chrono::NaiveDate;
// Configure date range for summary generation
// Default: August 2024 ±30 days (July 1 - September 30, 2024)
// To expand: change start_date and end_date
let start_date = Some(NaiveDate::from_ymd_opt(2015, 10, 1).unwrap());
let end_date = Some(NaiveDate::from_ymd_opt(2020, 1, 1).unwrap());
let contacts_to_summarize = vec!["Domenique", "Zach", "Paul"]; // Add more contacts as needed
let ollama = app_state.ollama.clone();
let sms_client = app_state.sms_client.clone();
for contact in contacts_to_summarize {
let ollama_clone = ollama.clone();
let sms_client_clone = sms_client.clone();
let summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new())));
let start = start_date;
let end = end_date;
tokio::spawn(async move {
log::info!("Starting daily summary generation for {}", contact);
if let Err(e) = generate_daily_summaries(
contact,
start,
end,
&ollama_clone,
&sms_client_clone,
summary_dao,
)
.await
{
log::error!("Daily summary generation failed for {}: {:?}", contact, e);
} else {
log::info!("Daily summary generation completed for {}", contact);
}
});
}
}
HttpServer::new(move || {
let user_dao = SqliteUserDao::new();
let favorites_dao = SqliteFavoriteDao::new();
let tag_dao = SqliteTagDao::default();
let exif_dao = SqliteExifDao::new();
let insight_dao = SqliteInsightDao::new();
let cors = Cors::default()
.allowed_origin_fn(|origin, _req_head| {
// Allow all origins in development, or check against CORS_ALLOWED_ORIGINS env var
@@ -795,6 +854,11 @@ fn main() -> std::io::Result<()> {
.service(delete_favorite)
.service(get_file_metadata)
.service(memories::list_memories)
.service(ai::generate_insight_handler)
.service(ai::get_insight_handler)
.service(ai::delete_insight_handler)
.service(ai::get_all_insights_handler)
.service(ai::get_available_models_handler)
.add_feature(add_tag_services::<_, SqliteTagDao>)
.app_data(app_data.clone())
.app_data::<Data<RealFileSystem>>(Data::new(RealFileSystem::new(
@@ -808,6 +872,10 @@ fn main() -> std::io::Result<()> {
.app_data::<Data<Mutex<Box<dyn ExifDao>>>>(Data::new(Mutex::new(Box::new(
exif_dao,
))))
.app_data::<Data<Mutex<Box<dyn InsightDao>>>>(Data::new(Mutex::new(Box::new(
insight_dao,
))))
.app_data::<Data<InsightGenerator>>(Data::new(app_data.insight_generator.clone()))
.wrap(prometheus.clone())
})
.bind(dotenv::var("BIND_URL").unwrap())?
@@ -1003,11 +1071,11 @@ fn process_new_files(
width: exif_data.width,
height: exif_data.height,
orientation: exif_data.orientation,
gps_latitude: exif_data.gps_latitude,
gps_longitude: exif_data.gps_longitude,
gps_altitude: exif_data.gps_altitude,
focal_length: exif_data.focal_length,
aperture: exif_data.aperture,
gps_latitude: exif_data.gps_latitude.map(|v| v as f32),
gps_longitude: exif_data.gps_longitude.map(|v| v as f32),
gps_altitude: exif_data.gps_altitude.map(|v| v as f32),
focal_length: exif_data.focal_length.map(|v| v as f32),
aperture: exif_data.aperture.map(|v| v as f32),
shutter_speed: exif_data.shutter_speed,
iso: exif_data.iso,
date_taken: exif_data.date_taken,

View File

@@ -66,7 +66,7 @@ impl PathExcluder {
// Directory-based exclusions
for excluded in &self.excluded_dirs {
if path.starts_with(excluded) {
debug!(
trace!(
"PathExcluder: excluded by dir: {:?} (rule: {:?})",
path, excluded
);
@@ -81,7 +81,7 @@ impl PathExcluder {
if let Some(comp_str) = component.as_os_str().to_str()
&& self.excluded_patterns.iter().any(|pat| pat == comp_str)
{
debug!(
trace!(
"PathExcluder: excluded by component pattern: {:?} (component: {:?}, patterns: {:?})",
path, comp_str, self.excluded_patterns
);
@@ -204,16 +204,21 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
return Some(captures);
}
// 4. Timestamp format: 1401638400.jpeg
if let Some(captures) = regex::Regex::new(r"(\d{10}|\d{13})\.")
.ok()?
.captures(filename)
{
// 4. Timestamp format: 1401638400.jpeg, att_1422489664680106.jpeg, att_142248967186928.jpeg
// Matches timestamps with 10-16 digits (seconds, milliseconds, microseconds)
if let Some(captures) = regex::Regex::new(r"(\d{10,16})\.").ok()?.captures(filename) {
let timestamp_str = captures.get(1)?.as_str();
let len = timestamp_str.len();
// Millisecond timestamp (13 digits)
if timestamp_str.len() >= 13
&& let Some(date_time) = timestamp_str[0..13]
// Skip autogenerated filenames that start with "10000" (e.g., 1000004178.jpg)
// These are not timestamps but auto-generated file IDs
if timestamp_str.starts_with("10000") {
return None;
}
// Try milliseconds first (13 digits exactly)
if len == 13
&& let Some(date_time) = timestamp_str
.parse::<i64>()
.ok()
.and_then(DateTime::from_timestamp_millis)
@@ -222,8 +227,9 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
return Some(date_time);
}
// Second timestamp (10 digits)
if timestamp_str.len() >= 10
// For 14-16 digits, treat first 10 digits as seconds to avoid far future dates
// Examples: att_1422489664680106 (16 digits), att_142248967186928 (15 digits)
if (14..=16).contains(&len)
&& let Some(date_time) = timestamp_str[0..10]
.parse::<i64>()
.ok()
@@ -232,6 +238,28 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
{
return Some(date_time);
}
// Exactly 10 digits - seconds since epoch
if len == 10
&& let Some(date_time) = timestamp_str
.parse::<i64>()
.ok()
.and_then(|timestamp_secs| DateTime::from_timestamp(timestamp_secs, 0))
.map(|naive_dt| naive_dt.fixed_offset())
{
return Some(date_time);
}
// 11-12 digits: try as milliseconds (might be partial millisecond timestamp)
if (len == 11 || len == 12)
&& let Some(date_time) = timestamp_str
.parse::<i64>()
.ok()
.and_then(DateTime::from_timestamp_millis)
.map(|naive_dt| naive_dt.fixed_offset())
{
return Some(date_time);
}
}
None
@@ -752,6 +780,55 @@ mod tests {
assert_eq!(date_time.second(), 0);
}
#[test]
fn test_extract_date_from_filename_attachment_15_digits() {
// att_142248967186928.jpeg - 15 digits, should parse first 10 as seconds
// 1422489671 = 2015-01-28 23:07:51 UTC (converts to local timezone)
let filename = "att_142248967186928.jpeg";
let date_time = extract_date_from_filename(filename).unwrap();
// Verify year and month are correct (2015-01)
assert_eq!(date_time.year(), 2015);
assert_eq!(date_time.month(), 1);
// Day may be 28 or 29 depending on timezone
assert!(date_time.day() >= 28 && date_time.day() <= 29);
// Verify timestamp is within expected range (should be around 1422489671)
let timestamp = date_time.timestamp();
assert!(timestamp >= 1422480000 && timestamp <= 1422576000); // Jan 28-29, 2015
}
#[test]
fn test_extract_date_from_filename_attachment_16_digits() {
// att_1422489664680106.jpeg - 16 digits, should parse first 10 as seconds
// 1422489664 = 2015-01-28 23:07:44 UTC (converts to local timezone)
let filename = "att_1422489664680106.jpeg";
let date_time = extract_date_from_filename(filename).unwrap();
// Verify year and month are correct (2015-01)
assert_eq!(date_time.year(), 2015);
assert_eq!(date_time.month(), 1);
// Day may be 28 or 29 depending on timezone
assert!(date_time.day() >= 28 && date_time.day() <= 29);
// Verify timestamp is within expected range (should be around 1422489664)
let timestamp = date_time.timestamp();
assert!(timestamp >= 1422480000 && timestamp <= 1422576000); // Jan 28-29, 2015
}
#[test]
fn test_extract_date_from_filename_autogenerated_should_not_match() {
// Autogenerated filenames like 1000004178.jpg should NOT be parsed as timestamps
// These start with "10000" which would be Sept 2001 if parsed literally
let filename = "1000004178.jpg";
let date_time = extract_date_from_filename(filename);
assert!(
date_time.is_none(),
"Autogenerated filenames starting with 10000 should not be parsed as dates"
);
}
#[test]
fn test_memory_date_priority_filename() {
let temp_dir = tempdir().unwrap();

183
src/parsers/ical_parser.rs Normal file
View File

@@ -0,0 +1,183 @@
use anyhow::{Context, Result};
use chrono::NaiveDateTime;
use ical::parser::ical::component::IcalCalendar;
use ical::property::Property;
use std::fs::File;
use std::io::BufReader;
#[derive(Debug, Clone)]
pub struct ParsedCalendarEvent {
pub event_uid: Option<String>,
pub summary: String,
pub description: Option<String>,
pub location: Option<String>,
pub start_time: i64,
pub end_time: i64,
pub all_day: bool,
pub organizer: Option<String>,
pub attendees: Vec<String>,
}
pub fn parse_ics_file(path: &str) -> Result<Vec<ParsedCalendarEvent>> {
let file = File::open(path).context("Failed to open .ics file")?;
let reader = BufReader::new(file);
let parser = ical::IcalParser::new(reader);
let mut events = Vec::new();
for calendar_result in parser {
let calendar: IcalCalendar = calendar_result.context("Failed to parse calendar")?;
for event in calendar.events {
// Extract properties
let mut event_uid = None;
let mut summary = None;
let mut description = None;
let mut location = None;
let mut start_time = None;
let mut end_time = None;
let mut all_day = false;
let mut organizer = None;
let mut attendees = Vec::new();
for property in event.properties {
match property.name.as_str() {
"UID" => {
event_uid = property.value;
}
"SUMMARY" => {
summary = property.value;
}
"DESCRIPTION" => {
description = property.value;
}
"LOCATION" => {
location = property.value;
}
"DTSTART" => {
if let Some(ref value) = property.value {
start_time = parse_ical_datetime(value, &property)?;
// Check if it's an all-day event (no time component)
all_day = value.len() == 8; // YYYYMMDD format
}
}
"DTEND" => {
if let Some(ref value) = property.value {
end_time = parse_ical_datetime(value, &property)?;
}
}
"ORGANIZER" => {
organizer = extract_email_from_mailto(property.value.as_deref());
}
"ATTENDEE" => {
if let Some(email) = extract_email_from_mailto(property.value.as_deref()) {
attendees.push(email);
}
}
_ => {}
}
}
// Only include events with required fields
if let (Some(summary_text), Some(start), Some(end)) = (summary, start_time, end_time) {
events.push(ParsedCalendarEvent {
event_uid,
summary: summary_text,
description,
location,
start_time: start,
end_time: end,
all_day,
organizer,
attendees,
});
}
}
}
Ok(events)
}
fn parse_ical_datetime(value: &str, property: &Property) -> Result<Option<i64>> {
// Check for TZID parameter
let _tzid = property.params.as_ref().and_then(|params| {
params
.iter()
.find(|(key, _)| key == "TZID")
.and_then(|(_, values)| values.first())
.cloned()
});
// iCal datetime formats:
// - 20240815T140000Z (UTC)
// - 20240815T140000 (local/TZID)
// - 20240815 (all-day)
let cleaned = value.replace("Z", "").replace("T", "");
// All-day event (YYYYMMDD)
if cleaned.len() == 8 {
let dt = NaiveDateTime::parse_from_str(&format!("{}000000", cleaned), "%Y%m%d%H%M%S")
.context("Failed to parse all-day date")?;
return Ok(Some(dt.and_utc().timestamp()));
}
// DateTime event (YYYYMMDDTHHMMSS)
if cleaned.len() >= 14 {
let dt = NaiveDateTime::parse_from_str(&cleaned[..14], "%Y%m%d%H%M%S")
.context("Failed to parse datetime")?;
// If original had 'Z', it's UTC
let timestamp = if value.ends_with('Z') {
dt.and_utc().timestamp()
} else {
// Treat as UTC for simplicity (proper TZID handling is complex)
dt.and_utc().timestamp()
};
return Ok(Some(timestamp));
}
Ok(None)
}
fn extract_email_from_mailto(value: Option<&str>) -> Option<String> {
value.map(|v| {
// ORGANIZER and ATTENDEE often have format: mailto:user@example.com
if v.starts_with("mailto:") {
v.trim_start_matches("mailto:").to_string()
} else {
v.to_string()
}
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_ical_datetime() {
let prop = Property {
name: "DTSTART".to_string(),
params: None,
value: Some("20240815T140000Z".to_string()),
};
let timestamp = parse_ical_datetime("20240815T140000Z", &prop).unwrap();
assert!(timestamp.is_some());
}
#[test]
fn test_extract_email() {
assert_eq!(
extract_email_from_mailto(Some("mailto:user@example.com")),
Some("user@example.com".to_string())
);
assert_eq!(
extract_email_from_mailto(Some("user@example.com")),
Some("user@example.com".to_string())
);
}
}

View File

@@ -0,0 +1,134 @@
use anyhow::{Context, Result};
use chrono::DateTime;
use serde::Deserialize;
use std::fs::File;
use std::io::BufReader;
#[derive(Debug, Clone)]
pub struct ParsedLocationRecord {
pub timestamp: i64,
pub latitude: f64,
pub longitude: f64,
pub accuracy: Option<i32>,
pub activity: Option<String>,
pub activity_confidence: Option<i32>,
}
// Google Takeout Location History JSON structures
#[derive(Debug, Deserialize)]
struct LocationHistory {
locations: Vec<LocationPoint>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct LocationPoint {
timestamp_ms: Option<String>, // Older format
timestamp: Option<String>, // Newer format (ISO8601)
latitude_e7: Option<i64>,
longitude_e7: Option<i64>,
accuracy: Option<i32>,
activity: Option<Vec<ActivityRecord>>,
}
#[derive(Debug, Deserialize)]
struct ActivityRecord {
activity: Vec<ActivityType>,
#[allow(dead_code)] // Part of JSON structure, may be used in future
timestamp_ms: Option<String>,
}
#[derive(Debug, Deserialize)]
struct ActivityType {
#[serde(rename = "type")]
activity_type: String,
confidence: i32,
}
pub fn parse_location_json(path: &str) -> Result<Vec<ParsedLocationRecord>> {
let file = File::open(path).context("Failed to open location JSON file")?;
let reader = BufReader::new(file);
let history: LocationHistory =
serde_json::from_reader(reader).context("Failed to parse location history JSON")?;
let mut records = Vec::new();
for point in history.locations {
// Parse timestamp (try both formats)
let timestamp = if let Some(ts_ms) = point.timestamp_ms {
// Milliseconds since epoch
ts_ms
.parse::<i64>()
.context("Failed to parse timestamp_ms")?
/ 1000
} else if let Some(ts_iso) = point.timestamp {
// ISO8601 format
DateTime::parse_from_rfc3339(&ts_iso)
.context("Failed to parse ISO8601 timestamp")?
.timestamp()
} else {
continue; // Skip points without timestamp
};
// Convert E7 format to decimal degrees
let latitude = point.latitude_e7.map(|e7| e7 as f64 / 10_000_000.0);
let longitude = point.longitude_e7.map(|e7| e7 as f64 / 10_000_000.0);
// Extract highest-confidence activity
let (activity, activity_confidence) = point
.activity
.as_ref()
.and_then(|activities| activities.first())
.and_then(|record| {
record
.activity
.iter()
.max_by_key(|a| a.confidence)
.map(|a| (a.activity_type.clone(), a.confidence))
})
.unzip();
if let (Some(lat), Some(lon)) = (latitude, longitude) {
records.push(ParsedLocationRecord {
timestamp,
latitude: lat,
longitude: lon,
accuracy: point.accuracy,
activity,
activity_confidence,
});
}
}
Ok(records)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_e7_conversion() {
let lat_e7 = 374228300_i64;
let lat = lat_e7 as f64 / 10_000_000.0;
assert!((lat - 37.42283).abs() < 0.00001);
}
#[test]
fn test_parse_sample_json() {
let json = r#"{
"locations": [
{
"latitudeE7": 374228300,
"longitudeE7": -1221086100,
"accuracy": 20,
"timestampMs": "1692115200000"
}
]
}"#;
let history: LocationHistory = serde_json::from_str(json).unwrap();
assert_eq!(history.locations.len(), 1);
}
}

7
src/parsers/mod.rs Normal file
View File

@@ -0,0 +1,7 @@
pub mod ical_parser;
pub mod location_json_parser;
pub mod search_html_parser;
pub use ical_parser::{ParsedCalendarEvent, parse_ics_file};
pub use location_json_parser::{ParsedLocationRecord, parse_location_json};
pub use search_html_parser::{ParsedSearchRecord, parse_search_html};

View File

@@ -0,0 +1,209 @@
use anyhow::{Context, Result};
use chrono::{DateTime, NaiveDateTime, Utc};
use scraper::{Html, Selector};
use std::fs;
#[derive(Debug, Clone)]
pub struct ParsedSearchRecord {
pub timestamp: i64,
pub query: String,
pub search_engine: Option<String>,
}
pub fn parse_search_html(path: &str) -> Result<Vec<ParsedSearchRecord>> {
let html_content =
fs::read_to_string(path).context("Failed to read search history HTML file")?;
let document = Html::parse_document(&html_content);
let mut records = Vec::new();
// Try multiple selector strategies as Google Takeout format varies
// Strategy 1: Look for specific cell structure
if let Ok(cell_selector) = Selector::parse("div.content-cell") {
for cell in document.select(&cell_selector) {
if let Some(record) = parse_content_cell(&cell) {
records.push(record);
}
}
}
// Strategy 2: Look for outer-cell structure (older format)
if records.is_empty()
&& let Ok(outer_selector) = Selector::parse("div.outer-cell")
{
for cell in document.select(&outer_selector) {
if let Some(record) = parse_outer_cell(&cell) {
records.push(record);
}
}
}
// Strategy 3: Generic approach - look for links and timestamps
if records.is_empty()
&& let Ok(link_selector) = Selector::parse("a")
{
for link in document.select(&link_selector) {
if let Some(href) = link.value().attr("href") {
// Check if it's a search URL
if (href.contains("google.com/search?q=") || href.contains("search?q="))
&& let Some(query) = extract_query_from_url(href)
{
// Try to find nearby timestamp
let timestamp = find_nearby_timestamp(&link);
records.push(ParsedSearchRecord {
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
query,
search_engine: Some("Google".to_string()),
});
}
}
}
}
Ok(records)
}
fn parse_content_cell(cell: &scraper::ElementRef) -> Option<ParsedSearchRecord> {
let link_selector = Selector::parse("a").ok()?;
let link = cell.select(&link_selector).next()?;
let href = link.value().attr("href")?;
let query = extract_query_from_url(href)?;
// Extract timestamp from cell text
let cell_text = cell.text().collect::<Vec<_>>().join(" ");
let timestamp = parse_timestamp_from_text(&cell_text);
Some(ParsedSearchRecord {
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
query,
search_engine: Some("Google".to_string()),
})
}
fn parse_outer_cell(cell: &scraper::ElementRef) -> Option<ParsedSearchRecord> {
let link_selector = Selector::parse("a").ok()?;
let link = cell.select(&link_selector).next()?;
let href = link.value().attr("href")?;
let query = extract_query_from_url(href)?;
let cell_text = cell.text().collect::<Vec<_>>().join(" ");
let timestamp = parse_timestamp_from_text(&cell_text);
Some(ParsedSearchRecord {
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
query,
search_engine: Some("Google".to_string()),
})
}
fn extract_query_from_url(url: &str) -> Option<String> {
// Extract query parameter from URL
// Example: https://www.google.com/search?q=rust+programming
if let Some(query_start) = url.find("?q=").or_else(|| url.find("&q=")) {
let query_part = &url[query_start + 3..];
let query_end = query_part.find('&').unwrap_or(query_part.len());
let encoded_query = &query_part[..query_end];
// URL decode
urlencoding::decode(encoded_query)
.ok()
.map(|s| s.to_string())
} else {
None
}
}
fn find_nearby_timestamp(element: &scraper::ElementRef) -> Option<i64> {
// Look for timestamp in parent or sibling elements
if let Some(parent) = element.parent()
&& parent.value().as_element().is_some()
{
let parent_ref = scraper::ElementRef::wrap(parent)?;
let text = parent_ref.text().collect::<Vec<_>>().join(" ");
return parse_timestamp_from_text(&text);
}
None
}
fn parse_timestamp_from_text(text: &str) -> Option<i64> {
// Google Takeout timestamps often look like:
// "Aug 15, 2024, 2:34:56 PM PDT"
// "2024-08-15T14:34:56Z"
// Try ISO8601 first
if let Some(iso_match) = text
.split_whitespace()
.find(|s| s.contains('T') && s.contains('-'))
&& let Ok(dt) = DateTime::parse_from_rfc3339(iso_match)
{
return Some(dt.timestamp());
}
// Try common date patterns
let patterns = [
"%b %d, %Y, %I:%M:%S %p", // Aug 15, 2024, 2:34:56 PM
"%Y-%m-%d %H:%M:%S", // 2024-08-15 14:34:56
"%m/%d/%Y %H:%M:%S", // 08/15/2024 14:34:56
];
for pattern in patterns {
// Extract potential date string
if let Some(date_part) = extract_date_substring(text)
&& let Ok(dt) = NaiveDateTime::parse_from_str(&date_part, pattern)
{
return Some(dt.and_utc().timestamp());
}
}
None
}
fn extract_date_substring(text: &str) -> Option<String> {
// Try to extract date-like substring from text
// This is a heuristic approach for varied formats
// Look for patterns like "Aug 15, 2024, 2:34:56 PM"
if let Some(pos) = text.find(|c: char| c.is_numeric()) {
let rest = &text[pos..];
if let Some(end) =
rest.find(|c: char| !c.is_alphanumeric() && c != ':' && c != ',' && c != ' ')
{
Some(rest[..end].trim().to_string())
} else {
Some(rest.trim().to_string())
}
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_query_from_url() {
let url = "https://www.google.com/search?q=rust+programming&oq=rust";
let query = extract_query_from_url(url);
assert_eq!(query, Some("rust+programming".to_string()));
}
#[test]
fn test_extract_query_with_encoding() {
let url = "https://www.google.com/search?q=hello%20world";
let query = extract_query_from_url(url);
assert_eq!(query, Some("hello world".to_string()));
}
#[test]
fn test_parse_iso_timestamp() {
let text = "Some text 2024-08-15T14:34:56Z more text";
let timestamp = parse_timestamp_from_text(text);
assert!(timestamp.is_some());
}
}

View File

@@ -1,6 +1,13 @@
use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
use crate::database::{
CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, LocationHistoryDao, SearchHistoryDao,
SqliteCalendarEventDao, SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao,
SqliteLocationHistoryDao, SqliteSearchHistoryDao,
};
use crate::video::actors::{PlaylistGenerator, StreamActor, VideoPlaylistManager};
use actix::{Actor, Addr};
use std::{env, sync::Arc};
use std::env;
use std::sync::{Arc, Mutex};
pub struct AppState {
pub stream_manager: Arc<Addr<StreamActor>>,
@@ -10,6 +17,9 @@ pub struct AppState {
pub video_path: String,
pub gif_path: String,
pub excluded_dirs: Vec<String>,
pub ollama: OllamaClient,
pub sms_client: SmsApiClient,
pub insight_generator: InsightGenerator,
}
impl AppState {
@@ -20,6 +30,9 @@ impl AppState {
video_path: String,
gif_path: String,
excluded_dirs: Vec<String>,
ollama: OllamaClient,
sms_client: SmsApiClient,
insight_generator: InsightGenerator,
) -> Self {
let playlist_generator = PlaylistGenerator::new();
let video_playlist_manager =
@@ -33,6 +46,9 @@ impl AppState {
video_path,
gif_path,
excluded_dirs,
ollama,
sms_client,
insight_generator,
}
}
@@ -49,13 +65,70 @@ impl AppState {
impl Default for AppState {
fn default() -> Self {
// Initialize AI clients
let ollama_primary_url = env::var("OLLAMA_PRIMARY_URL").unwrap_or_else(|_| {
env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".to_string())
});
let ollama_fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
let ollama_primary_model = env::var("OLLAMA_PRIMARY_MODEL")
.or_else(|_| env::var("OLLAMA_MODEL"))
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string());
let ollama_fallback_model = env::var("OLLAMA_FALLBACK_MODEL").ok();
let ollama = OllamaClient::new(
ollama_primary_url,
ollama_fallback_url,
ollama_primary_model,
ollama_fallback_model,
);
let sms_api_url =
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
let sms_api_token = env::var("SMS_API_TOKEN").ok();
let sms_client = SmsApiClient::new(sms_api_url, sms_api_token);
// Initialize DAOs
let insight_dao: Arc<Mutex<Box<dyn InsightDao>>> =
Arc::new(Mutex::new(Box::new(SqliteInsightDao::new())));
let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> =
Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
let daily_summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new())));
// Initialize Google Takeout DAOs
let calendar_dao: Arc<Mutex<Box<dyn CalendarEventDao>>> =
Arc::new(Mutex::new(Box::new(SqliteCalendarEventDao::new())));
let location_dao: Arc<Mutex<Box<dyn LocationHistoryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteLocationHistoryDao::new())));
let search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteSearchHistoryDao::new())));
// Load base path
let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
// Initialize InsightGenerator with all data sources
let insight_generator = InsightGenerator::new(
ollama.clone(),
sms_client.clone(),
insight_dao.clone(),
exif_dao.clone(),
daily_summary_dao.clone(),
calendar_dao.clone(),
location_dao.clone(),
search_dao.clone(),
base_path.clone(),
);
Self::new(
Arc::new(StreamActor {}.start()),
env::var("BASE_PATH").expect("BASE_PATH was not set in the env"),
base_path,
env::var("THUMBNAILS").expect("THUMBNAILS was not set in the env"),
env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env"),
env::var("GIFS_DIRECTORY").expect("GIFS_DIRECTORY was not set in the env"),
Self::parse_excluded_dirs(),
ollama,
sms_client,
insight_generator,
)
}
}
@@ -74,14 +147,56 @@ impl AppState {
let video_path = create_test_subdir(&base_path, "videos");
let gif_path = create_test_subdir(&base_path, "gifs");
// Initialize test AI clients
let ollama = OllamaClient::new(
"http://localhost:11434".to_string(),
None,
"llama3.2".to_string(),
None,
);
let sms_client = SmsApiClient::new("http://localhost:8000".to_string(), None);
// Initialize test DAOs
let insight_dao: Arc<Mutex<Box<dyn InsightDao>>> =
Arc::new(Mutex::new(Box::new(SqliteInsightDao::new())));
let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> =
Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
let daily_summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new())));
// Initialize test Google Takeout DAOs
let calendar_dao: Arc<Mutex<Box<dyn CalendarEventDao>>> =
Arc::new(Mutex::new(Box::new(SqliteCalendarEventDao::new())));
let location_dao: Arc<Mutex<Box<dyn LocationHistoryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteLocationHistoryDao::new())));
let search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteSearchHistoryDao::new())));
// Initialize test InsightGenerator with all data sources
let base_path_str = base_path.to_string_lossy().to_string();
let insight_generator = InsightGenerator::new(
ollama.clone(),
sms_client.clone(),
insight_dao.clone(),
exif_dao.clone(),
daily_summary_dao.clone(),
calendar_dao.clone(),
location_dao.clone(),
search_dao.clone(),
base_path_str.clone(),
);
// Create the AppState with the temporary paths
AppState::new(
std::sync::Arc::new(crate::video::actors::StreamActor {}.start()),
base_path.to_string_lossy().to_string(),
Arc::new(StreamActor {}.start()),
base_path_str,
thumbnail_path.to_string_lossy().to_string(),
video_path.to_string_lossy().to_string(),
gif_path.to_string_lossy().to_string(),
Vec::new(), // No excluded directories for test state
ollama,
sms_client,
insight_generator,
)
}
}

View File

@@ -1,5 +1,6 @@
use crate::data::GetTagsRequest;
use crate::otel::{extract_context_from_request, global_tracer, trace_db_call};
use crate::utils::normalize_path;
use crate::{Claims, ThumbnailRequest, connect, data::AddTagRequest, error::IntoHttpError, schema};
use actix_web::dev::{ServiceFactory, ServiceRequest};
use actix_web::{App, HttpRequest, HttpResponse, Responder, web};
@@ -41,6 +42,7 @@ async fn add_tag<D: TagDao>(
let span = tracer.start_with_context("add_tag", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let tag_name = body.tag_name.clone();
let normalized_path = normalize_path(&body.file_name);
let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao");
@@ -52,12 +54,12 @@ async fn add_tag<D: TagDao>(
} else {
info!(
"Creating missing tag: '{:?}' for file: '{}'",
tag_name, &body.file_name
tag_name, &normalized_path
);
tag_dao.create_tag(&span_context, tag_name.trim())
}
})
.and_then(|tag| tag_dao.tag_file(&span_context, &body.file_name, tag.id))
.and_then(|tag| tag_dao.tag_file(&span_context, &normalized_path, tag.id))
.map(|_| {
span_context.span().set_status(Status::Ok);
HttpResponse::Ok()
@@ -74,9 +76,10 @@ async fn get_tags<D: TagDao>(
let context = extract_context_from_request(&http_request);
let span = global_tracer().start_with_context("get_tags", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let normalized_path = normalize_path(&request.path);
let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao");
tag_dao
.get_tags_for_path(&span_context, &request.path)
.get_tags_for_path(&span_context, &normalized_path)
.map(|tags| {
span_context.span().set_status(Status::Ok);
HttpResponse::Ok().json(tags)
@@ -139,10 +142,11 @@ async fn remove_tagged_photo<D: TagDao>(
let context = extract_context_from_request(&http_request);
let span = global_tracer().start_with_context("remove_tagged_photo", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let normalized_path = normalize_path(&request.file_name);
let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao");
tag_dao
.remove_tag(&span_context, &request.tag_name, &request.file_name)
.remove_tag(&span_context, &request.tag_name, &normalized_path)
.map(|result| {
span_context.span().set_status(Status::Ok);
@@ -165,8 +169,9 @@ async fn update_tags<D: TagDao>(
let context = extract_context_from_request(&http_request);
let span = global_tracer().start_with_context("update_tags", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let normalized_path = normalize_path(&request.file_name);
dao.get_tags_for_path(&span_context, &request.file_name)
dao.get_tags_for_path(&span_context, &normalized_path)
.and_then(|existing_tags| {
dao.get_all_tags(&span_context, None)
.map(|all| (existing_tags, all))
@@ -180,9 +185,9 @@ async fn update_tags<D: TagDao>(
for tag in tags_to_remove {
info!(
"Removing tag {:?} from file: {:?}",
tag.name, request.file_name
tag.name, normalized_path
);
dao.remove_tag(&span_context, &tag.name, &request.file_name)
dao.remove_tag(&span_context, &tag.name, &normalized_path)
.unwrap_or_else(|err| panic!("{:?} Unable to remove tag {:?}", err, &tag.name));
}
@@ -194,14 +199,14 @@ async fn update_tags<D: TagDao>(
for (_, new_tag) in new_tags {
info!(
"Adding tag {:?} to file: {:?}",
new_tag.name, request.file_name
new_tag.name, normalized_path
);
dao.tag_file(&span_context, &request.file_name, new_tag.id)
dao.tag_file(&span_context, &normalized_path, new_tag.id)
.with_context(|| {
format!(
"Unable to tag file {:?} with tag: {:?}",
request.file_name, new_tag.name
normalized_path, new_tag.name
)
})
.unwrap();
@@ -255,9 +260,13 @@ pub struct InsertTaggedPhoto {
#[derive(Queryable, Clone, Debug)]
pub struct TaggedPhoto {
#[allow(dead_code)] // Part of API contract
pub id: i32,
#[allow(dead_code)] // Part of API contract
pub photo_name: String,
#[allow(dead_code)] // Part of API contract
pub tag_id: i32,
#[allow(dead_code)] // Part of API contract
pub created_time: i64,
}
@@ -778,8 +787,8 @@ mod tests {
fn get_files_with_all_tag_ids(
&mut self,
tag_ids: Vec<i32>,
exclude_tag_ids: Vec<i32>,
_tag_ids: Vec<i32>,
_exclude_tag_ids: Vec<i32>,
_context: &opentelemetry::Context,
) -> anyhow::Result<Vec<FileWithTagCount>> {
todo!()
@@ -787,8 +796,8 @@ mod tests {
fn get_files_with_any_tag_ids(
&mut self,
tag_ids: Vec<i32>,
exclude_tag_ids: Vec<i32>,
_tag_ids: Vec<i32>,
_exclude_tag_ids: Vec<i32>,
_context: &opentelemetry::Context,
) -> anyhow::Result<Vec<FileWithTagCount>> {
todo!()
@@ -796,16 +805,16 @@ mod tests {
fn update_photo_name(
&mut self,
old_name: &str,
new_name: &str,
context: &opentelemetry::Context,
_old_name: &str,
_new_name: &str,
_context: &opentelemetry::Context,
) -> anyhow::Result<()> {
todo!()
}
fn get_all_photo_names(
&mut self,
context: &opentelemetry::Context,
_context: &opentelemetry::Context,
) -> anyhow::Result<Vec<String>> {
todo!()
}

83
src/utils.rs Normal file
View File

@@ -0,0 +1,83 @@
/// Normalize a file path to use forward slashes for cross-platform consistency
/// This ensures paths stored in the database always use `/` regardless of OS
///
/// # Examples
/// ```
/// use image_api::utils::normalize_path;
///
/// assert_eq!(normalize_path("foo\\bar\\baz.jpg"), "foo/bar/baz.jpg");
/// assert_eq!(normalize_path("foo/bar/baz.jpg"), "foo/bar/baz.jpg");
/// ```
pub fn normalize_path(path: &str) -> String {
path.replace('\\', "/")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normalize_path_with_backslashes() {
assert_eq!(normalize_path("foo\\bar\\baz.jpg"), "foo/bar/baz.jpg");
}
#[test]
fn test_normalize_path_with_forward_slashes() {
assert_eq!(normalize_path("foo/bar/baz.jpg"), "foo/bar/baz.jpg");
}
#[test]
fn test_normalize_path_mixed() {
assert_eq!(
normalize_path("foo\\bar/baz\\qux.jpg"),
"foo/bar/baz/qux.jpg"
);
}
#[test]
fn test_normalize_path_empty() {
assert_eq!(normalize_path(""), "");
}
#[test]
fn test_normalize_path_absolute_windows() {
assert_eq!(
normalize_path("C:\\Users\\Photos\\image.jpg"),
"C:/Users/Photos/image.jpg"
);
}
#[test]
fn test_normalize_path_unc_path() {
assert_eq!(
normalize_path("\\\\server\\share\\folder\\file.jpg"),
"//server/share/folder/file.jpg"
);
}
#[test]
fn test_normalize_path_single_filename() {
assert_eq!(normalize_path("image.jpg"), "image.jpg");
}
#[test]
fn test_normalize_path_trailing_slash() {
assert_eq!(normalize_path("foo\\bar\\"), "foo/bar/");
}
#[test]
fn test_normalize_path_multiple_consecutive_backslashes() {
assert_eq!(
normalize_path("foo\\\\bar\\\\\\baz.jpg"),
"foo//bar///baz.jpg"
);
}
#[test]
fn test_normalize_path_deep_nesting() {
assert_eq!(
normalize_path("a\\b\\c\\d\\e\\f\\g\\file.jpg"),
"a/b/c/d/e/f/g/file.jpg"
);
}
}

View File

@@ -10,6 +10,7 @@ use walkdir::WalkDir;
pub mod actors;
pub mod ffmpeg;
#[allow(dead_code)]
pub async fn generate_video_gifs() {
tokio::spawn(async {
info!("Starting to make video gifs");