Add Google Takeout data import infrastructure
Implements Phase 1 & 2 of Google Takeout RAG integration: - Database migrations for calendar_events, location_history, search_history - DAO implementations with hybrid time + semantic search - Parsers for .ics, JSON, and HTML Google Takeout formats - Import utilities with batch insert optimization Features: - CalendarEventDao: Hybrid time-range + semantic search for events - LocationHistoryDao: GPS proximity with Haversine distance calculation - SearchHistoryDao: Semantic-first search (queries are embedding-rich) - Batch inserts for performance (1M+ records in minutes vs hours) - OpenTelemetry tracing for all database operations Import utilities: - import_calendar: Parse .ics with optional embedding generation - import_location_history: High-volume GPS data with batch inserts - import_search_history: Always generates embeddings for semantic search 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
342
Cargo.lock
generated
342
Cargo.lock
generated
@@ -340,6 +340,19 @@ version = "2.0.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ahash"
|
||||||
|
version = "0.8.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"getrandom 0.3.3",
|
||||||
|
"once_cell",
|
||||||
|
"version_check",
|
||||||
|
"zerocopy",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "1.1.3"
|
version = "1.1.3"
|
||||||
@@ -848,6 +861,29 @@ dependencies = [
|
|||||||
"typenum",
|
"typenum",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cssparser"
|
||||||
|
version = "0.31.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be"
|
||||||
|
dependencies = [
|
||||||
|
"cssparser-macros",
|
||||||
|
"dtoa-short",
|
||||||
|
"itoa",
|
||||||
|
"phf 0.11.3",
|
||||||
|
"smallvec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cssparser-macros"
|
||||||
|
version = "0.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
|
||||||
|
dependencies = [
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "darling"
|
name = "darling"
|
||||||
version = "0.20.11"
|
version = "0.20.11"
|
||||||
@@ -1024,6 +1060,27 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dtoa"
|
||||||
|
version = "1.0.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dtoa-short"
|
||||||
|
version = "0.3.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
|
||||||
|
dependencies = [
|
||||||
|
"dtoa",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ego-tree"
|
||||||
|
version = "0.6.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "either"
|
name = "either"
|
||||||
version = "1.15.0"
|
version = "1.15.0"
|
||||||
@@ -1165,6 +1222,16 @@ dependencies = [
|
|||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futf"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
|
||||||
|
dependencies = [
|
||||||
|
"mac",
|
||||||
|
"new_debug_unreachable",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures"
|
name = "futures"
|
||||||
version = "0.3.31"
|
version = "0.3.31"
|
||||||
@@ -1260,6 +1327,15 @@ dependencies = [
|
|||||||
"slab",
|
"slab",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fxhash"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||||
|
dependencies = [
|
||||||
|
"byteorder",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "generic-array"
|
name = "generic-array"
|
||||||
version = "0.14.7"
|
version = "0.14.7"
|
||||||
@@ -1270,6 +1346,15 @@ dependencies = [
|
|||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "getopts"
|
||||||
|
version = "0.2.24"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-width",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "getrandom"
|
name = "getrandom"
|
||||||
version = "0.2.16"
|
version = "0.2.16"
|
||||||
@@ -1377,6 +1462,20 @@ version = "0.5.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "html5ever"
|
||||||
|
version = "0.27.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"mac",
|
||||||
|
"markup5ever",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "http"
|
name = "http"
|
||||||
version = "0.2.12"
|
version = "0.2.12"
|
||||||
@@ -1557,6 +1656,15 @@ dependencies = [
|
|||||||
"cc",
|
"cc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ical"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b7cab7543a8b7729a19e2c04309f902861293dcdae6558dfbeb634454d279f6"
|
||||||
|
dependencies = [
|
||||||
|
"thiserror 1.0.69",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "icu_collections"
|
name = "icu_collections"
|
||||||
version = "2.0.0"
|
version = "2.0.0"
|
||||||
@@ -1708,6 +1816,7 @@ dependencies = [
|
|||||||
"dotenv",
|
"dotenv",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"futures",
|
"futures",
|
||||||
|
"ical",
|
||||||
"image",
|
"image",
|
||||||
"infer",
|
"infer",
|
||||||
"jsonwebtoken",
|
"jsonwebtoken",
|
||||||
@@ -1726,6 +1835,7 @@ dependencies = [
|
|||||||
"rayon",
|
"rayon",
|
||||||
"regex",
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
|
"scraper",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
@@ -2004,6 +2114,26 @@ dependencies = [
|
|||||||
"imgref",
|
"imgref",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mac"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "markup5ever"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"phf 0.11.3",
|
||||||
|
"phf_codegen 0.11.3",
|
||||||
|
"string_cache",
|
||||||
|
"string_cache_codegen",
|
||||||
|
"tendril",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "maybe-rayon"
|
name = "maybe-rayon"
|
||||||
version = "0.1.1"
|
version = "0.1.1"
|
||||||
@@ -2439,6 +2569,96 @@ version = "2.3.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf"
|
||||||
|
version = "0.10.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
|
||||||
|
dependencies = [
|
||||||
|
"phf_shared 0.10.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
|
||||||
|
dependencies = [
|
||||||
|
"phf_macros",
|
||||||
|
"phf_shared 0.11.3",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_codegen"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator 0.10.0",
|
||||||
|
"phf_shared 0.10.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_codegen"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator 0.11.3",
|
||||||
|
"phf_shared 0.11.3",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_generator"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
|
||||||
|
dependencies = [
|
||||||
|
"phf_shared 0.10.0",
|
||||||
|
"rand 0.8.5",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_generator"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
||||||
|
dependencies = [
|
||||||
|
"phf_shared 0.11.3",
|
||||||
|
"rand 0.8.5",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_macros"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator 0.11.3",
|
||||||
|
"phf_shared 0.11.3",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_shared"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
|
||||||
|
dependencies = [
|
||||||
|
"siphasher 0.3.11",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_shared"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
|
||||||
|
dependencies = [
|
||||||
|
"siphasher 1.0.1",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pin-project"
|
name = "pin-project"
|
||||||
version = "1.1.10"
|
version = "1.1.10"
|
||||||
@@ -2529,6 +2749,12 @@ dependencies = [
|
|||||||
"zerocopy",
|
"zerocopy",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "precomputed-hash"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.101"
|
version = "1.0.101"
|
||||||
@@ -2987,6 +3213,22 @@ version = "1.2.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "scraper"
|
||||||
|
version = "0.20.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b90460b31bfe1fc07be8262e42c665ad97118d4585869de9345a84d501a9eaf0"
|
||||||
|
dependencies = [
|
||||||
|
"ahash",
|
||||||
|
"cssparser",
|
||||||
|
"ego-tree",
|
||||||
|
"getopts",
|
||||||
|
"html5ever",
|
||||||
|
"once_cell",
|
||||||
|
"selectors",
|
||||||
|
"tendril",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "security-framework"
|
name = "security-framework"
|
||||||
version = "2.11.1"
|
version = "2.11.1"
|
||||||
@@ -3010,6 +3252,25 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "selectors"
|
||||||
|
version = "0.25.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cssparser",
|
||||||
|
"derive_more 0.99.20",
|
||||||
|
"fxhash",
|
||||||
|
"log",
|
||||||
|
"new_debug_unreachable",
|
||||||
|
"phf 0.10.1",
|
||||||
|
"phf_codegen 0.10.0",
|
||||||
|
"precomputed-hash",
|
||||||
|
"servo_arc",
|
||||||
|
"smallvec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "semver"
|
name = "semver"
|
||||||
version = "1.0.26"
|
version = "1.0.26"
|
||||||
@@ -3087,6 +3348,15 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "servo_arc"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44"
|
||||||
|
dependencies = [
|
||||||
|
"stable_deref_trait",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sha1"
|
name = "sha1"
|
||||||
version = "0.10.6"
|
version = "0.10.6"
|
||||||
@@ -3140,6 +3410,18 @@ dependencies = [
|
|||||||
"time",
|
"time",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "siphasher"
|
||||||
|
version = "0.3.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "siphasher"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "slab"
|
name = "slab"
|
||||||
version = "0.4.11"
|
version = "0.4.11"
|
||||||
@@ -3193,6 +3475,31 @@ version = "0.2.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "29fdc163db75f7b5ffa3daf0c5a7136fb0d4b2f35523cd1769da05e034159feb"
|
checksum = "29fdc163db75f7b5ffa3daf0c5a7136fb0d4b2f35523cd1769da05e034159feb"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "string_cache"
|
||||||
|
version = "0.8.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
|
||||||
|
dependencies = [
|
||||||
|
"new_debug_unreachable",
|
||||||
|
"parking_lot",
|
||||||
|
"phf_shared 0.11.3",
|
||||||
|
"precomputed-hash",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "string_cache_codegen"
|
||||||
|
version = "0.5.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator 0.11.3",
|
||||||
|
"phf_shared 0.11.3",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
@@ -3289,6 +3596,17 @@ dependencies = [
|
|||||||
"windows-sys 0.60.2",
|
"windows-sys 0.60.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tendril"
|
||||||
|
version = "0.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
|
||||||
|
dependencies = [
|
||||||
|
"futf",
|
||||||
|
"mac",
|
||||||
|
"utf-8",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror"
|
name = "thiserror"
|
||||||
version = "1.0.69"
|
version = "1.0.69"
|
||||||
@@ -3385,9 +3703,21 @@ dependencies = [
|
|||||||
"signal-hook-registry",
|
"signal-hook-registry",
|
||||||
"slab",
|
"slab",
|
||||||
"socket2 0.6.0",
|
"socket2 0.6.0",
|
||||||
|
"tokio-macros",
|
||||||
"windows-sys 0.59.0",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokio-macros"
|
||||||
|
version = "2.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio-native-tls"
|
name = "tokio-native-tls"
|
||||||
version = "0.3.1"
|
version = "0.3.1"
|
||||||
@@ -3647,6 +3977,12 @@ version = "1.0.18"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-width"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-xid"
|
name = "unicode-xid"
|
||||||
version = "0.2.6"
|
version = "0.2.6"
|
||||||
@@ -3677,6 +4013,12 @@ version = "2.1.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
|
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf-8"
|
||||||
|
version = "0.7.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utf8_iter"
|
name = "utf8_iter"
|
||||||
version = "1.0.4"
|
version = "1.0.4"
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ lto = "thin"
|
|||||||
actix = "0.13.1"
|
actix = "0.13.1"
|
||||||
actix-web = "4"
|
actix-web = "4"
|
||||||
actix-rt = "2.6"
|
actix-rt = "2.6"
|
||||||
tokio = { version = "1.42.0", features = ["default", "process", "sync"] }
|
tokio = { version = "1.42.0", features = ["default", "process", "sync", "macros", "rt-multi-thread"] }
|
||||||
actix-files = "0.6"
|
actix-files = "0.6"
|
||||||
actix-cors = "0.7"
|
actix-cors = "0.7"
|
||||||
actix-multipart = "0.7.2"
|
actix-multipart = "0.7.2"
|
||||||
@@ -52,3 +52,5 @@ exif = { package = "kamadak-exif", version = "0.6.1" }
|
|||||||
reqwest = { version = "0.12", features = ["json"] }
|
reqwest = { version = "0.12", features = ["json"] }
|
||||||
urlencoding = "2.1"
|
urlencoding = "2.1"
|
||||||
zerocopy = "0.8"
|
zerocopy = "0.8"
|
||||||
|
ical = "0.11"
|
||||||
|
scraper = "0.20"
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
DROP TABLE IF EXISTS calendar_events;
|
||||||
20
migrations/2026-01-05-000000_add_calendar_events/up.sql
Normal file
20
migrations/2026-01-05-000000_add_calendar_events/up.sql
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
CREATE TABLE calendar_events (
|
||||||
|
id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
event_uid TEXT,
|
||||||
|
summary TEXT NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
location TEXT,
|
||||||
|
start_time BIGINT NOT NULL,
|
||||||
|
end_time BIGINT NOT NULL,
|
||||||
|
all_day BOOLEAN NOT NULL DEFAULT 0,
|
||||||
|
organizer TEXT,
|
||||||
|
attendees TEXT,
|
||||||
|
embedding BLOB,
|
||||||
|
created_at BIGINT NOT NULL,
|
||||||
|
source_file TEXT,
|
||||||
|
UNIQUE(event_uid, start_time)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_calendar_start_time ON calendar_events(start_time);
|
||||||
|
CREATE INDEX idx_calendar_end_time ON calendar_events(end_time);
|
||||||
|
CREATE INDEX idx_calendar_time_range ON calendar_events(start_time, end_time);
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
DROP TABLE IF EXISTS location_history;
|
||||||
19
migrations/2026-01-05-000100_add_location_history/up.sql
Normal file
19
migrations/2026-01-05-000100_add_location_history/up.sql
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
CREATE TABLE location_history (
|
||||||
|
id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
timestamp BIGINT NOT NULL,
|
||||||
|
latitude REAL NOT NULL,
|
||||||
|
longitude REAL NOT NULL,
|
||||||
|
accuracy INTEGER,
|
||||||
|
activity TEXT,
|
||||||
|
activity_confidence INTEGER,
|
||||||
|
place_name TEXT,
|
||||||
|
place_category TEXT,
|
||||||
|
embedding BLOB,
|
||||||
|
created_at BIGINT NOT NULL,
|
||||||
|
source_file TEXT,
|
||||||
|
UNIQUE(timestamp, latitude, longitude)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_location_timestamp ON location_history(timestamp);
|
||||||
|
CREATE INDEX idx_location_coords ON location_history(latitude, longitude);
|
||||||
|
CREATE INDEX idx_location_activity ON location_history(activity);
|
||||||
1
migrations/2026-01-05-000200_add_search_history/down.sql
Normal file
1
migrations/2026-01-05-000200_add_search_history/down.sql
Normal file
@@ -0,0 +1 @@
|
|||||||
|
DROP TABLE IF EXISTS search_history;
|
||||||
13
migrations/2026-01-05-000200_add_search_history/up.sql
Normal file
13
migrations/2026-01-05-000200_add_search_history/up.sql
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
CREATE TABLE search_history (
|
||||||
|
id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
timestamp BIGINT NOT NULL,
|
||||||
|
query TEXT NOT NULL,
|
||||||
|
search_engine TEXT,
|
||||||
|
embedding BLOB NOT NULL,
|
||||||
|
created_at BIGINT NOT NULL,
|
||||||
|
source_file TEXT,
|
||||||
|
UNIQUE(timestamp, query)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_search_timestamp ON search_history(timestamp);
|
||||||
|
CREATE INDEX idx_search_query ON search_history(query);
|
||||||
@@ -339,7 +339,7 @@ impl InsightGenerator {
|
|||||||
let location = match exif {
|
let location = match exif {
|
||||||
Some(ref exif) => {
|
Some(ref exif) => {
|
||||||
if let (Some(lat), Some(lon)) = (exif.gps_latitude, exif.gps_longitude) {
|
if let (Some(lat), Some(lon)) = (exif.gps_latitude, exif.gps_longitude) {
|
||||||
let loc = self.reverse_geocode(lat, lon).await;
|
let loc = self.reverse_geocode(lat as f64, lon as f64).await;
|
||||||
if let Some(ref l) = loc {
|
if let Some(ref l) = loc {
|
||||||
insight_cx
|
insight_cx
|
||||||
.span()
|
.span()
|
||||||
|
|||||||
167
src/bin/import_calendar.rs
Normal file
167
src/bin/import_calendar.rs
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
use anyhow::{Context, Result};
|
||||||
|
use chrono::Utc;
|
||||||
|
use clap::Parser;
|
||||||
|
use image_api::ai::ollama::OllamaClient;
|
||||||
|
use image_api::database::calendar_dao::{InsertCalendarEvent, SqliteCalendarEventDao};
|
||||||
|
use image_api::parsers::ical_parser::parse_ics_file;
|
||||||
|
use log::{error, info};
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
|
// Import the trait to use its methods
|
||||||
|
use image_api::database::CalendarEventDao;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about = "Import Google Takeout Calendar data", long_about = None)]
|
||||||
|
struct Args {
|
||||||
|
/// Path to the .ics calendar file
|
||||||
|
#[arg(short, long)]
|
||||||
|
path: String,
|
||||||
|
|
||||||
|
/// Generate embeddings for calendar events (slower but enables semantic search)
|
||||||
|
#[arg(long, default_value = "false")]
|
||||||
|
generate_embeddings: bool,
|
||||||
|
|
||||||
|
/// Skip events that already exist in the database
|
||||||
|
#[arg(long, default_value = "true")]
|
||||||
|
skip_existing: bool,
|
||||||
|
|
||||||
|
/// Batch size for embedding generation
|
||||||
|
#[arg(long, default_value = "128")]
|
||||||
|
batch_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
dotenv::dotenv().ok();
|
||||||
|
env_logger::init();
|
||||||
|
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
info!("Parsing calendar file: {}", args.path);
|
||||||
|
let events = parse_ics_file(&args.path).context("Failed to parse .ics file")?;
|
||||||
|
|
||||||
|
info!("Found {} calendar events", events.len());
|
||||||
|
|
||||||
|
let context = opentelemetry::Context::current();
|
||||||
|
|
||||||
|
let ollama = if args.generate_embeddings {
|
||||||
|
let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
|
||||||
|
.or_else(|_| dotenv::var("OLLAMA_URL"))
|
||||||
|
.unwrap_or_else(|_| "http://localhost:11434".to_string());
|
||||||
|
let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
|
||||||
|
let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
|
||||||
|
.or_else(|_| dotenv::var("OLLAMA_MODEL"))
|
||||||
|
.unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
|
||||||
|
let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
|
||||||
|
|
||||||
|
Some(OllamaClient::new(
|
||||||
|
primary_url,
|
||||||
|
fallback_url,
|
||||||
|
primary_model,
|
||||||
|
fallback_model,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let inserted_count = Arc::new(Mutex::new(0));
|
||||||
|
let skipped_count = Arc::new(Mutex::new(0));
|
||||||
|
let error_count = Arc::new(Mutex::new(0));
|
||||||
|
|
||||||
|
// Process events in batches
|
||||||
|
// Can't use rayon with async, so process sequentially
|
||||||
|
for event in &events {
|
||||||
|
let mut dao_instance = SqliteCalendarEventDao::new();
|
||||||
|
|
||||||
|
// Check if event exists
|
||||||
|
if args.skip_existing {
|
||||||
|
if let Ok(exists) = dao_instance.event_exists(
|
||||||
|
&context,
|
||||||
|
event.event_uid.as_deref().unwrap_or(""),
|
||||||
|
event.start_time,
|
||||||
|
) {
|
||||||
|
if exists {
|
||||||
|
*skipped_count.lock().unwrap() += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate embedding if requested (blocking call)
|
||||||
|
let embedding = if let Some(ref ollama_client) = ollama {
|
||||||
|
let text = format!(
|
||||||
|
"{} {} {}",
|
||||||
|
event.summary,
|
||||||
|
event.description.as_deref().unwrap_or(""),
|
||||||
|
event.location.as_deref().unwrap_or("")
|
||||||
|
);
|
||||||
|
|
||||||
|
match tokio::task::block_in_place(|| {
|
||||||
|
tokio::runtime::Handle::current()
|
||||||
|
.block_on(async { ollama_client.generate_embedding(&text).await })
|
||||||
|
}) {
|
||||||
|
Ok(emb) => Some(emb),
|
||||||
|
Err(e) => {
|
||||||
|
error!(
|
||||||
|
"Failed to generate embedding for event '{}': {}",
|
||||||
|
event.summary, e
|
||||||
|
);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// Insert into database
|
||||||
|
let insert_event = InsertCalendarEvent {
|
||||||
|
event_uid: event.event_uid.clone(),
|
||||||
|
summary: event.summary.clone(),
|
||||||
|
description: event.description.clone(),
|
||||||
|
location: event.location.clone(),
|
||||||
|
start_time: event.start_time,
|
||||||
|
end_time: event.end_time,
|
||||||
|
all_day: event.all_day,
|
||||||
|
organizer: event.organizer.clone(),
|
||||||
|
attendees: if event.attendees.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(serde_json::to_string(&event.attendees).unwrap_or_default())
|
||||||
|
},
|
||||||
|
embedding,
|
||||||
|
created_at: Utc::now().timestamp(),
|
||||||
|
source_file: Some(args.path.clone()),
|
||||||
|
};
|
||||||
|
|
||||||
|
match dao_instance.store_event(&context, insert_event) {
|
||||||
|
Ok(_) => {
|
||||||
|
*inserted_count.lock().unwrap() += 1;
|
||||||
|
if *inserted_count.lock().unwrap() % 100 == 0 {
|
||||||
|
info!("Imported {} events...", *inserted_count.lock().unwrap());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to store event '{}': {:?}", event.summary, e);
|
||||||
|
*error_count.lock().unwrap() += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let final_inserted = *inserted_count.lock().unwrap();
|
||||||
|
let final_skipped = *skipped_count.lock().unwrap();
|
||||||
|
let final_errors = *error_count.lock().unwrap();
|
||||||
|
|
||||||
|
info!("\n=== Import Summary ===");
|
||||||
|
info!("Total events found: {}", events.len());
|
||||||
|
info!("Successfully inserted: {}", final_inserted);
|
||||||
|
info!("Skipped (already exist): {}", final_skipped);
|
||||||
|
info!("Errors: {}", final_errors);
|
||||||
|
|
||||||
|
if args.generate_embeddings {
|
||||||
|
info!("Embeddings were generated for semantic search");
|
||||||
|
} else {
|
||||||
|
info!("No embeddings generated (use --generate-embeddings to enable semantic search)");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
115
src/bin/import_location_history.rs
Normal file
115
src/bin/import_location_history.rs
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
use anyhow::{Context, Result};
|
||||||
|
use chrono::Utc;
|
||||||
|
use clap::Parser;
|
||||||
|
use image_api::database::location_dao::{InsertLocationRecord, SqliteLocationHistoryDao};
|
||||||
|
use image_api::parsers::location_json_parser::parse_location_json;
|
||||||
|
use log::{error, info};
|
||||||
|
// Import the trait to use its methods
|
||||||
|
use image_api::database::LocationHistoryDao;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about = "Import Google Takeout Location History data", long_about = None)]
|
||||||
|
struct Args {
|
||||||
|
/// Path to the Location History JSON file
|
||||||
|
#[arg(short, long)]
|
||||||
|
path: String,
|
||||||
|
|
||||||
|
/// Skip locations that already exist in the database
|
||||||
|
#[arg(long, default_value = "true")]
|
||||||
|
skip_existing: bool,
|
||||||
|
|
||||||
|
/// Batch size for database inserts
|
||||||
|
#[arg(long, default_value = "1000")]
|
||||||
|
batch_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
dotenv::dotenv().ok();
|
||||||
|
env_logger::init();
|
||||||
|
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
info!("Parsing location history file: {}", args.path);
|
||||||
|
let locations =
|
||||||
|
parse_location_json(&args.path).context("Failed to parse location history JSON")?;
|
||||||
|
|
||||||
|
info!("Found {} location records", locations.len());
|
||||||
|
|
||||||
|
let context = opentelemetry::Context::current();
|
||||||
|
|
||||||
|
let mut inserted_count = 0;
|
||||||
|
let mut skipped_count = 0;
|
||||||
|
let mut error_count = 0;
|
||||||
|
|
||||||
|
let mut dao_instance = SqliteLocationHistoryDao::new();
|
||||||
|
let created_at = Utc::now().timestamp();
|
||||||
|
|
||||||
|
// Process in batches using batch insert for massive speedup
|
||||||
|
for (batch_idx, chunk) in locations.chunks(args.batch_size).enumerate() {
|
||||||
|
info!(
|
||||||
|
"Processing batch {} ({} records)...",
|
||||||
|
batch_idx + 1,
|
||||||
|
chunk.len()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Convert to InsertLocationRecord
|
||||||
|
let mut batch_inserts = Vec::with_capacity(chunk.len());
|
||||||
|
|
||||||
|
for location in chunk {
|
||||||
|
// Skip existing check if requested (makes import much slower)
|
||||||
|
if args.skip_existing {
|
||||||
|
if let Ok(exists) = dao_instance.location_exists(
|
||||||
|
&context,
|
||||||
|
location.timestamp,
|
||||||
|
location.latitude,
|
||||||
|
location.longitude,
|
||||||
|
) {
|
||||||
|
if exists {
|
||||||
|
skipped_count += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
batch_inserts.push(InsertLocationRecord {
|
||||||
|
timestamp: location.timestamp,
|
||||||
|
latitude: location.latitude,
|
||||||
|
longitude: location.longitude,
|
||||||
|
accuracy: location.accuracy,
|
||||||
|
activity: location.activity.clone(),
|
||||||
|
activity_confidence: location.activity_confidence,
|
||||||
|
place_name: None,
|
||||||
|
place_category: None,
|
||||||
|
embedding: None,
|
||||||
|
created_at,
|
||||||
|
source_file: Some(args.path.clone()),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch insert entire chunk in single transaction
|
||||||
|
if !batch_inserts.is_empty() {
|
||||||
|
match dao_instance.store_locations_batch(&context, batch_inserts) {
|
||||||
|
Ok(count) => {
|
||||||
|
inserted_count += count;
|
||||||
|
info!(
|
||||||
|
"Imported {} locations (total: {})...",
|
||||||
|
count, inserted_count
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to store batch: {:?}", e);
|
||||||
|
error_count += chunk.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("\n=== Import Summary ===");
|
||||||
|
info!("Total locations found: {}", locations.len());
|
||||||
|
info!("Successfully inserted: {}", inserted_count);
|
||||||
|
info!("Skipped (already exist): {}", skipped_count);
|
||||||
|
info!("Errors: {}", error_count);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
154
src/bin/import_search_history.rs
Normal file
154
src/bin/import_search_history.rs
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
use anyhow::{Context, Result};
|
||||||
|
use chrono::Utc;
|
||||||
|
use clap::Parser;
|
||||||
|
use image_api::ai::ollama::OllamaClient;
|
||||||
|
use image_api::database::search_dao::{InsertSearchRecord, SqliteSearchHistoryDao};
|
||||||
|
use image_api::parsers::search_html_parser::parse_search_html;
|
||||||
|
use log::{error, info, warn};
|
||||||
|
|
||||||
|
// Import the trait to use its methods
|
||||||
|
use image_api::database::SearchHistoryDao;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about = "Import Google Takeout Search History data", long_about = None)]
|
||||||
|
struct Args {
|
||||||
|
/// Path to the search history HTML file
|
||||||
|
#[arg(short, long)]
|
||||||
|
path: String,
|
||||||
|
|
||||||
|
/// Skip searches that already exist in the database
|
||||||
|
#[arg(long, default_value = "true")]
|
||||||
|
skip_existing: bool,
|
||||||
|
|
||||||
|
/// Batch size for embedding generation (max 128 recommended)
|
||||||
|
#[arg(long, default_value = "64")]
|
||||||
|
batch_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
dotenv::dotenv().ok();
|
||||||
|
env_logger::init();
|
||||||
|
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
info!("Parsing search history file: {}", args.path);
|
||||||
|
let searches = parse_search_html(&args.path).context("Failed to parse search history HTML")?;
|
||||||
|
|
||||||
|
info!("Found {} search records", searches.len());
|
||||||
|
|
||||||
|
let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
|
||||||
|
.or_else(|_| dotenv::var("OLLAMA_URL"))
|
||||||
|
.unwrap_or_else(|_| "http://localhost:11434".to_string());
|
||||||
|
let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
|
||||||
|
let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
|
||||||
|
.or_else(|_| dotenv::var("OLLAMA_MODEL"))
|
||||||
|
.unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
|
||||||
|
let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
|
||||||
|
|
||||||
|
let ollama = OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model);
|
||||||
|
let context = opentelemetry::Context::current();
|
||||||
|
|
||||||
|
let mut inserted_count = 0;
|
||||||
|
let mut skipped_count = 0;
|
||||||
|
let mut error_count = 0;
|
||||||
|
|
||||||
|
let mut dao_instance = SqliteSearchHistoryDao::new();
|
||||||
|
let created_at = Utc::now().timestamp();
|
||||||
|
|
||||||
|
// Process searches in batches (embeddings are REQUIRED for searches)
|
||||||
|
for (batch_idx, chunk) in searches.chunks(args.batch_size).enumerate() {
|
||||||
|
info!(
|
||||||
|
"Processing batch {} ({} searches)...",
|
||||||
|
batch_idx + 1,
|
||||||
|
chunk.len()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Generate embeddings for this batch
|
||||||
|
let queries: Vec<String> = chunk.iter().map(|s| s.query.clone()).collect();
|
||||||
|
|
||||||
|
let embeddings_result = tokio::task::spawn({
|
||||||
|
let ollama_client = ollama.clone();
|
||||||
|
async move {
|
||||||
|
// Generate embeddings in parallel for the batch
|
||||||
|
let mut embeddings = Vec::new();
|
||||||
|
for query in &queries {
|
||||||
|
match ollama_client.generate_embedding(query).await {
|
||||||
|
Ok(emb) => embeddings.push(Some(emb)),
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Failed to generate embedding for query '{}': {}", query, e);
|
||||||
|
embeddings.push(None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
embeddings
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.context("Failed to generate embeddings for batch")?;
|
||||||
|
|
||||||
|
// Build batch of searches with embeddings
|
||||||
|
let mut batch_inserts = Vec::new();
|
||||||
|
|
||||||
|
for (search, embedding_opt) in chunk.iter().zip(embeddings_result.iter()) {
|
||||||
|
// Check if search exists (optional for speed)
|
||||||
|
if args.skip_existing {
|
||||||
|
if let Ok(exists) =
|
||||||
|
dao_instance.search_exists(&context, search.timestamp, &search.query)
|
||||||
|
{
|
||||||
|
if exists {
|
||||||
|
skipped_count += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only insert if we have an embedding
|
||||||
|
if let Some(embedding) = embedding_opt {
|
||||||
|
batch_inserts.push(InsertSearchRecord {
|
||||||
|
timestamp: search.timestamp,
|
||||||
|
query: search.query.clone(),
|
||||||
|
search_engine: search.search_engine.clone(),
|
||||||
|
embedding: embedding.clone(),
|
||||||
|
created_at,
|
||||||
|
source_file: Some(args.path.clone()),
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
error!(
|
||||||
|
"Skipping search '{}' due to missing embedding",
|
||||||
|
search.query
|
||||||
|
);
|
||||||
|
error_count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch insert entire chunk in single transaction
|
||||||
|
if !batch_inserts.is_empty() {
|
||||||
|
match dao_instance.store_searches_batch(&context, batch_inserts) {
|
||||||
|
Ok(count) => {
|
||||||
|
inserted_count += count;
|
||||||
|
info!("Imported {} searches (total: {})...", count, inserted_count);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to store batch: {:?}", e);
|
||||||
|
error_count += chunk.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rate limiting between batches
|
||||||
|
if batch_idx < searches.len() / args.batch_size {
|
||||||
|
info!("Waiting 500ms before next batch...");
|
||||||
|
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("\n=== Import Summary ===");
|
||||||
|
info!("Total searches found: {}", searches.len());
|
||||||
|
info!("Successfully inserted: {}", inserted_count);
|
||||||
|
info!("Skipped (already exist): {}", skipped_count);
|
||||||
|
info!("Errors: {}", error_count);
|
||||||
|
info!("All imported searches have embeddings for semantic search");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
@@ -102,11 +102,11 @@ fn main() -> anyhow::Result<()> {
|
|||||||
width: exif_data.width,
|
width: exif_data.width,
|
||||||
height: exif_data.height,
|
height: exif_data.height,
|
||||||
orientation: exif_data.orientation,
|
orientation: exif_data.orientation,
|
||||||
gps_latitude: exif_data.gps_latitude,
|
gps_latitude: exif_data.gps_latitude.map(|v| v as f32),
|
||||||
gps_longitude: exif_data.gps_longitude,
|
gps_longitude: exif_data.gps_longitude.map(|v| v as f32),
|
||||||
gps_altitude: exif_data.gps_altitude,
|
gps_altitude: exif_data.gps_altitude.map(|v| v as f32),
|
||||||
focal_length: exif_data.focal_length,
|
focal_length: exif_data.focal_length.map(|v| v as f32),
|
||||||
aperture: exif_data.aperture,
|
aperture: exif_data.aperture.map(|v| v as f32),
|
||||||
shutter_speed: exif_data.shutter_speed,
|
shutter_speed: exif_data.shutter_speed,
|
||||||
iso: exif_data.iso,
|
iso: exif_data.iso,
|
||||||
date_taken: exif_data.date_taken,
|
date_taken: exif_data.date_taken,
|
||||||
|
|||||||
@@ -298,17 +298,17 @@ impl From<ImageExif> for ExifMetadata {
|
|||||||
},
|
},
|
||||||
gps: if has_gps {
|
gps: if has_gps {
|
||||||
Some(GpsCoordinates {
|
Some(GpsCoordinates {
|
||||||
latitude: exif.gps_latitude,
|
latitude: exif.gps_latitude.map(|v| v as f64),
|
||||||
longitude: exif.gps_longitude,
|
longitude: exif.gps_longitude.map(|v| v as f64),
|
||||||
altitude: exif.gps_altitude,
|
altitude: exif.gps_altitude.map(|v| v as f64),
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
},
|
},
|
||||||
capture_settings: if has_capture_settings {
|
capture_settings: if has_capture_settings {
|
||||||
Some(CaptureSettings {
|
Some(CaptureSettings {
|
||||||
focal_length: exif.focal_length,
|
focal_length: exif.focal_length.map(|v| v as f64),
|
||||||
aperture: exif.aperture,
|
aperture: exif.aperture.map(|v| v as f64),
|
||||||
shutter_speed: exif.shutter_speed,
|
shutter_speed: exif.shutter_speed,
|
||||||
iso: exif.iso,
|
iso: exif.iso,
|
||||||
})
|
})
|
||||||
|
|||||||
553
src/database/calendar_dao.rs
Normal file
553
src/database/calendar_dao.rs
Normal file
@@ -0,0 +1,553 @@
|
|||||||
|
use diesel::prelude::*;
|
||||||
|
use diesel::sqlite::SqliteConnection;
|
||||||
|
use serde::Serialize;
|
||||||
|
use std::ops::DerefMut;
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
|
use crate::database::{DbError, DbErrorKind, connect};
|
||||||
|
use crate::otel::trace_db_call;
|
||||||
|
|
||||||
|
/// Represents a calendar event
|
||||||
|
#[derive(Serialize, Clone, Debug)]
|
||||||
|
pub struct CalendarEvent {
|
||||||
|
pub id: i32,
|
||||||
|
pub event_uid: Option<String>,
|
||||||
|
pub summary: String,
|
||||||
|
pub description: Option<String>,
|
||||||
|
pub location: Option<String>,
|
||||||
|
pub start_time: i64,
|
||||||
|
pub end_time: i64,
|
||||||
|
pub all_day: bool,
|
||||||
|
pub organizer: Option<String>,
|
||||||
|
pub attendees: Option<String>, // JSON string
|
||||||
|
pub created_at: i64,
|
||||||
|
pub source_file: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Data for inserting a new calendar event
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct InsertCalendarEvent {
|
||||||
|
pub event_uid: Option<String>,
|
||||||
|
pub summary: String,
|
||||||
|
pub description: Option<String>,
|
||||||
|
pub location: Option<String>,
|
||||||
|
pub start_time: i64,
|
||||||
|
pub end_time: i64,
|
||||||
|
pub all_day: bool,
|
||||||
|
pub organizer: Option<String>,
|
||||||
|
pub attendees: Option<String>,
|
||||||
|
pub embedding: Option<Vec<f32>>, // 768-dim, optional
|
||||||
|
pub created_at: i64,
|
||||||
|
pub source_file: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait CalendarEventDao: Sync + Send {
|
||||||
|
/// Store calendar event with optional embedding
|
||||||
|
fn store_event(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
event: InsertCalendarEvent,
|
||||||
|
) -> Result<CalendarEvent, DbError>;
|
||||||
|
|
||||||
|
/// Batch insert events (for import efficiency)
|
||||||
|
fn store_events_batch(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
events: Vec<InsertCalendarEvent>,
|
||||||
|
) -> Result<usize, DbError>;
|
||||||
|
|
||||||
|
/// Find events in time range (PRIMARY query method)
|
||||||
|
fn find_events_in_range(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
start_ts: i64,
|
||||||
|
end_ts: i64,
|
||||||
|
) -> Result<Vec<CalendarEvent>, DbError>;
|
||||||
|
|
||||||
|
/// Find semantically similar events (SECONDARY - requires embeddings)
|
||||||
|
fn find_similar_events(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
query_embedding: &[f32],
|
||||||
|
limit: usize,
|
||||||
|
) -> Result<Vec<CalendarEvent>, DbError>;
|
||||||
|
|
||||||
|
/// Hybrid: Time-filtered + semantic ranking
|
||||||
|
/// "Events during photo timestamp ±N days, ranked by similarity to context"
|
||||||
|
fn find_relevant_events_hybrid(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
center_timestamp: i64,
|
||||||
|
time_window_days: i64,
|
||||||
|
query_embedding: Option<&[f32]>,
|
||||||
|
limit: usize,
|
||||||
|
) -> Result<Vec<CalendarEvent>, DbError>;
|
||||||
|
|
||||||
|
/// Check if event exists (idempotency)
|
||||||
|
fn event_exists(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
event_uid: &str,
|
||||||
|
start_time: i64,
|
||||||
|
) -> Result<bool, DbError>;
|
||||||
|
|
||||||
|
/// Get count of events
|
||||||
|
fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SqliteCalendarEventDao {
|
||||||
|
connection: Arc<Mutex<SqliteConnection>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SqliteCalendarEventDao {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SqliteCalendarEventDao {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
SqliteCalendarEventDao {
|
||||||
|
connection: Arc::new(Mutex::new(connect())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
|
||||||
|
use zerocopy::IntoBytes;
|
||||||
|
vec.as_bytes().to_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
|
||||||
|
if bytes.len() % 4 != 0 {
|
||||||
|
return Err(DbError::new(DbErrorKind::QueryError));
|
||||||
|
}
|
||||||
|
|
||||||
|
let count = bytes.len() / 4;
|
||||||
|
let mut vec = Vec::with_capacity(count);
|
||||||
|
|
||||||
|
for chunk in bytes.chunks_exact(4) {
|
||||||
|
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
|
||||||
|
vec.push(float);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(vec)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||||
|
if a.len() != b.len() {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||||
|
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||||
|
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||||
|
|
||||||
|
if magnitude_a == 0.0 || magnitude_b == 0.0 {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
dot_product / (magnitude_a * magnitude_b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct CalendarEventWithVectorRow {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||||
|
id: i32,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
event_uid: Option<String>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||||
|
summary: String,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
description: Option<String>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
location: Option<String>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||||
|
start_time: i64,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||||
|
end_time: i64,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Bool)]
|
||||||
|
all_day: bool,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
organizer: Option<String>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
attendees: Option<String>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Binary>)]
|
||||||
|
embedding: Option<Vec<u8>>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||||
|
created_at: i64,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
source_file: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CalendarEventWithVectorRow {
|
||||||
|
fn to_calendar_event(&self) -> CalendarEvent {
|
||||||
|
CalendarEvent {
|
||||||
|
id: self.id,
|
||||||
|
event_uid: self.event_uid.clone(),
|
||||||
|
summary: self.summary.clone(),
|
||||||
|
description: self.description.clone(),
|
||||||
|
location: self.location.clone(),
|
||||||
|
start_time: self.start_time,
|
||||||
|
end_time: self.end_time,
|
||||||
|
all_day: self.all_day,
|
||||||
|
organizer: self.organizer.clone(),
|
||||||
|
attendees: self.attendees.clone(),
|
||||||
|
created_at: self.created_at,
|
||||||
|
source_file: self.source_file.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct LastInsertRowId {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||||
|
id: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CalendarEventDao for SqliteCalendarEventDao {
|
||||||
|
fn store_event(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
event: InsertCalendarEvent,
|
||||||
|
) -> Result<CalendarEvent, DbError> {
|
||||||
|
trace_db_call(context, "insert", "store_event", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get CalendarEventDao");
|
||||||
|
|
||||||
|
// Validate embedding dimensions if provided
|
||||||
|
if let Some(ref emb) = event.embedding {
|
||||||
|
if emb.len() != 768 {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"Invalid embedding dimensions: {} (expected 768)",
|
||||||
|
emb.len()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let embedding_bytes = event.embedding.as_ref().map(|e| Self::serialize_vector(e));
|
||||||
|
|
||||||
|
// INSERT OR REPLACE to handle re-imports
|
||||||
|
diesel::sql_query(
|
||||||
|
"INSERT OR REPLACE INTO calendar_events
|
||||||
|
(event_uid, summary, description, location, start_time, end_time, all_day,
|
||||||
|
organizer, attendees, embedding, created_at, source_file)
|
||||||
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.event_uid)
|
||||||
|
.bind::<diesel::sql_types::Text, _>(&event.summary)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.description)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.location)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(event.start_time)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(event.end_time)
|
||||||
|
.bind::<diesel::sql_types::Bool, _>(event.all_day)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.organizer)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.attendees)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(&embedding_bytes)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(event.created_at)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.source_file)
|
||||||
|
.execute(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
|
||||||
|
|
||||||
|
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
|
||||||
|
.get_result::<LastInsertRowId>(conn.deref_mut())
|
||||||
|
.map(|r| r.id)
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(CalendarEvent {
|
||||||
|
id: row_id,
|
||||||
|
event_uid: event.event_uid,
|
||||||
|
summary: event.summary,
|
||||||
|
description: event.description,
|
||||||
|
location: event.location,
|
||||||
|
start_time: event.start_time,
|
||||||
|
end_time: event.end_time,
|
||||||
|
all_day: event.all_day,
|
||||||
|
organizer: event.organizer,
|
||||||
|
attendees: event.attendees,
|
||||||
|
created_at: event.created_at,
|
||||||
|
source_file: event.source_file,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn store_events_batch(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
events: Vec<InsertCalendarEvent>,
|
||||||
|
) -> Result<usize, DbError> {
|
||||||
|
trace_db_call(context, "insert", "store_events_batch", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get CalendarEventDao");
|
||||||
|
let mut inserted = 0;
|
||||||
|
|
||||||
|
conn.transaction::<_, anyhow::Error, _>(|conn| {
|
||||||
|
for event in events {
|
||||||
|
// Validate embedding if provided
|
||||||
|
if let Some(ref emb) = event.embedding {
|
||||||
|
if emb.len() != 768 {
|
||||||
|
log::warn!(
|
||||||
|
"Skipping event with invalid embedding dimensions: {}",
|
||||||
|
emb.len()
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let embedding_bytes =
|
||||||
|
event.embedding.as_ref().map(|e| Self::serialize_vector(e));
|
||||||
|
|
||||||
|
diesel::sql_query(
|
||||||
|
"INSERT OR REPLACE INTO calendar_events
|
||||||
|
(event_uid, summary, description, location, start_time, end_time, all_day,
|
||||||
|
organizer, attendees, embedding, created_at, source_file)
|
||||||
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&event.event_uid,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Text, _>(&event.summary)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&event.description,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&event.location,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(event.start_time)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(event.end_time)
|
||||||
|
.bind::<diesel::sql_types::Bool, _>(event.all_day)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&event.organizer,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&event.attendees,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(
|
||||||
|
&embedding_bytes,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(event.created_at)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&event.source_file,
|
||||||
|
)
|
||||||
|
.execute(conn)
|
||||||
|
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
|
||||||
|
|
||||||
|
inserted += 1;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(inserted)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_events_in_range(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
start_ts: i64,
|
||||||
|
end_ts: i64,
|
||||||
|
) -> Result<Vec<CalendarEvent>, DbError> {
|
||||||
|
trace_db_call(context, "query", "find_events_in_range", |_span| {
|
||||||
|
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||||
|
|
||||||
|
diesel::sql_query(
|
||||||
|
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
|
||||||
|
organizer, attendees, NULL as embedding, created_at, source_file
|
||||||
|
FROM calendar_events
|
||||||
|
WHERE start_time >= ?1 AND start_time <= ?2
|
||||||
|
ORDER BY start_time ASC"
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||||
|
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
|
||||||
|
.map(|rows| rows.into_iter().map(|r| r.to_calendar_event()).collect())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_similar_events(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
query_embedding: &[f32],
|
||||||
|
limit: usize,
|
||||||
|
) -> Result<Vec<CalendarEvent>, DbError> {
|
||||||
|
trace_db_call(context, "query", "find_similar_events", |_span| {
|
||||||
|
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||||
|
|
||||||
|
if query_embedding.len() != 768 {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"Invalid query embedding dimensions: {} (expected 768)",
|
||||||
|
query_embedding.len()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load all events with embeddings
|
||||||
|
let results = diesel::sql_query(
|
||||||
|
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
|
||||||
|
organizer, attendees, embedding, created_at, source_file
|
||||||
|
FROM calendar_events
|
||||||
|
WHERE embedding IS NOT NULL"
|
||||||
|
)
|
||||||
|
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
// Compute similarities
|
||||||
|
let mut scored_events: Vec<(f32, CalendarEvent)> = results
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|row| {
|
||||||
|
if let Some(ref emb_bytes) = row.embedding {
|
||||||
|
if let Ok(emb) = Self::deserialize_vector(emb_bytes) {
|
||||||
|
let similarity = Self::cosine_similarity(query_embedding, &emb);
|
||||||
|
Some((similarity, row.to_calendar_event()))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Sort by similarity descending
|
||||||
|
scored_events.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
|
||||||
|
log::info!("Found {} similar calendar events", scored_events.len());
|
||||||
|
if !scored_events.is_empty() {
|
||||||
|
log::info!("Top similarity: {:.4}", scored_events[0].0);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_relevant_events_hybrid(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
center_timestamp: i64,
|
||||||
|
time_window_days: i64,
|
||||||
|
query_embedding: Option<&[f32]>,
|
||||||
|
limit: usize,
|
||||||
|
) -> Result<Vec<CalendarEvent>, DbError> {
|
||||||
|
trace_db_call(context, "query", "find_relevant_events_hybrid", |_span| {
|
||||||
|
let window_seconds = time_window_days * 86400;
|
||||||
|
let start_ts = center_timestamp - window_seconds;
|
||||||
|
let end_ts = center_timestamp + window_seconds;
|
||||||
|
|
||||||
|
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||||
|
|
||||||
|
// Step 1: Time-based filter (fast, indexed)
|
||||||
|
let events_in_range = diesel::sql_query(
|
||||||
|
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
|
||||||
|
organizer, attendees, embedding, created_at, source_file
|
||||||
|
FROM calendar_events
|
||||||
|
WHERE start_time >= ?1 AND start_time <= ?2"
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||||
|
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
// Step 2: If query embedding provided, rank by semantic similarity
|
||||||
|
if let Some(query_emb) = query_embedding {
|
||||||
|
if query_emb.len() != 768 {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"Invalid query embedding dimensions: {} (expected 768)",
|
||||||
|
query_emb.len()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut scored_events: Vec<(f32, CalendarEvent)> = events_in_range
|
||||||
|
.into_iter()
|
||||||
|
.map(|row| {
|
||||||
|
// Events with embeddings get semantic scoring
|
||||||
|
let similarity = if let Some(ref emb_bytes) = row.embedding {
|
||||||
|
if let Ok(emb) = Self::deserialize_vector(emb_bytes) {
|
||||||
|
Self::cosine_similarity(query_emb, &emb)
|
||||||
|
} else {
|
||||||
|
0.5 // Neutral score for deserialization errors
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
0.5 // Neutral score for events without embeddings
|
||||||
|
};
|
||||||
|
(similarity, row.to_calendar_event())
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Sort by similarity descending
|
||||||
|
scored_events.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
|
||||||
|
log::info!("Hybrid query: {} events in time range, ranked by similarity", scored_events.len());
|
||||||
|
if !scored_events.is_empty() {
|
||||||
|
log::info!("Top similarity: {:.4}", scored_events[0].0);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
|
||||||
|
} else {
|
||||||
|
// No semantic ranking, just return time-sorted (limit applied)
|
||||||
|
log::info!("Time-only query: {} events in range", events_in_range.len());
|
||||||
|
Ok(events_in_range.into_iter().take(limit).map(|r| r.to_calendar_event()).collect())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn event_exists(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
event_uid: &str,
|
||||||
|
start_time: i64,
|
||||||
|
) -> Result<bool, DbError> {
|
||||||
|
trace_db_call(context, "query", "event_exists", |_span| {
|
||||||
|
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct CountResult {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||||
|
count: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
let result: CountResult = diesel::sql_query(
|
||||||
|
"SELECT COUNT(*) as count FROM calendar_events WHERE event_uid = ?1 AND start_time = ?2"
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Text, _>(event_uid)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(start_time)
|
||||||
|
.get_result(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(result.count > 0)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
|
||||||
|
trace_db_call(context, "query", "get_event_count", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get CalendarEventDao");
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct CountResult {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||||
|
count: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
let result: CountResult =
|
||||||
|
diesel::sql_query("SELECT COUNT(*) as count FROM calendar_events")
|
||||||
|
.get_result(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(result.count)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
}
|
||||||
528
src/database/location_dao.rs
Normal file
528
src/database/location_dao.rs
Normal file
@@ -0,0 +1,528 @@
|
|||||||
|
use diesel::prelude::*;
|
||||||
|
use diesel::sqlite::SqliteConnection;
|
||||||
|
use serde::Serialize;
|
||||||
|
use std::ops::DerefMut;
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
|
use crate::database::{DbError, DbErrorKind, connect};
|
||||||
|
use crate::otel::trace_db_call;
|
||||||
|
|
||||||
|
/// Represents a location history record
|
||||||
|
#[derive(Serialize, Clone, Debug)]
|
||||||
|
pub struct LocationRecord {
|
||||||
|
pub id: i32,
|
||||||
|
pub timestamp: i64,
|
||||||
|
pub latitude: f64,
|
||||||
|
pub longitude: f64,
|
||||||
|
pub accuracy: Option<i32>,
|
||||||
|
pub activity: Option<String>,
|
||||||
|
pub activity_confidence: Option<i32>,
|
||||||
|
pub place_name: Option<String>,
|
||||||
|
pub place_category: Option<String>,
|
||||||
|
pub created_at: i64,
|
||||||
|
pub source_file: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Data for inserting a new location record
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct InsertLocationRecord {
|
||||||
|
pub timestamp: i64,
|
||||||
|
pub latitude: f64,
|
||||||
|
pub longitude: f64,
|
||||||
|
pub accuracy: Option<i32>,
|
||||||
|
pub activity: Option<String>,
|
||||||
|
pub activity_confidence: Option<i32>,
|
||||||
|
pub place_name: Option<String>,
|
||||||
|
pub place_category: Option<String>,
|
||||||
|
pub embedding: Option<Vec<f32>>, // 768-dim, optional (rarely used)
|
||||||
|
pub created_at: i64,
|
||||||
|
pub source_file: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait LocationHistoryDao: Sync + Send {
|
||||||
|
/// Store single location record
|
||||||
|
fn store_location(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
location: InsertLocationRecord,
|
||||||
|
) -> Result<LocationRecord, DbError>;
|
||||||
|
|
||||||
|
/// Batch insert locations (Google Takeout has millions of points)
|
||||||
|
fn store_locations_batch(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
locations: Vec<InsertLocationRecord>,
|
||||||
|
) -> Result<usize, DbError>;
|
||||||
|
|
||||||
|
/// Find nearest location to timestamp (PRIMARY query)
|
||||||
|
/// "Where was I at photo timestamp ±N minutes?"
|
||||||
|
fn find_nearest_location(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
timestamp: i64,
|
||||||
|
max_time_diff_seconds: i64,
|
||||||
|
) -> Result<Option<LocationRecord>, DbError>;
|
||||||
|
|
||||||
|
/// Find locations in time range
|
||||||
|
fn find_locations_in_range(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
start_ts: i64,
|
||||||
|
end_ts: i64,
|
||||||
|
) -> Result<Vec<LocationRecord>, DbError>;
|
||||||
|
|
||||||
|
/// Find locations near GPS coordinates (for "photos near this place")
|
||||||
|
/// Uses approximate bounding box for performance
|
||||||
|
fn find_locations_near_point(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
latitude: f64,
|
||||||
|
longitude: f64,
|
||||||
|
radius_km: f64,
|
||||||
|
) -> Result<Vec<LocationRecord>, DbError>;
|
||||||
|
|
||||||
|
/// Deduplicate: check if location exists
|
||||||
|
fn location_exists(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
timestamp: i64,
|
||||||
|
latitude: f64,
|
||||||
|
longitude: f64,
|
||||||
|
) -> Result<bool, DbError>;
|
||||||
|
|
||||||
|
/// Get count of location records
|
||||||
|
fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SqliteLocationHistoryDao {
|
||||||
|
connection: Arc<Mutex<SqliteConnection>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SqliteLocationHistoryDao {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SqliteLocationHistoryDao {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
SqliteLocationHistoryDao {
|
||||||
|
connection: Arc::new(Mutex::new(connect())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
|
||||||
|
use zerocopy::IntoBytes;
|
||||||
|
vec.as_bytes().to_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Haversine distance calculation (in kilometers)
|
||||||
|
/// Used for filtering locations by proximity to a point
|
||||||
|
fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
|
||||||
|
const R: f64 = 6371.0; // Earth radius in km
|
||||||
|
|
||||||
|
let d_lat = (lat2 - lat1).to_radians();
|
||||||
|
let d_lon = (lon2 - lon1).to_radians();
|
||||||
|
|
||||||
|
let a = (d_lat / 2.0).sin().powi(2)
|
||||||
|
+ lat1.to_radians().cos() * lat2.to_radians().cos() * (d_lon / 2.0).sin().powi(2);
|
||||||
|
|
||||||
|
let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
|
||||||
|
|
||||||
|
R * c
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate approximate bounding box for spatial queries
|
||||||
|
/// Returns (min_lat, max_lat, min_lon, max_lon)
|
||||||
|
fn bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f64) {
|
||||||
|
const KM_PER_DEGREE_LAT: f64 = 111.0;
|
||||||
|
let km_per_degree_lon = 111.0 * lat.to_radians().cos();
|
||||||
|
|
||||||
|
let delta_lat = radius_km / KM_PER_DEGREE_LAT;
|
||||||
|
let delta_lon = radius_km / km_per_degree_lon;
|
||||||
|
|
||||||
|
(
|
||||||
|
lat - delta_lat, // min_lat
|
||||||
|
lat + delta_lat, // max_lat
|
||||||
|
lon - delta_lon, // min_lon
|
||||||
|
lon + delta_lon, // max_lon
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct LocationRecordRow {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||||
|
id: i32,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||||
|
timestamp: i64,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Float)]
|
||||||
|
latitude: f32,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Float)]
|
||||||
|
longitude: f32,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Integer>)]
|
||||||
|
accuracy: Option<i32>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
activity: Option<String>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Integer>)]
|
||||||
|
activity_confidence: Option<i32>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
place_name: Option<String>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
place_category: Option<String>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||||
|
created_at: i64,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
source_file: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LocationRecordRow {
|
||||||
|
fn to_location_record(&self) -> LocationRecord {
|
||||||
|
LocationRecord {
|
||||||
|
id: self.id,
|
||||||
|
timestamp: self.timestamp,
|
||||||
|
latitude: self.latitude as f64,
|
||||||
|
longitude: self.longitude as f64,
|
||||||
|
accuracy: self.accuracy,
|
||||||
|
activity: self.activity.clone(),
|
||||||
|
activity_confidence: self.activity_confidence,
|
||||||
|
place_name: self.place_name.clone(),
|
||||||
|
place_category: self.place_category.clone(),
|
||||||
|
created_at: self.created_at,
|
||||||
|
source_file: self.source_file.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct LastInsertRowId {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||||
|
id: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LocationHistoryDao for SqliteLocationHistoryDao {
|
||||||
|
fn store_location(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
location: InsertLocationRecord,
|
||||||
|
) -> Result<LocationRecord, DbError> {
|
||||||
|
trace_db_call(context, "insert", "store_location", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get LocationHistoryDao");
|
||||||
|
|
||||||
|
// Validate embedding dimensions if provided (rare for location data)
|
||||||
|
if let Some(ref emb) = location.embedding {
|
||||||
|
if emb.len() != 768 {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"Invalid embedding dimensions: {} (expected 768)",
|
||||||
|
emb.len()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let embedding_bytes = location
|
||||||
|
.embedding
|
||||||
|
.as_ref()
|
||||||
|
.map(|e| Self::serialize_vector(e));
|
||||||
|
|
||||||
|
// INSERT OR IGNORE to handle re-imports (UNIQUE constraint on timestamp+lat+lon)
|
||||||
|
diesel::sql_query(
|
||||||
|
"INSERT OR IGNORE INTO location_history
|
||||||
|
(timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||||
|
place_name, place_category, embedding, created_at, source_file)
|
||||||
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(location.timestamp)
|
||||||
|
.bind::<diesel::sql_types::Float, _>(location.latitude as f32)
|
||||||
|
.bind::<diesel::sql_types::Float, _>(location.longitude as f32)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(&location.accuracy)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.activity)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
|
||||||
|
&location.activity_confidence,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.place_name)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&location.place_category,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(&embedding_bytes)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(location.created_at)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.source_file)
|
||||||
|
.execute(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
|
||||||
|
|
||||||
|
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
|
||||||
|
.get_result::<LastInsertRowId>(conn.deref_mut())
|
||||||
|
.map(|r| r.id)
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(LocationRecord {
|
||||||
|
id: row_id,
|
||||||
|
timestamp: location.timestamp,
|
||||||
|
latitude: location.latitude,
|
||||||
|
longitude: location.longitude,
|
||||||
|
accuracy: location.accuracy,
|
||||||
|
activity: location.activity,
|
||||||
|
activity_confidence: location.activity_confidence,
|
||||||
|
place_name: location.place_name,
|
||||||
|
place_category: location.place_category,
|
||||||
|
created_at: location.created_at,
|
||||||
|
source_file: location.source_file,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn store_locations_batch(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
locations: Vec<InsertLocationRecord>,
|
||||||
|
) -> Result<usize, DbError> {
|
||||||
|
trace_db_call(context, "insert", "store_locations_batch", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get LocationHistoryDao");
|
||||||
|
let mut inserted = 0;
|
||||||
|
|
||||||
|
conn.transaction::<_, anyhow::Error, _>(|conn| {
|
||||||
|
for location in locations {
|
||||||
|
// Validate embedding if provided (rare)
|
||||||
|
if let Some(ref emb) = location.embedding {
|
||||||
|
if emb.len() != 768 {
|
||||||
|
log::warn!(
|
||||||
|
"Skipping location with invalid embedding dimensions: {}",
|
||||||
|
emb.len()
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let embedding_bytes = location
|
||||||
|
.embedding
|
||||||
|
.as_ref()
|
||||||
|
.map(|e| Self::serialize_vector(e));
|
||||||
|
|
||||||
|
let rows_affected = diesel::sql_query(
|
||||||
|
"INSERT OR IGNORE INTO location_history
|
||||||
|
(timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||||
|
place_name, place_category, embedding, created_at, source_file)
|
||||||
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(location.timestamp)
|
||||||
|
.bind::<diesel::sql_types::Float, _>(location.latitude as f32)
|
||||||
|
.bind::<diesel::sql_types::Float, _>(location.longitude as f32)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
|
||||||
|
&location.accuracy,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&location.activity,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
|
||||||
|
&location.activity_confidence,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&location.place_name,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&location.place_category,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(
|
||||||
|
&embedding_bytes,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(location.created_at)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&location.source_file,
|
||||||
|
)
|
||||||
|
.execute(conn)
|
||||||
|
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
|
||||||
|
|
||||||
|
if rows_affected > 0 {
|
||||||
|
inserted += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(inserted)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_nearest_location(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
timestamp: i64,
|
||||||
|
max_time_diff_seconds: i64,
|
||||||
|
) -> Result<Option<LocationRecord>, DbError> {
|
||||||
|
trace_db_call(context, "query", "find_nearest_location", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get LocationHistoryDao");
|
||||||
|
|
||||||
|
let start_ts = timestamp - max_time_diff_seconds;
|
||||||
|
let end_ts = timestamp + max_time_diff_seconds;
|
||||||
|
|
||||||
|
// Find location closest to target timestamp within window
|
||||||
|
let results = diesel::sql_query(
|
||||||
|
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||||
|
place_name, place_category, created_at, source_file
|
||||||
|
FROM location_history
|
||||||
|
WHERE timestamp >= ?1 AND timestamp <= ?2
|
||||||
|
ORDER BY ABS(timestamp - ?3) ASC
|
||||||
|
LIMIT 1"
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(timestamp)
|
||||||
|
.load::<LocationRecordRow>(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(results.into_iter().next().map(|r| r.to_location_record()))
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_locations_in_range(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
start_ts: i64,
|
||||||
|
end_ts: i64,
|
||||||
|
) -> Result<Vec<LocationRecord>, DbError> {
|
||||||
|
trace_db_call(context, "query", "find_locations_in_range", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get LocationHistoryDao");
|
||||||
|
|
||||||
|
diesel::sql_query(
|
||||||
|
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||||
|
place_name, place_category, created_at, source_file
|
||||||
|
FROM location_history
|
||||||
|
WHERE timestamp >= ?1 AND timestamp <= ?2
|
||||||
|
ORDER BY timestamp ASC"
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||||
|
.load::<LocationRecordRow>(conn.deref_mut())
|
||||||
|
.map(|rows| rows.into_iter().map(|r| r.to_location_record()).collect())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_locations_near_point(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
latitude: f64,
|
||||||
|
longitude: f64,
|
||||||
|
radius_km: f64,
|
||||||
|
) -> Result<Vec<LocationRecord>, DbError> {
|
||||||
|
trace_db_call(context, "query", "find_locations_near_point", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get LocationHistoryDao");
|
||||||
|
|
||||||
|
// Use bounding box for initial filter (fast, indexed)
|
||||||
|
let (min_lat, max_lat, min_lon, max_lon) =
|
||||||
|
Self::bounding_box(latitude, longitude, radius_km);
|
||||||
|
|
||||||
|
let results = diesel::sql_query(
|
||||||
|
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||||
|
place_name, place_category, created_at, source_file
|
||||||
|
FROM location_history
|
||||||
|
WHERE latitude >= ?1 AND latitude <= ?2
|
||||||
|
AND longitude >= ?3 AND longitude <= ?4"
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Float, _>(min_lat as f32)
|
||||||
|
.bind::<diesel::sql_types::Float, _>(max_lat as f32)
|
||||||
|
.bind::<diesel::sql_types::Float, _>(min_lon as f32)
|
||||||
|
.bind::<diesel::sql_types::Float, _>(max_lon as f32)
|
||||||
|
.load::<LocationRecordRow>(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
// Refine with Haversine distance (in-memory, post-filter)
|
||||||
|
let filtered: Vec<LocationRecord> = results
|
||||||
|
.into_iter()
|
||||||
|
.map(|r| r.to_location_record())
|
||||||
|
.filter(|loc| {
|
||||||
|
let distance =
|
||||||
|
Self::haversine_distance(latitude, longitude, loc.latitude, loc.longitude);
|
||||||
|
distance <= radius_km
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
log::info!(
|
||||||
|
"Found {} locations within {} km of ({}, {})",
|
||||||
|
filtered.len(),
|
||||||
|
radius_km,
|
||||||
|
latitude,
|
||||||
|
longitude
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(filtered)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn location_exists(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
timestamp: i64,
|
||||||
|
latitude: f64,
|
||||||
|
longitude: f64,
|
||||||
|
) -> Result<bool, DbError> {
|
||||||
|
trace_db_call(context, "query", "location_exists", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get LocationHistoryDao");
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct CountResult {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||||
|
count: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
let result: CountResult = diesel::sql_query(
|
||||||
|
"SELECT COUNT(*) as count FROM location_history
|
||||||
|
WHERE timestamp = ?1 AND latitude = ?2 AND longitude = ?3",
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(timestamp)
|
||||||
|
.bind::<diesel::sql_types::Float, _>(latitude as f32)
|
||||||
|
.bind::<diesel::sql_types::Float, _>(longitude as f32)
|
||||||
|
.get_result(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(result.count > 0)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
|
||||||
|
trace_db_call(context, "query", "get_location_count", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get LocationHistoryDao");
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct CountResult {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||||
|
count: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
let result: CountResult =
|
||||||
|
diesel::sql_query("SELECT COUNT(*) as count FROM location_history")
|
||||||
|
.get_result(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(result.count)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -9,15 +9,25 @@ use crate::database::models::{
|
|||||||
};
|
};
|
||||||
use crate::otel::trace_db_call;
|
use crate::otel::trace_db_call;
|
||||||
|
|
||||||
|
pub mod calendar_dao;
|
||||||
pub mod daily_summary_dao;
|
pub mod daily_summary_dao;
|
||||||
pub mod embeddings_dao;
|
pub mod embeddings_dao;
|
||||||
pub mod insights_dao;
|
pub mod insights_dao;
|
||||||
|
pub mod location_dao;
|
||||||
pub mod models;
|
pub mod models;
|
||||||
pub mod schema;
|
pub mod schema;
|
||||||
|
pub mod search_dao;
|
||||||
|
|
||||||
|
pub use calendar_dao::{
|
||||||
|
CalendarEvent, CalendarEventDao, InsertCalendarEvent, SqliteCalendarEventDao,
|
||||||
|
};
|
||||||
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
|
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
|
||||||
pub use embeddings_dao::{EmbeddingDao, InsertMessageEmbedding};
|
pub use embeddings_dao::{EmbeddingDao, InsertMessageEmbedding};
|
||||||
pub use insights_dao::{InsightDao, SqliteInsightDao};
|
pub use insights_dao::{InsightDao, SqliteInsightDao};
|
||||||
|
pub use location_dao::{
|
||||||
|
InsertLocationRecord, LocationHistoryDao, LocationRecord, SqliteLocationHistoryDao,
|
||||||
|
};
|
||||||
|
pub use search_dao::{InsertSearchRecord, SearchHistoryDao, SearchRecord, SqliteSearchHistoryDao};
|
||||||
|
|
||||||
pub trait UserDao {
|
pub trait UserDao {
|
||||||
fn create_user(&mut self, user: &str, password: &str) -> Option<User>;
|
fn create_user(&mut self, user: &str, password: &str) -> Option<User>;
|
||||||
@@ -485,8 +495,8 @@ impl ExifDao for SqliteExifDao {
|
|||||||
// GPS bounding box
|
// GPS bounding box
|
||||||
if let Some((min_lat, max_lat, min_lon, max_lon)) = gps_bounds {
|
if let Some((min_lat, max_lat, min_lon, max_lon)) = gps_bounds {
|
||||||
query = query
|
query = query
|
||||||
.filter(gps_latitude.between(min_lat, max_lat))
|
.filter(gps_latitude.between(min_lat as f32, max_lat as f32))
|
||||||
.filter(gps_longitude.between(min_lon, max_lon))
|
.filter(gps_longitude.between(min_lon as f32, max_lon as f32))
|
||||||
.filter(gps_latitude.is_not_null())
|
.filter(gps_latitude.is_not_null())
|
||||||
.filter(gps_longitude.is_not_null());
|
.filter(gps_longitude.is_not_null());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,11 +40,11 @@ pub struct InsertImageExif {
|
|||||||
pub width: Option<i32>,
|
pub width: Option<i32>,
|
||||||
pub height: Option<i32>,
|
pub height: Option<i32>,
|
||||||
pub orientation: Option<i32>,
|
pub orientation: Option<i32>,
|
||||||
pub gps_latitude: Option<f64>,
|
pub gps_latitude: Option<f32>,
|
||||||
pub gps_longitude: Option<f64>,
|
pub gps_longitude: Option<f32>,
|
||||||
pub gps_altitude: Option<f64>,
|
pub gps_altitude: Option<f32>,
|
||||||
pub focal_length: Option<f64>,
|
pub focal_length: Option<f32>,
|
||||||
pub aperture: Option<f64>,
|
pub aperture: Option<f32>,
|
||||||
pub shutter_speed: Option<String>,
|
pub shutter_speed: Option<String>,
|
||||||
pub iso: Option<i32>,
|
pub iso: Option<i32>,
|
||||||
pub date_taken: Option<i64>,
|
pub date_taken: Option<i64>,
|
||||||
@@ -62,11 +62,11 @@ pub struct ImageExif {
|
|||||||
pub width: Option<i32>,
|
pub width: Option<i32>,
|
||||||
pub height: Option<i32>,
|
pub height: Option<i32>,
|
||||||
pub orientation: Option<i32>,
|
pub orientation: Option<i32>,
|
||||||
pub gps_latitude: Option<f64>,
|
pub gps_latitude: Option<f32>,
|
||||||
pub gps_longitude: Option<f64>,
|
pub gps_longitude: Option<f32>,
|
||||||
pub gps_altitude: Option<f64>,
|
pub gps_altitude: Option<f32>,
|
||||||
pub focal_length: Option<f64>,
|
pub focal_length: Option<f32>,
|
||||||
pub aperture: Option<f64>,
|
pub aperture: Option<f32>,
|
||||||
pub shutter_speed: Option<String>,
|
pub shutter_speed: Option<String>,
|
||||||
pub iso: Option<i32>,
|
pub iso: Option<i32>,
|
||||||
pub date_taken: Option<i64>,
|
pub date_taken: Option<i64>,
|
||||||
|
|||||||
@@ -1,4 +1,37 @@
|
|||||||
table! {
|
// @generated automatically by Diesel CLI.
|
||||||
|
|
||||||
|
diesel::table! {
|
||||||
|
calendar_events (id) {
|
||||||
|
id -> Integer,
|
||||||
|
event_uid -> Nullable<Text>,
|
||||||
|
summary -> Text,
|
||||||
|
description -> Nullable<Text>,
|
||||||
|
location -> Nullable<Text>,
|
||||||
|
start_time -> BigInt,
|
||||||
|
end_time -> BigInt,
|
||||||
|
all_day -> Bool,
|
||||||
|
organizer -> Nullable<Text>,
|
||||||
|
attendees -> Nullable<Text>,
|
||||||
|
embedding -> Nullable<Binary>,
|
||||||
|
created_at -> BigInt,
|
||||||
|
source_file -> Nullable<Text>,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diesel::table! {
|
||||||
|
daily_conversation_summaries (id) {
|
||||||
|
id -> Integer,
|
||||||
|
date -> Text,
|
||||||
|
contact -> Text,
|
||||||
|
summary -> Text,
|
||||||
|
message_count -> Integer,
|
||||||
|
embedding -> Binary,
|
||||||
|
created_at -> BigInt,
|
||||||
|
model_version -> Text,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diesel::table! {
|
||||||
favorites (id) {
|
favorites (id) {
|
||||||
id -> Integer,
|
id -> Integer,
|
||||||
userid -> Integer,
|
userid -> Integer,
|
||||||
@@ -6,7 +39,7 @@ table! {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
table! {
|
diesel::table! {
|
||||||
image_exif (id) {
|
image_exif (id) {
|
||||||
id -> Integer,
|
id -> Integer,
|
||||||
file_path -> Text,
|
file_path -> Text,
|
||||||
@@ -16,11 +49,11 @@ table! {
|
|||||||
width -> Nullable<Integer>,
|
width -> Nullable<Integer>,
|
||||||
height -> Nullable<Integer>,
|
height -> Nullable<Integer>,
|
||||||
orientation -> Nullable<Integer>,
|
orientation -> Nullable<Integer>,
|
||||||
gps_latitude -> Nullable<Double>,
|
gps_latitude -> Nullable<Float>,
|
||||||
gps_longitude -> Nullable<Double>,
|
gps_longitude -> Nullable<Float>,
|
||||||
gps_altitude -> Nullable<Double>,
|
gps_altitude -> Nullable<Float>,
|
||||||
focal_length -> Nullable<Double>,
|
focal_length -> Nullable<Float>,
|
||||||
aperture -> Nullable<Double>,
|
aperture -> Nullable<Float>,
|
||||||
shutter_speed -> Nullable<Text>,
|
shutter_speed -> Nullable<Text>,
|
||||||
iso -> Nullable<Integer>,
|
iso -> Nullable<Integer>,
|
||||||
date_taken -> Nullable<BigInt>,
|
date_taken -> Nullable<BigInt>,
|
||||||
@@ -29,24 +62,49 @@ table! {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
table! {
|
diesel::table! {
|
||||||
tagged_photo (id) {
|
knowledge_embeddings (id) {
|
||||||
id -> Integer,
|
id -> Integer,
|
||||||
photo_name -> Text,
|
keyword -> Text,
|
||||||
tag_id -> Integer,
|
description -> Text,
|
||||||
created_time -> BigInt,
|
category -> Nullable<Text>,
|
||||||
|
embedding -> Binary,
|
||||||
|
created_at -> BigInt,
|
||||||
|
model_version -> Text,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
table! {
|
diesel::table! {
|
||||||
tags (id) {
|
location_history (id) {
|
||||||
id -> Integer,
|
id -> Integer,
|
||||||
name -> Text,
|
timestamp -> BigInt,
|
||||||
created_time -> BigInt,
|
latitude -> Float,
|
||||||
|
longitude -> Float,
|
||||||
|
accuracy -> Nullable<Integer>,
|
||||||
|
activity -> Nullable<Text>,
|
||||||
|
activity_confidence -> Nullable<Integer>,
|
||||||
|
place_name -> Nullable<Text>,
|
||||||
|
place_category -> Nullable<Text>,
|
||||||
|
embedding -> Nullable<Binary>,
|
||||||
|
created_at -> BigInt,
|
||||||
|
source_file -> Nullable<Text>,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
table! {
|
diesel::table! {
|
||||||
|
message_embeddings (id) {
|
||||||
|
id -> Integer,
|
||||||
|
contact -> Text,
|
||||||
|
body -> Text,
|
||||||
|
timestamp -> BigInt,
|
||||||
|
is_sent -> Bool,
|
||||||
|
embedding -> Binary,
|
||||||
|
created_at -> BigInt,
|
||||||
|
model_version -> Text,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diesel::table! {
|
||||||
photo_insights (id) {
|
photo_insights (id) {
|
||||||
id -> Integer,
|
id -> Integer,
|
||||||
file_path -> Text,
|
file_path -> Text,
|
||||||
@@ -57,7 +115,36 @@ table! {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
table! {
|
diesel::table! {
|
||||||
|
search_history (id) {
|
||||||
|
id -> Integer,
|
||||||
|
timestamp -> BigInt,
|
||||||
|
query -> Text,
|
||||||
|
search_engine -> Nullable<Text>,
|
||||||
|
embedding -> Binary,
|
||||||
|
created_at -> BigInt,
|
||||||
|
source_file -> Nullable<Text>,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diesel::table! {
|
||||||
|
tagged_photo (id) {
|
||||||
|
id -> Integer,
|
||||||
|
photo_name -> Text,
|
||||||
|
tag_id -> Integer,
|
||||||
|
created_time -> BigInt,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diesel::table! {
|
||||||
|
tags (id) {
|
||||||
|
id -> Integer,
|
||||||
|
name -> Text,
|
||||||
|
created_time -> BigInt,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diesel::table! {
|
||||||
users (id) {
|
users (id) {
|
||||||
id -> Integer,
|
id -> Integer,
|
||||||
username -> Text,
|
username -> Text,
|
||||||
@@ -65,12 +152,18 @@ table! {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
joinable!(tagged_photo -> tags (tag_id));
|
diesel::joinable!(tagged_photo -> tags (tag_id));
|
||||||
|
|
||||||
allow_tables_to_appear_in_same_query!(
|
diesel::allow_tables_to_appear_in_same_query!(
|
||||||
|
calendar_events,
|
||||||
|
daily_conversation_summaries,
|
||||||
favorites,
|
favorites,
|
||||||
image_exif,
|
image_exif,
|
||||||
|
knowledge_embeddings,
|
||||||
|
location_history,
|
||||||
|
message_embeddings,
|
||||||
photo_insights,
|
photo_insights,
|
||||||
|
search_history,
|
||||||
tagged_photo,
|
tagged_photo,
|
||||||
tags,
|
tags,
|
||||||
users,
|
users,
|
||||||
|
|||||||
516
src/database/search_dao.rs
Normal file
516
src/database/search_dao.rs
Normal file
@@ -0,0 +1,516 @@
|
|||||||
|
use diesel::prelude::*;
|
||||||
|
use diesel::sqlite::SqliteConnection;
|
||||||
|
use serde::Serialize;
|
||||||
|
use std::ops::DerefMut;
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
|
use crate::database::{DbError, DbErrorKind, connect};
|
||||||
|
use crate::otel::trace_db_call;
|
||||||
|
|
||||||
|
/// Represents a search history record
|
||||||
|
#[derive(Serialize, Clone, Debug)]
|
||||||
|
pub struct SearchRecord {
|
||||||
|
pub id: i32,
|
||||||
|
pub timestamp: i64,
|
||||||
|
pub query: String,
|
||||||
|
pub search_engine: Option<String>,
|
||||||
|
pub created_at: i64,
|
||||||
|
pub source_file: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Data for inserting a new search record
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct InsertSearchRecord {
|
||||||
|
pub timestamp: i64,
|
||||||
|
pub query: String,
|
||||||
|
pub search_engine: Option<String>,
|
||||||
|
pub embedding: Vec<f32>, // 768-dim, REQUIRED
|
||||||
|
pub created_at: i64,
|
||||||
|
pub source_file: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait SearchHistoryDao: Sync + Send {
|
||||||
|
/// Store search with embedding
|
||||||
|
fn store_search(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
search: InsertSearchRecord,
|
||||||
|
) -> Result<SearchRecord, DbError>;
|
||||||
|
|
||||||
|
/// Batch insert searches
|
||||||
|
fn store_searches_batch(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
searches: Vec<InsertSearchRecord>,
|
||||||
|
) -> Result<usize, DbError>;
|
||||||
|
|
||||||
|
/// Find searches in time range (for temporal context)
|
||||||
|
fn find_searches_in_range(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
start_ts: i64,
|
||||||
|
end_ts: i64,
|
||||||
|
) -> Result<Vec<SearchRecord>, DbError>;
|
||||||
|
|
||||||
|
/// Find semantically similar searches (PRIMARY - embeddings shine here)
|
||||||
|
fn find_similar_searches(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
query_embedding: &[f32],
|
||||||
|
limit: usize,
|
||||||
|
) -> Result<Vec<SearchRecord>, DbError>;
|
||||||
|
|
||||||
|
/// Hybrid: Time window + semantic ranking
|
||||||
|
fn find_relevant_searches_hybrid(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
center_timestamp: i64,
|
||||||
|
time_window_days: i64,
|
||||||
|
query_embedding: Option<&[f32]>,
|
||||||
|
limit: usize,
|
||||||
|
) -> Result<Vec<SearchRecord>, DbError>;
|
||||||
|
|
||||||
|
/// Deduplication check
|
||||||
|
fn search_exists(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
timestamp: i64,
|
||||||
|
query: &str,
|
||||||
|
) -> Result<bool, DbError>;
|
||||||
|
|
||||||
|
/// Get count of search records
|
||||||
|
fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SqliteSearchHistoryDao {
|
||||||
|
connection: Arc<Mutex<SqliteConnection>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SqliteSearchHistoryDao {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SqliteSearchHistoryDao {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
SqliteSearchHistoryDao {
|
||||||
|
connection: Arc::new(Mutex::new(connect())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
|
||||||
|
use zerocopy::IntoBytes;
|
||||||
|
vec.as_bytes().to_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
|
||||||
|
if bytes.len() % 4 != 0 {
|
||||||
|
return Err(DbError::new(DbErrorKind::QueryError));
|
||||||
|
}
|
||||||
|
|
||||||
|
let count = bytes.len() / 4;
|
||||||
|
let mut vec = Vec::with_capacity(count);
|
||||||
|
|
||||||
|
for chunk in bytes.chunks_exact(4) {
|
||||||
|
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
|
||||||
|
vec.push(float);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(vec)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||||
|
if a.len() != b.len() {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||||
|
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||||
|
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||||
|
|
||||||
|
if magnitude_a == 0.0 || magnitude_b == 0.0 {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
dot_product / (magnitude_a * magnitude_b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct SearchRecordWithVectorRow {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||||
|
id: i32,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||||
|
timestamp: i64,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||||
|
query: String,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
search_engine: Option<String>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Binary)]
|
||||||
|
embedding: Vec<u8>,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||||
|
created_at: i64,
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||||
|
source_file: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearchRecordWithVectorRow {
|
||||||
|
fn to_search_record(&self) -> SearchRecord {
|
||||||
|
SearchRecord {
|
||||||
|
id: self.id,
|
||||||
|
timestamp: self.timestamp,
|
||||||
|
query: self.query.clone(),
|
||||||
|
search_engine: self.search_engine.clone(),
|
||||||
|
created_at: self.created_at,
|
||||||
|
source_file: self.source_file.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct LastInsertRowId {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||||
|
id: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearchHistoryDao for SqliteSearchHistoryDao {
|
||||||
|
fn store_search(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
search: InsertSearchRecord,
|
||||||
|
) -> Result<SearchRecord, DbError> {
|
||||||
|
trace_db_call(context, "insert", "store_search", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get SearchHistoryDao");
|
||||||
|
|
||||||
|
// Validate embedding dimensions (REQUIRED for searches)
|
||||||
|
if search.embedding.len() != 768 {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"Invalid embedding dimensions: {} (expected 768)",
|
||||||
|
search.embedding.len()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let embedding_bytes = Self::serialize_vector(&search.embedding);
|
||||||
|
|
||||||
|
// INSERT OR IGNORE to handle re-imports (UNIQUE constraint on timestamp+query)
|
||||||
|
diesel::sql_query(
|
||||||
|
"INSERT OR IGNORE INTO search_history
|
||||||
|
(timestamp, query, search_engine, embedding, created_at, source_file)
|
||||||
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(search.timestamp)
|
||||||
|
.bind::<diesel::sql_types::Text, _>(&search.query)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&search.search_engine)
|
||||||
|
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(search.created_at)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&search.source_file)
|
||||||
|
.execute(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
|
||||||
|
|
||||||
|
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
|
||||||
|
.get_result::<LastInsertRowId>(conn.deref_mut())
|
||||||
|
.map(|r| r.id)
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(SearchRecord {
|
||||||
|
id: row_id,
|
||||||
|
timestamp: search.timestamp,
|
||||||
|
query: search.query,
|
||||||
|
search_engine: search.search_engine,
|
||||||
|
created_at: search.created_at,
|
||||||
|
source_file: search.source_file,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn store_searches_batch(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
searches: Vec<InsertSearchRecord>,
|
||||||
|
) -> Result<usize, DbError> {
|
||||||
|
trace_db_call(context, "insert", "store_searches_batch", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get SearchHistoryDao");
|
||||||
|
let mut inserted = 0;
|
||||||
|
|
||||||
|
conn.transaction::<_, anyhow::Error, _>(|conn| {
|
||||||
|
for search in searches {
|
||||||
|
// Validate embedding (REQUIRED)
|
||||||
|
if search.embedding.len() != 768 {
|
||||||
|
log::warn!(
|
||||||
|
"Skipping search with invalid embedding dimensions: {}",
|
||||||
|
search.embedding.len()
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let embedding_bytes = Self::serialize_vector(&search.embedding);
|
||||||
|
|
||||||
|
let rows_affected = diesel::sql_query(
|
||||||
|
"INSERT OR IGNORE INTO search_history
|
||||||
|
(timestamp, query, search_engine, embedding, created_at, source_file)
|
||||||
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(search.timestamp)
|
||||||
|
.bind::<diesel::sql_types::Text, _>(&search.query)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&search.search_engine,
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(search.created_at)
|
||||||
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||||
|
&search.source_file,
|
||||||
|
)
|
||||||
|
.execute(conn)
|
||||||
|
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
|
||||||
|
|
||||||
|
if rows_affected > 0 {
|
||||||
|
inserted += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(inserted)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_searches_in_range(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
start_ts: i64,
|
||||||
|
end_ts: i64,
|
||||||
|
) -> Result<Vec<SearchRecord>, DbError> {
|
||||||
|
trace_db_call(context, "query", "find_searches_in_range", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get SearchHistoryDao");
|
||||||
|
|
||||||
|
diesel::sql_query(
|
||||||
|
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
|
||||||
|
FROM search_history
|
||||||
|
WHERE timestamp >= ?1 AND timestamp <= ?2
|
||||||
|
ORDER BY timestamp DESC",
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||||
|
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
|
||||||
|
.map(|rows| rows.into_iter().map(|r| r.to_search_record()).collect())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_similar_searches(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
query_embedding: &[f32],
|
||||||
|
limit: usize,
|
||||||
|
) -> Result<Vec<SearchRecord>, DbError> {
|
||||||
|
trace_db_call(context, "query", "find_similar_searches", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get SearchHistoryDao");
|
||||||
|
|
||||||
|
if query_embedding.len() != 768 {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"Invalid query embedding dimensions: {} (expected 768)",
|
||||||
|
query_embedding.len()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load all searches with embeddings
|
||||||
|
let results = diesel::sql_query(
|
||||||
|
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
|
||||||
|
FROM search_history",
|
||||||
|
)
|
||||||
|
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
// Compute similarities
|
||||||
|
let mut scored_searches: Vec<(f32, SearchRecord)> = results
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|row| {
|
||||||
|
if let Ok(emb) = Self::deserialize_vector(&row.embedding) {
|
||||||
|
let similarity = Self::cosine_similarity(query_embedding, &emb);
|
||||||
|
Some((similarity, row.to_search_record()))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Sort by similarity descending
|
||||||
|
scored_searches
|
||||||
|
.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
|
||||||
|
log::info!("Found {} similar searches", scored_searches.len());
|
||||||
|
if !scored_searches.is_empty() {
|
||||||
|
log::info!(
|
||||||
|
"Top similarity: {:.4} for query: '{}'",
|
||||||
|
scored_searches[0].0,
|
||||||
|
scored_searches[0].1.query
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(scored_searches
|
||||||
|
.into_iter()
|
||||||
|
.take(limit)
|
||||||
|
.map(|(_, search)| search)
|
||||||
|
.collect())
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_relevant_searches_hybrid(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
center_timestamp: i64,
|
||||||
|
time_window_days: i64,
|
||||||
|
query_embedding: Option<&[f32]>,
|
||||||
|
limit: usize,
|
||||||
|
) -> Result<Vec<SearchRecord>, DbError> {
|
||||||
|
trace_db_call(context, "query", "find_relevant_searches_hybrid", |_span| {
|
||||||
|
let window_seconds = time_window_days * 86400;
|
||||||
|
let start_ts = center_timestamp - window_seconds;
|
||||||
|
let end_ts = center_timestamp + window_seconds;
|
||||||
|
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get SearchHistoryDao");
|
||||||
|
|
||||||
|
// Step 1: Time-based filter (fast, indexed)
|
||||||
|
let searches_in_range = diesel::sql_query(
|
||||||
|
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
|
||||||
|
FROM search_history
|
||||||
|
WHERE timestamp >= ?1 AND timestamp <= ?2",
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||||
|
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
// Step 2: If query embedding provided, rank by semantic similarity
|
||||||
|
if let Some(query_emb) = query_embedding {
|
||||||
|
if query_emb.len() != 768 {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"Invalid query embedding dimensions: {} (expected 768)",
|
||||||
|
query_emb.len()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut scored_searches: Vec<(f32, SearchRecord)> = searches_in_range
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|row| {
|
||||||
|
if let Ok(emb) = Self::deserialize_vector(&row.embedding) {
|
||||||
|
let similarity = Self::cosine_similarity(query_emb, &emb);
|
||||||
|
Some((similarity, row.to_search_record()))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Sort by similarity descending
|
||||||
|
scored_searches
|
||||||
|
.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
|
||||||
|
log::info!(
|
||||||
|
"Hybrid query: {} searches in time range, ranked by similarity",
|
||||||
|
scored_searches.len()
|
||||||
|
);
|
||||||
|
if !scored_searches.is_empty() {
|
||||||
|
log::info!(
|
||||||
|
"Top similarity: {:.4} for '{}'",
|
||||||
|
scored_searches[0].0,
|
||||||
|
scored_searches[0].1.query
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(scored_searches
|
||||||
|
.into_iter()
|
||||||
|
.take(limit)
|
||||||
|
.map(|(_, search)| search)
|
||||||
|
.collect())
|
||||||
|
} else {
|
||||||
|
// No semantic ranking, just return time-sorted (most recent first)
|
||||||
|
log::info!(
|
||||||
|
"Time-only query: {} searches in range",
|
||||||
|
searches_in_range.len()
|
||||||
|
);
|
||||||
|
Ok(searches_in_range
|
||||||
|
.into_iter()
|
||||||
|
.take(limit)
|
||||||
|
.map(|r| r.to_search_record())
|
||||||
|
.collect())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn search_exists(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
timestamp: i64,
|
||||||
|
query: &str,
|
||||||
|
) -> Result<bool, DbError> {
|
||||||
|
trace_db_call(context, "query", "search_exists", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get SearchHistoryDao");
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct CountResult {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||||
|
count: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
let result: CountResult = diesel::sql_query(
|
||||||
|
"SELECT COUNT(*) as count FROM search_history WHERE timestamp = ?1 AND query = ?2",
|
||||||
|
)
|
||||||
|
.bind::<diesel::sql_types::BigInt, _>(timestamp)
|
||||||
|
.bind::<diesel::sql_types::Text, _>(query)
|
||||||
|
.get_result(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(result.count > 0)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
|
||||||
|
trace_db_call(context, "query", "get_search_count", |_span| {
|
||||||
|
let mut conn = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to get SearchHistoryDao");
|
||||||
|
|
||||||
|
#[derive(QueryableByName)]
|
||||||
|
struct CountResult {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||||
|
count: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
let result: CountResult =
|
||||||
|
diesel::sql_query("SELECT COUNT(*) as count FROM search_history")
|
||||||
|
.get_result(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||||
|
|
||||||
|
Ok(result.count)
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -217,7 +217,12 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
|
|||||||
if let (Some(photo_lat), Some(photo_lon)) =
|
if let (Some(photo_lat), Some(photo_lon)) =
|
||||||
(exif.gps_latitude, exif.gps_longitude)
|
(exif.gps_latitude, exif.gps_longitude)
|
||||||
{
|
{
|
||||||
let distance = haversine_distance(lat, lon, photo_lat, photo_lon);
|
let distance = haversine_distance(
|
||||||
|
lat as f64,
|
||||||
|
lon as f64,
|
||||||
|
photo_lat as f64,
|
||||||
|
photo_lon as f64,
|
||||||
|
);
|
||||||
distance <= radius_km
|
distance <= radius_km
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ pub mod files;
|
|||||||
pub mod geo;
|
pub mod geo;
|
||||||
pub mod memories;
|
pub mod memories;
|
||||||
pub mod otel;
|
pub mod otel;
|
||||||
|
pub mod parsers;
|
||||||
pub mod service;
|
pub mod service;
|
||||||
pub mod state;
|
pub mod state;
|
||||||
pub mod tags;
|
pub mod tags;
|
||||||
|
|||||||
20
src/main.rs
20
src/main.rs
@@ -303,11 +303,11 @@ async fn upload_image(
|
|||||||
width: exif_data.width,
|
width: exif_data.width,
|
||||||
height: exif_data.height,
|
height: exif_data.height,
|
||||||
orientation: exif_data.orientation,
|
orientation: exif_data.orientation,
|
||||||
gps_latitude: exif_data.gps_latitude,
|
gps_latitude: exif_data.gps_latitude.map(|v| v as f32),
|
||||||
gps_longitude: exif_data.gps_longitude,
|
gps_longitude: exif_data.gps_longitude.map(|v| v as f32),
|
||||||
gps_altitude: exif_data.gps_altitude,
|
gps_altitude: exif_data.gps_altitude.map(|v| v as f32),
|
||||||
focal_length: exif_data.focal_length,
|
focal_length: exif_data.focal_length.map(|v| v as f32),
|
||||||
aperture: exif_data.aperture,
|
aperture: exif_data.aperture.map(|v| v as f32),
|
||||||
shutter_speed: exif_data.shutter_speed,
|
shutter_speed: exif_data.shutter_speed,
|
||||||
iso: exif_data.iso,
|
iso: exif_data.iso,
|
||||||
date_taken: exif_data.date_taken,
|
date_taken: exif_data.date_taken,
|
||||||
@@ -1061,11 +1061,11 @@ fn process_new_files(
|
|||||||
width: exif_data.width,
|
width: exif_data.width,
|
||||||
height: exif_data.height,
|
height: exif_data.height,
|
||||||
orientation: exif_data.orientation,
|
orientation: exif_data.orientation,
|
||||||
gps_latitude: exif_data.gps_latitude,
|
gps_latitude: exif_data.gps_latitude.map(|v| v as f32),
|
||||||
gps_longitude: exif_data.gps_longitude,
|
gps_longitude: exif_data.gps_longitude.map(|v| v as f32),
|
||||||
gps_altitude: exif_data.gps_altitude,
|
gps_altitude: exif_data.gps_altitude.map(|v| v as f32),
|
||||||
focal_length: exif_data.focal_length,
|
focal_length: exif_data.focal_length.map(|v| v as f32),
|
||||||
aperture: exif_data.aperture,
|
aperture: exif_data.aperture.map(|v| v as f32),
|
||||||
shutter_speed: exif_data.shutter_speed,
|
shutter_speed: exif_data.shutter_speed,
|
||||||
iso: exif_data.iso,
|
iso: exif_data.iso,
|
||||||
date_taken: exif_data.date_taken,
|
date_taken: exif_data.date_taken,
|
||||||
|
|||||||
183
src/parsers/ical_parser.rs
Normal file
183
src/parsers/ical_parser.rs
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
use anyhow::{Context, Result};
|
||||||
|
use chrono::NaiveDateTime;
|
||||||
|
use ical::parser::ical::component::IcalCalendar;
|
||||||
|
use ical::property::Property;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::BufReader;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ParsedCalendarEvent {
|
||||||
|
pub event_uid: Option<String>,
|
||||||
|
pub summary: String,
|
||||||
|
pub description: Option<String>,
|
||||||
|
pub location: Option<String>,
|
||||||
|
pub start_time: i64,
|
||||||
|
pub end_time: i64,
|
||||||
|
pub all_day: bool,
|
||||||
|
pub organizer: Option<String>,
|
||||||
|
pub attendees: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_ics_file(path: &str) -> Result<Vec<ParsedCalendarEvent>> {
|
||||||
|
let file = File::open(path).context("Failed to open .ics file")?;
|
||||||
|
let reader = BufReader::new(file);
|
||||||
|
|
||||||
|
let parser = ical::IcalParser::new(reader);
|
||||||
|
let mut events = Vec::new();
|
||||||
|
|
||||||
|
for calendar_result in parser {
|
||||||
|
let calendar: IcalCalendar = calendar_result.context("Failed to parse calendar")?;
|
||||||
|
|
||||||
|
for event in calendar.events {
|
||||||
|
// Extract properties
|
||||||
|
let mut event_uid = None;
|
||||||
|
let mut summary = None;
|
||||||
|
let mut description = None;
|
||||||
|
let mut location = None;
|
||||||
|
let mut start_time = None;
|
||||||
|
let mut end_time = None;
|
||||||
|
let mut all_day = false;
|
||||||
|
let mut organizer = None;
|
||||||
|
let mut attendees = Vec::new();
|
||||||
|
|
||||||
|
for property in event.properties {
|
||||||
|
match property.name.as_str() {
|
||||||
|
"UID" => {
|
||||||
|
event_uid = property.value;
|
||||||
|
}
|
||||||
|
"SUMMARY" => {
|
||||||
|
summary = property.value;
|
||||||
|
}
|
||||||
|
"DESCRIPTION" => {
|
||||||
|
description = property.value;
|
||||||
|
}
|
||||||
|
"LOCATION" => {
|
||||||
|
location = property.value;
|
||||||
|
}
|
||||||
|
"DTSTART" => {
|
||||||
|
if let Some(ref value) = property.value {
|
||||||
|
start_time = parse_ical_datetime(value, &property)?;
|
||||||
|
// Check if it's an all-day event (no time component)
|
||||||
|
all_day = value.len() == 8; // YYYYMMDD format
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"DTEND" => {
|
||||||
|
if let Some(ref value) = property.value {
|
||||||
|
end_time = parse_ical_datetime(value, &property)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"ORGANIZER" => {
|
||||||
|
organizer = extract_email_from_mailto(property.value.as_deref());
|
||||||
|
}
|
||||||
|
"ATTENDEE" => {
|
||||||
|
if let Some(email) = extract_email_from_mailto(property.value.as_deref()) {
|
||||||
|
attendees.push(email);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only include events with required fields
|
||||||
|
if let (Some(summary_text), Some(start), Some(end)) = (summary, start_time, end_time) {
|
||||||
|
events.push(ParsedCalendarEvent {
|
||||||
|
event_uid,
|
||||||
|
summary: summary_text,
|
||||||
|
description,
|
||||||
|
location,
|
||||||
|
start_time: start,
|
||||||
|
end_time: end,
|
||||||
|
all_day,
|
||||||
|
organizer,
|
||||||
|
attendees,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(events)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_ical_datetime(value: &str, property: &Property) -> Result<Option<i64>> {
|
||||||
|
// Check for TZID parameter
|
||||||
|
let _tzid = property.params.as_ref().and_then(|params| {
|
||||||
|
params
|
||||||
|
.iter()
|
||||||
|
.find(|(key, _)| key == "TZID")
|
||||||
|
.and_then(|(_, values)| values.first())
|
||||||
|
.cloned()
|
||||||
|
});
|
||||||
|
|
||||||
|
// iCal datetime formats:
|
||||||
|
// - 20240815T140000Z (UTC)
|
||||||
|
// - 20240815T140000 (local/TZID)
|
||||||
|
// - 20240815 (all-day)
|
||||||
|
|
||||||
|
let cleaned = value.replace("Z", "").replace("T", "");
|
||||||
|
|
||||||
|
// All-day event (YYYYMMDD)
|
||||||
|
if cleaned.len() == 8 {
|
||||||
|
let dt = NaiveDateTime::parse_from_str(&format!("{}000000", cleaned), "%Y%m%d%H%M%S")
|
||||||
|
.context("Failed to parse all-day date")?;
|
||||||
|
return Ok(Some(dt.and_utc().timestamp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// DateTime event (YYYYMMDDTHHMMSS)
|
||||||
|
if cleaned.len() >= 14 {
|
||||||
|
let dt = NaiveDateTime::parse_from_str(&cleaned[..14], "%Y%m%d%H%M%S")
|
||||||
|
.context("Failed to parse datetime")?;
|
||||||
|
|
||||||
|
// If original had 'Z', it's UTC
|
||||||
|
let timestamp = if value.ends_with('Z') {
|
||||||
|
dt.and_utc().timestamp()
|
||||||
|
} else {
|
||||||
|
// Treat as UTC for simplicity (proper TZID handling is complex)
|
||||||
|
dt.and_utc().timestamp()
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(Some(timestamp));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_email_from_mailto(value: Option<&str>) -> Option<String> {
|
||||||
|
value.and_then(|v| {
|
||||||
|
// ORGANIZER and ATTENDEE often have format: mailto:user@example.com
|
||||||
|
if v.starts_with("mailto:") {
|
||||||
|
Some(v.trim_start_matches("mailto:").to_string())
|
||||||
|
} else {
|
||||||
|
Some(v.to_string())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_ical_datetime() {
|
||||||
|
let prop = Property {
|
||||||
|
name: "DTSTART".to_string(),
|
||||||
|
params: None,
|
||||||
|
value: Some("20240815T140000Z".to_string()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let timestamp = parse_ical_datetime("20240815T140000Z", &prop).unwrap();
|
||||||
|
assert!(timestamp.is_some());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_email() {
|
||||||
|
assert_eq!(
|
||||||
|
extract_email_from_mailto(Some("mailto:user@example.com")),
|
||||||
|
Some("user@example.com".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
extract_email_from_mailto(Some("user@example.com")),
|
||||||
|
Some("user@example.com".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
133
src/parsers/location_json_parser.rs
Normal file
133
src/parsers/location_json_parser.rs
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
use anyhow::{Context, Result};
|
||||||
|
use chrono::DateTime;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::BufReader;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ParsedLocationRecord {
|
||||||
|
pub timestamp: i64,
|
||||||
|
pub latitude: f64,
|
||||||
|
pub longitude: f64,
|
||||||
|
pub accuracy: Option<i32>,
|
||||||
|
pub activity: Option<String>,
|
||||||
|
pub activity_confidence: Option<i32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Google Takeout Location History JSON structures
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct LocationHistory {
|
||||||
|
locations: Vec<LocationPoint>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
struct LocationPoint {
|
||||||
|
timestamp_ms: Option<String>, // Older format
|
||||||
|
timestamp: Option<String>, // Newer format (ISO8601)
|
||||||
|
latitude_e7: Option<i64>,
|
||||||
|
longitude_e7: Option<i64>,
|
||||||
|
accuracy: Option<i32>,
|
||||||
|
activity: Option<Vec<ActivityRecord>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct ActivityRecord {
|
||||||
|
activity: Vec<ActivityType>,
|
||||||
|
timestamp_ms: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct ActivityType {
|
||||||
|
#[serde(rename = "type")]
|
||||||
|
activity_type: String,
|
||||||
|
confidence: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_location_json(path: &str) -> Result<Vec<ParsedLocationRecord>> {
|
||||||
|
let file = File::open(path).context("Failed to open location JSON file")?;
|
||||||
|
let reader = BufReader::new(file);
|
||||||
|
|
||||||
|
let history: LocationHistory =
|
||||||
|
serde_json::from_reader(reader).context("Failed to parse location history JSON")?;
|
||||||
|
|
||||||
|
let mut records = Vec::new();
|
||||||
|
|
||||||
|
for point in history.locations {
|
||||||
|
// Parse timestamp (try both formats)
|
||||||
|
let timestamp = if let Some(ts_ms) = point.timestamp_ms {
|
||||||
|
// Milliseconds since epoch
|
||||||
|
ts_ms
|
||||||
|
.parse::<i64>()
|
||||||
|
.context("Failed to parse timestamp_ms")?
|
||||||
|
/ 1000
|
||||||
|
} else if let Some(ts_iso) = point.timestamp {
|
||||||
|
// ISO8601 format
|
||||||
|
DateTime::parse_from_rfc3339(&ts_iso)
|
||||||
|
.context("Failed to parse ISO8601 timestamp")?
|
||||||
|
.timestamp()
|
||||||
|
} else {
|
||||||
|
continue; // Skip points without timestamp
|
||||||
|
};
|
||||||
|
|
||||||
|
// Convert E7 format to decimal degrees
|
||||||
|
let latitude = point.latitude_e7.map(|e7| e7 as f64 / 10_000_000.0);
|
||||||
|
let longitude = point.longitude_e7.map(|e7| e7 as f64 / 10_000_000.0);
|
||||||
|
|
||||||
|
// Extract highest-confidence activity
|
||||||
|
let (activity, activity_confidence) = point
|
||||||
|
.activity
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|activities| activities.first())
|
||||||
|
.and_then(|record| {
|
||||||
|
record
|
||||||
|
.activity
|
||||||
|
.iter()
|
||||||
|
.max_by_key(|a| a.confidence)
|
||||||
|
.map(|a| (a.activity_type.clone(), a.confidence))
|
||||||
|
})
|
||||||
|
.unzip();
|
||||||
|
|
||||||
|
if let (Some(lat), Some(lon)) = (latitude, longitude) {
|
||||||
|
records.push(ParsedLocationRecord {
|
||||||
|
timestamp,
|
||||||
|
latitude: lat,
|
||||||
|
longitude: lon,
|
||||||
|
accuracy: point.accuracy,
|
||||||
|
activity,
|
||||||
|
activity_confidence,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(records)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_e7_conversion() {
|
||||||
|
let lat_e7 = 374228300_i64;
|
||||||
|
let lat = lat_e7 as f64 / 10_000_000.0;
|
||||||
|
assert!((lat - 37.42283).abs() < 0.00001);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_sample_json() {
|
||||||
|
let json = r#"{
|
||||||
|
"locations": [
|
||||||
|
{
|
||||||
|
"latitudeE7": 374228300,
|
||||||
|
"longitudeE7": -1221086100,
|
||||||
|
"accuracy": 20,
|
||||||
|
"timestampMs": "1692115200000"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}"#;
|
||||||
|
|
||||||
|
let history: LocationHistory = serde_json::from_str(json).unwrap();
|
||||||
|
assert_eq!(history.locations.len(), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
7
src/parsers/mod.rs
Normal file
7
src/parsers/mod.rs
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
pub mod ical_parser;
|
||||||
|
pub mod location_json_parser;
|
||||||
|
pub mod search_html_parser;
|
||||||
|
|
||||||
|
pub use ical_parser::{ParsedCalendarEvent, parse_ics_file};
|
||||||
|
pub use location_json_parser::{ParsedLocationRecord, parse_location_json};
|
||||||
|
pub use search_html_parser::{ParsedSearchRecord, parse_search_html};
|
||||||
210
src/parsers/search_html_parser.rs
Normal file
210
src/parsers/search_html_parser.rs
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
use anyhow::{Context, Result};
|
||||||
|
use chrono::{DateTime, NaiveDateTime, Utc};
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
use std::fs;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ParsedSearchRecord {
|
||||||
|
pub timestamp: i64,
|
||||||
|
pub query: String,
|
||||||
|
pub search_engine: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_search_html(path: &str) -> Result<Vec<ParsedSearchRecord>> {
|
||||||
|
let html_content =
|
||||||
|
fs::read_to_string(path).context("Failed to read search history HTML file")?;
|
||||||
|
|
||||||
|
let document = Html::parse_document(&html_content);
|
||||||
|
let mut records = Vec::new();
|
||||||
|
|
||||||
|
// Try multiple selector strategies as Google Takeout format varies
|
||||||
|
|
||||||
|
// Strategy 1: Look for specific cell structure
|
||||||
|
if let Ok(cell_selector) = Selector::parse("div.content-cell") {
|
||||||
|
for cell in document.select(&cell_selector) {
|
||||||
|
if let Some(record) = parse_content_cell(&cell) {
|
||||||
|
records.push(record);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strategy 2: Look for outer-cell structure (older format)
|
||||||
|
if records.is_empty() {
|
||||||
|
if let Ok(outer_selector) = Selector::parse("div.outer-cell") {
|
||||||
|
for cell in document.select(&outer_selector) {
|
||||||
|
if let Some(record) = parse_outer_cell(&cell) {
|
||||||
|
records.push(record);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strategy 3: Generic approach - look for links and timestamps
|
||||||
|
if records.is_empty() {
|
||||||
|
if let Ok(link_selector) = Selector::parse("a") {
|
||||||
|
for link in document.select(&link_selector) {
|
||||||
|
if let Some(href) = link.value().attr("href") {
|
||||||
|
// Check if it's a search URL
|
||||||
|
if href.contains("google.com/search?q=") || href.contains("search?q=") {
|
||||||
|
if let Some(query) = extract_query_from_url(href) {
|
||||||
|
// Try to find nearby timestamp
|
||||||
|
let timestamp = find_nearby_timestamp(&link);
|
||||||
|
|
||||||
|
records.push(ParsedSearchRecord {
|
||||||
|
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
|
||||||
|
query,
|
||||||
|
search_engine: Some("Google".to_string()),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(records)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_content_cell(cell: &scraper::ElementRef) -> Option<ParsedSearchRecord> {
|
||||||
|
let link_selector = Selector::parse("a").ok()?;
|
||||||
|
|
||||||
|
let link = cell.select(&link_selector).next()?;
|
||||||
|
let href = link.value().attr("href")?;
|
||||||
|
let query = extract_query_from_url(href)?;
|
||||||
|
|
||||||
|
// Extract timestamp from cell text
|
||||||
|
let cell_text = cell.text().collect::<Vec<_>>().join(" ");
|
||||||
|
let timestamp = parse_timestamp_from_text(&cell_text);
|
||||||
|
|
||||||
|
Some(ParsedSearchRecord {
|
||||||
|
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
|
||||||
|
query,
|
||||||
|
search_engine: Some("Google".to_string()),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_outer_cell(cell: &scraper::ElementRef) -> Option<ParsedSearchRecord> {
|
||||||
|
let link_selector = Selector::parse("a").ok()?;
|
||||||
|
|
||||||
|
let link = cell.select(&link_selector).next()?;
|
||||||
|
let href = link.value().attr("href")?;
|
||||||
|
let query = extract_query_from_url(href)?;
|
||||||
|
|
||||||
|
let cell_text = cell.text().collect::<Vec<_>>().join(" ");
|
||||||
|
let timestamp = parse_timestamp_from_text(&cell_text);
|
||||||
|
|
||||||
|
Some(ParsedSearchRecord {
|
||||||
|
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
|
||||||
|
query,
|
||||||
|
search_engine: Some("Google".to_string()),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_query_from_url(url: &str) -> Option<String> {
|
||||||
|
// Extract query parameter from URL
|
||||||
|
// Example: https://www.google.com/search?q=rust+programming
|
||||||
|
|
||||||
|
if let Some(query_start) = url.find("?q=").or_else(|| url.find("&q=")) {
|
||||||
|
let query_part = &url[query_start + 3..];
|
||||||
|
let query_end = query_part.find('&').unwrap_or(query_part.len());
|
||||||
|
let encoded_query = &query_part[..query_end];
|
||||||
|
|
||||||
|
// URL decode
|
||||||
|
urlencoding::decode(encoded_query)
|
||||||
|
.ok()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_nearby_timestamp(element: &scraper::ElementRef) -> Option<i64> {
|
||||||
|
// Look for timestamp in parent or sibling elements
|
||||||
|
if let Some(parent) = element.parent() {
|
||||||
|
if parent.value().as_element().is_some() {
|
||||||
|
let parent_ref = scraper::ElementRef::wrap(parent)?;
|
||||||
|
let text = parent_ref.text().collect::<Vec<_>>().join(" ");
|
||||||
|
return parse_timestamp_from_text(&text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_timestamp_from_text(text: &str) -> Option<i64> {
|
||||||
|
// Google Takeout timestamps often look like:
|
||||||
|
// "Aug 15, 2024, 2:34:56 PM PDT"
|
||||||
|
// "2024-08-15T14:34:56Z"
|
||||||
|
|
||||||
|
// Try ISO8601 first
|
||||||
|
if let Some(iso_match) = text
|
||||||
|
.split_whitespace()
|
||||||
|
.find(|s| s.contains('T') && s.contains('-'))
|
||||||
|
{
|
||||||
|
if let Ok(dt) = DateTime::parse_from_rfc3339(iso_match) {
|
||||||
|
return Some(dt.timestamp());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try common date patterns
|
||||||
|
let patterns = [
|
||||||
|
"%b %d, %Y, %I:%M:%S %p", // Aug 15, 2024, 2:34:56 PM
|
||||||
|
"%Y-%m-%d %H:%M:%S", // 2024-08-15 14:34:56
|
||||||
|
"%m/%d/%Y %H:%M:%S", // 08/15/2024 14:34:56
|
||||||
|
];
|
||||||
|
|
||||||
|
for pattern in patterns {
|
||||||
|
// Extract potential date string
|
||||||
|
if let Some(date_part) = extract_date_substring(text) {
|
||||||
|
if let Ok(dt) = NaiveDateTime::parse_from_str(&date_part, pattern) {
|
||||||
|
return Some(dt.and_utc().timestamp());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_date_substring(text: &str) -> Option<String> {
|
||||||
|
// Try to extract date-like substring from text
|
||||||
|
// This is a heuristic approach for varied formats
|
||||||
|
|
||||||
|
// Look for patterns like "Aug 15, 2024, 2:34:56 PM"
|
||||||
|
if let Some(pos) = text.find(|c: char| c.is_numeric()) {
|
||||||
|
let rest = &text[pos..];
|
||||||
|
if let Some(end) =
|
||||||
|
rest.find(|c: char| !c.is_alphanumeric() && c != ':' && c != ',' && c != ' ')
|
||||||
|
{
|
||||||
|
Some(rest[..end].trim().to_string())
|
||||||
|
} else {
|
||||||
|
Some(rest.trim().to_string())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_query_from_url() {
|
||||||
|
let url = "https://www.google.com/search?q=rust+programming&oq=rust";
|
||||||
|
let query = extract_query_from_url(url);
|
||||||
|
assert_eq!(query, Some("rust programming".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_query_with_encoding() {
|
||||||
|
let url = "https://www.google.com/search?q=hello%20world";
|
||||||
|
let query = extract_query_from_url(url);
|
||||||
|
assert_eq!(query, Some("hello world".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_iso_timestamp() {
|
||||||
|
let text = "Some text 2024-08-15T14:34:56Z more text";
|
||||||
|
let timestamp = parse_timestamp_from_text(text);
|
||||||
|
assert!(timestamp.is_some());
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user