Compare commits
4 Commits
master
...
8a59a70dc4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8a59a70dc4 | ||
|
|
fcc520af1a | ||
|
|
19dea67e3f | ||
|
|
55725e2b3c |
11
.gitignore
vendored
11
.gitignore
vendored
@@ -2,14 +2,3 @@
|
||||
database/target
|
||||
*.db
|
||||
.env
|
||||
/tmp
|
||||
|
||||
# Default ignored files
|
||||
.idea/shelf/
|
||||
.idea/workspace.xml
|
||||
# Datasource local storage ignored files
|
||||
.idea/dataSources*
|
||||
.idea/dataSources.local.xml
|
||||
# Editor-based HTTP Client requests
|
||||
.idea/httpRequests/
|
||||
/.claude/settings.local.json
|
||||
|
||||
12
.idea/image-api.iml
generated
12
.idea/image-api.iml
generated
@@ -1,12 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="CPP_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/.idea/dataSources" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
6
.idea/misc.xml
generated
6
.idea/misc.xml
generated
@@ -1,6 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="JavaScriptSettings">
|
||||
<option name="languageLevel" value="ES6" />
|
||||
</component>
|
||||
</project>
|
||||
8
.idea/modules.xml
generated
8
.idea/modules.xml
generated
@@ -1,8 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/image-api.iml" filepath="$PROJECT_DIR$/.idea/image-api.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
7
.idea/sqldialects.xml
generated
7
.idea/sqldialects.xml
generated
@@ -1,7 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="SqlDialectMappings">
|
||||
<file url="file://$PROJECT_DIR$/migrations/2021-09-02-000740_create_tags/up.sql" dialect="GenericSQL" />
|
||||
<file url="PROJECT" dialect="SQLite" />
|
||||
</component>
|
||||
</project>
|
||||
6
.idea/vcs.xml
generated
6
.idea/vcs.xml
generated
@@ -1,6 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
289
CLAUDE.md
289
CLAUDE.md
@@ -1,289 +0,0 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Project Overview
|
||||
|
||||
An Actix-web REST API for serving images and videos from a filesystem with automatic thumbnail generation, EXIF extraction, tag organization, and a memories feature for browsing photos by date. Uses SQLite/Diesel ORM for data persistence and ffmpeg for video processing.
|
||||
|
||||
## Development Commands
|
||||
|
||||
### Building & Running
|
||||
```bash
|
||||
# Build for development
|
||||
cargo build
|
||||
|
||||
# Build for release (uses thin LTO optimization)
|
||||
cargo build --release
|
||||
|
||||
# Run the server (requires .env file with DATABASE_URL, BASE_PATH, THUMBNAILS, VIDEO_PATH, BIND_URL, SECRET_KEY)
|
||||
cargo run
|
||||
|
||||
# Run with specific log level
|
||||
RUST_LOG=debug cargo run
|
||||
```
|
||||
|
||||
### Testing
|
||||
```bash
|
||||
# Run all tests (requires BASE_PATH in .env)
|
||||
cargo test
|
||||
|
||||
# Run specific test
|
||||
cargo test test_name
|
||||
|
||||
# Run tests with output
|
||||
cargo test -- --nocapture
|
||||
```
|
||||
|
||||
### Database Migrations
|
||||
```bash
|
||||
# Install diesel CLI (one-time setup)
|
||||
cargo install diesel_cli --no-default-features --features sqlite
|
||||
|
||||
# Create new migration
|
||||
diesel migration generate migration_name
|
||||
|
||||
# Run migrations (also runs automatically on app startup)
|
||||
diesel migration run
|
||||
|
||||
# Revert last migration
|
||||
diesel migration revert
|
||||
|
||||
# Regenerate schema.rs after manual migration changes
|
||||
diesel print-schema > src/database/schema.rs
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
```bash
|
||||
# Format code
|
||||
cargo fmt
|
||||
|
||||
# Run clippy linter
|
||||
cargo clippy
|
||||
|
||||
# Fix automatically fixable issues
|
||||
cargo fix
|
||||
```
|
||||
|
||||
### Utility Binaries
|
||||
```bash
|
||||
# Two-phase cleanup: resolve missing files and validate file types
|
||||
cargo run --bin cleanup_files -- --base-path /path/to/media --database-url ./database.db
|
||||
|
||||
# Batch extract EXIF for existing files
|
||||
cargo run --bin migrate_exif
|
||||
```
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Core Components
|
||||
|
||||
**Layered Architecture:**
|
||||
- **HTTP Layer** (`main.rs`): Route handlers for images, videos, metadata, tags, favorites, memories
|
||||
- **Auth Layer** (`auth.rs`): JWT token validation, Claims extraction via FromRequest trait
|
||||
- **Service Layer** (`files.rs`, `exif.rs`, `memories.rs`): Business logic for file operations and EXIF extraction
|
||||
- **DAO Layer** (`database/mod.rs`): Trait-based data access (ExifDao, UserDao, FavoriteDao, TagDao)
|
||||
- **Database Layer**: Diesel ORM with SQLite, schema in `database/schema.rs`
|
||||
|
||||
**Async Actor System (Actix):**
|
||||
- `StreamActor`: Manages ffmpeg video processing lifecycle
|
||||
- `VideoPlaylistManager`: Scans directories and queues videos
|
||||
- `PlaylistGenerator`: Creates HLS playlists for video streaming
|
||||
|
||||
### Database Schema & Patterns
|
||||
|
||||
**Tables:**
|
||||
- `users`: Authentication (id, username, password_hash)
|
||||
- `favorites`: User-specific favorites (userid, path)
|
||||
- `tags`: Custom labels with timestamps
|
||||
- `tagged_photo`: Many-to-many photo-tag relationships
|
||||
- `image_exif`: Rich metadata (file_path + 16 EXIF fields: camera, GPS, dates, exposure settings)
|
||||
|
||||
**DAO Pattern:**
|
||||
All database access goes through trait-based DAOs (e.g., `ExifDao`, `SqliteExifDao`). Connection pooling uses `Arc<Mutex<SqliteConnection>>`. All DB operations are traced with OpenTelemetry in release builds.
|
||||
|
||||
**Key DAO Methods:**
|
||||
- `store_exif()`, `get_exif()`, `get_exif_batch()`: EXIF CRUD operations
|
||||
- `query_by_exif()`: Complex filtering by camera, GPS bounds, date ranges
|
||||
- Batch operations minimize DB hits during file watching
|
||||
|
||||
### File Processing Pipeline
|
||||
|
||||
**Thumbnail Generation:**
|
||||
1. Startup scan: Rayon parallel walk of BASE_PATH
|
||||
2. Creates 200x200 thumbnails in THUMBNAILS directory (mirrors source structure)
|
||||
3. Videos: extracts frame at 3-second mark via ffmpeg
|
||||
4. Images: uses `image` crate for JPEG/PNG processing
|
||||
|
||||
**File Watching:**
|
||||
Runs in background thread with two-tier strategy:
|
||||
- **Quick scan** (default 60s): Recently modified files only
|
||||
- **Full scan** (default 3600s): Comprehensive directory check
|
||||
- Batch queries EXIF DB to detect new files
|
||||
- Configurable via `WATCH_QUICK_INTERVAL_SECONDS` and `WATCH_FULL_INTERVAL_SECONDS`
|
||||
|
||||
**EXIF Extraction:**
|
||||
- Uses `kamadak-exif` crate
|
||||
- Supports: JPEG, TIFF, RAW (NEF, CR2, CR3), HEIF/HEIC, PNG, WebP
|
||||
- Extracts: camera make/model, lens, dimensions, GPS coordinates, focal length, aperture, shutter speed, ISO, date taken
|
||||
- Triggered on upload and during file watching
|
||||
|
||||
**File Upload Behavior:**
|
||||
If file exists, appends timestamp to filename (`photo_1735124234.jpg`) to preserve history without overwrites.
|
||||
|
||||
### Authentication Flow
|
||||
|
||||
**Login:**
|
||||
1. POST `/login` with username/password
|
||||
2. Verify with `bcrypt::verify()` against password_hash
|
||||
3. Generate JWT with claims: `{ sub: user_id, exp: 5_days_from_now }`
|
||||
4. Sign with HS256 using `SECRET_KEY` environment variable
|
||||
|
||||
**Authorization:**
|
||||
All protected endpoints extract `Claims` via `FromRequest` trait implementation. Token passed as `Authorization: Bearer <token>` header.
|
||||
|
||||
### API Structure
|
||||
|
||||
**Key Endpoint Patterns:**
|
||||
|
||||
```rust
|
||||
// Image serving & upload
|
||||
GET /image?path=...&size=...&format=...
|
||||
POST /image (multipart file upload)
|
||||
|
||||
// Metadata & EXIF
|
||||
GET /image/metadata?path=...
|
||||
|
||||
// Advanced search with filters
|
||||
GET /photos?path=...&recursive=true&sort=DateTakenDesc&camera_make=Canon&gps_lat=...&gps_lon=...&gps_radius_km=10&date_from=...&date_to=...&tag_ids=1,2,3&media_type=Photo
|
||||
|
||||
// Video streaming (HLS)
|
||||
POST /video/generate (creates .m3u8 playlist + .ts segments)
|
||||
GET /video/stream?path=... (serves playlist)
|
||||
|
||||
// Tags
|
||||
GET /image/tags/all
|
||||
POST /image/tags (add tag to file)
|
||||
DELETE /image/tags (remove tag from file)
|
||||
POST /image/tags/batch (bulk tag updates)
|
||||
|
||||
// Memories (week-based grouping)
|
||||
GET /memories?path=...&recursive=true
|
||||
```
|
||||
|
||||
**Request Types:**
|
||||
- `FilesRequest`: Supports complex filtering (tags, EXIF fields, GPS radius, date ranges)
|
||||
- `SortType`: Shuffle, NameAsc/Desc, TagCountAsc/Desc, DateTakenAsc/Desc
|
||||
|
||||
### Important Patterns
|
||||
|
||||
**Service Builder Pattern:**
|
||||
Routes are registered via composable `ServiceBuilder` trait in `service.rs`. Allows modular feature addition.
|
||||
|
||||
**Path Validation:**
|
||||
Always use `is_valid_full_path(&base_path, &requested_path, check_exists)` to prevent directory traversal attacks.
|
||||
|
||||
**File Type Detection:**
|
||||
Centralized in `file_types.rs` with constants `IMAGE_EXTENSIONS` and `VIDEO_EXTENSIONS`. Provides both `Path` and `DirEntry` variants for performance.
|
||||
|
||||
**OpenTelemetry Tracing:**
|
||||
All database operations and HTTP handlers wrapped in spans. In release builds, exports to OTLP endpoint via `OTLP_OTLS_ENDPOINT`. Debug builds use basic logger.
|
||||
|
||||
**Memory Exclusion:**
|
||||
`PathExcluder` in `memories.rs` filters out directories from memories API via `EXCLUDED_DIRS` environment variable (comma-separated paths or substring patterns).
|
||||
|
||||
### Startup Sequence
|
||||
|
||||
1. Load `.env` file
|
||||
2. Run embedded Diesel migrations
|
||||
3. Spawn file watcher thread
|
||||
4. Create initial thumbnails (parallel scan)
|
||||
5. Generate video GIF thumbnails
|
||||
6. Initialize AppState with Actix actors
|
||||
7. Set up Prometheus metrics (`imageserver_image_total`, `imageserver_video_total`)
|
||||
8. Scan directory for videos and queue HLS processing
|
||||
9. Start HTTP server on `BIND_URL` + localhost:8088
|
||||
|
||||
## Testing Patterns
|
||||
|
||||
Tests require `BASE_PATH` environment variable. Many integration tests create temporary directories and files.
|
||||
|
||||
When testing database code:
|
||||
- Use in-memory SQLite: `DATABASE_URL=":memory:"`
|
||||
- Run migrations in test setup
|
||||
- Clean up with `DROP TABLE` or use `#[serial]` from `serial_test` crate if parallel tests conflict
|
||||
|
||||
## Common Gotchas
|
||||
|
||||
**EXIF Date Parsing:**
|
||||
Multiple formats supported (EXIF DateTime, ISO8601, Unix timestamp). Fallback chain attempts multiple parsers.
|
||||
|
||||
**Video Processing:**
|
||||
ffmpeg processes run asynchronously via actors. Use `StreamActor` to track completion. HLS segments written to `VIDEO_PATH`.
|
||||
|
||||
**File Extensions:**
|
||||
Extension detection is case-insensitive. Use `file_types.rs` helpers rather than manual string matching.
|
||||
|
||||
**Migration Workflow:**
|
||||
After creating a migration, manually edit the SQL, then regenerate `schema.rs` with `diesel print-schema`. Migrations auto-run on startup via `embedded_migrations!()` macro.
|
||||
|
||||
**Path Absolutization:**
|
||||
Use `path-absolutize` crate's `.absolutize()` method when converting user-provided paths to ensure they're within `BASE_PATH`.
|
||||
|
||||
## Required Environment Variables
|
||||
|
||||
```bash
|
||||
DATABASE_URL=./database.db # SQLite database path
|
||||
BASE_PATH=/path/to/media # Root media directory
|
||||
THUMBNAILS=/path/to/thumbnails # Thumbnail storage
|
||||
VIDEO_PATH=/path/to/video/hls # HLS playlist output
|
||||
GIFS_DIRECTORY=/path/to/gifs # Video GIF thumbnails
|
||||
BIND_URL=0.0.0.0:8080 # Server binding
|
||||
CORS_ALLOWED_ORIGINS=http://localhost:3000
|
||||
SECRET_KEY=your-secret-key-here # JWT signing secret
|
||||
RUST_LOG=info # Log level
|
||||
EXCLUDED_DIRS=/private,/archive # Comma-separated paths to exclude from memories
|
||||
```
|
||||
|
||||
Optional:
|
||||
```bash
|
||||
WATCH_QUICK_INTERVAL_SECONDS=60 # Quick scan interval
|
||||
WATCH_FULL_INTERVAL_SECONDS=3600 # Full scan interval
|
||||
OTLP_OTLS_ENDPOINT=http://... # OpenTelemetry collector (release builds)
|
||||
|
||||
# AI Insights Configuration
|
||||
OLLAMA_PRIMARY_URL=http://desktop:11434 # Primary Ollama server (e.g., desktop)
|
||||
OLLAMA_FALLBACK_URL=http://server:11434 # Fallback Ollama server (optional, always-on)
|
||||
OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b # Model for primary server (default: nemotron-3-nano:30b)
|
||||
OLLAMA_FALLBACK_MODEL=llama3.2:3b # Model for fallback server (optional, uses primary if not set)
|
||||
SMS_API_URL=http://localhost:8000 # SMS message API endpoint (default: localhost:8000)
|
||||
SMS_API_TOKEN=your-api-token # SMS API authentication token (optional)
|
||||
```
|
||||
|
||||
**AI Insights Fallback Behavior:**
|
||||
- Primary server is tried first with its configured model (5-second connection timeout)
|
||||
- On connection failure, automatically falls back to secondary server with its model (if configured)
|
||||
- If `OLLAMA_FALLBACK_MODEL` not set, uses same model as primary server on fallback
|
||||
- Total request timeout is 120 seconds to accommodate slow LLM inference
|
||||
- Logs indicate which server and model was used (info level) and failover attempts (warn level)
|
||||
- Backwards compatible: `OLLAMA_URL` and `OLLAMA_MODEL` still supported as fallbacks
|
||||
|
||||
**Model Discovery:**
|
||||
The `OllamaClient` provides methods to query available models:
|
||||
- `OllamaClient::list_models(url)` - Returns list of all models on a server
|
||||
- `OllamaClient::is_model_available(url, model_name)` - Checks if a specific model exists
|
||||
|
||||
This allows runtime verification of model availability before generating insights.
|
||||
|
||||
## Dependencies of Note
|
||||
|
||||
- **actix-web**: HTTP framework
|
||||
- **diesel**: ORM for SQLite
|
||||
- **jsonwebtoken**: JWT implementation
|
||||
- **kamadak-exif**: EXIF parsing
|
||||
- **image**: Thumbnail generation
|
||||
- **walkdir**: Directory traversal
|
||||
- **rayon**: Parallel processing
|
||||
- **opentelemetry**: Distributed tracing
|
||||
- **bcrypt**: Password hashing
|
||||
- **infer**: Magic number file type detection
|
||||
4612
Cargo.lock
generated
4612
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
65
Cargo.toml
65
Cargo.toml
@@ -1,57 +1,30 @@
|
||||
[package]
|
||||
name = "image-api"
|
||||
version = "0.5.2"
|
||||
version = "0.1.0"
|
||||
authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
|
||||
edition = "2024"
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[profile.release]
|
||||
lto = "thin"
|
||||
|
||||
[dependencies]
|
||||
actix = "0.13.1"
|
||||
actix-web = "4"
|
||||
actix-rt = "2.6"
|
||||
tokio = { version = "1.42.0", features = ["default", "process", "sync", "macros", "rt-multi-thread"] }
|
||||
actix-files = "0.6"
|
||||
actix-cors = "0.7"
|
||||
actix-multipart = "0.7.2"
|
||||
actix-governor = "0.5"
|
||||
actix-web = "3"
|
||||
actix-rt = "1"
|
||||
actix-files = "0.4"
|
||||
actix-multipart = "0.3.0"
|
||||
actix-cors="0.5"
|
||||
futures = "0.3.5"
|
||||
jsonwebtoken = "9.3.0"
|
||||
jsonwebtoken = "7.2.0"
|
||||
serde = "1"
|
||||
serde_json = "1"
|
||||
diesel = { version = "2.2.10", features = ["sqlite"] }
|
||||
libsqlite3-sys = { version = "0.35", features = ["bundled"] }
|
||||
diesel_migrations = "2.2.0"
|
||||
chrono = "0.4"
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
diesel = { version = "1.4.5", features = ["sqlite"] }
|
||||
hmac = "0.7.1"
|
||||
sha2 = "0.8.2"
|
||||
chrono = "0.4.11"
|
||||
dotenv = "0.15"
|
||||
bcrypt = "0.17.1"
|
||||
image = { version = "0.25.5", default-features = false, features = ["jpeg", "png", "rayon"] }
|
||||
infer = "0.16"
|
||||
walkdir = "2.4.0"
|
||||
rayon = "1.5"
|
||||
path-absolutize = "3.1"
|
||||
log = "0.4"
|
||||
env_logger = "0.11.5"
|
||||
actix-web-prom = "0.9.0"
|
||||
prometheus = "0.13"
|
||||
lazy_static = "1.5"
|
||||
anyhow = "1.0"
|
||||
rand = "0.8.5"
|
||||
opentelemetry = { version = "0.31.0", features = ["default", "metrics", "tracing"] }
|
||||
opentelemetry_sdk = { version = "0.31.0", features = ["default", "rt-tokio-current-thread", "metrics"] }
|
||||
opentelemetry-otlp = { version = "0.31.0", features = ["default", "metrics", "tracing", "grpc-tonic"] }
|
||||
opentelemetry-stdout = "0.31.0"
|
||||
opentelemetry-appender-log = "0.31.0"
|
||||
tempfile = "3.20.0"
|
||||
regex = "1.11.1"
|
||||
exif = { package = "kamadak-exif", version = "0.6.1" }
|
||||
reqwest = { version = "0.12", features = ["json"] }
|
||||
urlencoding = "2.1"
|
||||
zerocopy = "0.8"
|
||||
ical = "0.11"
|
||||
scraper = "0.20"
|
||||
base64 = "0.22"
|
||||
bcrypt = "0.8.1"
|
||||
image = "0.23.7"
|
||||
walkdir = "2"
|
||||
rayon = "1.3"
|
||||
notify = "4.0"
|
||||
tokio = "0.2"
|
||||
path-absolutize = "3.0.6"
|
||||
|
||||
19
Jenkinsfile
vendored
19
Jenkinsfile
vendored
@@ -1,30 +1,25 @@
|
||||
pipeline {
|
||||
agent {
|
||||
docker {
|
||||
image 'rust:1.59'
|
||||
args '-v "$PWD":/usr/src/image-api'
|
||||
image 'rust:1.48'
|
||||
args "-v $PWD:/usr/src/image-api"
|
||||
}
|
||||
}
|
||||
|
||||
stages {
|
||||
stage('build') {
|
||||
steps {
|
||||
sh 'cargo build --release'
|
||||
archiveArtifacts artifacts: '**/target/release/image-api', fingerprint: true
|
||||
echo $PWD
|
||||
sh 'cargo build --release'
|
||||
archiveArtifacts artifacts: '**/target/release/**', fingerprint: true
|
||||
}
|
||||
}
|
||||
|
||||
stage('test') {
|
||||
steps {
|
||||
sh 'echo "BASE_PATH=$PWD" > .env'
|
||||
sh 'cargo test'
|
||||
}
|
||||
|
||||
post {
|
||||
always {
|
||||
sh 'rm -f .env'
|
||||
}
|
||||
sh 'cargo test'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
52
README.md
52
README.md
@@ -2,65 +2,13 @@
|
||||
This is an Actix-web server for serving images and videos from a filesystem.
|
||||
Upon first run it will generate thumbnails for all images and videos at `BASE_PATH`.
|
||||
|
||||
## Features
|
||||
- Automatic thumbnail generation for images and videos
|
||||
- EXIF data extraction and storage for photos
|
||||
- File watching with NFS support (polling-based)
|
||||
- Video streaming with HLS
|
||||
- Tag-based organization
|
||||
- Memories API for browsing photos by date
|
||||
- **AI-Powered Photo Insights** - Generate contextual insights from photos using LLMs
|
||||
- **RAG-based Context Retrieval** - Semantic search over daily conversation summaries
|
||||
- **Automatic Daily Summaries** - LLM-generated summaries of daily conversations with embeddings
|
||||
|
||||
## Environment
|
||||
There are a handful of required environment variables to have the API run.
|
||||
They should be defined where the binary is located or above it in an `.env` file.
|
||||
You must have `ffmpeg` installed for streaming video and generating video thumbnails.
|
||||
|
||||
- `DATABASE_URL` is a path or url to a database (currently only SQLite is tested)
|
||||
- `BASE_PATH` is the root from which you want to serve images and videos
|
||||
- `THUMBNAILS` is a path where generated thumbnails should be stored
|
||||
- `VIDEO_PATH` is a path where HLS playlists and video parts should be stored
|
||||
- `BIND_URL` is the url and port to bind to (typically your own IP address)
|
||||
- `SECRET_KEY` is the *hopefully* random string to sign Tokens with
|
||||
- `RUST_LOG` is one of `off, error, warn, info, debug, trace`, from least to most noisy [error is default]
|
||||
- `EXCLUDED_DIRS` is a comma separated list of directories to exclude from the Memories API
|
||||
- `WATCH_QUICK_INTERVAL_SECONDS` (optional) is the interval in seconds for quick file scans [default: 60]
|
||||
- `WATCH_FULL_INTERVAL_SECONDS` (optional) is the interval in seconds for full file scans [default: 3600]
|
||||
|
||||
### AI Insights Configuration (Optional)
|
||||
|
||||
The following environment variables configure AI-powered photo insights and daily conversation summaries:
|
||||
|
||||
#### Ollama Configuration
|
||||
- `OLLAMA_PRIMARY_URL` - Primary Ollama server URL [default: `http://localhost:11434`]
|
||||
- Example: `http://desktop:11434` (your main/powerful server)
|
||||
- `OLLAMA_FALLBACK_URL` - Fallback Ollama server URL (optional)
|
||||
- Example: `http://server:11434` (always-on backup server)
|
||||
- `OLLAMA_PRIMARY_MODEL` - Model to use on primary server [default: `nemotron-3-nano:30b`]
|
||||
- Example: `nemotron-3-nano:30b`, `llama3.2:3b`, etc.
|
||||
- `OLLAMA_FALLBACK_MODEL` - Model to use on fallback server (optional)
|
||||
- If not set, uses `OLLAMA_PRIMARY_MODEL` on fallback server
|
||||
|
||||
**Legacy Variables** (still supported):
|
||||
- `OLLAMA_URL` - Used if `OLLAMA_PRIMARY_URL` not set
|
||||
- `OLLAMA_MODEL` - Used if `OLLAMA_PRIMARY_MODEL` not set
|
||||
|
||||
#### SMS API Configuration
|
||||
- `SMS_API_URL` - URL to SMS message API [default: `http://localhost:8000`]
|
||||
- Used to fetch conversation data for context in insights
|
||||
- `SMS_API_TOKEN` - Authentication token for SMS API (optional)
|
||||
|
||||
#### Fallback Behavior
|
||||
- Primary server is tried first with 5-second connection timeout
|
||||
- On failure, automatically falls back to secondary server (if configured)
|
||||
- Total request timeout is 120 seconds to accommodate LLM inference
|
||||
- Logs indicate which server/model was used and any failover attempts
|
||||
|
||||
#### Daily Summary Generation
|
||||
Daily conversation summaries are generated automatically on server startup. Configure in `src/main.rs`:
|
||||
- Date range for summary generation
|
||||
- Contacts to process
|
||||
- Model version used for embeddings: `nomic-embed-text:v1.5`
|
||||
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
DROP TABLE tags;
|
||||
DROP TABLE tagged_photo;
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
CREATE TABLE tags (
|
||||
id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
created_time BIGINT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE tagged_photo (
|
||||
id INTEGER PRIMARY KEY NOT NULL,
|
||||
photo_name TEXT NOT NULL,
|
||||
tag_id INTEGER NOT NULL,
|
||||
created_time BIGINT NOT NULL,
|
||||
CONSTRAINT tagid FOREIGN KEY (tag_id) REFERENCES tags (id) ON DELETE CASCADE ON UPDATE CASCADE
|
||||
);
|
||||
@@ -1,2 +0,0 @@
|
||||
DROP INDEX IF EXISTS idx_image_exif_file_path;
|
||||
DROP TABLE IF EXISTS image_exif;
|
||||
@@ -1,32 +0,0 @@
|
||||
CREATE TABLE image_exif (
|
||||
id INTEGER PRIMARY KEY NOT NULL,
|
||||
file_path TEXT NOT NULL UNIQUE,
|
||||
|
||||
-- Camera Information
|
||||
camera_make TEXT,
|
||||
camera_model TEXT,
|
||||
lens_model TEXT,
|
||||
|
||||
-- Image Properties
|
||||
width INTEGER,
|
||||
height INTEGER,
|
||||
orientation INTEGER,
|
||||
|
||||
-- GPS Coordinates
|
||||
gps_latitude REAL,
|
||||
gps_longitude REAL,
|
||||
gps_altitude REAL,
|
||||
|
||||
-- Capture Settings
|
||||
focal_length REAL,
|
||||
aperture REAL,
|
||||
shutter_speed TEXT,
|
||||
iso INTEGER,
|
||||
date_taken BIGINT,
|
||||
|
||||
-- Housekeeping
|
||||
created_time BIGINT NOT NULL,
|
||||
last_modified BIGINT NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX idx_image_exif_file_path ON image_exif(file_path);
|
||||
@@ -1,9 +0,0 @@
|
||||
-- Rollback indexes
|
||||
|
||||
DROP INDEX IF EXISTS idx_favorites_userid;
|
||||
DROP INDEX IF EXISTS idx_favorites_path;
|
||||
DROP INDEX IF EXISTS idx_tags_name;
|
||||
DROP INDEX IF EXISTS idx_tagged_photo_photo_name;
|
||||
DROP INDEX IF EXISTS idx_tagged_photo_tag_id;
|
||||
DROP INDEX IF EXISTS idx_image_exif_camera;
|
||||
DROP INDEX IF EXISTS idx_image_exif_gps;
|
||||
@@ -1,17 +0,0 @@
|
||||
-- Add indexes for improved query performance
|
||||
|
||||
-- Favorites table indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_favorites_userid ON favorites(userid);
|
||||
CREATE INDEX IF NOT EXISTS idx_favorites_path ON favorites(path);
|
||||
|
||||
-- Tags table indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_tags_name ON tags(name);
|
||||
|
||||
-- Tagged photos indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_tagged_photo_photo_name ON tagged_photo(photo_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_tagged_photo_tag_id ON tagged_photo(tag_id);
|
||||
|
||||
-- EXIF table indexes (date_taken already has index from previous migration)
|
||||
-- Adding composite index for common EXIF queries
|
||||
CREATE INDEX IF NOT EXISTS idx_image_exif_camera ON image_exif(camera_make, camera_model);
|
||||
CREATE INDEX IF NOT EXISTS idx_image_exif_gps ON image_exif(gps_latitude, gps_longitude);
|
||||
@@ -1,3 +0,0 @@
|
||||
-- Rollback unique constraint on favorites
|
||||
|
||||
DROP INDEX IF EXISTS idx_favorites_unique;
|
||||
@@ -1,12 +0,0 @@
|
||||
-- Add unique constraint to prevent duplicate favorites per user
|
||||
|
||||
-- First, remove any existing duplicates (keep the oldest one)
|
||||
DELETE FROM favorites
|
||||
WHERE rowid NOT IN (
|
||||
SELECT MIN(rowid)
|
||||
FROM favorites
|
||||
GROUP BY userid, path
|
||||
);
|
||||
|
||||
-- Add unique index to enforce constraint
|
||||
CREATE UNIQUE INDEX idx_favorites_unique ON favorites(userid, path);
|
||||
@@ -1,2 +0,0 @@
|
||||
-- Remove date_taken index
|
||||
DROP INDEX IF EXISTS idx_image_exif_date_taken;
|
||||
@@ -1,2 +0,0 @@
|
||||
-- Add index on date_taken for efficient date range queries
|
||||
CREATE INDEX IF NOT EXISTS idx_image_exif_date_taken ON image_exif(date_taken);
|
||||
@@ -1,3 +0,0 @@
|
||||
-- Rollback AI insights table
|
||||
DROP INDEX IF EXISTS idx_photo_insights_path;
|
||||
DROP TABLE IF EXISTS photo_insights;
|
||||
@@ -1,11 +0,0 @@
|
||||
-- AI-generated insights for individual photos
|
||||
CREATE TABLE IF NOT EXISTS photo_insights (
|
||||
id INTEGER PRIMARY KEY NOT NULL,
|
||||
file_path TEXT NOT NULL UNIQUE, -- Full path to the photo
|
||||
title TEXT NOT NULL, -- "At the beach with Sarah"
|
||||
summary TEXT NOT NULL, -- 2-3 sentence description
|
||||
generated_at BIGINT NOT NULL,
|
||||
model_version TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_photo_insights_path ON photo_insights(file_path);
|
||||
@@ -1 +0,0 @@
|
||||
DROP TABLE daily_conversation_summaries;
|
||||
@@ -1,19 +0,0 @@
|
||||
-- Daily conversation summaries for improved RAG quality
|
||||
-- Each row = one day's conversation with a contact, summarized by LLM and embedded
|
||||
|
||||
CREATE TABLE daily_conversation_summaries (
|
||||
id INTEGER PRIMARY KEY NOT NULL,
|
||||
date TEXT NOT NULL, -- ISO date "2024-08-15"
|
||||
contact TEXT NOT NULL, -- Contact name
|
||||
summary TEXT NOT NULL, -- LLM-generated 3-5 sentence summary
|
||||
message_count INTEGER NOT NULL, -- Number of messages in this day
|
||||
embedding BLOB NOT NULL, -- 768-dim vector of the summary
|
||||
created_at BIGINT NOT NULL, -- When this summary was generated
|
||||
model_version TEXT NOT NULL, -- "nomic-embed-text:v1.5"
|
||||
UNIQUE(date, contact)
|
||||
);
|
||||
|
||||
-- Indexes for efficient querying
|
||||
CREATE INDEX idx_daily_summaries_date ON daily_conversation_summaries(date);
|
||||
CREATE INDEX idx_daily_summaries_contact ON daily_conversation_summaries(contact);
|
||||
CREATE INDEX idx_daily_summaries_date_contact ON daily_conversation_summaries(date, contact);
|
||||
@@ -1 +0,0 @@
|
||||
DROP TABLE IF EXISTS calendar_events;
|
||||
@@ -1,20 +0,0 @@
|
||||
CREATE TABLE calendar_events (
|
||||
id INTEGER PRIMARY KEY NOT NULL,
|
||||
event_uid TEXT,
|
||||
summary TEXT NOT NULL,
|
||||
description TEXT,
|
||||
location TEXT,
|
||||
start_time BIGINT NOT NULL,
|
||||
end_time BIGINT NOT NULL,
|
||||
all_day BOOLEAN NOT NULL DEFAULT 0,
|
||||
organizer TEXT,
|
||||
attendees TEXT,
|
||||
embedding BLOB,
|
||||
created_at BIGINT NOT NULL,
|
||||
source_file TEXT,
|
||||
UNIQUE(event_uid, start_time)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_calendar_start_time ON calendar_events(start_time);
|
||||
CREATE INDEX idx_calendar_end_time ON calendar_events(end_time);
|
||||
CREATE INDEX idx_calendar_time_range ON calendar_events(start_time, end_time);
|
||||
@@ -1 +0,0 @@
|
||||
DROP TABLE IF EXISTS location_history;
|
||||
@@ -1,19 +0,0 @@
|
||||
CREATE TABLE location_history (
|
||||
id INTEGER PRIMARY KEY NOT NULL,
|
||||
timestamp BIGINT NOT NULL,
|
||||
latitude REAL NOT NULL,
|
||||
longitude REAL NOT NULL,
|
||||
accuracy INTEGER,
|
||||
activity TEXT,
|
||||
activity_confidence INTEGER,
|
||||
place_name TEXT,
|
||||
place_category TEXT,
|
||||
embedding BLOB,
|
||||
created_at BIGINT NOT NULL,
|
||||
source_file TEXT,
|
||||
UNIQUE(timestamp, latitude, longitude)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_location_timestamp ON location_history(timestamp);
|
||||
CREATE INDEX idx_location_coords ON location_history(latitude, longitude);
|
||||
CREATE INDEX idx_location_activity ON location_history(activity);
|
||||
@@ -1 +0,0 @@
|
||||
DROP TABLE IF EXISTS search_history;
|
||||
@@ -1,13 +0,0 @@
|
||||
CREATE TABLE search_history (
|
||||
id INTEGER PRIMARY KEY NOT NULL,
|
||||
timestamp BIGINT NOT NULL,
|
||||
query TEXT NOT NULL,
|
||||
search_engine TEXT,
|
||||
embedding BLOB NOT NULL,
|
||||
created_at BIGINT NOT NULL,
|
||||
source_file TEXT,
|
||||
UNIQUE(timestamp, query)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_search_timestamp ON search_history(timestamp);
|
||||
CREATE INDEX idx_search_query ON search_history(query);
|
||||
@@ -1,4 +0,0 @@
|
||||
-- Revert search performance optimization indexes
|
||||
|
||||
DROP INDEX IF EXISTS idx_image_exif_date_path;
|
||||
DROP INDEX IF EXISTS idx_tagged_photo_count;
|
||||
@@ -1,15 +0,0 @@
|
||||
-- Add composite indexes for search performance optimization
|
||||
-- This migration addresses N+1 query issues and enables database-level sorting
|
||||
|
||||
-- Covering index for date-sorted queries (supports ORDER BY + pagination)
|
||||
-- Enables efficient date-based sorting without loading all files into memory
|
||||
CREATE INDEX IF NOT EXISTS idx_image_exif_date_path
|
||||
ON image_exif(date_taken DESC, file_path);
|
||||
|
||||
-- Optimize batch tag count queries with GROUP BY
|
||||
-- Reduces N individual queries to a single batch query
|
||||
CREATE INDEX IF NOT EXISTS idx_tagged_photo_count
|
||||
ON tagged_photo(photo_name, tag_id);
|
||||
|
||||
-- Update query planner statistics to optimize query execution
|
||||
ANALYZE;
|
||||
@@ -1,403 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use chrono::{NaiveDate, Utc};
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::ai::{OllamaClient, SmsApiClient, SmsMessage};
|
||||
use crate::database::{DailySummaryDao, InsertDailySummary};
|
||||
use crate::otel::global_tracer;
|
||||
|
||||
/// Strip boilerplate prefixes and common phrases from summaries before embedding.
|
||||
/// This improves embedding diversity by removing structural similarity.
|
||||
pub fn strip_summary_boilerplate(summary: &str) -> String {
|
||||
let mut text = summary.trim().to_string();
|
||||
|
||||
// Remove markdown headers
|
||||
while text.starts_with('#') {
|
||||
if let Some(pos) = text.find('\n') {
|
||||
text = text[pos..].trim_start().to_string();
|
||||
} else {
|
||||
// Single line with just headers, try to extract content after #s
|
||||
text = text.trim_start_matches('#').trim().to_string();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove "Summary:" prefix variations (with optional markdown bold)
|
||||
let prefixes = [
|
||||
"**Summary:**",
|
||||
"**Summary**:",
|
||||
"*Summary:*",
|
||||
"Summary:",
|
||||
"**summary:**",
|
||||
"summary:",
|
||||
];
|
||||
for prefix in prefixes {
|
||||
if text.to_lowercase().starts_with(&prefix.to_lowercase()) {
|
||||
text = text[prefix.len()..].trim_start().to_string();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove common opening phrases that add no semantic value
|
||||
let opening_phrases = [
|
||||
"Today, Melissa and I discussed",
|
||||
"Today, Amanda and I discussed",
|
||||
"Today Melissa and I discussed",
|
||||
"Today Amanda and I discussed",
|
||||
"Melissa and I discussed",
|
||||
"Amanda and I discussed",
|
||||
"Today, I discussed",
|
||||
"Today I discussed",
|
||||
"The conversation covered",
|
||||
"This conversation covered",
|
||||
"In this conversation,",
|
||||
"During this conversation,",
|
||||
];
|
||||
|
||||
for phrase in opening_phrases {
|
||||
if text.to_lowercase().starts_with(&phrase.to_lowercase()) {
|
||||
text = text[phrase.len()..].trim_start().to_string();
|
||||
// Remove leading punctuation/articles after stripping phrase
|
||||
text = text
|
||||
.trim_start_matches([',', ':', '-'])
|
||||
.trim_start()
|
||||
.to_string();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove any remaining leading markdown bold markers
|
||||
if text.starts_with("**")
|
||||
&& let Some(end) = text[2..].find("**")
|
||||
{
|
||||
// Keep the content between ** but remove the markers
|
||||
let bold_content = &text[2..2 + end];
|
||||
text = format!("{}{}", bold_content, &text[4 + end..]);
|
||||
}
|
||||
|
||||
text.trim().to_string()
|
||||
}
|
||||
|
||||
/// Generate and embed daily conversation summaries for a date range
|
||||
/// Default: August 2024 ±30 days (July 1 - September 30, 2024)
|
||||
pub async fn generate_daily_summaries(
|
||||
contact: &str,
|
||||
start_date: Option<NaiveDate>,
|
||||
end_date: Option<NaiveDate>,
|
||||
ollama: &OllamaClient,
|
||||
sms_client: &SmsApiClient,
|
||||
summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>>,
|
||||
) -> Result<()> {
|
||||
let tracer = global_tracer();
|
||||
|
||||
// Get current context (empty in background task) and start span with it
|
||||
let current_cx = opentelemetry::Context::current();
|
||||
let mut span = tracer.start_with_context("ai.daily_summary.generate_batch", ¤t_cx);
|
||||
span.set_attribute(KeyValue::new("contact", contact.to_string()));
|
||||
|
||||
// Create context with this span for child operations
|
||||
let parent_cx = current_cx.with_span(span);
|
||||
|
||||
// Default to August 2024 ±30 days
|
||||
let start = start_date.unwrap_or_else(|| NaiveDate::from_ymd_opt(2024, 7, 1).unwrap());
|
||||
let end = end_date.unwrap_or_else(|| NaiveDate::from_ymd_opt(2024, 9, 30).unwrap());
|
||||
|
||||
parent_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("start_date", start.to_string()));
|
||||
parent_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("end_date", end.to_string()));
|
||||
parent_cx.span().set_attribute(KeyValue::new(
|
||||
"date_range_days",
|
||||
(end - start).num_days() + 1,
|
||||
));
|
||||
|
||||
log::info!("========================================");
|
||||
log::info!("Starting daily summary generation for {}", contact);
|
||||
log::info!(
|
||||
"Date range: {} to {} ({} days)",
|
||||
start,
|
||||
end,
|
||||
(end - start).num_days() + 1
|
||||
);
|
||||
log::info!("========================================");
|
||||
|
||||
// Fetch all messages for the contact in the date range
|
||||
log::info!("Fetching messages for date range...");
|
||||
let _start_timestamp = start.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
|
||||
let _end_timestamp = end.and_hms_opt(23, 59, 59).unwrap().and_utc().timestamp();
|
||||
|
||||
let all_messages = sms_client.fetch_all_messages_for_contact(contact).await?;
|
||||
|
||||
// Filter to date range and group by date
|
||||
let mut messages_by_date: HashMap<NaiveDate, Vec<SmsMessage>> = HashMap::new();
|
||||
|
||||
for msg in all_messages {
|
||||
let msg_dt = chrono::DateTime::from_timestamp(msg.timestamp, 0);
|
||||
if let Some(dt) = msg_dt {
|
||||
let date = dt.date_naive();
|
||||
if date >= start && date <= end {
|
||||
messages_by_date.entry(date).or_default().push(msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log::info!(
|
||||
"Grouped messages into {} days with activity",
|
||||
messages_by_date.len()
|
||||
);
|
||||
|
||||
if messages_by_date.is_empty() {
|
||||
log::warn!("No messages found in date range");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Sort dates for ordered processing
|
||||
let mut dates: Vec<NaiveDate> = messages_by_date.keys().cloned().collect();
|
||||
dates.sort();
|
||||
|
||||
let total_days = dates.len();
|
||||
let mut processed = 0;
|
||||
let mut skipped = 0;
|
||||
let mut failed = 0;
|
||||
|
||||
log::info!("Processing {} days with messages...", total_days);
|
||||
|
||||
for (idx, date) in dates.iter().enumerate() {
|
||||
let messages = messages_by_date.get(date).unwrap();
|
||||
let date_str = date.format("%Y-%m-%d").to_string();
|
||||
|
||||
// Check if summary already exists
|
||||
{
|
||||
let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
|
||||
let otel_context = opentelemetry::Context::new();
|
||||
|
||||
if dao
|
||||
.summary_exists(&otel_context, &date_str, contact)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
skipped += 1;
|
||||
if idx % 10 == 0 {
|
||||
log::info!(
|
||||
"Progress: {}/{} ({} processed, {} skipped)",
|
||||
idx + 1,
|
||||
total_days,
|
||||
processed,
|
||||
skipped
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Generate summary for this day
|
||||
match generate_and_store_daily_summary(
|
||||
&parent_cx,
|
||||
date,
|
||||
contact,
|
||||
messages,
|
||||
ollama,
|
||||
summary_dao.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(_) => {
|
||||
processed += 1;
|
||||
log::info!(
|
||||
"✓ {}/{}: {} ({} messages)",
|
||||
idx + 1,
|
||||
total_days,
|
||||
date_str,
|
||||
messages.len()
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
failed += 1;
|
||||
log::error!("✗ Failed to process {}: {:?}", date_str, e);
|
||||
}
|
||||
}
|
||||
|
||||
// Rate limiting: sleep 500ms between summaries
|
||||
if idx < total_days - 1 {
|
||||
sleep(std::time::Duration::from_millis(500)).await;
|
||||
}
|
||||
|
||||
// Progress logging every 10 days
|
||||
if idx % 10 == 0 && idx > 0 {
|
||||
log::info!(
|
||||
"Progress: {}/{} ({} processed, {} skipped, {} failed)",
|
||||
idx + 1,
|
||||
total_days,
|
||||
processed,
|
||||
skipped,
|
||||
failed
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
log::info!("========================================");
|
||||
log::info!("Daily summary generation complete!");
|
||||
log::info!(
|
||||
"Processed: {}, Skipped: {}, Failed: {}",
|
||||
processed,
|
||||
skipped,
|
||||
failed
|
||||
);
|
||||
log::info!("========================================");
|
||||
|
||||
// Record final metrics in span
|
||||
parent_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("days_processed", processed as i64));
|
||||
parent_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("days_skipped", skipped as i64));
|
||||
parent_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("days_failed", failed as i64));
|
||||
parent_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("total_days", total_days as i64));
|
||||
|
||||
if failed > 0 {
|
||||
parent_cx
|
||||
.span()
|
||||
.set_status(Status::error(format!("{} days failed to process", failed)));
|
||||
} else {
|
||||
parent_cx.span().set_status(Status::Ok);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Generate and store a single day's summary
|
||||
async fn generate_and_store_daily_summary(
|
||||
parent_cx: &opentelemetry::Context,
|
||||
date: &NaiveDate,
|
||||
contact: &str,
|
||||
messages: &[SmsMessage],
|
||||
ollama: &OllamaClient,
|
||||
summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>>,
|
||||
) -> Result<()> {
|
||||
let tracer = global_tracer();
|
||||
let mut span = tracer.start_with_context("ai.daily_summary.generate_single", parent_cx);
|
||||
span.set_attribute(KeyValue::new("date", date.to_string()));
|
||||
span.set_attribute(KeyValue::new("contact", contact.to_string()));
|
||||
span.set_attribute(KeyValue::new("message_count", messages.len() as i64));
|
||||
|
||||
// Format messages for LLM
|
||||
let messages_text: String = messages
|
||||
.iter()
|
||||
.take(200) // Limit to 200 messages per day to avoid token overflow
|
||||
.map(|m| {
|
||||
if m.is_sent {
|
||||
format!("Me: {}", m.body)
|
||||
} else {
|
||||
format!("{}: {}", m.contact, m.body)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let weekday = date.format("%A");
|
||||
|
||||
let prompt = format!(
|
||||
r#"Summarize this day's conversation between me and {}.
|
||||
|
||||
CRITICAL FORMAT RULES:
|
||||
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
|
||||
- Do NOT repeat the date at the beginning
|
||||
- Start DIRECTLY with the content - begin with a person's name or action
|
||||
- Write in past tense, as if recording what happened
|
||||
|
||||
NARRATIVE (3-5 sentences):
|
||||
- What specific topics, activities, or events were discussed?
|
||||
- What places, people, or organizations were mentioned?
|
||||
- What plans were made or decisions discussed?
|
||||
- Clearly distinguish between what "I" did versus what {} did
|
||||
|
||||
KEYWORDS (comma-separated):
|
||||
5-10 specific keywords that capture this conversation's unique content:
|
||||
- Proper nouns (people, places, brands)
|
||||
- Specific activities ("drum corps audition" not just "music")
|
||||
- Distinctive terms that make this day unique
|
||||
|
||||
Date: {} ({})
|
||||
Messages:
|
||||
{}
|
||||
|
||||
YOUR RESPONSE (follow this format EXACTLY):
|
||||
Summary: [Start directly with content, NO preamble]
|
||||
|
||||
Keywords: [specific, unique terms]"#,
|
||||
contact,
|
||||
contact,
|
||||
date.format("%B %d, %Y"),
|
||||
weekday,
|
||||
messages_text
|
||||
);
|
||||
|
||||
// Generate summary with LLM
|
||||
let summary = ollama
|
||||
.generate(
|
||||
&prompt,
|
||||
Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
|
||||
)
|
||||
.await?;
|
||||
|
||||
log::debug!(
|
||||
"Generated summary for {}: {}",
|
||||
date,
|
||||
summary.chars().take(100).collect::<String>()
|
||||
);
|
||||
|
||||
span.set_attribute(KeyValue::new("summary_length", summary.len() as i64));
|
||||
|
||||
// Strip boilerplate before embedding to improve vector diversity
|
||||
let stripped_summary = strip_summary_boilerplate(&summary);
|
||||
log::debug!(
|
||||
"Stripped summary for embedding: {}",
|
||||
stripped_summary.chars().take(100).collect::<String>()
|
||||
);
|
||||
|
||||
// Embed the stripped summary (store original summary in DB)
|
||||
let embedding = ollama.generate_embedding(&stripped_summary).await?;
|
||||
|
||||
span.set_attribute(KeyValue::new(
|
||||
"embedding_dimensions",
|
||||
embedding.len() as i64,
|
||||
));
|
||||
|
||||
// Store in database
|
||||
let insert = InsertDailySummary {
|
||||
date: date.format("%Y-%m-%d").to_string(),
|
||||
contact: contact.to_string(),
|
||||
summary: summary.trim().to_string(),
|
||||
message_count: messages.len() as i32,
|
||||
embedding,
|
||||
created_at: Utc::now().timestamp(),
|
||||
// model_version: "nomic-embed-text:v1.5".to_string(),
|
||||
model_version: "mxbai-embed-large:335m".to_string(),
|
||||
};
|
||||
|
||||
// Create context from current span for DB operation
|
||||
let child_cx = opentelemetry::Context::current_with_span(span);
|
||||
|
||||
let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
|
||||
let result = dao
|
||||
.store_summary(&child_cx, insert)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to store summary: {:?}", e));
|
||||
|
||||
match &result {
|
||||
Ok(_) => child_cx.span().set_status(Status::Ok),
|
||||
Err(e) => child_cx.span().set_status(Status::error(e.to_string())),
|
||||
}
|
||||
|
||||
result?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,263 +0,0 @@
|
||||
use actix_web::{HttpRequest, HttpResponse, Responder, delete, get, post, web};
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry::trace::{Span, Status, Tracer};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
|
||||
use crate::data::Claims;
|
||||
use crate::database::InsightDao;
|
||||
use crate::otel::{extract_context_from_request, global_tracer};
|
||||
use crate::utils::normalize_path;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct GeneratePhotoInsightRequest {
|
||||
pub file_path: String,
|
||||
#[serde(default)]
|
||||
pub model: Option<String>,
|
||||
#[serde(default)]
|
||||
pub system_prompt: Option<String>,
|
||||
#[serde(default)]
|
||||
pub num_ctx: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct GetPhotoInsightQuery {
|
||||
pub path: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct PhotoInsightResponse {
|
||||
pub id: i32,
|
||||
pub file_path: String,
|
||||
pub title: String,
|
||||
pub summary: String,
|
||||
pub generated_at: i64,
|
||||
pub model_version: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct AvailableModelsResponse {
|
||||
pub primary: ServerModels,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub fallback: Option<ServerModels>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct ServerModels {
|
||||
pub url: String,
|
||||
pub models: Vec<ModelCapabilities>,
|
||||
pub default_model: String,
|
||||
}
|
||||
|
||||
/// POST /insights/generate - Generate insight for a specific photo
|
||||
#[post("/insights/generate")]
|
||||
pub async fn generate_insight_handler(
|
||||
http_request: HttpRequest,
|
||||
_claims: Claims,
|
||||
request: web::Json<GeneratePhotoInsightRequest>,
|
||||
insight_generator: web::Data<InsightGenerator>,
|
||||
) -> impl Responder {
|
||||
let parent_context = extract_context_from_request(&http_request);
|
||||
let tracer = global_tracer();
|
||||
let mut span = tracer.start_with_context("http.insights.generate", &parent_context);
|
||||
|
||||
let normalized_path = normalize_path(&request.file_path);
|
||||
|
||||
span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
|
||||
if let Some(ref model) = request.model {
|
||||
span.set_attribute(KeyValue::new("model", model.clone()));
|
||||
}
|
||||
if let Some(ref prompt) = request.system_prompt {
|
||||
span.set_attribute(KeyValue::new("has_custom_prompt", true));
|
||||
span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64));
|
||||
}
|
||||
if let Some(ctx) = request.num_ctx {
|
||||
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
|
||||
}
|
||||
|
||||
log::info!(
|
||||
"Manual insight generation triggered for photo: {} with model: {:?}, custom_prompt: {}, num_ctx: {:?}",
|
||||
normalized_path,
|
||||
request.model,
|
||||
request.system_prompt.is_some(),
|
||||
request.num_ctx
|
||||
);
|
||||
|
||||
// Generate insight with optional custom model, system prompt, and context size
|
||||
let result = insight_generator
|
||||
.generate_insight_for_photo_with_config(
|
||||
&normalized_path,
|
||||
request.model.clone(),
|
||||
request.system_prompt.clone(),
|
||||
request.num_ctx,
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(()) => {
|
||||
span.set_status(Status::Ok);
|
||||
HttpResponse::Ok().json(serde_json::json!({
|
||||
"success": true,
|
||||
"message": "Insight generated successfully"
|
||||
}))
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Failed to generate insight: {:?}", e);
|
||||
span.set_status(Status::error(e.to_string()));
|
||||
HttpResponse::InternalServerError().json(serde_json::json!({
|
||||
"error": format!("Failed to generate insight: {:?}", e)
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo
|
||||
#[get("/insights")]
|
||||
pub async fn get_insight_handler(
|
||||
_claims: Claims,
|
||||
query: web::Query<GetPhotoInsightQuery>,
|
||||
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
|
||||
) -> impl Responder {
|
||||
let normalized_path = normalize_path(&query.path);
|
||||
log::debug!("Fetching insight for {}", normalized_path);
|
||||
|
||||
let otel_context = opentelemetry::Context::new();
|
||||
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
||||
|
||||
match dao.get_insight(&otel_context, &normalized_path) {
|
||||
Ok(Some(insight)) => {
|
||||
let response = PhotoInsightResponse {
|
||||
id: insight.id,
|
||||
file_path: insight.file_path,
|
||||
title: insight.title,
|
||||
summary: insight.summary,
|
||||
generated_at: insight.generated_at,
|
||||
model_version: insight.model_version,
|
||||
};
|
||||
HttpResponse::Ok().json(response)
|
||||
}
|
||||
Ok(None) => HttpResponse::NotFound().json(serde_json::json!({
|
||||
"error": "Insight not found"
|
||||
})),
|
||||
Err(e) => {
|
||||
log::error!("Failed to fetch insight ({}): {:?}", &query.path, e);
|
||||
HttpResponse::InternalServerError().json(serde_json::json!({
|
||||
"error": format!("Failed to fetch insight: {:?}", e)
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// DELETE /insights?path=/path/to/photo.jpg - Remove insight (will regenerate on next request)
|
||||
#[delete("/insights")]
|
||||
pub async fn delete_insight_handler(
|
||||
_claims: Claims,
|
||||
query: web::Query<GetPhotoInsightQuery>,
|
||||
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
|
||||
) -> impl Responder {
|
||||
let normalized_path = normalize_path(&query.path);
|
||||
log::info!("Deleting insight for {}", normalized_path);
|
||||
|
||||
let otel_context = opentelemetry::Context::new();
|
||||
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
||||
|
||||
match dao.delete_insight(&otel_context, &normalized_path) {
|
||||
Ok(()) => HttpResponse::Ok().json(serde_json::json!({
|
||||
"success": true,
|
||||
"message": "Insight deleted successfully"
|
||||
})),
|
||||
Err(e) => {
|
||||
log::error!("Failed to delete insight: {:?}", e);
|
||||
HttpResponse::InternalServerError().json(serde_json::json!({
|
||||
"error": format!("Failed to delete insight: {:?}", e)
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GET /insights/all - Get all insights
|
||||
#[get("/insights/all")]
|
||||
pub async fn get_all_insights_handler(
|
||||
_claims: Claims,
|
||||
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
|
||||
) -> impl Responder {
|
||||
log::debug!("Fetching all insights");
|
||||
|
||||
let otel_context = opentelemetry::Context::new();
|
||||
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
|
||||
|
||||
match dao.get_all_insights(&otel_context) {
|
||||
Ok(insights) => {
|
||||
let responses: Vec<PhotoInsightResponse> = insights
|
||||
.into_iter()
|
||||
.map(|insight| PhotoInsightResponse {
|
||||
id: insight.id,
|
||||
file_path: insight.file_path,
|
||||
title: insight.title,
|
||||
summary: insight.summary,
|
||||
generated_at: insight.generated_at,
|
||||
model_version: insight.model_version,
|
||||
})
|
||||
.collect();
|
||||
|
||||
HttpResponse::Ok().json(responses)
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Failed to fetch all insights: {:?}", e);
|
||||
HttpResponse::InternalServerError().json(serde_json::json!({
|
||||
"error": format!("Failed to fetch insights: {:?}", e)
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GET /insights/models - List available models from both servers with capabilities
|
||||
#[get("/insights/models")]
|
||||
pub async fn get_available_models_handler(
|
||||
_claims: Claims,
|
||||
app_state: web::Data<crate::state::AppState>,
|
||||
) -> impl Responder {
|
||||
log::debug!("Fetching available models with capabilities");
|
||||
|
||||
let ollama_client = &app_state.ollama;
|
||||
|
||||
// Fetch models with capabilities from primary server
|
||||
let primary_models =
|
||||
match OllamaClient::list_models_with_capabilities(&ollama_client.primary_url).await {
|
||||
Ok(models) => models,
|
||||
Err(e) => {
|
||||
log::warn!("Failed to fetch models from primary server: {:?}", e);
|
||||
vec![]
|
||||
}
|
||||
};
|
||||
|
||||
let primary = ServerModels {
|
||||
url: ollama_client.primary_url.clone(),
|
||||
models: primary_models,
|
||||
default_model: ollama_client.primary_model.clone(),
|
||||
};
|
||||
|
||||
// Fetch models with capabilities from fallback server if configured
|
||||
let fallback = if let Some(fallback_url) = &ollama_client.fallback_url {
|
||||
match OllamaClient::list_models_with_capabilities(fallback_url).await {
|
||||
Ok(models) => Some(ServerModels {
|
||||
url: fallback_url.clone(),
|
||||
models,
|
||||
default_model: ollama_client
|
||||
.fallback_model
|
||||
.clone()
|
||||
.unwrap_or_else(|| ollama_client.primary_model.clone()),
|
||||
}),
|
||||
Err(e) => {
|
||||
log::warn!("Failed to fetch models from fallback server: {:?}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let response = AvailableModelsResponse { primary, fallback };
|
||||
|
||||
HttpResponse::Ok().json(response)
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,16 +0,0 @@
|
||||
pub mod daily_summary_job;
|
||||
pub mod handlers;
|
||||
pub mod insight_generator;
|
||||
pub mod ollama;
|
||||
pub mod sms_client;
|
||||
|
||||
// strip_summary_boilerplate is used by binaries (test_daily_summary), not the library
|
||||
#[allow(unused_imports)]
|
||||
pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate};
|
||||
pub use handlers::{
|
||||
delete_insight_handler, generate_insight_handler, get_all_insights_handler,
|
||||
get_available_models_handler, get_insight_handler,
|
||||
};
|
||||
pub use insight_generator::InsightGenerator;
|
||||
pub use ollama::{ModelCapabilities, OllamaClient};
|
||||
pub use sms_client::{SmsApiClient, SmsMessage};
|
||||
735
src/ai/ollama.rs
735
src/ai/ollama.rs
@@ -1,735 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use chrono::NaiveDate;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
// Cache duration: 15 minutes
|
||||
const CACHE_DURATION_SECS: u64 = 15 * 60;
|
||||
|
||||
// Cached entry with timestamp
|
||||
#[derive(Clone)]
|
||||
struct CachedEntry<T> {
|
||||
data: T,
|
||||
cached_at: Instant,
|
||||
}
|
||||
|
||||
impl<T> CachedEntry<T> {
|
||||
fn new(data: T) -> Self {
|
||||
Self {
|
||||
data,
|
||||
cached_at: Instant::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_expired(&self) -> bool {
|
||||
self.cached_at.elapsed().as_secs() > CACHE_DURATION_SECS
|
||||
}
|
||||
}
|
||||
|
||||
// Global cache for model lists and capabilities
|
||||
lazy_static::lazy_static! {
|
||||
static ref MODEL_LIST_CACHE: Arc<Mutex<HashMap<String, CachedEntry<Vec<String>>>>> =
|
||||
Arc::new(Mutex::new(HashMap::new()));
|
||||
|
||||
static ref MODEL_CAPABILITIES_CACHE: Arc<Mutex<HashMap<String, CachedEntry<Vec<ModelCapabilities>>>>> =
|
||||
Arc::new(Mutex::new(HashMap::new()));
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct OllamaClient {
|
||||
client: Client,
|
||||
pub primary_url: String,
|
||||
pub fallback_url: Option<String>,
|
||||
pub primary_model: String,
|
||||
pub fallback_model: Option<String>,
|
||||
num_ctx: Option<i32>,
|
||||
}
|
||||
|
||||
impl OllamaClient {
|
||||
pub fn new(
|
||||
primary_url: String,
|
||||
fallback_url: Option<String>,
|
||||
primary_model: String,
|
||||
fallback_model: Option<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
client: Client::builder()
|
||||
.connect_timeout(Duration::from_secs(5)) // Quick connection timeout
|
||||
.timeout(Duration::from_secs(120)) // Total request timeout for generation
|
||||
.build()
|
||||
.unwrap_or_else(|_| Client::new()),
|
||||
primary_url,
|
||||
fallback_url,
|
||||
primary_model,
|
||||
fallback_model,
|
||||
num_ctx: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_num_ctx(&mut self, num_ctx: Option<i32>) {
|
||||
self.num_ctx = num_ctx;
|
||||
}
|
||||
|
||||
/// List available models on an Ollama server (cached for 15 minutes)
|
||||
pub async fn list_models(url: &str) -> Result<Vec<String>> {
|
||||
// Check cache first
|
||||
{
|
||||
let cache = MODEL_LIST_CACHE.lock().unwrap();
|
||||
if let Some(entry) = cache.get(url)
|
||||
&& !entry.is_expired()
|
||||
{
|
||||
log::debug!("Returning cached model list for {}", url);
|
||||
return Ok(entry.data.clone());
|
||||
}
|
||||
}
|
||||
|
||||
log::debug!("Fetching fresh model list from {}", url);
|
||||
|
||||
let client = Client::builder()
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
.timeout(Duration::from_secs(10))
|
||||
.build()?;
|
||||
|
||||
let response = client.get(format!("{}/api/tags", url)).send().await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(anyhow::anyhow!("Failed to list models from {}", url));
|
||||
}
|
||||
|
||||
let tags_response: OllamaTagsResponse = response.json().await?;
|
||||
let models: Vec<String> = tags_response.models.into_iter().map(|m| m.name).collect();
|
||||
|
||||
// Store in cache
|
||||
{
|
||||
let mut cache = MODEL_LIST_CACHE.lock().unwrap();
|
||||
cache.insert(url.to_string(), CachedEntry::new(models.clone()));
|
||||
}
|
||||
|
||||
Ok(models)
|
||||
}
|
||||
|
||||
/// Check if a model is available on a server
|
||||
pub async fn is_model_available(url: &str, model_name: &str) -> Result<bool> {
|
||||
let models = Self::list_models(url).await?;
|
||||
Ok(models.iter().any(|m| m == model_name))
|
||||
}
|
||||
|
||||
/// Clear the model list cache for a specific URL or all URLs
|
||||
pub fn clear_model_cache(url: Option<&str>) {
|
||||
let mut cache = MODEL_LIST_CACHE.lock().unwrap();
|
||||
if let Some(url) = url {
|
||||
cache.remove(url);
|
||||
log::debug!("Cleared model list cache for {}", url);
|
||||
} else {
|
||||
cache.clear();
|
||||
log::debug!("Cleared all model list cache entries");
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear the model capabilities cache for a specific URL or all URLs
|
||||
pub fn clear_capabilities_cache(url: Option<&str>) {
|
||||
let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
|
||||
if let Some(url) = url {
|
||||
cache.remove(url);
|
||||
log::debug!("Cleared model capabilities cache for {}", url);
|
||||
} else {
|
||||
cache.clear();
|
||||
log::debug!("Cleared all model capabilities cache entries");
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a model has vision capabilities using the /api/show endpoint
|
||||
pub async fn check_model_capabilities(
|
||||
url: &str,
|
||||
model_name: &str,
|
||||
) -> Result<ModelCapabilities> {
|
||||
let client = Client::builder()
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
.timeout(Duration::from_secs(10))
|
||||
.build()?;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct ShowRequest {
|
||||
model: String,
|
||||
}
|
||||
|
||||
let response = client
|
||||
.post(format!("{}/api/show", url))
|
||||
.json(&ShowRequest {
|
||||
model: model_name.to_string(),
|
||||
})
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Failed to get model details for {} from {}",
|
||||
model_name,
|
||||
url
|
||||
));
|
||||
}
|
||||
|
||||
let show_response: OllamaShowResponse = response.json().await?;
|
||||
|
||||
// Check if "vision" is in the capabilities array
|
||||
let has_vision = show_response.capabilities.iter().any(|cap| cap == "vision");
|
||||
|
||||
Ok(ModelCapabilities {
|
||||
name: model_name.to_string(),
|
||||
has_vision,
|
||||
})
|
||||
}
|
||||
|
||||
/// List all models with their capabilities from a server (cached for 15 minutes)
|
||||
pub async fn list_models_with_capabilities(url: &str) -> Result<Vec<ModelCapabilities>> {
|
||||
// Check cache first
|
||||
{
|
||||
let cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
|
||||
if let Some(entry) = cache.get(url)
|
||||
&& !entry.is_expired()
|
||||
{
|
||||
log::debug!("Returning cached model capabilities for {}", url);
|
||||
return Ok(entry.data.clone());
|
||||
}
|
||||
}
|
||||
|
||||
log::debug!("Fetching fresh model capabilities from {}", url);
|
||||
|
||||
let models = Self::list_models(url).await?;
|
||||
let mut capabilities = Vec::new();
|
||||
|
||||
for model_name in models {
|
||||
match Self::check_model_capabilities(url, &model_name).await {
|
||||
Ok(cap) => capabilities.push(cap),
|
||||
Err(e) => {
|
||||
log::warn!("Failed to get capabilities for model {}: {}", model_name, e);
|
||||
// Fallback: assume no vision if we can't check
|
||||
capabilities.push(ModelCapabilities {
|
||||
name: model_name,
|
||||
has_vision: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store in cache
|
||||
{
|
||||
let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
|
||||
cache.insert(url.to_string(), CachedEntry::new(capabilities.clone()));
|
||||
}
|
||||
|
||||
Ok(capabilities)
|
||||
}
|
||||
|
||||
/// Extract final answer from thinking model output
|
||||
/// Handles <think>...</think> tags and takes everything after
|
||||
fn extract_final_answer(&self, response: &str) -> String {
|
||||
let response = response.trim();
|
||||
|
||||
// Look for </think> tag and take everything after it
|
||||
if let Some(pos) = response.find("</think>") {
|
||||
let answer = response[pos + 8..].trim();
|
||||
if !answer.is_empty() {
|
||||
return answer.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: return the whole response trimmed
|
||||
response.to_string()
|
||||
}
|
||||
|
||||
async fn try_generate(
|
||||
&self,
|
||||
url: &str,
|
||||
model: &str,
|
||||
prompt: &str,
|
||||
system: Option<&str>,
|
||||
images: Option<Vec<String>>,
|
||||
) -> Result<String> {
|
||||
let request = OllamaRequest {
|
||||
model: model.to_string(),
|
||||
prompt: prompt.to_string(),
|
||||
stream: false,
|
||||
system: system.map(|s| s.to_string()),
|
||||
options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: ctx }),
|
||||
images,
|
||||
};
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(format!("{}/api/generate", url))
|
||||
.json(&request)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_body = response.text().await.unwrap_or_default();
|
||||
return Err(anyhow::anyhow!(
|
||||
"Ollama request failed: {} - {}",
|
||||
status,
|
||||
error_body
|
||||
));
|
||||
}
|
||||
|
||||
let result: OllamaResponse = response.json().await?;
|
||||
Ok(result.response)
|
||||
}
|
||||
|
||||
pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
|
||||
self.generate_with_images(prompt, system, None).await
|
||||
}
|
||||
|
||||
pub async fn generate_with_images(
|
||||
&self,
|
||||
prompt: &str,
|
||||
system: Option<&str>,
|
||||
images: Option<Vec<String>>,
|
||||
) -> Result<String> {
|
||||
log::debug!("=== Ollama Request ===");
|
||||
log::debug!("Primary model: {}", self.primary_model);
|
||||
if let Some(sys) = system {
|
||||
log::debug!("System: {}", sys);
|
||||
}
|
||||
log::debug!("Prompt:\n{}", prompt);
|
||||
if let Some(ref imgs) = images {
|
||||
log::debug!("Images: {} image(s) included", imgs.len());
|
||||
}
|
||||
log::debug!("=====================");
|
||||
|
||||
// Try primary server first with primary model
|
||||
log::info!(
|
||||
"Attempting to generate with primary server: {} (model: {})",
|
||||
self.primary_url,
|
||||
self.primary_model
|
||||
);
|
||||
let primary_result = self
|
||||
.try_generate(
|
||||
&self.primary_url,
|
||||
&self.primary_model,
|
||||
prompt,
|
||||
system,
|
||||
images.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
let raw_response = match primary_result {
|
||||
Ok(response) => {
|
||||
log::info!("Successfully generated response from primary server");
|
||||
response
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Primary server failed: {}", e);
|
||||
|
||||
// Try fallback server if available
|
||||
if let Some(fallback_url) = &self.fallback_url {
|
||||
// Use fallback model if specified, otherwise use primary model
|
||||
let fallback_model =
|
||||
self.fallback_model.as_ref().unwrap_or(&self.primary_model);
|
||||
|
||||
log::info!(
|
||||
"Attempting to generate with fallback server: {} (model: {})",
|
||||
fallback_url,
|
||||
fallback_model
|
||||
);
|
||||
match self
|
||||
.try_generate(fallback_url, fallback_model, prompt, system, images.clone())
|
||||
.await
|
||||
{
|
||||
Ok(response) => {
|
||||
log::info!("Successfully generated response from fallback server");
|
||||
response
|
||||
}
|
||||
Err(fallback_e) => {
|
||||
log::error!("Fallback server also failed: {}", fallback_e);
|
||||
return Err(anyhow::anyhow!(
|
||||
"Both primary and fallback servers failed. Primary: {}, Fallback: {}",
|
||||
e,
|
||||
fallback_e
|
||||
));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log::error!("No fallback server configured");
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
log::debug!("=== Ollama Response ===");
|
||||
log::debug!("Raw response: {}", raw_response.trim());
|
||||
log::debug!("=======================");
|
||||
|
||||
// Extract final answer from thinking model output
|
||||
let cleaned = self.extract_final_answer(&raw_response);
|
||||
|
||||
log::debug!("=== Cleaned Response ===");
|
||||
log::debug!("Final answer: {}", cleaned);
|
||||
log::debug!("========================");
|
||||
|
||||
Ok(cleaned)
|
||||
}
|
||||
|
||||
/// Generate a title for a single photo based on its context
|
||||
pub async fn generate_photo_title(
|
||||
&self,
|
||||
date: NaiveDate,
|
||||
location: Option<&str>,
|
||||
contact: Option<&str>,
|
||||
sms_summary: Option<&str>,
|
||||
custom_system: Option<&str>,
|
||||
image_base64: Option<String>,
|
||||
) -> Result<String> {
|
||||
let location_str = location.unwrap_or("Unknown location");
|
||||
let sms_str = sms_summary.unwrap_or("No messages");
|
||||
|
||||
let prompt = if image_base64.is_some() {
|
||||
if let Some(contact_name) = contact {
|
||||
format!(
|
||||
r#"Create a short title (maximum 8 words) about this moment by analyzing the image and context:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Person/Contact: {}
|
||||
Messages: {}
|
||||
|
||||
Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. If limited information is available, use a simple descriptive title based on what you see.
|
||||
|
||||
Return ONLY the title, nothing else."#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
contact_name,
|
||||
sms_str,
|
||||
contact_name
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
r#"Create a short title (maximum 8 words) about this moment by analyzing the image and context:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Messages: {}
|
||||
|
||||
Analyze the image and use specific details from both the visual content and the context above. If limited information is available, use a simple descriptive title based on what you see.
|
||||
|
||||
Return ONLY the title, nothing else."#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
sms_str
|
||||
)
|
||||
}
|
||||
} else if let Some(contact_name) = contact {
|
||||
format!(
|
||||
r#"Create a short title (maximum 8 words) about this moment:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Person/Contact: {}
|
||||
Messages: {}
|
||||
|
||||
Use specific details from the context above. The photo is from a folder for {}, so they are likely related to this moment. If no specific details are available, use a simple descriptive title.
|
||||
|
||||
Return ONLY the title, nothing else."#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
contact_name,
|
||||
sms_str,
|
||||
contact_name
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
r#"Create a short title (maximum 8 words) about this moment:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Messages: {}
|
||||
|
||||
Use specific details from the context above. If no specific details are available, use a simple descriptive title.
|
||||
|
||||
Return ONLY the title, nothing else."#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
sms_str
|
||||
)
|
||||
};
|
||||
|
||||
let system = custom_system.unwrap_or("You are my long term memory assistant. Use only the information provided. Do not invent details.");
|
||||
|
||||
let images = image_base64.map(|img| vec![img]);
|
||||
let title = self
|
||||
.generate_with_images(&prompt, Some(system), images)
|
||||
.await?;
|
||||
Ok(title.trim().trim_matches('"').to_string())
|
||||
}
|
||||
|
||||
/// Generate a summary for a single photo based on its context
|
||||
pub async fn generate_photo_summary(
|
||||
&self,
|
||||
date: NaiveDate,
|
||||
location: Option<&str>,
|
||||
contact: Option<&str>,
|
||||
sms_summary: Option<&str>,
|
||||
custom_system: Option<&str>,
|
||||
image_base64: Option<String>,
|
||||
) -> Result<String> {
|
||||
let location_str = location.unwrap_or("Unknown");
|
||||
let sms_str = sms_summary.unwrap_or("No messages");
|
||||
|
||||
let prompt = if image_base64.is_some() {
|
||||
if let Some(contact_name) = contact {
|
||||
format!(
|
||||
r#"Write a 1-3 paragraph description of this moment by analyzing the image and the available context:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Person/Contact: {}
|
||||
Messages: {}
|
||||
|
||||
Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. Mention people's names (especially {}), places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
contact_name,
|
||||
sms_str,
|
||||
contact_name,
|
||||
contact_name
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
r#"Write a 1-3 paragraph description of this moment by analyzing the image and the available context:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Messages: {}
|
||||
|
||||
Analyze the image and use specific details from both the visual content and the context above. Mention people's names, places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
sms_str
|
||||
)
|
||||
}
|
||||
} else if let Some(contact_name) = contact {
|
||||
format!(
|
||||
r#"Write a 1-3 paragraph description of this moment based on the available information:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Person/Contact: {}
|
||||
Messages: {}
|
||||
|
||||
Use only the specific details provided above. The photo is from a folder for {}, so they are likely related to this moment. Mention people's names (especially {}), places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
contact_name,
|
||||
sms_str,
|
||||
contact_name,
|
||||
contact_name
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
r#"Write a 1-3 paragraph description of this moment based on the available information:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Messages: {}
|
||||
|
||||
Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
sms_str
|
||||
)
|
||||
};
|
||||
|
||||
let system = custom_system.unwrap_or("You are a memory refreshing assistant who is able to provide insights through analyzing past conversations. Use only the information provided. Do not invent details.");
|
||||
|
||||
let images = image_base64.map(|img| vec![img]);
|
||||
self.generate_with_images(&prompt, Some(system), images)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Generate an embedding vector for text using nomic-embed-text:v1.5
|
||||
/// Returns a 768-dimensional vector as Vec<f32>
|
||||
pub async fn generate_embedding(&self, text: &str) -> Result<Vec<f32>> {
|
||||
let embeddings = self.generate_embeddings(&[text]).await?;
|
||||
embeddings
|
||||
.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| anyhow::anyhow!("No embedding returned"))
|
||||
}
|
||||
|
||||
/// Generate embeddings for multiple texts in a single API call (batch mode)
|
||||
/// Returns a vector of 768-dimensional vectors
|
||||
/// This is much more efficient than calling generate_embedding multiple times
|
||||
pub async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
|
||||
let embedding_model = "nomic-embed-text:v1.5";
|
||||
|
||||
log::debug!("=== Ollama Batch Embedding Request ===");
|
||||
log::debug!("Model: {}", embedding_model);
|
||||
log::debug!("Batch size: {} texts", texts.len());
|
||||
log::debug!("======================================");
|
||||
|
||||
// Try primary server first
|
||||
log::debug!(
|
||||
"Attempting to generate {} embeddings with primary server: {} (model: {})",
|
||||
texts.len(),
|
||||
self.primary_url,
|
||||
embedding_model
|
||||
);
|
||||
let primary_result = self
|
||||
.try_generate_embeddings(&self.primary_url, embedding_model, texts)
|
||||
.await;
|
||||
|
||||
let embeddings = match primary_result {
|
||||
Ok(embeddings) => {
|
||||
log::debug!(
|
||||
"Successfully generated {} embeddings from primary server",
|
||||
embeddings.len()
|
||||
);
|
||||
embeddings
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Primary server batch embedding failed: {}", e);
|
||||
|
||||
// Try fallback server if available
|
||||
if let Some(fallback_url) = &self.fallback_url {
|
||||
log::info!(
|
||||
"Attempting to generate {} embeddings with fallback server: {} (model: {})",
|
||||
texts.len(),
|
||||
fallback_url,
|
||||
embedding_model
|
||||
);
|
||||
match self
|
||||
.try_generate_embeddings(fallback_url, embedding_model, texts)
|
||||
.await
|
||||
{
|
||||
Ok(embeddings) => {
|
||||
log::info!(
|
||||
"Successfully generated {} embeddings from fallback server",
|
||||
embeddings.len()
|
||||
);
|
||||
embeddings
|
||||
}
|
||||
Err(fallback_e) => {
|
||||
log::error!(
|
||||
"Fallback server batch embedding also failed: {}",
|
||||
fallback_e
|
||||
);
|
||||
return Err(anyhow::anyhow!(
|
||||
"Both primary and fallback servers failed. Primary: {}, Fallback: {}",
|
||||
e,
|
||||
fallback_e
|
||||
));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log::error!("No fallback server configured");
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Validate embedding dimensions (should be 768 for nomic-embed-text:v1.5)
|
||||
for (i, embedding) in embeddings.iter().enumerate() {
|
||||
if embedding.len() != 768 {
|
||||
log::warn!(
|
||||
"Unexpected embedding dimensions for item {}: {} (expected 768)",
|
||||
i,
|
||||
embedding.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(embeddings)
|
||||
}
|
||||
|
||||
/// Internal helper to try generating embeddings for multiple texts from a specific server
|
||||
async fn try_generate_embeddings(
|
||||
&self,
|
||||
url: &str,
|
||||
model: &str,
|
||||
texts: &[&str],
|
||||
) -> Result<Vec<Vec<f32>>> {
|
||||
let request = OllamaBatchEmbedRequest {
|
||||
model: model.to_string(),
|
||||
input: texts.iter().map(|s| s.to_string()).collect(),
|
||||
};
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(format!("{}/api/embed", url))
|
||||
.json(&request)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_body = response.text().await.unwrap_or_default();
|
||||
return Err(anyhow::anyhow!(
|
||||
"Ollama batch embedding request failed: {} - {}",
|
||||
status,
|
||||
error_body
|
||||
));
|
||||
}
|
||||
|
||||
let result: OllamaEmbedResponse = response.json().await?;
|
||||
Ok(result.embeddings)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct OllamaRequest {
|
||||
model: String,
|
||||
prompt: String,
|
||||
stream: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
system: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
options: Option<OllamaOptions>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
images: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct OllamaOptions {
|
||||
num_ctx: i32,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OllamaResponse {
|
||||
response: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OllamaTagsResponse {
|
||||
models: Vec<OllamaModel>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OllamaModel {
|
||||
name: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OllamaShowResponse {
|
||||
#[serde(default)]
|
||||
capabilities: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct ModelCapabilities {
|
||||
pub name: String,
|
||||
pub has_vision: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct OllamaBatchEmbedRequest {
|
||||
model: String,
|
||||
input: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OllamaEmbedResponse {
|
||||
embeddings: Vec<Vec<f32>>,
|
||||
}
|
||||
@@ -1,316 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use reqwest::Client;
|
||||
use serde::Deserialize;
|
||||
|
||||
use super::ollama::OllamaClient;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SmsApiClient {
|
||||
client: Client,
|
||||
base_url: String,
|
||||
token: Option<String>,
|
||||
}
|
||||
|
||||
impl SmsApiClient {
|
||||
pub fn new(base_url: String, token: Option<String>) -> Self {
|
||||
Self {
|
||||
client: Client::new(),
|
||||
base_url,
|
||||
token,
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch messages for a specific contact within ±1 day of the given timestamp
|
||||
/// Falls back to all contacts if no messages found for the specific contact
|
||||
/// Messages are sorted by proximity to the center timestamp
|
||||
pub async fn fetch_messages_for_contact(
|
||||
&self,
|
||||
contact: Option<&str>,
|
||||
center_timestamp: i64,
|
||||
) -> Result<Vec<SmsMessage>> {
|
||||
use chrono::Duration;
|
||||
|
||||
// Calculate ±2 days range around the center timestamp
|
||||
let center_dt = chrono::DateTime::from_timestamp(center_timestamp, 0)
|
||||
.ok_or_else(|| anyhow::anyhow!("Invalid timestamp"))?;
|
||||
|
||||
let start_dt = center_dt - Duration::days(2);
|
||||
let end_dt = center_dt + Duration::days(2);
|
||||
|
||||
let start_ts = start_dt.timestamp();
|
||||
let end_ts = end_dt.timestamp();
|
||||
|
||||
// If contact specified, try fetching for that contact first
|
||||
if let Some(contact_name) = contact {
|
||||
log::info!(
|
||||
"Fetching SMS for contact: {} (±2 days from {})",
|
||||
contact_name,
|
||||
center_dt.format("%Y-%m-%d %H:%M:%S")
|
||||
);
|
||||
let messages = self
|
||||
.fetch_messages(start_ts, end_ts, Some(contact_name), Some(center_timestamp))
|
||||
.await?;
|
||||
|
||||
if !messages.is_empty() {
|
||||
log::info!(
|
||||
"Found {} messages for contact {}",
|
||||
messages.len(),
|
||||
contact_name
|
||||
);
|
||||
return Ok(messages);
|
||||
}
|
||||
|
||||
log::info!(
|
||||
"No messages found for contact {}, falling back to all contacts",
|
||||
contact_name
|
||||
);
|
||||
}
|
||||
|
||||
// Fallback to all contacts
|
||||
log::info!(
|
||||
"Fetching all SMS messages (±1 day from {})",
|
||||
center_dt.format("%Y-%m-%d %H:%M:%S")
|
||||
);
|
||||
self.fetch_messages(start_ts, end_ts, None, Some(center_timestamp))
|
||||
.await
|
||||
}
|
||||
|
||||
/// Fetch all messages for a specific contact across all time
|
||||
/// Used for embedding generation - retrieves complete message history
|
||||
/// Handles pagination automatically if the API returns a limited number of results
|
||||
pub async fn fetch_all_messages_for_contact(&self, contact: &str) -> Result<Vec<SmsMessage>> {
|
||||
let start_ts = chrono::DateTime::parse_from_rfc3339("2000-01-01T00:00:00Z")
|
||||
.unwrap()
|
||||
.timestamp();
|
||||
let end_ts = chrono::Utc::now().timestamp();
|
||||
|
||||
log::info!("Fetching all historical messages for contact: {}", contact);
|
||||
|
||||
let mut all_messages = Vec::new();
|
||||
let mut offset = 0;
|
||||
let limit = 1000; // Fetch in batches of 1000
|
||||
|
||||
loop {
|
||||
log::debug!(
|
||||
"Fetching batch at offset {} for contact {}",
|
||||
offset,
|
||||
contact
|
||||
);
|
||||
|
||||
let batch = self
|
||||
.fetch_messages_paginated(start_ts, end_ts, Some(contact), None, limit, offset)
|
||||
.await?;
|
||||
|
||||
let batch_size = batch.len();
|
||||
all_messages.extend(batch);
|
||||
|
||||
log::debug!(
|
||||
"Fetched {} messages (total so far: {})",
|
||||
batch_size,
|
||||
all_messages.len()
|
||||
);
|
||||
|
||||
// If we got fewer messages than the limit, we've reached the end
|
||||
if batch_size < limit {
|
||||
break;
|
||||
}
|
||||
|
||||
offset += limit;
|
||||
}
|
||||
|
||||
log::info!(
|
||||
"Fetched {} total messages for contact {}",
|
||||
all_messages.len(),
|
||||
contact
|
||||
);
|
||||
|
||||
Ok(all_messages)
|
||||
}
|
||||
|
||||
/// Internal method to fetch messages with pagination support
|
||||
async fn fetch_messages_paginated(
|
||||
&self,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
contact: Option<&str>,
|
||||
center_timestamp: Option<i64>,
|
||||
limit: usize,
|
||||
offset: usize,
|
||||
) -> Result<Vec<SmsMessage>> {
|
||||
let mut url = format!(
|
||||
"{}/api/messages/by-date-range/?start_date={}&end_date={}&limit={}&offset={}",
|
||||
self.base_url, start_ts, end_ts, limit, offset
|
||||
);
|
||||
|
||||
if let Some(contact_name) = contact {
|
||||
url.push_str(&format!("&contact={}", urlencoding::encode(contact_name)));
|
||||
}
|
||||
|
||||
if let Some(ts) = center_timestamp {
|
||||
url.push_str(&format!("×tamp={}", ts));
|
||||
}
|
||||
|
||||
log::debug!("Fetching SMS messages from: {}", url);
|
||||
|
||||
let mut request = self.client.get(&url);
|
||||
|
||||
if let Some(token) = &self.token {
|
||||
request = request.header("Authorization", format!("Bearer {}", token));
|
||||
}
|
||||
|
||||
let response = request.send().await?;
|
||||
|
||||
log::debug!("SMS API response status: {}", response.status());
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_body = response.text().await.unwrap_or_default();
|
||||
log::error!("SMS API request failed: {} - {}", status, error_body);
|
||||
return Err(anyhow::anyhow!(
|
||||
"SMS API request failed: {} - {}",
|
||||
status,
|
||||
error_body
|
||||
));
|
||||
}
|
||||
|
||||
let data: SmsApiResponse = response.json().await?;
|
||||
|
||||
Ok(data
|
||||
.messages
|
||||
.into_iter()
|
||||
.map(|m| SmsMessage {
|
||||
contact: m.contact_name,
|
||||
body: m.body,
|
||||
timestamp: m.date,
|
||||
is_sent: m.type_ == 2,
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Internal method to fetch messages with optional contact filter and timestamp sorting
|
||||
async fn fetch_messages(
|
||||
&self,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
contact: Option<&str>,
|
||||
center_timestamp: Option<i64>,
|
||||
) -> Result<Vec<SmsMessage>> {
|
||||
// Call Django endpoint
|
||||
let mut url = format!(
|
||||
"{}/api/messages/by-date-range/?start_date={}&end_date={}",
|
||||
self.base_url, start_ts, end_ts
|
||||
);
|
||||
|
||||
// Add contact filter if provided
|
||||
if let Some(contact_name) = contact {
|
||||
url.push_str(&format!("&contact={}", urlencoding::encode(contact_name)));
|
||||
}
|
||||
|
||||
// Add timestamp for proximity sorting if provided
|
||||
if let Some(ts) = center_timestamp {
|
||||
url.push_str(&format!("×tamp={}", ts));
|
||||
}
|
||||
|
||||
log::debug!("Fetching SMS messages from: {}", url);
|
||||
|
||||
let mut request = self.client.get(&url);
|
||||
|
||||
// Add authorization header if token exists
|
||||
if let Some(token) = &self.token {
|
||||
request = request.header("Authorization", format!("Bearer {}", token));
|
||||
}
|
||||
|
||||
let response = request.send().await?;
|
||||
|
||||
log::debug!("SMS API response status: {}", response.status());
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_body = response.text().await.unwrap_or_default();
|
||||
log::error!("SMS API request failed: {} - {}", status, error_body);
|
||||
return Err(anyhow::anyhow!(
|
||||
"SMS API request failed: {} - {}",
|
||||
status,
|
||||
error_body
|
||||
));
|
||||
}
|
||||
|
||||
let data: SmsApiResponse = response.json().await?;
|
||||
|
||||
// Convert to internal format
|
||||
Ok(data
|
||||
.messages
|
||||
.into_iter()
|
||||
.map(|m| SmsMessage {
|
||||
contact: m.contact_name,
|
||||
body: m.body,
|
||||
timestamp: m.date,
|
||||
is_sent: m.type_ == 2, // type 2 = sent
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
pub async fn summarize_context(
|
||||
&self,
|
||||
messages: &[SmsMessage],
|
||||
ollama: &OllamaClient,
|
||||
) -> Result<String> {
|
||||
if messages.is_empty() {
|
||||
return Ok(String::from("No messages on this day"));
|
||||
}
|
||||
|
||||
// Create prompt for Ollama with sender/receiver distinction
|
||||
let messages_text: String = messages
|
||||
.iter()
|
||||
.take(60) // Limit to avoid token overflow
|
||||
.map(|m| {
|
||||
if m.is_sent {
|
||||
format!("Me: {}", m.body)
|
||||
} else {
|
||||
format!("{}: {}", m.contact, m.body)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let prompt = format!(
|
||||
r#"Summarize these messages in up to 4-5 sentences. Focus on key topics, places, people mentioned, and the overall context of the conversations.
|
||||
|
||||
Messages:
|
||||
{}
|
||||
|
||||
Summary:"#,
|
||||
messages_text
|
||||
);
|
||||
|
||||
ollama
|
||||
.generate(
|
||||
&prompt,
|
||||
// Some("You are a summarizer for the purposes of jogging my memory and highlighting events and situations."),
|
||||
Some("You are the keeper of memories, ingest the context and give me a casual summary of the moment."),
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SmsMessage {
|
||||
pub contact: String,
|
||||
pub body: String,
|
||||
pub timestamp: i64,
|
||||
pub is_sent: bool,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct SmsApiResponse {
|
||||
messages: Vec<SmsApiMessage>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct SmsApiMessage {
|
||||
contact_name: String,
|
||||
body: String,
|
||||
date: i64,
|
||||
#[serde(rename = "type")]
|
||||
type_: i32,
|
||||
}
|
||||
144
src/auth.rs
144
src/auth.rs
@@ -1,144 +0,0 @@
|
||||
use actix_web::Responder;
|
||||
use actix_web::{
|
||||
HttpResponse,
|
||||
web::{self, Json},
|
||||
};
|
||||
use chrono::{Duration, Utc};
|
||||
use jsonwebtoken::{EncodingKey, Header, encode};
|
||||
use log::{error, info};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use crate::{
|
||||
data::{Claims, CreateAccountRequest, LoginRequest, Token, secret_key},
|
||||
database::UserDao,
|
||||
};
|
||||
|
||||
/// Validate password meets security requirements
|
||||
fn validate_password(password: &str) -> Result<(), String> {
|
||||
if password.len() < 12 {
|
||||
return Err("Password must be at least 12 characters".into());
|
||||
}
|
||||
if !password.chars().any(|c| c.is_uppercase()) {
|
||||
return Err("Password must contain at least one uppercase letter".into());
|
||||
}
|
||||
if !password.chars().any(|c| c.is_lowercase()) {
|
||||
return Err("Password must contain at least one lowercase letter".into());
|
||||
}
|
||||
if !password.chars().any(|c| c.is_numeric()) {
|
||||
return Err("Password must contain at least one number".into());
|
||||
}
|
||||
if !password.chars().any(|c| !c.is_alphanumeric()) {
|
||||
return Err("Password must contain at least one special character".into());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
async fn register<D: UserDao>(
|
||||
user: Json<CreateAccountRequest>,
|
||||
user_dao: web::Data<Mutex<D>>,
|
||||
) -> impl Responder {
|
||||
// Validate password strength
|
||||
if let Err(msg) = validate_password(&user.password) {
|
||||
return HttpResponse::BadRequest().body(msg);
|
||||
}
|
||||
|
||||
if !user.username.is_empty() && user.password == user.confirmation {
|
||||
let mut dao = user_dao.lock().expect("Unable to get UserDao");
|
||||
if dao.user_exists(&user.username) {
|
||||
HttpResponse::BadRequest().finish()
|
||||
} else if let Some(_user) = dao.create_user(&user.username, &user.password) {
|
||||
HttpResponse::Ok().finish()
|
||||
} else {
|
||||
HttpResponse::InternalServerError().finish()
|
||||
}
|
||||
} else {
|
||||
HttpResponse::BadRequest().finish()
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn login<D: UserDao>(
|
||||
creds: Json<LoginRequest>,
|
||||
user_dao: web::Data<Mutex<D>>,
|
||||
) -> HttpResponse {
|
||||
info!("Logging in: {}", creds.username);
|
||||
|
||||
let mut user_dao = user_dao.lock().expect("Unable to get UserDao");
|
||||
|
||||
if let Some(user) = user_dao.get_user(&creds.username, &creds.password) {
|
||||
let claims = Claims {
|
||||
sub: user.id.to_string(),
|
||||
exp: (Utc::now() + Duration::days(5)).timestamp(),
|
||||
};
|
||||
let token = match encode(
|
||||
&Header::default(),
|
||||
&claims,
|
||||
&EncodingKey::from_secret(secret_key().as_bytes()),
|
||||
) {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
error!("Failed to encode JWT: {}", e);
|
||||
return HttpResponse::InternalServerError().finish();
|
||||
}
|
||||
};
|
||||
|
||||
HttpResponse::Ok().json(Token { token: &token })
|
||||
} else {
|
||||
error!("Failed login attempt for user: '{}'", creds.username);
|
||||
HttpResponse::NotFound().finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::testhelpers::{BodyReader, TestUserDao};
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_login_reports_200_when_user_exists() {
|
||||
let mut dao = TestUserDao::new();
|
||||
dao.create_user("user", "pass");
|
||||
|
||||
let j = Json(LoginRequest {
|
||||
username: "user".to_string(),
|
||||
password: "pass".to_string(),
|
||||
});
|
||||
|
||||
let response = login::<TestUserDao>(j, web::Data::new(Mutex::new(dao))).await;
|
||||
|
||||
assert_eq!(response.status(), 200);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_login_returns_token_on_success() {
|
||||
let mut dao = TestUserDao::new();
|
||||
dao.create_user("user", "password");
|
||||
|
||||
let j = Json(LoginRequest {
|
||||
username: "user".to_string(),
|
||||
password: "password".to_string(),
|
||||
});
|
||||
|
||||
let response = login::<TestUserDao>(j, web::Data::new(Mutex::new(dao))).await;
|
||||
|
||||
assert_eq!(response.status(), 200);
|
||||
let response_text: String = response.read_to_str();
|
||||
|
||||
assert!(response_text.contains("\"token\""));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_login_reports_404_when_user_does_not_exist() {
|
||||
let mut dao = TestUserDao::new();
|
||||
dao.create_user("user", "password");
|
||||
|
||||
let j = Json(LoginRequest {
|
||||
username: "doesnotexist".to_string(),
|
||||
password: "password".to_string(),
|
||||
});
|
||||
|
||||
let response = login::<TestUserDao>(j, web::Data::new(Mutex::new(dao))).await;
|
||||
|
||||
assert_eq!(response.status(), 404);
|
||||
}
|
||||
}
|
||||
@@ -1,143 +0,0 @@
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use clap::Parser;
|
||||
|
||||
use image_api::cleanup::{
|
||||
CleanupConfig, DatabaseUpdater, resolve_missing_files, validate_file_types,
|
||||
};
|
||||
use image_api::database::{SqliteExifDao, SqliteFavoriteDao};
|
||||
use image_api::tags::SqliteTagDao;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "cleanup_files")]
|
||||
#[command(about = "File cleanup and fix utility for ImageApi", long_about = None)]
|
||||
struct Args {
|
||||
#[arg(long, help = "Preview changes without making them")]
|
||||
dry_run: bool,
|
||||
|
||||
#[arg(long, help = "Auto-fix all issues without prompting")]
|
||||
auto_fix: bool,
|
||||
|
||||
#[arg(long, help = "Skip phase 1 (missing file resolution)")]
|
||||
skip_phase1: bool,
|
||||
|
||||
#[arg(long, help = "Skip phase 2 (file type validation)")]
|
||||
skip_phase2: bool,
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
// Initialize logging
|
||||
env_logger::init();
|
||||
|
||||
// Load environment variables
|
||||
dotenv::dotenv()?;
|
||||
|
||||
// Parse CLI arguments
|
||||
let args = Args::parse();
|
||||
|
||||
// Get base path from environment
|
||||
let base_path = dotenv::var("BASE_PATH")?;
|
||||
let base = PathBuf::from(&base_path);
|
||||
|
||||
println!("File Cleanup and Fix Utility");
|
||||
println!("============================");
|
||||
println!("Base path: {}", base.display());
|
||||
println!("Dry run: {}", args.dry_run);
|
||||
println!("Auto fix: {}", args.auto_fix);
|
||||
println!();
|
||||
|
||||
// Pre-flight checks
|
||||
if !base.exists() {
|
||||
eprintln!("Error: Base path does not exist: {}", base.display());
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
if !base.is_dir() {
|
||||
eprintln!("Error: Base path is not a directory: {}", base.display());
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
// Create configuration
|
||||
let config = CleanupConfig {
|
||||
base_path: base,
|
||||
dry_run: args.dry_run,
|
||||
auto_fix: args.auto_fix,
|
||||
};
|
||||
|
||||
// Create DAOs
|
||||
println!("Connecting to database...");
|
||||
let tag_dao: Arc<Mutex<dyn image_api::tags::TagDao>> =
|
||||
Arc::new(Mutex::new(SqliteTagDao::default()));
|
||||
let exif_dao: Arc<Mutex<dyn image_api::database::ExifDao>> =
|
||||
Arc::new(Mutex::new(SqliteExifDao::new()));
|
||||
let favorites_dao: Arc<Mutex<dyn image_api::database::FavoriteDao>> =
|
||||
Arc::new(Mutex::new(SqliteFavoriteDao::new()));
|
||||
|
||||
// Create database updater
|
||||
let mut db_updater = DatabaseUpdater::new(tag_dao, exif_dao, favorites_dao);
|
||||
|
||||
println!("✓ Database connected\n");
|
||||
|
||||
// Track overall statistics
|
||||
let mut total_issues_found = 0;
|
||||
let mut total_issues_fixed = 0;
|
||||
let mut total_errors = Vec::new();
|
||||
|
||||
// Phase 1: Missing file resolution
|
||||
if !args.skip_phase1 {
|
||||
match resolve_missing_files(&config, &mut db_updater) {
|
||||
Ok(stats) => {
|
||||
total_issues_found += stats.issues_found;
|
||||
total_issues_fixed += stats.issues_fixed;
|
||||
total_errors.extend(stats.errors);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Phase 1 failed: {:?}", e);
|
||||
total_errors.push(format!("Phase 1 error: {}", e));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!("Phase 1: Skipped (--skip-phase1)");
|
||||
}
|
||||
|
||||
// Phase 2: File type validation
|
||||
if !args.skip_phase2 {
|
||||
match validate_file_types(&config, &mut db_updater) {
|
||||
Ok(stats) => {
|
||||
total_issues_found += stats.issues_found;
|
||||
total_issues_fixed += stats.issues_fixed;
|
||||
total_errors.extend(stats.errors);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Phase 2 failed: {:?}", e);
|
||||
total_errors.push(format!("Phase 2 error: {}", e));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!("\nPhase 2: Skipped (--skip-phase2)");
|
||||
}
|
||||
|
||||
// Final summary
|
||||
println!("\n============================");
|
||||
println!("Cleanup Complete!");
|
||||
println!("============================");
|
||||
println!("Total issues found: {}", total_issues_found);
|
||||
if config.dry_run {
|
||||
println!("Total issues that would be fixed: {}", total_issues_found);
|
||||
} else {
|
||||
println!("Total issues fixed: {}", total_issues_fixed);
|
||||
}
|
||||
|
||||
if !total_errors.is_empty() {
|
||||
println!("\nErrors encountered:");
|
||||
for (i, error) in total_errors.iter().enumerate() {
|
||||
println!(" {}. {}", i + 1, error);
|
||||
}
|
||||
println!("\nSome operations failed. Review errors above.");
|
||||
} else {
|
||||
println!("\n✓ No errors encountered");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,307 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use diesel::prelude::*;
|
||||
use diesel::sql_query;
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use std::env;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about = "Diagnose embedding distribution and identify problematic summaries", long_about = None)]
|
||||
struct Args {
|
||||
/// Show detailed per-summary statistics
|
||||
#[arg(short, long, default_value_t = false)]
|
||||
verbose: bool,
|
||||
|
||||
/// Number of top "central" summaries to show (ones that match everything)
|
||||
#[arg(short, long, default_value_t = 10)]
|
||||
top: usize,
|
||||
|
||||
/// Test a specific query to see what matches
|
||||
#[arg(short, long)]
|
||||
query: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(QueryableByName, Debug)]
|
||||
struct EmbeddingRow {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
date: String,
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
contact: String,
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
summary: String,
|
||||
#[diesel(sql_type = diesel::sql_types::Binary)]
|
||||
embedding: Vec<u8>,
|
||||
}
|
||||
|
||||
fn deserialize_embedding(bytes: &[u8]) -> Result<Vec<f32>> {
|
||||
if !bytes.len().is_multiple_of(4) {
|
||||
return Err(anyhow::anyhow!("Invalid embedding byte length"));
|
||||
}
|
||||
|
||||
let count = bytes.len() / 4;
|
||||
let mut vec = Vec::with_capacity(count);
|
||||
|
||||
for chunk in bytes.chunks_exact(4) {
|
||||
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
|
||||
vec.push(float);
|
||||
}
|
||||
|
||||
Ok(vec)
|
||||
}
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if magnitude_a == 0.0 || magnitude_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
dot_product / (magnitude_a * magnitude_b)
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
dotenv::dotenv().ok();
|
||||
let args = Args::parse();
|
||||
|
||||
let database_url = env::var("DATABASE_URL").unwrap_or_else(|_| "auth.db".to_string());
|
||||
println!("Connecting to database: {}", database_url);
|
||||
|
||||
let mut conn = SqliteConnection::establish(&database_url)?;
|
||||
|
||||
// Load all embeddings
|
||||
println!("\nLoading embeddings from daily_conversation_summaries...");
|
||||
let rows: Vec<EmbeddingRow> = sql_query(
|
||||
"SELECT id, date, contact, summary, embedding FROM daily_conversation_summaries ORDER BY date"
|
||||
)
|
||||
.load(&mut conn)?;
|
||||
|
||||
println!("Found {} summaries with embeddings\n", rows.len());
|
||||
|
||||
if rows.is_empty() {
|
||||
println!("No summaries found!");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Parse all embeddings
|
||||
let mut embeddings: Vec<(i32, String, String, String, Vec<f32>)> = Vec::new();
|
||||
for row in &rows {
|
||||
match deserialize_embedding(&row.embedding) {
|
||||
Ok(emb) => {
|
||||
embeddings.push((
|
||||
row.id,
|
||||
row.date.clone(),
|
||||
row.contact.clone(),
|
||||
row.summary.clone(),
|
||||
emb,
|
||||
));
|
||||
}
|
||||
Err(e) => {
|
||||
println!(
|
||||
"Warning: Failed to parse embedding for id {}: {}",
|
||||
row.id, e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("Successfully parsed {} embeddings\n", embeddings.len());
|
||||
|
||||
// Compute embedding statistics
|
||||
println!("========================================");
|
||||
println!("EMBEDDING STATISTICS");
|
||||
println!("========================================\n");
|
||||
|
||||
// Check embedding variance (are values clustered or spread out?)
|
||||
let first_emb = &embeddings[0].4;
|
||||
let dim = first_emb.len();
|
||||
println!("Embedding dimensions: {}", dim);
|
||||
|
||||
// Calculate mean and std dev per dimension
|
||||
let mut dim_means: Vec<f32> = vec![0.0; dim];
|
||||
let mut dim_vars: Vec<f32> = vec![0.0; dim];
|
||||
|
||||
for (_, _, _, _, emb) in &embeddings {
|
||||
for (i, &val) in emb.iter().enumerate() {
|
||||
dim_means[i] += val;
|
||||
}
|
||||
}
|
||||
for m in &mut dim_means {
|
||||
*m /= embeddings.len() as f32;
|
||||
}
|
||||
|
||||
for (_, _, _, _, emb) in &embeddings {
|
||||
for (i, &val) in emb.iter().enumerate() {
|
||||
let diff = val - dim_means[i];
|
||||
dim_vars[i] += diff * diff;
|
||||
}
|
||||
}
|
||||
for v in &mut dim_vars {
|
||||
*v = (*v / embeddings.len() as f32).sqrt();
|
||||
}
|
||||
|
||||
let avg_std_dev: f32 = dim_vars.iter().sum::<f32>() / dim as f32;
|
||||
let min_std_dev: f32 = dim_vars.iter().cloned().fold(f32::INFINITY, f32::min);
|
||||
let max_std_dev: f32 = dim_vars.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
|
||||
|
||||
println!("Per-dimension standard deviation:");
|
||||
println!(" Average: {:.6}", avg_std_dev);
|
||||
println!(" Min: {:.6}", min_std_dev);
|
||||
println!(" Max: {:.6}", max_std_dev);
|
||||
println!();
|
||||
|
||||
// Compute pairwise similarities
|
||||
println!("Computing pairwise similarities (this may take a moment)...\n");
|
||||
|
||||
let mut all_similarities: Vec<f32> = Vec::new();
|
||||
let mut per_embedding_avg: Vec<(usize, f32)> = Vec::new();
|
||||
|
||||
for i in 0..embeddings.len() {
|
||||
let mut sum = 0.0;
|
||||
let mut count = 0;
|
||||
for j in 0..embeddings.len() {
|
||||
if i != j {
|
||||
let sim = cosine_similarity(&embeddings[i].4, &embeddings[j].4);
|
||||
all_similarities.push(sim);
|
||||
sum += sim;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
per_embedding_avg.push((i, sum / count as f32));
|
||||
}
|
||||
|
||||
// Sort similarities for percentile analysis
|
||||
all_similarities.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||
|
||||
let min_sim = all_similarities.first().copied().unwrap_or(0.0);
|
||||
let max_sim = all_similarities.last().copied().unwrap_or(0.0);
|
||||
let median_sim = all_similarities[all_similarities.len() / 2];
|
||||
let p25 = all_similarities[all_similarities.len() / 4];
|
||||
let p75 = all_similarities[3 * all_similarities.len() / 4];
|
||||
let mean_sim: f32 = all_similarities.iter().sum::<f32>() / all_similarities.len() as f32;
|
||||
|
||||
println!("========================================");
|
||||
println!("PAIRWISE SIMILARITY DISTRIBUTION");
|
||||
println!("========================================\n");
|
||||
println!("Total pairs analyzed: {}", all_similarities.len());
|
||||
println!();
|
||||
println!("Min similarity: {:.4}", min_sim);
|
||||
println!("25th percentile: {:.4}", p25);
|
||||
println!("Median similarity: {:.4}", median_sim);
|
||||
println!("Mean similarity: {:.4}", mean_sim);
|
||||
println!("75th percentile: {:.4}", p75);
|
||||
println!("Max similarity: {:.4}", max_sim);
|
||||
println!();
|
||||
|
||||
// Analyze distribution
|
||||
let count_above_08 = all_similarities.iter().filter(|&&s| s > 0.8).count();
|
||||
let count_above_07 = all_similarities.iter().filter(|&&s| s > 0.7).count();
|
||||
let count_above_06 = all_similarities.iter().filter(|&&s| s > 0.6).count();
|
||||
let count_above_05 = all_similarities.iter().filter(|&&s| s > 0.5).count();
|
||||
let count_below_03 = all_similarities.iter().filter(|&&s| s < 0.3).count();
|
||||
|
||||
println!("Similarity distribution:");
|
||||
println!(
|
||||
" > 0.8: {} ({:.1}%)",
|
||||
count_above_08,
|
||||
100.0 * count_above_08 as f32 / all_similarities.len() as f32
|
||||
);
|
||||
println!(
|
||||
" > 0.7: {} ({:.1}%)",
|
||||
count_above_07,
|
||||
100.0 * count_above_07 as f32 / all_similarities.len() as f32
|
||||
);
|
||||
println!(
|
||||
" > 0.6: {} ({:.1}%)",
|
||||
count_above_06,
|
||||
100.0 * count_above_06 as f32 / all_similarities.len() as f32
|
||||
);
|
||||
println!(
|
||||
" > 0.5: {} ({:.1}%)",
|
||||
count_above_05,
|
||||
100.0 * count_above_05 as f32 / all_similarities.len() as f32
|
||||
);
|
||||
println!(
|
||||
" < 0.3: {} ({:.1}%)",
|
||||
count_below_03,
|
||||
100.0 * count_below_03 as f32 / all_similarities.len() as f32
|
||||
);
|
||||
println!();
|
||||
|
||||
// Identify "central" embeddings (high average similarity to all others)
|
||||
per_embedding_avg.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
|
||||
println!("========================================");
|
||||
println!("TOP {} MOST 'CENTRAL' SUMMARIES", args.top);
|
||||
println!("(These match everything with high similarity)");
|
||||
println!("========================================\n");
|
||||
|
||||
for (rank, (idx, avg_sim)) in per_embedding_avg.iter().take(args.top).enumerate() {
|
||||
let (id, date, contact, summary, _) = &embeddings[*idx];
|
||||
let preview: String = summary.chars().take(80).collect();
|
||||
println!("{}. [id={}, avg_sim={:.4}]", rank + 1, id, avg_sim);
|
||||
println!(" Date: {}, Contact: {}", date, contact);
|
||||
println!(" Preview: {}...", preview.replace('\n', " "));
|
||||
println!();
|
||||
}
|
||||
|
||||
// Also show the least central (most unique)
|
||||
println!("========================================");
|
||||
println!("TOP {} MOST UNIQUE SUMMARIES", args.top);
|
||||
println!("(These are most different from others)");
|
||||
println!("========================================\n");
|
||||
|
||||
for (rank, (idx, avg_sim)) in per_embedding_avg.iter().rev().take(args.top).enumerate() {
|
||||
let (id, date, contact, summary, _) = &embeddings[*idx];
|
||||
let preview: String = summary.chars().take(80).collect();
|
||||
println!("{}. [id={}, avg_sim={:.4}]", rank + 1, id, avg_sim);
|
||||
println!(" Date: {}, Contact: {}", date, contact);
|
||||
println!(" Preview: {}...", preview.replace('\n', " "));
|
||||
println!();
|
||||
}
|
||||
|
||||
// Diagnosis
|
||||
println!("========================================");
|
||||
println!("DIAGNOSIS");
|
||||
println!("========================================\n");
|
||||
|
||||
if mean_sim > 0.7 {
|
||||
println!("⚠️ HIGH AVERAGE SIMILARITY ({:.4})", mean_sim);
|
||||
println!(" All embeddings are very similar to each other.");
|
||||
println!(" This explains why the same summaries always match.");
|
||||
println!();
|
||||
println!(" Possible causes:");
|
||||
println!(
|
||||
" 1. Summaries have similar structure/phrasing (e.g., all start with 'Summary:')"
|
||||
);
|
||||
println!(" 2. Embedding model isn't capturing semantic differences well");
|
||||
println!(" 3. Daily conversations have similar topics (e.g., 'good morning', plans)");
|
||||
println!();
|
||||
println!(" Recommendations:");
|
||||
println!(" 1. Try a different embedding model (mxbai-embed-large, bge-large)");
|
||||
println!(" 2. Improve summary diversity by varying the prompt");
|
||||
println!(" 3. Extract and embed only keywords/entities, not full summaries");
|
||||
} else if mean_sim > 0.5 {
|
||||
println!("⚡ MODERATE AVERAGE SIMILARITY ({:.4})", mean_sim);
|
||||
println!(" Some clustering in embeddings, but some differentiation exists.");
|
||||
println!();
|
||||
println!(" The 'central' summaries above are likely dominating search results.");
|
||||
println!(" Consider:");
|
||||
println!(" 1. Filtering out summaries with very high centrality");
|
||||
println!(" 2. Adding time-based weighting to prefer recent/relevant dates");
|
||||
println!(" 3. Increasing the similarity threshold from 0.3 to 0.5");
|
||||
} else {
|
||||
println!("✅ GOOD EMBEDDING DIVERSITY ({:.4})", mean_sim);
|
||||
println!(" Embeddings are well-differentiated.");
|
||||
println!(" If same results keep appearing, the issue may be elsewhere.");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,166 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Parser;
|
||||
use image_api::ai::ollama::OllamaClient;
|
||||
use image_api::database::calendar_dao::{InsertCalendarEvent, SqliteCalendarEventDao};
|
||||
use image_api::parsers::ical_parser::parse_ics_file;
|
||||
use log::{error, info};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
// Import the trait to use its methods
|
||||
use image_api::database::CalendarEventDao;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about = "Import Google Takeout Calendar data", long_about = None)]
|
||||
struct Args {
|
||||
/// Path to the .ics calendar file
|
||||
#[arg(short, long)]
|
||||
path: String,
|
||||
|
||||
/// Generate embeddings for calendar events (slower but enables semantic search)
|
||||
#[arg(long, default_value = "false")]
|
||||
generate_embeddings: bool,
|
||||
|
||||
/// Skip events that already exist in the database
|
||||
#[arg(long, default_value = "true")]
|
||||
skip_existing: bool,
|
||||
|
||||
/// Batch size for embedding generation
|
||||
#[arg(long, default_value = "128")]
|
||||
batch_size: usize,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenv::dotenv().ok();
|
||||
env_logger::init();
|
||||
|
||||
let args = Args::parse();
|
||||
|
||||
info!("Parsing calendar file: {}", args.path);
|
||||
let events = parse_ics_file(&args.path).context("Failed to parse .ics file")?;
|
||||
|
||||
info!("Found {} calendar events", events.len());
|
||||
|
||||
let context = opentelemetry::Context::current();
|
||||
|
||||
let ollama = if args.generate_embeddings {
|
||||
let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
|
||||
.or_else(|_| dotenv::var("OLLAMA_URL"))
|
||||
.unwrap_or_else(|_| "http://localhost:11434".to_string());
|
||||
let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
|
||||
let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
|
||||
.or_else(|_| dotenv::var("OLLAMA_MODEL"))
|
||||
.unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
|
||||
let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
|
||||
|
||||
Some(OllamaClient::new(
|
||||
primary_url,
|
||||
fallback_url,
|
||||
primary_model,
|
||||
fallback_model,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let inserted_count = Arc::new(Mutex::new(0));
|
||||
let skipped_count = Arc::new(Mutex::new(0));
|
||||
let error_count = Arc::new(Mutex::new(0));
|
||||
|
||||
// Process events in batches
|
||||
// Can't use rayon with async, so process sequentially
|
||||
for event in &events {
|
||||
let mut dao_instance = SqliteCalendarEventDao::new();
|
||||
|
||||
// Check if event exists
|
||||
if args.skip_existing
|
||||
&& let Ok(exists) = dao_instance.event_exists(
|
||||
&context,
|
||||
event.event_uid.as_deref().unwrap_or(""),
|
||||
event.start_time,
|
||||
)
|
||||
&& exists
|
||||
{
|
||||
*skipped_count.lock().unwrap() += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Generate embedding if requested (blocking call)
|
||||
let embedding = if let Some(ref ollama_client) = ollama {
|
||||
let text = format!(
|
||||
"{} {} {}",
|
||||
event.summary,
|
||||
event.description.as_deref().unwrap_or(""),
|
||||
event.location.as_deref().unwrap_or("")
|
||||
);
|
||||
|
||||
match tokio::task::block_in_place(|| {
|
||||
tokio::runtime::Handle::current()
|
||||
.block_on(async { ollama_client.generate_embedding(&text).await })
|
||||
}) {
|
||||
Ok(emb) => Some(emb),
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to generate embedding for event '{}': {}",
|
||||
event.summary, e
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Insert into database
|
||||
let insert_event = InsertCalendarEvent {
|
||||
event_uid: event.event_uid.clone(),
|
||||
summary: event.summary.clone(),
|
||||
description: event.description.clone(),
|
||||
location: event.location.clone(),
|
||||
start_time: event.start_time,
|
||||
end_time: event.end_time,
|
||||
all_day: event.all_day,
|
||||
organizer: event.organizer.clone(),
|
||||
attendees: if event.attendees.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(serde_json::to_string(&event.attendees).unwrap_or_default())
|
||||
},
|
||||
embedding,
|
||||
created_at: Utc::now().timestamp(),
|
||||
source_file: Some(args.path.clone()),
|
||||
};
|
||||
|
||||
match dao_instance.store_event(&context, insert_event) {
|
||||
Ok(_) => {
|
||||
*inserted_count.lock().unwrap() += 1;
|
||||
if *inserted_count.lock().unwrap() % 100 == 0 {
|
||||
info!("Imported {} events...", *inserted_count.lock().unwrap());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to store event '{}': {:?}", event.summary, e);
|
||||
*error_count.lock().unwrap() += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let final_inserted = *inserted_count.lock().unwrap();
|
||||
let final_skipped = *skipped_count.lock().unwrap();
|
||||
let final_errors = *error_count.lock().unwrap();
|
||||
|
||||
info!("\n=== Import Summary ===");
|
||||
info!("Total events found: {}", events.len());
|
||||
info!("Successfully inserted: {}", final_inserted);
|
||||
info!("Skipped (already exist): {}", final_skipped);
|
||||
info!("Errors: {}", final_errors);
|
||||
|
||||
if args.generate_embeddings {
|
||||
info!("Embeddings were generated for semantic search");
|
||||
} else {
|
||||
info!("No embeddings generated (use --generate-embeddings to enable semantic search)");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,114 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Parser;
|
||||
use image_api::database::location_dao::{InsertLocationRecord, SqliteLocationHistoryDao};
|
||||
use image_api::parsers::location_json_parser::parse_location_json;
|
||||
use log::{error, info};
|
||||
// Import the trait to use its methods
|
||||
use image_api::database::LocationHistoryDao;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about = "Import Google Takeout Location History data", long_about = None)]
|
||||
struct Args {
|
||||
/// Path to the Location History JSON file
|
||||
#[arg(short, long)]
|
||||
path: String,
|
||||
|
||||
/// Skip locations that already exist in the database
|
||||
#[arg(long, default_value = "true")]
|
||||
skip_existing: bool,
|
||||
|
||||
/// Batch size for database inserts
|
||||
#[arg(long, default_value = "1000")]
|
||||
batch_size: usize,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenv::dotenv().ok();
|
||||
env_logger::init();
|
||||
|
||||
let args = Args::parse();
|
||||
|
||||
info!("Parsing location history file: {}", args.path);
|
||||
let locations =
|
||||
parse_location_json(&args.path).context("Failed to parse location history JSON")?;
|
||||
|
||||
info!("Found {} location records", locations.len());
|
||||
|
||||
let context = opentelemetry::Context::current();
|
||||
|
||||
let mut inserted_count = 0;
|
||||
let mut skipped_count = 0;
|
||||
let mut error_count = 0;
|
||||
|
||||
let mut dao_instance = SqliteLocationHistoryDao::new();
|
||||
let created_at = Utc::now().timestamp();
|
||||
|
||||
// Process in batches using batch insert for massive speedup
|
||||
for (batch_idx, chunk) in locations.chunks(args.batch_size).enumerate() {
|
||||
info!(
|
||||
"Processing batch {} ({} records)...",
|
||||
batch_idx + 1,
|
||||
chunk.len()
|
||||
);
|
||||
|
||||
// Convert to InsertLocationRecord
|
||||
let mut batch_inserts = Vec::with_capacity(chunk.len());
|
||||
|
||||
for location in chunk {
|
||||
// Skip existing check if requested (makes import much slower)
|
||||
if args.skip_existing
|
||||
&& let Ok(exists) = dao_instance.location_exists(
|
||||
&context,
|
||||
location.timestamp,
|
||||
location.latitude,
|
||||
location.longitude,
|
||||
)
|
||||
&& exists
|
||||
{
|
||||
skipped_count += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
batch_inserts.push(InsertLocationRecord {
|
||||
timestamp: location.timestamp,
|
||||
latitude: location.latitude,
|
||||
longitude: location.longitude,
|
||||
accuracy: location.accuracy,
|
||||
activity: location.activity.clone(),
|
||||
activity_confidence: location.activity_confidence,
|
||||
place_name: None,
|
||||
place_category: None,
|
||||
embedding: None,
|
||||
created_at,
|
||||
source_file: Some(args.path.clone()),
|
||||
});
|
||||
}
|
||||
|
||||
// Batch insert entire chunk in single transaction
|
||||
if !batch_inserts.is_empty() {
|
||||
match dao_instance.store_locations_batch(&context, batch_inserts) {
|
||||
Ok(count) => {
|
||||
inserted_count += count;
|
||||
info!(
|
||||
"Imported {} locations (total: {})...",
|
||||
count, inserted_count
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to store batch: {:?}", e);
|
||||
error_count += chunk.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!("\n=== Import Summary ===");
|
||||
info!("Total locations found: {}", locations.len());
|
||||
info!("Successfully inserted: {}", inserted_count);
|
||||
info!("Skipped (already exist): {}", skipped_count);
|
||||
info!("Errors: {}", error_count);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,152 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Parser;
|
||||
use image_api::ai::ollama::OllamaClient;
|
||||
use image_api::database::search_dao::{InsertSearchRecord, SqliteSearchHistoryDao};
|
||||
use image_api::parsers::search_html_parser::parse_search_html;
|
||||
use log::{error, info, warn};
|
||||
|
||||
// Import the trait to use its methods
|
||||
use image_api::database::SearchHistoryDao;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about = "Import Google Takeout Search History data", long_about = None)]
|
||||
struct Args {
|
||||
/// Path to the search history HTML file
|
||||
#[arg(short, long)]
|
||||
path: String,
|
||||
|
||||
/// Skip searches that already exist in the database
|
||||
#[arg(long, default_value = "true")]
|
||||
skip_existing: bool,
|
||||
|
||||
/// Batch size for embedding generation (max 128 recommended)
|
||||
#[arg(long, default_value = "64")]
|
||||
batch_size: usize,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenv::dotenv().ok();
|
||||
env_logger::init();
|
||||
|
||||
let args = Args::parse();
|
||||
|
||||
info!("Parsing search history file: {}", args.path);
|
||||
let searches = parse_search_html(&args.path).context("Failed to parse search history HTML")?;
|
||||
|
||||
info!("Found {} search records", searches.len());
|
||||
|
||||
let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
|
||||
.or_else(|_| dotenv::var("OLLAMA_URL"))
|
||||
.unwrap_or_else(|_| "http://localhost:11434".to_string());
|
||||
let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
|
||||
let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
|
||||
.or_else(|_| dotenv::var("OLLAMA_MODEL"))
|
||||
.unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
|
||||
let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
|
||||
|
||||
let ollama = OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model);
|
||||
let context = opentelemetry::Context::current();
|
||||
|
||||
let mut inserted_count = 0;
|
||||
let mut skipped_count = 0;
|
||||
let mut error_count = 0;
|
||||
|
||||
let mut dao_instance = SqliteSearchHistoryDao::new();
|
||||
let created_at = Utc::now().timestamp();
|
||||
|
||||
// Process searches in batches (embeddings are REQUIRED for searches)
|
||||
for (batch_idx, chunk) in searches.chunks(args.batch_size).enumerate() {
|
||||
info!(
|
||||
"Processing batch {} ({} searches)...",
|
||||
batch_idx + 1,
|
||||
chunk.len()
|
||||
);
|
||||
|
||||
// Generate embeddings for this batch
|
||||
let queries: Vec<String> = chunk.iter().map(|s| s.query.clone()).collect();
|
||||
|
||||
let embeddings_result = tokio::task::spawn({
|
||||
let ollama_client = ollama.clone();
|
||||
async move {
|
||||
// Generate embeddings in parallel for the batch
|
||||
let mut embeddings = Vec::new();
|
||||
for query in &queries {
|
||||
match ollama_client.generate_embedding(query).await {
|
||||
Ok(emb) => embeddings.push(Some(emb)),
|
||||
Err(e) => {
|
||||
warn!("Failed to generate embedding for query '{}': {}", query, e);
|
||||
embeddings.push(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
embeddings
|
||||
}
|
||||
})
|
||||
.await
|
||||
.context("Failed to generate embeddings for batch")?;
|
||||
|
||||
// Build batch of searches with embeddings
|
||||
let mut batch_inserts = Vec::new();
|
||||
|
||||
for (search, embedding_opt) in chunk.iter().zip(embeddings_result.iter()) {
|
||||
// Check if search exists (optional for speed)
|
||||
if args.skip_existing
|
||||
&& let Ok(exists) =
|
||||
dao_instance.search_exists(&context, search.timestamp, &search.query)
|
||||
&& exists
|
||||
{
|
||||
skipped_count += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Only insert if we have an embedding
|
||||
if let Some(embedding) = embedding_opt {
|
||||
batch_inserts.push(InsertSearchRecord {
|
||||
timestamp: search.timestamp,
|
||||
query: search.query.clone(),
|
||||
search_engine: search.search_engine.clone(),
|
||||
embedding: embedding.clone(),
|
||||
created_at,
|
||||
source_file: Some(args.path.clone()),
|
||||
});
|
||||
} else {
|
||||
error!(
|
||||
"Skipping search '{}' due to missing embedding",
|
||||
search.query
|
||||
);
|
||||
error_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Batch insert entire chunk in single transaction
|
||||
if !batch_inserts.is_empty() {
|
||||
match dao_instance.store_searches_batch(&context, batch_inserts) {
|
||||
Ok(count) => {
|
||||
inserted_count += count;
|
||||
info!("Imported {} searches (total: {})...", count, inserted_count);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to store batch: {:?}", e);
|
||||
error_count += chunk.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rate limiting between batches
|
||||
if batch_idx < searches.len() / args.batch_size {
|
||||
info!("Waiting 500ms before next batch...");
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||
}
|
||||
}
|
||||
|
||||
info!("\n=== Import Summary ===");
|
||||
info!("Total searches found: {}", searches.len());
|
||||
info!("Successfully inserted: {}", inserted_count);
|
||||
info!("Skipped (already exist): {}", skipped_count);
|
||||
info!("Errors: {}", error_count);
|
||||
info!("All imported searches have embeddings for semantic search");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,195 +0,0 @@
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use chrono::Utc;
|
||||
use clap::Parser;
|
||||
use rayon::prelude::*;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use image_api::database::models::InsertImageExif;
|
||||
use image_api::database::{ExifDao, SqliteExifDao};
|
||||
use image_api::exif;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "migrate_exif")]
|
||||
#[command(about = "Extract and store EXIF data from images", long_about = None)]
|
||||
struct Args {
|
||||
#[arg(long, help = "Skip files that already have EXIF data in database")]
|
||||
skip_existing: bool,
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
env_logger::init();
|
||||
dotenv::dotenv()?;
|
||||
|
||||
let args = Args::parse();
|
||||
let base_path = dotenv::var("BASE_PATH")?;
|
||||
let base = PathBuf::from(&base_path);
|
||||
|
||||
println!("EXIF Migration Tool");
|
||||
println!("===================");
|
||||
println!("Base path: {}", base.display());
|
||||
if args.skip_existing {
|
||||
println!("Mode: Skip existing (incremental)");
|
||||
} else {
|
||||
println!("Mode: Upsert (insert new, update existing)");
|
||||
}
|
||||
println!();
|
||||
|
||||
// Collect all image files that support EXIF
|
||||
println!("Scanning for images...");
|
||||
let image_files: Vec<PathBuf> = WalkDir::new(&base)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| e.file_type().is_file())
|
||||
.filter(|e| exif::supports_exif(e.path()))
|
||||
.map(|e| e.path().to_path_buf())
|
||||
.collect();
|
||||
|
||||
println!("Found {} images to process", image_files.len());
|
||||
|
||||
if image_files.is_empty() {
|
||||
println!("No EXIF-supporting images found. Exiting.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!();
|
||||
println!("Extracting EXIF data...");
|
||||
|
||||
// Create a thread-safe DAO
|
||||
let dao = Arc::new(Mutex::new(SqliteExifDao::new()));
|
||||
|
||||
// Process in parallel using rayon
|
||||
let results: Vec<_> = image_files
|
||||
.par_iter()
|
||||
.map(|path| {
|
||||
// Create context for this processing iteration
|
||||
let context = opentelemetry::Context::new();
|
||||
|
||||
let relative_path = match path.strip_prefix(&base) {
|
||||
Ok(p) => p.to_str().unwrap().to_string(),
|
||||
Err(_) => {
|
||||
eprintln!(
|
||||
"Error: Could not create relative path for {}",
|
||||
path.display()
|
||||
);
|
||||
return Err(anyhow::anyhow!("Path error"));
|
||||
}
|
||||
};
|
||||
|
||||
// Check if EXIF data already exists
|
||||
let existing = if let Ok(mut dao_lock) = dao.lock() {
|
||||
dao_lock.get_exif(&context, &relative_path).ok().flatten()
|
||||
} else {
|
||||
eprintln!("✗ {} - Failed to acquire database lock", relative_path);
|
||||
return Err(anyhow::anyhow!("Lock error"));
|
||||
};
|
||||
|
||||
// Skip if exists and skip_existing flag is set
|
||||
if args.skip_existing && existing.is_some() {
|
||||
return Ok(("skip".to_string(), relative_path));
|
||||
}
|
||||
|
||||
match exif::extract_exif_from_path(path) {
|
||||
Ok(exif_data) => {
|
||||
let timestamp = Utc::now().timestamp();
|
||||
let insert_exif = InsertImageExif {
|
||||
file_path: relative_path.clone(),
|
||||
camera_make: exif_data.camera_make,
|
||||
camera_model: exif_data.camera_model,
|
||||
lens_model: exif_data.lens_model,
|
||||
width: exif_data.width,
|
||||
height: exif_data.height,
|
||||
orientation: exif_data.orientation,
|
||||
gps_latitude: exif_data.gps_latitude.map(|v| v as f32),
|
||||
gps_longitude: exif_data.gps_longitude.map(|v| v as f32),
|
||||
gps_altitude: exif_data.gps_altitude.map(|v| v as f32),
|
||||
focal_length: exif_data.focal_length.map(|v| v as f32),
|
||||
aperture: exif_data.aperture.map(|v| v as f32),
|
||||
shutter_speed: exif_data.shutter_speed,
|
||||
iso: exif_data.iso,
|
||||
date_taken: exif_data.date_taken,
|
||||
created_time: existing
|
||||
.as_ref()
|
||||
.map(|e| e.created_time)
|
||||
.unwrap_or(timestamp),
|
||||
last_modified: timestamp,
|
||||
};
|
||||
|
||||
// Store or update in database
|
||||
if let Ok(mut dao_lock) = dao.lock() {
|
||||
let result = if existing.is_some() {
|
||||
// Update existing record
|
||||
dao_lock
|
||||
.update_exif(&context, insert_exif)
|
||||
.map(|_| "update")
|
||||
} else {
|
||||
// Insert new record
|
||||
dao_lock.store_exif(&context, insert_exif).map(|_| "insert")
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(action) => {
|
||||
if action == "update" {
|
||||
println!("↻ {} (updated)", relative_path);
|
||||
} else {
|
||||
println!("✓ {} (inserted)", relative_path);
|
||||
}
|
||||
Ok((action.to_string(), relative_path))
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("✗ {} - Database error: {:?}", relative_path, e);
|
||||
Err(anyhow::anyhow!("Database error"))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("✗ {} - Failed to acquire database lock", relative_path);
|
||||
Err(anyhow::anyhow!("Lock error"))
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("✗ {} - No EXIF data: {:?}", relative_path, e);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Count results
|
||||
let mut success_count = 0;
|
||||
let mut inserted_count = 0;
|
||||
let mut updated_count = 0;
|
||||
let mut skipped_count = 0;
|
||||
|
||||
for (action, _) in results.iter().flatten() {
|
||||
success_count += 1;
|
||||
match action.as_str() {
|
||||
"insert" => inserted_count += 1,
|
||||
"update" => updated_count += 1,
|
||||
"skip" => skipped_count += 1,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let error_count = results.len() - success_count - skipped_count;
|
||||
|
||||
println!();
|
||||
println!("===================");
|
||||
println!("Migration complete!");
|
||||
println!("Total images processed: {}", image_files.len());
|
||||
|
||||
if inserted_count > 0 {
|
||||
println!(" New EXIF records inserted: {}", inserted_count);
|
||||
}
|
||||
if updated_count > 0 {
|
||||
println!(" Existing records updated: {}", updated_count);
|
||||
}
|
||||
if skipped_count > 0 {
|
||||
println!(" Skipped (already exists): {}", skipped_count);
|
||||
}
|
||||
if error_count > 0 {
|
||||
println!(" Errors (no EXIF data or failures): {}", error_count);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,288 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use chrono::NaiveDate;
|
||||
use clap::Parser;
|
||||
use image_api::ai::{OllamaClient, SmsApiClient, strip_summary_boilerplate};
|
||||
use image_api::database::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
|
||||
use std::env;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about = "Test daily summary generation with different models and prompts", long_about = None)]
|
||||
struct Args {
|
||||
/// Contact name to generate summaries for
|
||||
#[arg(short, long)]
|
||||
contact: String,
|
||||
|
||||
/// Start date (YYYY-MM-DD)
|
||||
#[arg(short, long)]
|
||||
start: String,
|
||||
|
||||
/// End date (YYYY-MM-DD)
|
||||
#[arg(short, long)]
|
||||
end: String,
|
||||
|
||||
/// Optional: Override the model to use (e.g., "qwen2.5:32b", "llama3.1:30b")
|
||||
#[arg(short, long)]
|
||||
model: Option<String>,
|
||||
|
||||
/// Test mode: Generate but don't save to database (shows output only)
|
||||
#[arg(short = 't', long, default_value_t = false)]
|
||||
test_mode: bool,
|
||||
|
||||
/// Show message count and preview
|
||||
#[arg(short, long, default_value_t = false)]
|
||||
verbose: bool,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
// Load .env file
|
||||
dotenv::dotenv().ok();
|
||||
|
||||
// Initialize logging
|
||||
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
||||
|
||||
let args = Args::parse();
|
||||
|
||||
// Parse dates
|
||||
let start_date = NaiveDate::parse_from_str(&args.start, "%Y-%m-%d")
|
||||
.expect("Invalid start date format. Use YYYY-MM-DD");
|
||||
let end_date = NaiveDate::parse_from_str(&args.end, "%Y-%m-%d")
|
||||
.expect("Invalid end date format. Use YYYY-MM-DD");
|
||||
|
||||
println!("========================================");
|
||||
println!("Daily Summary Generation Test Tool");
|
||||
println!("========================================");
|
||||
println!("Contact: {}", args.contact);
|
||||
println!("Date range: {} to {}", start_date, end_date);
|
||||
println!("Days: {}", (end_date - start_date).num_days() + 1);
|
||||
if let Some(ref model) = args.model {
|
||||
println!("Model: {}", model);
|
||||
} else {
|
||||
println!(
|
||||
"Model: {} (from env)",
|
||||
env::var("OLLAMA_PRIMARY_MODEL")
|
||||
.or_else(|_| env::var("OLLAMA_MODEL"))
|
||||
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
|
||||
);
|
||||
}
|
||||
if args.test_mode {
|
||||
println!("⚠ TEST MODE: Results will NOT be saved to database");
|
||||
}
|
||||
println!("========================================");
|
||||
println!();
|
||||
|
||||
// Initialize AI clients
|
||||
let ollama_primary_url = env::var("OLLAMA_PRIMARY_URL")
|
||||
.or_else(|_| env::var("OLLAMA_URL"))
|
||||
.unwrap_or_else(|_| "http://localhost:11434".to_string());
|
||||
|
||||
let ollama_fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
|
||||
|
||||
// Use provided model or fallback to env
|
||||
let model_to_use = args.model.clone().unwrap_or_else(|| {
|
||||
env::var("OLLAMA_PRIMARY_MODEL")
|
||||
.or_else(|_| env::var("OLLAMA_MODEL"))
|
||||
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
|
||||
});
|
||||
|
||||
let ollama = OllamaClient::new(
|
||||
ollama_primary_url,
|
||||
ollama_fallback_url.clone(),
|
||||
model_to_use.clone(),
|
||||
Some(model_to_use), // Use same model for fallback
|
||||
);
|
||||
|
||||
let sms_api_url =
|
||||
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
|
||||
let sms_api_token = env::var("SMS_API_TOKEN").ok();
|
||||
let sms_client = SmsApiClient::new(sms_api_url, sms_api_token);
|
||||
|
||||
// Initialize DAO
|
||||
let summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new())));
|
||||
|
||||
// Fetch messages for contact
|
||||
println!("Fetching messages for {}...", args.contact);
|
||||
let all_messages = sms_client
|
||||
.fetch_all_messages_for_contact(&args.contact)
|
||||
.await?;
|
||||
|
||||
println!(
|
||||
"Found {} total messages for {}",
|
||||
all_messages.len(),
|
||||
args.contact
|
||||
);
|
||||
println!();
|
||||
|
||||
// Filter to date range and group by date
|
||||
let mut messages_by_date = std::collections::HashMap::new();
|
||||
|
||||
for msg in all_messages {
|
||||
if let Some(dt) = chrono::DateTime::from_timestamp(msg.timestamp, 0) {
|
||||
let date = dt.date_naive();
|
||||
if date >= start_date && date <= end_date {
|
||||
messages_by_date
|
||||
.entry(date)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if messages_by_date.is_empty() {
|
||||
println!("⚠ No messages found in date range");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("Found {} days with messages", messages_by_date.len());
|
||||
println!();
|
||||
|
||||
// Sort dates
|
||||
let mut dates: Vec<NaiveDate> = messages_by_date.keys().cloned().collect();
|
||||
dates.sort();
|
||||
|
||||
// Process each day
|
||||
for (idx, date) in dates.iter().enumerate() {
|
||||
let messages = messages_by_date.get(date).unwrap();
|
||||
let date_str = date.format("%Y-%m-%d").to_string();
|
||||
let weekday = date.format("%A");
|
||||
|
||||
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
||||
println!(
|
||||
"Day {}/{}: {} ({}) - {} messages",
|
||||
idx + 1,
|
||||
dates.len(),
|
||||
date_str,
|
||||
weekday,
|
||||
messages.len()
|
||||
);
|
||||
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
||||
|
||||
if args.verbose {
|
||||
println!("\nMessage preview:");
|
||||
for (i, msg) in messages.iter().take(3).enumerate() {
|
||||
let sender = if msg.is_sent { "Me" } else { &msg.contact };
|
||||
let preview = msg.body.chars().take(60).collect::<String>();
|
||||
println!(" {}. {}: {}...", i + 1, sender, preview);
|
||||
}
|
||||
if messages.len() > 3 {
|
||||
println!(" ... and {} more", messages.len() - 3);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
// Format messages for LLM
|
||||
let messages_text: String = messages
|
||||
.iter()
|
||||
.take(200)
|
||||
.map(|m| {
|
||||
if m.is_sent {
|
||||
format!("Me: {}", m.body)
|
||||
} else {
|
||||
format!("{}: {}", m.contact, m.body)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let prompt = format!(
|
||||
r#"Summarize this day's conversation between me and {}.
|
||||
|
||||
CRITICAL FORMAT RULES:
|
||||
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
|
||||
- Do NOT repeat the date at the beginning
|
||||
- Start DIRECTLY with the content - begin with a person's name or action
|
||||
- Write in past tense, as if recording what happened
|
||||
|
||||
NARRATIVE (3-5 sentences):
|
||||
- What specific topics, activities, or events were discussed?
|
||||
- What places, people, or organizations were mentioned?
|
||||
- What plans were made or decisions discussed?
|
||||
- Clearly distinguish between what "I" did versus what {} did
|
||||
|
||||
KEYWORDS (comma-separated):
|
||||
5-10 specific keywords that capture this conversation's unique content:
|
||||
- Proper nouns (people, places, brands)
|
||||
- Specific activities ("drum corps audition" not just "music")
|
||||
- Distinctive terms that make this day unique
|
||||
|
||||
Date: {} ({})
|
||||
Messages:
|
||||
{}
|
||||
|
||||
YOUR RESPONSE (follow this format EXACTLY):
|
||||
Summary: [Start directly with content, NO preamble]
|
||||
|
||||
Keywords: [specific, unique terms]"#,
|
||||
args.contact,
|
||||
args.contact,
|
||||
date.format("%B %d, %Y"),
|
||||
weekday,
|
||||
messages_text
|
||||
);
|
||||
|
||||
println!("Generating summary...");
|
||||
|
||||
let summary = ollama
|
||||
.generate(
|
||||
&prompt,
|
||||
Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
|
||||
)
|
||||
.await?;
|
||||
|
||||
println!("\n📝 GENERATED SUMMARY:");
|
||||
println!("─────────────────────────────────────────");
|
||||
println!("{}", summary.trim());
|
||||
println!("─────────────────────────────────────────");
|
||||
|
||||
if !args.test_mode {
|
||||
println!("\nStripping boilerplate for embedding...");
|
||||
let stripped = strip_summary_boilerplate(&summary);
|
||||
println!(
|
||||
"Stripped: {}...",
|
||||
stripped.chars().take(80).collect::<String>()
|
||||
);
|
||||
|
||||
println!("\nGenerating embedding...");
|
||||
let embedding = ollama.generate_embedding(&stripped).await?;
|
||||
println!("✓ Embedding generated ({} dimensions)", embedding.len());
|
||||
|
||||
println!("Saving to database...");
|
||||
let insert = InsertDailySummary {
|
||||
date: date_str.clone(),
|
||||
contact: args.contact.clone(),
|
||||
summary: summary.trim().to_string(),
|
||||
message_count: messages.len() as i32,
|
||||
embedding,
|
||||
created_at: chrono::Utc::now().timestamp(),
|
||||
// model_version: "nomic-embed-text:v1.5".to_string(),
|
||||
model_version: "mxbai-embed-large:335m".to_string(),
|
||||
};
|
||||
|
||||
let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
|
||||
let context = opentelemetry::Context::new();
|
||||
|
||||
match dao.store_summary(&context, insert) {
|
||||
Ok(_) => println!("✓ Saved to database"),
|
||||
Err(e) => println!("✗ Database error: {:?}", e),
|
||||
}
|
||||
} else {
|
||||
println!("\n⚠ TEST MODE: Not saved to database");
|
||||
}
|
||||
|
||||
println!();
|
||||
|
||||
// Rate limiting between days
|
||||
if idx < dates.len() - 1 {
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||
}
|
||||
}
|
||||
|
||||
println!("========================================");
|
||||
println!("✓ Complete!");
|
||||
println!("Processed {} days", dates.len());
|
||||
println!("========================================");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,154 +0,0 @@
|
||||
use crate::database::{ExifDao, FavoriteDao};
|
||||
use crate::tags::TagDao;
|
||||
use anyhow::Result;
|
||||
use log::{error, info};
|
||||
use opentelemetry;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
pub struct DatabaseUpdater {
|
||||
tag_dao: Arc<Mutex<dyn TagDao>>,
|
||||
exif_dao: Arc<Mutex<dyn ExifDao>>,
|
||||
favorites_dao: Arc<Mutex<dyn FavoriteDao>>,
|
||||
}
|
||||
|
||||
impl DatabaseUpdater {
|
||||
pub fn new(
|
||||
tag_dao: Arc<Mutex<dyn TagDao>>,
|
||||
exif_dao: Arc<Mutex<dyn ExifDao>>,
|
||||
favorites_dao: Arc<Mutex<dyn FavoriteDao>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
tag_dao,
|
||||
exif_dao,
|
||||
favorites_dao,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update file path across all three database tables
|
||||
/// Returns Ok(()) if successful, continues on partial failures but logs errors
|
||||
pub fn update_file_path(&mut self, old_path: &str, new_path: &str) -> Result<()> {
|
||||
let context = opentelemetry::Context::current();
|
||||
let mut success_count = 0;
|
||||
let mut error_count = 0;
|
||||
|
||||
// Update tagged_photo table
|
||||
if let Ok(mut dao) = self.tag_dao.lock() {
|
||||
match dao.update_photo_name(old_path, new_path, &context) {
|
||||
Ok(_) => {
|
||||
info!("Updated tagged_photo: {} -> {}", old_path, new_path);
|
||||
success_count += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to update tagged_photo for {}: {:?}", old_path, e);
|
||||
error_count += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
error!("Failed to acquire lock on TagDao");
|
||||
error_count += 1;
|
||||
}
|
||||
|
||||
// Update image_exif table
|
||||
if let Ok(mut dao) = self.exif_dao.lock() {
|
||||
match dao.update_file_path(&context, old_path, new_path) {
|
||||
Ok(_) => {
|
||||
info!("Updated image_exif: {} -> {}", old_path, new_path);
|
||||
success_count += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to update image_exif for {}: {:?}", old_path, e);
|
||||
error_count += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
error!("Failed to acquire lock on ExifDao");
|
||||
error_count += 1;
|
||||
}
|
||||
|
||||
// Update favorites table
|
||||
if let Ok(mut dao) = self.favorites_dao.lock() {
|
||||
match dao.update_path(old_path, new_path) {
|
||||
Ok(_) => {
|
||||
info!("Updated favorites: {} -> {}", old_path, new_path);
|
||||
success_count += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to update favorites for {}: {:?}", old_path, e);
|
||||
error_count += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
error!("Failed to acquire lock on FavoriteDao");
|
||||
error_count += 1;
|
||||
}
|
||||
|
||||
if success_count > 0 {
|
||||
info!(
|
||||
"Updated {}/{} tables for {} -> {}",
|
||||
success_count,
|
||||
success_count + error_count,
|
||||
old_path,
|
||||
new_path
|
||||
);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(anyhow::anyhow!(
|
||||
"Failed to update any tables for {} -> {}",
|
||||
old_path,
|
||||
new_path
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Get all file paths from all three database tables
|
||||
pub fn get_all_file_paths(&mut self) -> Result<Vec<String>> {
|
||||
let context = opentelemetry::Context::current();
|
||||
let mut all_paths = Vec::new();
|
||||
|
||||
// Get from tagged_photo
|
||||
if let Ok(mut dao) = self.tag_dao.lock() {
|
||||
match dao.get_all_photo_names(&context) {
|
||||
Ok(paths) => {
|
||||
info!("Found {} paths in tagged_photo", paths.len());
|
||||
all_paths.extend(paths);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to get paths from tagged_photo: {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get from image_exif
|
||||
if let Ok(mut dao) = self.exif_dao.lock() {
|
||||
match dao.get_all_file_paths(&context) {
|
||||
Ok(paths) => {
|
||||
info!("Found {} paths in image_exif", paths.len());
|
||||
all_paths.extend(paths);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to get paths from image_exif: {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get from favorites
|
||||
if let Ok(mut dao) = self.favorites_dao.lock() {
|
||||
match dao.get_all_paths() {
|
||||
Ok(paths) => {
|
||||
info!("Found {} paths in favorites", paths.len());
|
||||
all_paths.extend(paths);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to get paths from favorites: {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate
|
||||
all_paths.sort();
|
||||
all_paths.dedup();
|
||||
|
||||
info!("Total unique paths across all tables: {}", all_paths.len());
|
||||
Ok(all_paths)
|
||||
}
|
||||
}
|
||||
@@ -1,103 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
|
||||
/// Detect the actual file type by reading the magic number (file header)
|
||||
/// Returns the canonical extension for the detected type, or None if unknown
|
||||
pub fn detect_file_type(path: &Path) -> Result<Option<String>> {
|
||||
let mut file = File::open(path).with_context(|| format!("Failed to open file: {:?}", path))?;
|
||||
|
||||
// Read first 512 bytes for magic number detection
|
||||
let mut buffer = vec![0; 512];
|
||||
let bytes_read = file
|
||||
.read(&mut buffer)
|
||||
.with_context(|| format!("Failed to read file: {:?}", path))?;
|
||||
buffer.truncate(bytes_read);
|
||||
|
||||
// Detect type using infer crate
|
||||
let detected_type = infer::get(&buffer);
|
||||
|
||||
Ok(detected_type.map(|t| get_canonical_extension(t.mime_type())))
|
||||
}
|
||||
|
||||
/// Map MIME type to canonical file extension
|
||||
pub fn get_canonical_extension(mime_type: &str) -> String {
|
||||
match mime_type {
|
||||
// Images
|
||||
"image/jpeg" => "jpg",
|
||||
"image/png" => "png",
|
||||
"image/webp" => "webp",
|
||||
"image/tiff" => "tiff",
|
||||
"image/heif" | "image/heic" => "heic",
|
||||
"image/avif" => "avif",
|
||||
|
||||
// Videos
|
||||
"video/mp4" => "mp4",
|
||||
"video/quicktime" => "mov",
|
||||
|
||||
// Fallback: use the last part of MIME type
|
||||
_ => mime_type.split('/').next_back().unwrap_or("unknown"),
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
|
||||
/// Check if a file should be renamed based on current vs detected extension
|
||||
/// Handles aliases (jpg/jpeg are equivalent)
|
||||
pub fn should_rename(current_ext: &str, detected_ext: &str) -> bool {
|
||||
let current = current_ext.to_lowercase();
|
||||
let detected = detected_ext.to_lowercase();
|
||||
|
||||
// Direct match
|
||||
if current == detected {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Handle JPEG aliases (jpg and jpeg are equivalent)
|
||||
if (current == "jpg" || current == "jpeg") && (detected == "jpg" || detected == "jpeg") {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Handle TIFF aliases (tiff and tif are equivalent)
|
||||
if (current == "tiff" || current == "tif") && (detected == "tiff" || detected == "tif") {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Extensions differ and are not aliases
|
||||
true
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_canonical_extension() {
|
||||
assert_eq!(get_canonical_extension("image/jpeg"), "jpg");
|
||||
assert_eq!(get_canonical_extension("image/png"), "png");
|
||||
assert_eq!(get_canonical_extension("image/webp"), "webp");
|
||||
assert_eq!(get_canonical_extension("video/mp4"), "mp4");
|
||||
assert_eq!(get_canonical_extension("video/quicktime"), "mov");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_rename() {
|
||||
// Same extension - no rename
|
||||
assert!(!should_rename("jpg", "jpg"));
|
||||
assert!(!should_rename("png", "png"));
|
||||
|
||||
// JPEG aliases - no rename
|
||||
assert!(!should_rename("jpg", "jpeg"));
|
||||
assert!(!should_rename("jpeg", "jpg"));
|
||||
assert!(!should_rename("JPG", "jpeg"));
|
||||
|
||||
// TIFF aliases - no rename
|
||||
assert!(!should_rename("tiff", "tif"));
|
||||
assert!(!should_rename("tif", "tiff"));
|
||||
|
||||
// Different types - should rename
|
||||
assert!(should_rename("png", "jpg"));
|
||||
assert!(should_rename("jpg", "png"));
|
||||
assert!(should_rename("webp", "png"));
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
pub mod database_updater;
|
||||
pub mod file_type_detector;
|
||||
pub mod phase1;
|
||||
pub mod phase2;
|
||||
pub mod types;
|
||||
|
||||
pub use database_updater::DatabaseUpdater;
|
||||
pub use file_type_detector::{detect_file_type, get_canonical_extension, should_rename};
|
||||
pub use phase1::resolve_missing_files;
|
||||
pub use phase2::validate_file_types;
|
||||
pub use types::{CleanupConfig, CleanupStats, FileIssue, IssueType};
|
||||
@@ -1,147 +0,0 @@
|
||||
use crate::cleanup::database_updater::DatabaseUpdater;
|
||||
use crate::cleanup::types::{CleanupConfig, CleanupStats};
|
||||
use crate::file_types::IMAGE_EXTENSIONS;
|
||||
use anyhow::Result;
|
||||
use log::{error, warn};
|
||||
use std::path::PathBuf;
|
||||
|
||||
// All supported image extensions to try
|
||||
const SUPPORTED_EXTENSIONS: &[&str] = IMAGE_EXTENSIONS;
|
||||
|
||||
/// Phase 1: Resolve missing files by searching for alternative extensions
|
||||
pub fn resolve_missing_files(
|
||||
config: &CleanupConfig,
|
||||
db_updater: &mut DatabaseUpdater,
|
||||
) -> Result<CleanupStats> {
|
||||
let mut stats = CleanupStats::new();
|
||||
|
||||
println!("\nPhase 1: Missing File Resolution");
|
||||
println!("---------------------------------");
|
||||
|
||||
// Get all file paths from database
|
||||
println!("Scanning database for file references...");
|
||||
let all_paths = db_updater.get_all_file_paths()?;
|
||||
println!("Found {} unique file paths\n", all_paths.len());
|
||||
|
||||
stats.files_checked = all_paths.len();
|
||||
|
||||
println!("Checking file existence...");
|
||||
let mut missing_count = 0;
|
||||
let mut resolved_count = 0;
|
||||
|
||||
for path_str in all_paths {
|
||||
let full_path = config.base_path.join(&path_str);
|
||||
|
||||
// Check if file exists
|
||||
if full_path.exists() {
|
||||
continue;
|
||||
}
|
||||
|
||||
missing_count += 1;
|
||||
stats.issues_found += 1;
|
||||
|
||||
// Try to find the file with different extensions
|
||||
match find_file_with_alternative_extension(&config.base_path, &path_str) {
|
||||
Some(new_path_str) => {
|
||||
println!(
|
||||
"✓ {} → found as {} {}",
|
||||
path_str,
|
||||
new_path_str,
|
||||
if config.dry_run {
|
||||
"(dry-run, not updated)"
|
||||
} else {
|
||||
""
|
||||
}
|
||||
);
|
||||
|
||||
if !config.dry_run {
|
||||
// Update database
|
||||
match db_updater.update_file_path(&path_str, &new_path_str) {
|
||||
Ok(_) => {
|
||||
resolved_count += 1;
|
||||
stats.issues_fixed += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to update database for {}: {:?}", path_str, e);
|
||||
stats.add_error(format!("DB update failed for {}: {}", path_str, e));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
resolved_count += 1;
|
||||
}
|
||||
}
|
||||
None => {
|
||||
warn!("✗ {} → not found with any extension", path_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("\nResults:");
|
||||
println!("- Files checked: {}", stats.files_checked);
|
||||
println!("- Missing files: {}", missing_count);
|
||||
println!("- Resolved: {}", resolved_count);
|
||||
println!(
|
||||
"- Still missing: {}",
|
||||
missing_count - if config.dry_run { 0 } else { resolved_count }
|
||||
);
|
||||
|
||||
if !stats.errors.is_empty() {
|
||||
println!("- Errors: {}", stats.errors.len());
|
||||
}
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
/// Find a file with an alternative extension
|
||||
/// Returns the relative path with the new extension if found
|
||||
fn find_file_with_alternative_extension(
|
||||
base_path: &PathBuf,
|
||||
relative_path: &str,
|
||||
) -> Option<String> {
|
||||
let full_path = base_path.join(relative_path);
|
||||
|
||||
// Get the parent directory and file stem (name without extension)
|
||||
let parent = full_path.parent()?;
|
||||
let stem = full_path.file_stem()?.to_str()?;
|
||||
|
||||
// Try each supported extension
|
||||
for ext in SUPPORTED_EXTENSIONS {
|
||||
let test_path = parent.join(format!("{}.{}", stem, ext));
|
||||
if test_path.exists() {
|
||||
// Convert back to relative path
|
||||
if let Ok(rel) = test_path.strip_prefix(base_path)
|
||||
&& let Some(rel_str) = rel.to_str()
|
||||
{
|
||||
return Some(rel_str.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn test_find_file_with_alternative_extension() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let base_path = temp_dir.path().to_path_buf();
|
||||
|
||||
// Create a test file with .jpeg extension
|
||||
let test_file = base_path.join("test.jpeg");
|
||||
fs::write(&test_file, b"test").unwrap();
|
||||
|
||||
// Try to find it as .jpg
|
||||
let result = find_file_with_alternative_extension(&base_path, "test.jpg");
|
||||
assert!(result.is_some());
|
||||
assert_eq!(result.unwrap(), "test.jpeg");
|
||||
|
||||
// Try to find non-existent file
|
||||
let result = find_file_with_alternative_extension(&base_path, "nonexistent.jpg");
|
||||
assert!(result.is_none());
|
||||
}
|
||||
}
|
||||
@@ -1,241 +0,0 @@
|
||||
use crate::cleanup::database_updater::DatabaseUpdater;
|
||||
use crate::cleanup::file_type_detector::{detect_file_type, should_rename};
|
||||
use crate::cleanup::types::{CleanupConfig, CleanupStats};
|
||||
use anyhow::Result;
|
||||
use log::{error, warn};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
/// Phase 2: Validate file types and rename mismatches
|
||||
pub fn validate_file_types(
|
||||
config: &CleanupConfig,
|
||||
db_updater: &mut DatabaseUpdater,
|
||||
) -> Result<CleanupStats> {
|
||||
let mut stats = CleanupStats::new();
|
||||
let mut auto_fix_all = config.auto_fix;
|
||||
let mut skip_all = false;
|
||||
|
||||
println!("\nPhase 2: File Type Validation");
|
||||
println!("------------------------------");
|
||||
|
||||
// Walk the filesystem
|
||||
println!("Scanning filesystem...");
|
||||
let files: Vec<PathBuf> = WalkDir::new(&config.base_path)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| e.file_type().is_file())
|
||||
.filter(|e| is_supported_media_file(e.path()))
|
||||
.map(|e| e.path().to_path_buf())
|
||||
.collect();
|
||||
|
||||
println!("Files found: {}\n", files.len());
|
||||
stats.files_checked = files.len();
|
||||
|
||||
println!("Detecting file types...");
|
||||
let mut mismatches_found = 0;
|
||||
let mut files_renamed = 0;
|
||||
let mut user_skipped = 0;
|
||||
|
||||
for file_path in files {
|
||||
// Get current extension
|
||||
let current_ext = match file_path.extension() {
|
||||
Some(ext) => ext.to_str().unwrap_or(""),
|
||||
None => continue, // Skip files without extensions
|
||||
};
|
||||
|
||||
// Detect actual file type
|
||||
match detect_file_type(&file_path) {
|
||||
Ok(Some(detected_ext)) => {
|
||||
// Check if we should rename
|
||||
if should_rename(current_ext, &detected_ext) {
|
||||
mismatches_found += 1;
|
||||
stats.issues_found += 1;
|
||||
|
||||
// Get relative path for display and database
|
||||
let relative_path = match file_path.strip_prefix(&config.base_path) {
|
||||
Ok(rel) => rel.to_str().unwrap_or(""),
|
||||
Err(_) => {
|
||||
error!("Failed to get relative path for {:?}", file_path);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
println!("\nFile type mismatch:");
|
||||
println!(" Path: {}", relative_path);
|
||||
println!(" Current: .{}", current_ext);
|
||||
println!(" Actual: .{}", detected_ext);
|
||||
|
||||
// Calculate new path
|
||||
let new_file_path = file_path.with_extension(&detected_ext);
|
||||
let new_relative_path = match new_file_path.strip_prefix(&config.base_path) {
|
||||
Ok(rel) => rel.to_str().unwrap_or(""),
|
||||
Err(_) => {
|
||||
error!("Failed to get new relative path for {:?}", new_file_path);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Check if destination already exists
|
||||
if new_file_path.exists() {
|
||||
warn!("✗ Destination already exists: {}", new_relative_path);
|
||||
stats.add_error(format!(
|
||||
"Destination exists for {}: {}",
|
||||
relative_path, new_relative_path
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Determine if we should proceed
|
||||
let should_proceed = if config.dry_run {
|
||||
println!(" (dry-run mode - would rename to {})", new_relative_path);
|
||||
false
|
||||
} else if skip_all {
|
||||
println!(" Skipped (skip all)");
|
||||
user_skipped += 1;
|
||||
false
|
||||
} else if auto_fix_all {
|
||||
true
|
||||
} else {
|
||||
// Interactive prompt
|
||||
match prompt_for_rename(new_relative_path) {
|
||||
RenameDecision::Yes => true,
|
||||
RenameDecision::No => {
|
||||
user_skipped += 1;
|
||||
false
|
||||
}
|
||||
RenameDecision::All => {
|
||||
auto_fix_all = true;
|
||||
true
|
||||
}
|
||||
RenameDecision::SkipAll => {
|
||||
skip_all = true;
|
||||
user_skipped += 1;
|
||||
false
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if should_proceed {
|
||||
// Rename the file
|
||||
match fs::rename(&file_path, &new_file_path) {
|
||||
Ok(_) => {
|
||||
println!("✓ Renamed file");
|
||||
|
||||
// Update database
|
||||
match db_updater.update_file_path(relative_path, new_relative_path)
|
||||
{
|
||||
Ok(_) => {
|
||||
files_renamed += 1;
|
||||
stats.issues_fixed += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
"File renamed but DB update failed for {}: {:?}",
|
||||
relative_path, e
|
||||
);
|
||||
stats.add_error(format!(
|
||||
"DB update failed for {}: {}",
|
||||
relative_path, e
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("✗ Failed to rename file: {:?}", e);
|
||||
stats.add_error(format!(
|
||||
"Rename failed for {}: {}",
|
||||
relative_path, e
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
// Could not detect file type - skip
|
||||
// This is normal for some RAW formats or corrupted files
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to detect type for {:?}: {:?}", file_path, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("\nResults:");
|
||||
println!("- Files scanned: {}", stats.files_checked);
|
||||
println!("- Mismatches found: {}", mismatches_found);
|
||||
if config.dry_run {
|
||||
println!("- Would rename: {}", mismatches_found);
|
||||
} else {
|
||||
println!("- Files renamed: {}", files_renamed);
|
||||
if user_skipped > 0 {
|
||||
println!("- User skipped: {}", user_skipped);
|
||||
}
|
||||
}
|
||||
|
||||
if !stats.errors.is_empty() {
|
||||
println!("- Errors: {}", stats.errors.len());
|
||||
}
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
/// Check if a file is a supported media file based on extension
|
||||
fn is_supported_media_file(path: &Path) -> bool {
|
||||
use crate::file_types::is_media_file;
|
||||
is_media_file(path)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum RenameDecision {
|
||||
Yes,
|
||||
No,
|
||||
All,
|
||||
SkipAll,
|
||||
}
|
||||
|
||||
/// Prompt the user for rename decision
|
||||
fn prompt_for_rename(new_path: &str) -> RenameDecision {
|
||||
println!("\nRename to {}?", new_path);
|
||||
println!(" [y] Yes");
|
||||
println!(" [n] No (default)");
|
||||
println!(" [a] Yes to all");
|
||||
println!(" [s] Skip all remaining");
|
||||
print!("Choice: ");
|
||||
|
||||
// Force flush stdout
|
||||
use std::io::{self, Write};
|
||||
let _ = io::stdout().flush();
|
||||
|
||||
let mut input = String::new();
|
||||
match io::stdin().read_line(&mut input) {
|
||||
Ok(_) => {
|
||||
let choice = input.trim().to_lowercase();
|
||||
match choice.as_str() {
|
||||
"y" | "yes" => RenameDecision::Yes,
|
||||
"a" | "all" => RenameDecision::All,
|
||||
"s" | "skip" => RenameDecision::SkipAll,
|
||||
_ => RenameDecision::No,
|
||||
}
|
||||
}
|
||||
Err(_) => RenameDecision::No,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_is_supported_media_file() {
|
||||
assert!(is_supported_media_file(Path::new("test.jpg")));
|
||||
assert!(is_supported_media_file(Path::new("test.JPG")));
|
||||
assert!(is_supported_media_file(Path::new("test.png")));
|
||||
assert!(is_supported_media_file(Path::new("test.webp")));
|
||||
assert!(is_supported_media_file(Path::new("test.mp4")));
|
||||
assert!(is_supported_media_file(Path::new("test.mov")));
|
||||
assert!(!is_supported_media_file(Path::new("test.txt")));
|
||||
assert!(!is_supported_media_file(Path::new("test")));
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CleanupConfig {
|
||||
pub base_path: PathBuf,
|
||||
pub dry_run: bool,
|
||||
pub auto_fix: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FileIssue {
|
||||
pub current_path: String,
|
||||
pub issue_type: IssueType,
|
||||
pub suggested_path: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum IssueType {
|
||||
MissingFile,
|
||||
ExtensionMismatch { current: String, actual: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct CleanupStats {
|
||||
pub files_checked: usize,
|
||||
pub issues_found: usize,
|
||||
pub issues_fixed: usize,
|
||||
pub errors: Vec<String>,
|
||||
}
|
||||
|
||||
impl CleanupStats {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn add_error(&mut self, error: String) {
|
||||
self.errors.push(error);
|
||||
}
|
||||
}
|
||||
383
src/data/mod.rs
383
src/data/mod.rs
@@ -1,15 +1,9 @@
|
||||
use std::{fs, str::FromStr};
|
||||
|
||||
use crate::database::models::ImageExif;
|
||||
use anyhow::{Context, anyhow};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::error;
|
||||
use std::str::FromStr;
|
||||
|
||||
use actix_web::{dev, Error, FromRequest, http::header, HttpRequest};
|
||||
use actix_web::error::ErrorUnauthorized;
|
||||
use actix_web::{Error, FromRequest, HttpRequest, dev, http::header};
|
||||
use futures::future::{Ready, err, ok};
|
||||
use jsonwebtoken::{Algorithm, DecodingKey, Validation, decode};
|
||||
use futures::future::{err, ok, Ready};
|
||||
use jsonwebtoken::{Algorithm, decode, DecodingKey, Validation};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -17,51 +11,30 @@ pub struct Token<'a> {
|
||||
pub token: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
#[derive(Deserialize, Serialize)]
|
||||
pub struct Claims {
|
||||
pub sub: String,
|
||||
pub exp: i64,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod helper {
|
||||
use super::Claims;
|
||||
use chrono::{Duration, Utc};
|
||||
|
||||
impl Claims {
|
||||
pub fn valid_user(user_id: String) -> Self {
|
||||
Claims {
|
||||
sub: user_id,
|
||||
exp: (Utc::now() + Duration::minutes(1)).timestamp(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn secret_key() -> String {
|
||||
if cfg!(test) {
|
||||
String::from("test_key")
|
||||
} else {
|
||||
dotenv::var("SECRET_KEY").expect("SECRET_KEY env not set!")
|
||||
}
|
||||
dotenv::var("SECRET_KEY").expect("SECRET_KEY env not set!")
|
||||
}
|
||||
|
||||
impl FromStr for Claims {
|
||||
type Err = jsonwebtoken::errors::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let token = s.strip_prefix("Bearer ").ok_or_else(|| {
|
||||
jsonwebtoken::errors::Error::from(jsonwebtoken::errors::ErrorKind::InvalidToken)
|
||||
})?;
|
||||
let token = *(s.split("Bearer ").collect::<Vec<_>>().last().unwrap_or(&""));
|
||||
|
||||
match decode::<Claims>(
|
||||
token,
|
||||
&token,
|
||||
&DecodingKey::from_secret(secret_key().as_bytes()),
|
||||
&Validation::new(Algorithm::HS256),
|
||||
) {
|
||||
Ok(data) => Ok(data.claims),
|
||||
Err(other) => {
|
||||
error!("DecodeError: {}", other);
|
||||
println!("DecodeError: {}", other);
|
||||
Err(other)
|
||||
}
|
||||
}
|
||||
@@ -71,139 +44,28 @@ impl FromStr for Claims {
|
||||
impl FromRequest for Claims {
|
||||
type Error = Error;
|
||||
type Future = Ready<Result<Self, Self::Error>>;
|
||||
type Config = ();
|
||||
|
||||
fn from_request(req: &HttpRequest, _payload: &mut dev::Payload) -> Self::Future {
|
||||
req.headers()
|
||||
.get(header::AUTHORIZATION)
|
||||
.map_or_else(
|
||||
|| Err(anyhow!("No authorization header")),
|
||||
|header| {
|
||||
header
|
||||
.to_str()
|
||||
.context("Unable to read Authorization header to string")
|
||||
},
|
||||
)
|
||||
.and_then(|header| {
|
||||
Claims::from_str(header)
|
||||
.with_context(|| format!("Unable to decode token from: {}", header))
|
||||
})
|
||||
.map_or_else(
|
||||
|e| {
|
||||
error!("{}", e);
|
||||
err(ErrorUnauthorized("Bad token"))
|
||||
},
|
||||
ok,
|
||||
)
|
||||
let claims = match req.headers().get(header::AUTHORIZATION) {
|
||||
Some(header) => Claims::from_str(header.to_str().unwrap_or_else(|_| "")),
|
||||
None => Err(jsonwebtoken::errors::Error::from(
|
||||
jsonwebtoken::errors::ErrorKind::InvalidToken,
|
||||
)),
|
||||
};
|
||||
|
||||
if let Ok(claims) = claims {
|
||||
ok(claims)
|
||||
} else {
|
||||
err(ErrorUnauthorized("Bad token"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct PhotosResponse {
|
||||
pub photos: Vec<String>,
|
||||
pub dirs: Vec<String>,
|
||||
|
||||
// Pagination metadata (only present when limit is set)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub total_count: Option<i64>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub has_more: Option<bool>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub next_offset: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum SortType {
|
||||
Shuffle,
|
||||
NameAsc,
|
||||
NameDesc,
|
||||
TagCountAsc,
|
||||
TagCountDesc,
|
||||
DateTakenAsc,
|
||||
DateTakenDesc,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct FilesRequest {
|
||||
pub path: String,
|
||||
// comma separated numbers
|
||||
pub tag_ids: Option<String>,
|
||||
pub exclude_tag_ids: Option<String>,
|
||||
pub tag_filter_mode: Option<FilterMode>,
|
||||
pub recursive: Option<bool>,
|
||||
pub sort: Option<SortType>,
|
||||
|
||||
// EXIF-based search parameters
|
||||
pub camera_make: Option<String>,
|
||||
pub camera_model: Option<String>,
|
||||
pub lens_model: Option<String>,
|
||||
|
||||
// GPS location search
|
||||
pub gps_lat: Option<f64>,
|
||||
pub gps_lon: Option<f64>,
|
||||
pub gps_radius_km: Option<f64>,
|
||||
|
||||
// Date range filtering (Unix timestamps)
|
||||
pub date_from: Option<i64>,
|
||||
pub date_to: Option<i64>,
|
||||
|
||||
// Media type filtering
|
||||
pub media_type: Option<MediaType>,
|
||||
|
||||
// Pagination parameters (optional - backward compatible)
|
||||
pub limit: Option<i64>,
|
||||
pub offset: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
|
||||
pub enum FilterMode {
|
||||
Any,
|
||||
All,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum MediaType {
|
||||
Photo,
|
||||
Video,
|
||||
All,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum PhotoSize {
|
||||
Full,
|
||||
Thumb,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct ThumbnailRequest {
|
||||
pub(crate) path: String,
|
||||
#[allow(dead_code)] // Part of API contract, may be used in future
|
||||
pub(crate) size: Option<PhotoSize>,
|
||||
#[serde(default)]
|
||||
#[allow(dead_code)] // Part of API contract, may be used in future
|
||||
pub(crate) format: Option<ThumbnailFormat>,
|
||||
#[serde(default)]
|
||||
pub(crate) shape: Option<ThumbnailShape>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, PartialEq)]
|
||||
pub enum ThumbnailFormat {
|
||||
#[serde(rename = "gif")]
|
||||
Gif,
|
||||
#[serde(rename = "image")]
|
||||
Image,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, PartialEq)]
|
||||
pub enum ThumbnailShape {
|
||||
#[serde(rename = "circle")]
|
||||
Circle,
|
||||
#[serde(rename = "square")]
|
||||
Square,
|
||||
pub path: String,
|
||||
pub size: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
@@ -223,200 +85,3 @@ pub struct CreateAccountRequest {
|
||||
pub struct AddFavoriteRequest {
|
||||
pub path: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct MetadataResponse {
|
||||
pub created: Option<i64>,
|
||||
pub modified: Option<i64>,
|
||||
pub size: u64,
|
||||
pub exif: Option<ExifMetadata>,
|
||||
pub filename_date: Option<i64>, // Date extracted from filename
|
||||
}
|
||||
|
||||
impl From<fs::Metadata> for MetadataResponse {
|
||||
fn from(metadata: fs::Metadata) -> Self {
|
||||
MetadataResponse {
|
||||
created: metadata.created().ok().map(|created| {
|
||||
let utc: DateTime<Utc> = created.into();
|
||||
utc.timestamp()
|
||||
}),
|
||||
modified: metadata.modified().ok().map(|modified| {
|
||||
let utc: DateTime<Utc> = modified.into();
|
||||
utc.timestamp()
|
||||
}),
|
||||
size: metadata.len(),
|
||||
exif: None,
|
||||
filename_date: None, // Will be set in endpoint handler
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct ExifMetadata {
|
||||
pub camera: Option<CameraInfo>,
|
||||
pub image_properties: Option<ImageProperties>,
|
||||
pub gps: Option<GpsCoordinates>,
|
||||
pub capture_settings: Option<CaptureSettings>,
|
||||
pub date_taken: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct CameraInfo {
|
||||
pub make: Option<String>,
|
||||
pub model: Option<String>,
|
||||
pub lens: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct ImageProperties {
|
||||
pub width: Option<i32>,
|
||||
pub height: Option<i32>,
|
||||
pub orientation: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct GpsCoordinates {
|
||||
pub latitude: Option<f64>,
|
||||
pub longitude: Option<f64>,
|
||||
pub altitude: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct CaptureSettings {
|
||||
pub focal_length: Option<f64>,
|
||||
pub aperture: Option<f64>,
|
||||
pub shutter_speed: Option<String>,
|
||||
pub iso: Option<i32>,
|
||||
}
|
||||
|
||||
impl From<ImageExif> for ExifMetadata {
|
||||
fn from(exif: ImageExif) -> Self {
|
||||
let has_camera_info =
|
||||
exif.camera_make.is_some() || exif.camera_model.is_some() || exif.lens_model.is_some();
|
||||
let has_image_properties =
|
||||
exif.width.is_some() || exif.height.is_some() || exif.orientation.is_some();
|
||||
let has_gps = exif.gps_latitude.is_some()
|
||||
|| exif.gps_longitude.is_some()
|
||||
|| exif.gps_altitude.is_some();
|
||||
let has_capture_settings = exif.focal_length.is_some()
|
||||
|| exif.aperture.is_some()
|
||||
|| exif.shutter_speed.is_some()
|
||||
|| exif.iso.is_some();
|
||||
|
||||
ExifMetadata {
|
||||
camera: if has_camera_info {
|
||||
Some(CameraInfo {
|
||||
make: exif.camera_make,
|
||||
model: exif.camera_model,
|
||||
lens: exif.lens_model,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
image_properties: if has_image_properties {
|
||||
Some(ImageProperties {
|
||||
width: exif.width,
|
||||
height: exif.height,
|
||||
orientation: exif.orientation,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
gps: if has_gps {
|
||||
Some(GpsCoordinates {
|
||||
latitude: exif.gps_latitude.map(|v| v as f64),
|
||||
longitude: exif.gps_longitude.map(|v| v as f64),
|
||||
altitude: exif.gps_altitude.map(|v| v as f64),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
capture_settings: if has_capture_settings {
|
||||
Some(CaptureSettings {
|
||||
focal_length: exif.focal_length.map(|v| v as f64),
|
||||
aperture: exif.aperture.map(|v| v as f64),
|
||||
shutter_speed: exif.shutter_speed,
|
||||
iso: exif.iso,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
date_taken: exif.date_taken,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct AddTagRequest {
|
||||
pub file_name: String,
|
||||
pub tag_name: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct GetTagsRequest {
|
||||
pub path: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct GpsPhotoSummary {
|
||||
pub path: String,
|
||||
pub lat: f64,
|
||||
pub lon: f64,
|
||||
pub date_taken: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct GpsPhotosResponse {
|
||||
pub photos: Vec<GpsPhotoSummary>,
|
||||
pub total: usize,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Claims;
|
||||
use jsonwebtoken::errors::ErrorKind;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[test]
|
||||
fn test_token_from_claims() {
|
||||
let claims = Claims {
|
||||
exp: 16136164790, // 2481-ish
|
||||
sub: String::from("9"),
|
||||
};
|
||||
|
||||
let c = Claims::from_str(
|
||||
"Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5IiwiZXhwIjoxNjEzNjE2NDc5MH0.9wwK4l8vhvq55YoueEljMbN_5uVTaAsGLLRPr0AuymE")
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(claims.sub, c.sub);
|
||||
assert_eq!(claims.exp, c.exp);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expired_token() {
|
||||
let err = Claims::from_str(
|
||||
"Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5IiwiZXhwIjoxNn0.eZnfaNfiD54VMbphIqeBICeG9SzAtwNXntLwtTBihjY",
|
||||
);
|
||||
|
||||
match err.unwrap_err().into_kind() {
|
||||
ErrorKind::ExpiredSignature => assert!(true),
|
||||
kind => {
|
||||
println!("Unexpected error: {:?}", kind);
|
||||
assert!(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_junk_token_is_invalid() {
|
||||
let err = Claims::from_str("uni-֍ՓՓՓՓՓՓՓՓՓՓՓՓՓՓՓ");
|
||||
|
||||
match err.unwrap_err().into_kind() {
|
||||
ErrorKind::InvalidToken => assert!(true),
|
||||
kind => {
|
||||
println!("Unexpected error: {:?}", kind);
|
||||
assert!(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,554 +0,0 @@
|
||||
use diesel::prelude::*;
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use serde::Serialize;
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::database::{DbError, DbErrorKind, connect};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
/// Represents a calendar event
|
||||
#[derive(Serialize, Clone, Debug)]
|
||||
pub struct CalendarEvent {
|
||||
pub id: i32,
|
||||
pub event_uid: Option<String>,
|
||||
pub summary: String,
|
||||
pub description: Option<String>,
|
||||
pub location: Option<String>,
|
||||
pub start_time: i64,
|
||||
pub end_time: i64,
|
||||
pub all_day: bool,
|
||||
pub organizer: Option<String>,
|
||||
pub attendees: Option<String>, // JSON string
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
/// Data for inserting a new calendar event
|
||||
#[derive(Clone, Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub struct InsertCalendarEvent {
|
||||
pub event_uid: Option<String>,
|
||||
pub summary: String,
|
||||
pub description: Option<String>,
|
||||
pub location: Option<String>,
|
||||
pub start_time: i64,
|
||||
pub end_time: i64,
|
||||
pub all_day: bool,
|
||||
pub organizer: Option<String>,
|
||||
pub attendees: Option<String>,
|
||||
pub embedding: Option<Vec<f32>>, // 768-dim, optional
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
pub trait CalendarEventDao: Sync + Send {
|
||||
/// Store calendar event with optional embedding
|
||||
fn store_event(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
event: InsertCalendarEvent,
|
||||
) -> Result<CalendarEvent, DbError>;
|
||||
|
||||
/// Batch insert events (for import efficiency)
|
||||
fn store_events_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
events: Vec<InsertCalendarEvent>,
|
||||
) -> Result<usize, DbError>;
|
||||
|
||||
/// Find events in time range (PRIMARY query method)
|
||||
fn find_events_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<CalendarEvent>, DbError>;
|
||||
|
||||
/// Find semantically similar events (SECONDARY - requires embeddings)
|
||||
fn find_similar_events(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
limit: usize,
|
||||
) -> Result<Vec<CalendarEvent>, DbError>;
|
||||
|
||||
/// Hybrid: Time-filtered + semantic ranking
|
||||
/// "Events during photo timestamp ±N days, ranked by similarity to context"
|
||||
fn find_relevant_events_hybrid(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
center_timestamp: i64,
|
||||
time_window_days: i64,
|
||||
query_embedding: Option<&[f32]>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<CalendarEvent>, DbError>;
|
||||
|
||||
/// Check if event exists (idempotency)
|
||||
fn event_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
event_uid: &str,
|
||||
start_time: i64,
|
||||
) -> Result<bool, DbError>;
|
||||
|
||||
/// Get count of events
|
||||
fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
|
||||
}
|
||||
|
||||
pub struct SqliteCalendarEventDao {
|
||||
connection: Arc<Mutex<SqliteConnection>>,
|
||||
}
|
||||
|
||||
impl Default for SqliteCalendarEventDao {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteCalendarEventDao {
|
||||
pub fn new() -> Self {
|
||||
SqliteCalendarEventDao {
|
||||
connection: Arc::new(Mutex::new(connect())),
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
|
||||
use zerocopy::IntoBytes;
|
||||
vec.as_bytes().to_vec()
|
||||
}
|
||||
|
||||
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
|
||||
if !bytes.len().is_multiple_of(4) {
|
||||
return Err(DbError::new(DbErrorKind::QueryError));
|
||||
}
|
||||
|
||||
let count = bytes.len() / 4;
|
||||
let mut vec = Vec::with_capacity(count);
|
||||
|
||||
for chunk in bytes.chunks_exact(4) {
|
||||
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
|
||||
vec.push(float);
|
||||
}
|
||||
|
||||
Ok(vec)
|
||||
}
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if magnitude_a == 0.0 || magnitude_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
dot_product / (magnitude_a * magnitude_b)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CalendarEventWithVectorRow {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
event_uid: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
summary: String,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
description: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
location: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
start_time: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
end_time: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Bool)]
|
||||
all_day: bool,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
organizer: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
attendees: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Binary>)]
|
||||
embedding: Option<Vec<u8>>,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
created_at: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
source_file: Option<String>,
|
||||
}
|
||||
|
||||
impl CalendarEventWithVectorRow {
|
||||
fn to_calendar_event(&self) -> CalendarEvent {
|
||||
CalendarEvent {
|
||||
id: self.id,
|
||||
event_uid: self.event_uid.clone(),
|
||||
summary: self.summary.clone(),
|
||||
description: self.description.clone(),
|
||||
location: self.location.clone(),
|
||||
start_time: self.start_time,
|
||||
end_time: self.end_time,
|
||||
all_day: self.all_day,
|
||||
organizer: self.organizer.clone(),
|
||||
attendees: self.attendees.clone(),
|
||||
created_at: self.created_at,
|
||||
source_file: self.source_file.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct LastInsertRowId {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
}
|
||||
|
||||
impl CalendarEventDao for SqliteCalendarEventDao {
|
||||
fn store_event(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
event: InsertCalendarEvent,
|
||||
) -> Result<CalendarEvent, DbError> {
|
||||
trace_db_call(context, "insert", "store_event", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get CalendarEventDao");
|
||||
|
||||
// Validate embedding dimensions if provided
|
||||
if let Some(ref emb) = event.embedding
|
||||
&& emb.len() != 768
|
||||
{
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid embedding dimensions: {} (expected 768)",
|
||||
emb.len()
|
||||
));
|
||||
}
|
||||
|
||||
let embedding_bytes = event.embedding.as_ref().map(|e| Self::serialize_vector(e));
|
||||
|
||||
// INSERT OR REPLACE to handle re-imports
|
||||
diesel::sql_query(
|
||||
"INSERT OR REPLACE INTO calendar_events
|
||||
(event_uid, summary, description, location, start_time, end_time, all_day,
|
||||
organizer, attendees, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.event_uid)
|
||||
.bind::<diesel::sql_types::Text, _>(&event.summary)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.description)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.location)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.start_time)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.end_time)
|
||||
.bind::<diesel::sql_types::Bool, _>(event.all_day)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.organizer)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.attendees)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(&embedding_bytes)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.source_file)
|
||||
.execute(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
|
||||
|
||||
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
|
||||
.get_result::<LastInsertRowId>(conn.deref_mut())
|
||||
.map(|r| r.id)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
|
||||
|
||||
Ok(CalendarEvent {
|
||||
id: row_id,
|
||||
event_uid: event.event_uid,
|
||||
summary: event.summary,
|
||||
description: event.description,
|
||||
location: event.location,
|
||||
start_time: event.start_time,
|
||||
end_time: event.end_time,
|
||||
all_day: event.all_day,
|
||||
organizer: event.organizer,
|
||||
attendees: event.attendees,
|
||||
created_at: event.created_at,
|
||||
source_file: event.source_file,
|
||||
})
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn store_events_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
events: Vec<InsertCalendarEvent>,
|
||||
) -> Result<usize, DbError> {
|
||||
trace_db_call(context, "insert", "store_events_batch", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get CalendarEventDao");
|
||||
let mut inserted = 0;
|
||||
|
||||
conn.transaction::<_, anyhow::Error, _>(|conn| {
|
||||
for event in events {
|
||||
// Validate embedding if provided
|
||||
if let Some(ref emb) = event.embedding
|
||||
&& emb.len() != 768
|
||||
{
|
||||
log::warn!(
|
||||
"Skipping event with invalid embedding dimensions: {}",
|
||||
emb.len()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let embedding_bytes =
|
||||
event.embedding.as_ref().map(|e| Self::serialize_vector(e));
|
||||
|
||||
diesel::sql_query(
|
||||
"INSERT OR REPLACE INTO calendar_events
|
||||
(event_uid, summary, description, location, start_time, end_time, all_day,
|
||||
organizer, attendees, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.event_uid,
|
||||
)
|
||||
.bind::<diesel::sql_types::Text, _>(&event.summary)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.description,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.location,
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.start_time)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.end_time)
|
||||
.bind::<diesel::sql_types::Bool, _>(event.all_day)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.organizer,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.attendees,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(
|
||||
&embedding_bytes,
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.source_file,
|
||||
)
|
||||
.execute(conn)
|
||||
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
|
||||
|
||||
inserted += 1;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
|
||||
|
||||
Ok(inserted)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn find_events_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<CalendarEvent>, DbError> {
|
||||
trace_db_call(context, "query", "find_events_in_range", |_span| {
|
||||
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||
|
||||
diesel::sql_query(
|
||||
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
|
||||
organizer, attendees, NULL as embedding, created_at, source_file
|
||||
FROM calendar_events
|
||||
WHERE start_time >= ?1 AND start_time <= ?2
|
||||
ORDER BY start_time ASC"
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
|
||||
.map(|rows| rows.into_iter().map(|r| r.to_calendar_event()).collect())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_similar_events(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
limit: usize,
|
||||
) -> Result<Vec<CalendarEvent>, DbError> {
|
||||
trace_db_call(context, "query", "find_similar_events", |_span| {
|
||||
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||
|
||||
if query_embedding.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid query embedding dimensions: {} (expected 768)",
|
||||
query_embedding.len()
|
||||
));
|
||||
}
|
||||
|
||||
// Load all events with embeddings
|
||||
let results = diesel::sql_query(
|
||||
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
|
||||
organizer, attendees, embedding, created_at, source_file
|
||||
FROM calendar_events
|
||||
WHERE embedding IS NOT NULL"
|
||||
)
|
||||
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
// Compute similarities
|
||||
let mut scored_events: Vec<(f32, CalendarEvent)> = results
|
||||
.into_iter()
|
||||
.filter_map(|row| {
|
||||
if let Some(ref emb_bytes) = row.embedding {
|
||||
if let Ok(emb) = Self::deserialize_vector(emb_bytes) {
|
||||
let similarity = Self::cosine_similarity(query_embedding, &emb);
|
||||
Some((similarity, row.to_calendar_event()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by similarity descending
|
||||
scored_events.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
log::info!("Found {} similar calendar events", scored_events.len());
|
||||
if !scored_events.is_empty() {
|
||||
log::info!("Top similarity: {:.4}", scored_events[0].0);
|
||||
}
|
||||
|
||||
Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_relevant_events_hybrid(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
center_timestamp: i64,
|
||||
time_window_days: i64,
|
||||
query_embedding: Option<&[f32]>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<CalendarEvent>, DbError> {
|
||||
trace_db_call(context, "query", "find_relevant_events_hybrid", |_span| {
|
||||
let window_seconds = time_window_days * 86400;
|
||||
let start_ts = center_timestamp - window_seconds;
|
||||
let end_ts = center_timestamp + window_seconds;
|
||||
|
||||
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||
|
||||
// Step 1: Time-based filter (fast, indexed)
|
||||
let events_in_range = diesel::sql_query(
|
||||
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
|
||||
organizer, attendees, embedding, created_at, source_file
|
||||
FROM calendar_events
|
||||
WHERE start_time >= ?1 AND start_time <= ?2"
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
// Step 2: If query embedding provided, rank by semantic similarity
|
||||
if let Some(query_emb) = query_embedding {
|
||||
if query_emb.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid query embedding dimensions: {} (expected 768)",
|
||||
query_emb.len()
|
||||
));
|
||||
}
|
||||
|
||||
let mut scored_events: Vec<(f32, CalendarEvent)> = events_in_range
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
// Events with embeddings get semantic scoring
|
||||
let similarity = if let Some(ref emb_bytes) = row.embedding {
|
||||
if let Ok(emb) = Self::deserialize_vector(emb_bytes) {
|
||||
Self::cosine_similarity(query_emb, &emb)
|
||||
} else {
|
||||
0.5 // Neutral score for deserialization errors
|
||||
}
|
||||
} else {
|
||||
0.5 // Neutral score for events without embeddings
|
||||
};
|
||||
(similarity, row.to_calendar_event())
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by similarity descending
|
||||
scored_events.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
log::info!("Hybrid query: {} events in time range, ranked by similarity", scored_events.len());
|
||||
if !scored_events.is_empty() {
|
||||
log::info!("Top similarity: {:.4}", scored_events[0].0);
|
||||
}
|
||||
|
||||
Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
|
||||
} else {
|
||||
// No semantic ranking, just return time-sorted (limit applied)
|
||||
log::info!("Time-only query: {} events in range", events_in_range.len());
|
||||
Ok(events_in_range.into_iter().take(limit).map(|r| r.to_calendar_event()).collect())
|
||||
}
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn event_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
event_uid: &str,
|
||||
start_time: i64,
|
||||
) -> Result<bool, DbError> {
|
||||
trace_db_call(context, "query", "event_exists", |_span| {
|
||||
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
count: i32,
|
||||
}
|
||||
|
||||
let result: CountResult = diesel::sql_query(
|
||||
"SELECT COUNT(*) as count FROM calendar_events WHERE event_uid = ?1 AND start_time = ?2"
|
||||
)
|
||||
.bind::<diesel::sql_types::Text, _>(event_uid)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_time)
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count > 0)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
|
||||
trace_db_call(context, "query", "get_event_count", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get CalendarEventDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
count: i64,
|
||||
}
|
||||
|
||||
let result: CountResult =
|
||||
diesel::sql_query("SELECT COUNT(*) as count FROM calendar_events")
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
}
|
||||
@@ -1,489 +0,0 @@
|
||||
use chrono::NaiveDate;
|
||||
use diesel::prelude::*;
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use serde::Serialize;
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::database::{DbError, DbErrorKind, connect};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
/// Represents a daily conversation summary
|
||||
#[derive(Serialize, Clone, Debug)]
|
||||
pub struct DailySummary {
|
||||
pub id: i32,
|
||||
pub date: String,
|
||||
pub contact: String,
|
||||
pub summary: String,
|
||||
pub message_count: i32,
|
||||
pub created_at: i64,
|
||||
pub model_version: String,
|
||||
}
|
||||
|
||||
/// Data for inserting a new daily summary
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InsertDailySummary {
|
||||
pub date: String,
|
||||
pub contact: String,
|
||||
pub summary: String,
|
||||
pub message_count: i32,
|
||||
pub embedding: Vec<f32>,
|
||||
pub created_at: i64,
|
||||
pub model_version: String,
|
||||
}
|
||||
|
||||
pub trait DailySummaryDao: Sync + Send {
|
||||
/// Store a daily summary with its embedding
|
||||
fn store_summary(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
summary: InsertDailySummary,
|
||||
) -> Result<DailySummary, DbError>;
|
||||
|
||||
/// Find semantically similar daily summaries using vector similarity
|
||||
fn find_similar_summaries(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
limit: usize,
|
||||
) -> Result<Vec<DailySummary>, DbError>;
|
||||
|
||||
/// Find semantically similar daily summaries with time-based weighting
|
||||
/// Combines cosine similarity with temporal proximity to target_date
|
||||
/// Final score = similarity * time_weight, where time_weight decays with distance from target_date
|
||||
fn find_similar_summaries_with_time_weight(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
target_date: &str,
|
||||
limit: usize,
|
||||
) -> Result<Vec<DailySummary>, DbError>;
|
||||
|
||||
/// Check if a summary exists for a given date and contact
|
||||
fn summary_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
date: &str,
|
||||
contact: &str,
|
||||
) -> Result<bool, DbError>;
|
||||
|
||||
/// Get count of summaries for a contact
|
||||
fn get_summary_count(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
contact: &str,
|
||||
) -> Result<i64, DbError>;
|
||||
}
|
||||
|
||||
pub struct SqliteDailySummaryDao {
|
||||
connection: Arc<Mutex<SqliteConnection>>,
|
||||
}
|
||||
|
||||
impl Default for SqliteDailySummaryDao {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteDailySummaryDao {
|
||||
pub fn new() -> Self {
|
||||
SqliteDailySummaryDao {
|
||||
connection: Arc::new(Mutex::new(connect())),
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
|
||||
use zerocopy::IntoBytes;
|
||||
vec.as_bytes().to_vec()
|
||||
}
|
||||
|
||||
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
|
||||
if !bytes.len().is_multiple_of(4) {
|
||||
return Err(DbError::new(DbErrorKind::QueryError));
|
||||
}
|
||||
|
||||
let count = bytes.len() / 4;
|
||||
let mut vec = Vec::with_capacity(count);
|
||||
|
||||
for chunk in bytes.chunks_exact(4) {
|
||||
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
|
||||
vec.push(float);
|
||||
}
|
||||
|
||||
Ok(vec)
|
||||
}
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if magnitude_a == 0.0 || magnitude_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
dot_product / (magnitude_a * magnitude_b)
|
||||
}
|
||||
}
|
||||
|
||||
impl DailySummaryDao for SqliteDailySummaryDao {
|
||||
fn store_summary(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
summary: InsertDailySummary,
|
||||
) -> Result<DailySummary, DbError> {
|
||||
trace_db_call(context, "insert", "store_summary", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get DailySummaryDao");
|
||||
|
||||
// Validate embedding dimensions
|
||||
if summary.embedding.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid embedding dimensions: {} (expected 768)",
|
||||
summary.embedding.len()
|
||||
));
|
||||
}
|
||||
|
||||
let embedding_bytes = Self::serialize_vector(&summary.embedding);
|
||||
|
||||
// INSERT OR REPLACE to handle updates if summary needs regeneration
|
||||
diesel::sql_query(
|
||||
"INSERT OR REPLACE INTO daily_conversation_summaries
|
||||
(date, contact, summary, message_count, embedding, created_at, model_version)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
|
||||
)
|
||||
.bind::<diesel::sql_types::Text, _>(&summary.date)
|
||||
.bind::<diesel::sql_types::Text, _>(&summary.contact)
|
||||
.bind::<diesel::sql_types::Text, _>(&summary.summary)
|
||||
.bind::<diesel::sql_types::Integer, _>(summary.message_count)
|
||||
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
|
||||
.bind::<diesel::sql_types::BigInt, _>(summary.created_at)
|
||||
.bind::<diesel::sql_types::Text, _>(&summary.model_version)
|
||||
.execute(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
|
||||
|
||||
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
|
||||
.get_result::<LastInsertRowId>(conn.deref_mut())
|
||||
.map(|r| r.id as i32)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
|
||||
|
||||
Ok(DailySummary {
|
||||
id: row_id,
|
||||
date: summary.date,
|
||||
contact: summary.contact,
|
||||
summary: summary.summary,
|
||||
message_count: summary.message_count,
|
||||
created_at: summary.created_at,
|
||||
model_version: summary.model_version,
|
||||
})
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn find_similar_summaries(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
limit: usize,
|
||||
) -> Result<Vec<DailySummary>, DbError> {
|
||||
trace_db_call(context, "query", "find_similar_summaries", |_span| {
|
||||
let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao");
|
||||
|
||||
if query_embedding.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid query embedding dimensions: {} (expected 768)",
|
||||
query_embedding.len()
|
||||
));
|
||||
}
|
||||
|
||||
// Load all summaries with embeddings
|
||||
let results = diesel::sql_query(
|
||||
"SELECT id, date, contact, summary, message_count, embedding, created_at, model_version
|
||||
FROM daily_conversation_summaries"
|
||||
)
|
||||
.load::<DailySummaryWithVectorRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
log::info!("Loaded {} daily summaries for similarity comparison", results.len());
|
||||
|
||||
// Compute similarity for each summary
|
||||
let mut scored_summaries: Vec<(f32, DailySummary)> = results
|
||||
.into_iter()
|
||||
.filter_map(|row| {
|
||||
match Self::deserialize_vector(&row.embedding) {
|
||||
Ok(embedding) => {
|
||||
let similarity = Self::cosine_similarity(query_embedding, &embedding);
|
||||
Some((
|
||||
similarity,
|
||||
DailySummary {
|
||||
id: row.id,
|
||||
date: row.date,
|
||||
contact: row.contact,
|
||||
summary: row.summary,
|
||||
message_count: row.message_count,
|
||||
created_at: row.created_at,
|
||||
model_version: row.model_version,
|
||||
},
|
||||
))
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Failed to deserialize embedding for summary {}: {:?}", row.id, e);
|
||||
None
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by similarity (highest first)
|
||||
scored_summaries.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
// Filter out poor matches (similarity < 0.3 is likely noise)
|
||||
scored_summaries.retain(|(similarity, _)| *similarity >= 0.3);
|
||||
|
||||
// Log similarity distribution
|
||||
if !scored_summaries.is_empty() {
|
||||
let top_score = scored_summaries.first().map(|(s, _)| *s).unwrap_or(0.0);
|
||||
let median_score = scored_summaries.get(scored_summaries.len() / 2).map(|(s, _)| *s).unwrap_or(0.0);
|
||||
|
||||
log::info!(
|
||||
"Daily summary similarity - Top: {:.3}, Median: {:.3}, Count: {} (after 0.3 threshold)",
|
||||
top_score,
|
||||
median_score,
|
||||
scored_summaries.len()
|
||||
);
|
||||
} else {
|
||||
log::warn!("No daily summaries met the 0.3 similarity threshold");
|
||||
}
|
||||
|
||||
// Take top N and log matches
|
||||
let top_results: Vec<DailySummary> = scored_summaries
|
||||
.into_iter()
|
||||
.take(limit)
|
||||
.map(|(similarity, summary)| {
|
||||
log::info!(
|
||||
"Summary match: similarity={:.3}, date={}, contact={}, summary=\"{}\"",
|
||||
similarity,
|
||||
summary.date,
|
||||
summary.contact,
|
||||
summary.summary.chars().take(100).collect::<String>()
|
||||
);
|
||||
summary
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(top_results)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_similar_summaries_with_time_weight(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
target_date: &str,
|
||||
limit: usize,
|
||||
) -> Result<Vec<DailySummary>, DbError> {
|
||||
trace_db_call(context, "query", "find_similar_summaries_with_time_weight", |_span| {
|
||||
let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao");
|
||||
|
||||
if query_embedding.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid query embedding dimensions: {} (expected 768)",
|
||||
query_embedding.len()
|
||||
));
|
||||
}
|
||||
|
||||
// Parse target date
|
||||
let target = NaiveDate::parse_from_str(target_date, "%Y-%m-%d")
|
||||
.map_err(|e| anyhow::anyhow!("Invalid target date: {}", e))?;
|
||||
|
||||
// Load all summaries with embeddings
|
||||
let results = diesel::sql_query(
|
||||
"SELECT id, date, contact, summary, message_count, embedding, created_at, model_version
|
||||
FROM daily_conversation_summaries"
|
||||
)
|
||||
.load::<DailySummaryWithVectorRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
log::info!("Loaded {} daily summaries for time-weighted similarity (target: {})", results.len(), target_date);
|
||||
|
||||
// Compute time-weighted similarity for each summary
|
||||
// Score = cosine_similarity * time_weight
|
||||
// time_weight = 1 / (1 + days_distance/30) - decays with ~30 day half-life
|
||||
let mut scored_summaries: Vec<(f32, f32, i64, DailySummary)> = results
|
||||
.into_iter()
|
||||
.filter_map(|row| {
|
||||
match Self::deserialize_vector(&row.embedding) {
|
||||
Ok(embedding) => {
|
||||
let similarity = Self::cosine_similarity(query_embedding, &embedding);
|
||||
|
||||
// Calculate time weight
|
||||
let summary_date = NaiveDate::parse_from_str(&row.date, "%Y-%m-%d").ok()?;
|
||||
let days_distance = (target - summary_date).num_days().abs();
|
||||
|
||||
// Exponential decay with 30-day half-life
|
||||
// At 0 days: weight = 1.0
|
||||
// At 30 days: weight = 0.5
|
||||
// At 60 days: weight = 0.25
|
||||
// At 365 days: weight ~= 0.0001
|
||||
let time_weight = 0.5_f32.powf(days_distance as f32 / 30.0);
|
||||
|
||||
// Combined score - but ensure semantic similarity still matters
|
||||
// We use sqrt to soften the time weight's impact
|
||||
let combined_score = similarity * time_weight.sqrt();
|
||||
|
||||
Some((
|
||||
combined_score,
|
||||
similarity,
|
||||
days_distance,
|
||||
DailySummary {
|
||||
id: row.id,
|
||||
date: row.date,
|
||||
contact: row.contact,
|
||||
summary: row.summary,
|
||||
message_count: row.message_count,
|
||||
created_at: row.created_at,
|
||||
model_version: row.model_version,
|
||||
},
|
||||
))
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Failed to deserialize embedding for summary {}: {:?}", row.id, e);
|
||||
None
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by combined score (highest first)
|
||||
scored_summaries.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
// Filter out poor matches (base similarity < 0.5 - stricter than before since we have time weighting)
|
||||
scored_summaries.retain(|(_, similarity, _, _)| *similarity >= 0.5);
|
||||
|
||||
// Log similarity distribution
|
||||
if !scored_summaries.is_empty() {
|
||||
let (top_combined, top_sim, top_days, _) = &scored_summaries[0];
|
||||
log::info!(
|
||||
"Time-weighted similarity - Top: combined={:.3} (sim={:.3}, days={}), Count: {} matches",
|
||||
top_combined,
|
||||
top_sim,
|
||||
top_days,
|
||||
scored_summaries.len()
|
||||
);
|
||||
} else {
|
||||
log::warn!("No daily summaries met the 0.5 similarity threshold");
|
||||
}
|
||||
|
||||
// Take top N and log matches
|
||||
let top_results: Vec<DailySummary> = scored_summaries
|
||||
.into_iter()
|
||||
.take(limit)
|
||||
.map(|(combined, similarity, days, summary)| {
|
||||
log::info!(
|
||||
"Summary match: combined={:.3} (sim={:.3}, days={}), date={}, contact={}, summary=\"{}\"",
|
||||
combined,
|
||||
similarity,
|
||||
days,
|
||||
summary.date,
|
||||
summary.contact,
|
||||
summary.summary.chars().take(80).collect::<String>()
|
||||
);
|
||||
summary
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(top_results)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn summary_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
date: &str,
|
||||
contact: &str,
|
||||
) -> Result<bool, DbError> {
|
||||
trace_db_call(context, "query", "summary_exists", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get DailySummaryDao");
|
||||
|
||||
let count = diesel::sql_query(
|
||||
"SELECT COUNT(*) as count FROM daily_conversation_summaries
|
||||
WHERE date = ?1 AND contact = ?2",
|
||||
)
|
||||
.bind::<diesel::sql_types::Text, _>(date)
|
||||
.bind::<diesel::sql_types::Text, _>(contact)
|
||||
.get_result::<CountResult>(conn.deref_mut())
|
||||
.map(|r| r.count)
|
||||
.map_err(|e| anyhow::anyhow!("Count query error: {:?}", e))?;
|
||||
|
||||
Ok(count > 0)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_summary_count(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
contact: &str,
|
||||
) -> Result<i64, DbError> {
|
||||
trace_db_call(context, "query", "get_summary_count", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get DailySummaryDao");
|
||||
|
||||
diesel::sql_query(
|
||||
"SELECT COUNT(*) as count FROM daily_conversation_summaries WHERE contact = ?1",
|
||||
)
|
||||
.bind::<diesel::sql_types::Text, _>(contact)
|
||||
.get_result::<CountResult>(conn.deref_mut())
|
||||
.map(|r| r.count)
|
||||
.map_err(|e| anyhow::anyhow!("Count query error: {:?}", e))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
}
|
||||
|
||||
// Helper structs for raw SQL queries
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct LastInsertRowId {
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
id: i64,
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct DailySummaryWithVectorRow {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
date: String,
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
contact: String,
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
summary: String,
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
message_count: i32,
|
||||
#[diesel(sql_type = diesel::sql_types::Binary)]
|
||||
embedding: Vec<u8>,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
created_at: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
model_version: String,
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
count: i64,
|
||||
}
|
||||
@@ -1,133 +0,0 @@
|
||||
use diesel::prelude::*;
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::database::models::{InsertPhotoInsight, PhotoInsight};
|
||||
use crate::database::schema;
|
||||
use crate::database::{DbError, DbErrorKind, connect};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
pub trait InsightDao: Sync + Send {
|
||||
fn store_insight(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
insight: InsertPhotoInsight,
|
||||
) -> Result<PhotoInsight, DbError>;
|
||||
|
||||
fn get_insight(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
file_path: &str,
|
||||
) -> Result<Option<PhotoInsight>, DbError>;
|
||||
|
||||
fn delete_insight(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
file_path: &str,
|
||||
) -> Result<(), DbError>;
|
||||
|
||||
fn get_all_insights(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
) -> Result<Vec<PhotoInsight>, DbError>;
|
||||
}
|
||||
|
||||
pub struct SqliteInsightDao {
|
||||
connection: Arc<Mutex<SqliteConnection>>,
|
||||
}
|
||||
|
||||
impl Default for SqliteInsightDao {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteInsightDao {
|
||||
pub fn new() -> Self {
|
||||
SqliteInsightDao {
|
||||
connection: Arc::new(Mutex::new(connect())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl InsightDao for SqliteInsightDao {
|
||||
fn store_insight(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
insight: InsertPhotoInsight,
|
||||
) -> Result<PhotoInsight, DbError> {
|
||||
trace_db_call(context, "insert", "store_insight", |_span| {
|
||||
use schema::photo_insights::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
|
||||
|
||||
// Insert or replace on conflict (UNIQUE constraint on file_path)
|
||||
diesel::replace_into(photo_insights)
|
||||
.values(&insight)
|
||||
.execute(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Insert error"))?;
|
||||
|
||||
// Retrieve the inserted record
|
||||
photo_insights
|
||||
.filter(file_path.eq(&insight.file_path))
|
||||
.first::<PhotoInsight>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn get_insight(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
path: &str,
|
||||
) -> Result<Option<PhotoInsight>, DbError> {
|
||||
trace_db_call(context, "query", "get_insight", |_span| {
|
||||
use schema::photo_insights::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
|
||||
|
||||
photo_insights
|
||||
.filter(file_path.eq(path))
|
||||
.first::<PhotoInsight>(connection.deref_mut())
|
||||
.optional()
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn delete_insight(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
path: &str,
|
||||
) -> Result<(), DbError> {
|
||||
trace_db_call(context, "delete", "delete_insight", |_span| {
|
||||
use schema::photo_insights::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
|
||||
|
||||
diesel::delete(photo_insights.filter(file_path.eq(path)))
|
||||
.execute(connection.deref_mut())
|
||||
.map(|_| ())
|
||||
.map_err(|_| anyhow::anyhow!("Delete error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_all_insights(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
) -> Result<Vec<PhotoInsight>, DbError> {
|
||||
trace_db_call(context, "query", "get_all_insights", |_span| {
|
||||
use schema::photo_insights::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
|
||||
|
||||
photo_insights
|
||||
.order(generated_at.desc())
|
||||
.load::<PhotoInsight>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
}
|
||||
@@ -1,528 +0,0 @@
|
||||
use diesel::prelude::*;
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use serde::Serialize;
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::database::{DbError, DbErrorKind, connect};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
/// Represents a location history record
|
||||
#[derive(Serialize, Clone, Debug)]
|
||||
pub struct LocationRecord {
|
||||
pub id: i32,
|
||||
pub timestamp: i64,
|
||||
pub latitude: f64,
|
||||
pub longitude: f64,
|
||||
pub accuracy: Option<i32>,
|
||||
pub activity: Option<String>,
|
||||
pub activity_confidence: Option<i32>,
|
||||
pub place_name: Option<String>,
|
||||
pub place_category: Option<String>,
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
/// Data for inserting a new location record
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InsertLocationRecord {
|
||||
pub timestamp: i64,
|
||||
pub latitude: f64,
|
||||
pub longitude: f64,
|
||||
pub accuracy: Option<i32>,
|
||||
pub activity: Option<String>,
|
||||
pub activity_confidence: Option<i32>,
|
||||
pub place_name: Option<String>,
|
||||
pub place_category: Option<String>,
|
||||
pub embedding: Option<Vec<f32>>, // 768-dim, optional (rarely used)
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
pub trait LocationHistoryDao: Sync + Send {
|
||||
/// Store single location record
|
||||
fn store_location(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
location: InsertLocationRecord,
|
||||
) -> Result<LocationRecord, DbError>;
|
||||
|
||||
/// Batch insert locations (Google Takeout has millions of points)
|
||||
fn store_locations_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
locations: Vec<InsertLocationRecord>,
|
||||
) -> Result<usize, DbError>;
|
||||
|
||||
/// Find nearest location to timestamp (PRIMARY query)
|
||||
/// "Where was I at photo timestamp ±N minutes?"
|
||||
fn find_nearest_location(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
max_time_diff_seconds: i64,
|
||||
) -> Result<Option<LocationRecord>, DbError>;
|
||||
|
||||
/// Find locations in time range
|
||||
fn find_locations_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<LocationRecord>, DbError>;
|
||||
|
||||
/// Find locations near GPS coordinates (for "photos near this place")
|
||||
/// Uses approximate bounding box for performance
|
||||
fn find_locations_near_point(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
latitude: f64,
|
||||
longitude: f64,
|
||||
radius_km: f64,
|
||||
) -> Result<Vec<LocationRecord>, DbError>;
|
||||
|
||||
/// Deduplicate: check if location exists
|
||||
fn location_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
latitude: f64,
|
||||
longitude: f64,
|
||||
) -> Result<bool, DbError>;
|
||||
|
||||
/// Get count of location records
|
||||
fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
|
||||
}
|
||||
|
||||
pub struct SqliteLocationHistoryDao {
|
||||
connection: Arc<Mutex<SqliteConnection>>,
|
||||
}
|
||||
|
||||
impl Default for SqliteLocationHistoryDao {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteLocationHistoryDao {
|
||||
pub fn new() -> Self {
|
||||
SqliteLocationHistoryDao {
|
||||
connection: Arc::new(Mutex::new(connect())),
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
|
||||
use zerocopy::IntoBytes;
|
||||
vec.as_bytes().to_vec()
|
||||
}
|
||||
|
||||
/// Haversine distance calculation (in kilometers)
|
||||
/// Used for filtering locations by proximity to a point
|
||||
fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
|
||||
const R: f64 = 6371.0; // Earth radius in km
|
||||
|
||||
let d_lat = (lat2 - lat1).to_radians();
|
||||
let d_lon = (lon2 - lon1).to_radians();
|
||||
|
||||
let a = (d_lat / 2.0).sin().powi(2)
|
||||
+ lat1.to_radians().cos() * lat2.to_radians().cos() * (d_lon / 2.0).sin().powi(2);
|
||||
|
||||
let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
|
||||
|
||||
R * c
|
||||
}
|
||||
|
||||
/// Calculate approximate bounding box for spatial queries
|
||||
/// Returns (min_lat, max_lat, min_lon, max_lon)
|
||||
fn bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f64) {
|
||||
const KM_PER_DEGREE_LAT: f64 = 111.0;
|
||||
let km_per_degree_lon = 111.0 * lat.to_radians().cos();
|
||||
|
||||
let delta_lat = radius_km / KM_PER_DEGREE_LAT;
|
||||
let delta_lon = radius_km / km_per_degree_lon;
|
||||
|
||||
(
|
||||
lat - delta_lat, // min_lat
|
||||
lat + delta_lat, // max_lat
|
||||
lon - delta_lon, // min_lon
|
||||
lon + delta_lon, // max_lon
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct LocationRecordRow {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
timestamp: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Float)]
|
||||
latitude: f32,
|
||||
#[diesel(sql_type = diesel::sql_types::Float)]
|
||||
longitude: f32,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Integer>)]
|
||||
accuracy: Option<i32>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
activity: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Integer>)]
|
||||
activity_confidence: Option<i32>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
place_name: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
place_category: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
created_at: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
source_file: Option<String>,
|
||||
}
|
||||
|
||||
impl LocationRecordRow {
|
||||
fn to_location_record(&self) -> LocationRecord {
|
||||
LocationRecord {
|
||||
id: self.id,
|
||||
timestamp: self.timestamp,
|
||||
latitude: self.latitude as f64,
|
||||
longitude: self.longitude as f64,
|
||||
accuracy: self.accuracy,
|
||||
activity: self.activity.clone(),
|
||||
activity_confidence: self.activity_confidence,
|
||||
place_name: self.place_name.clone(),
|
||||
place_category: self.place_category.clone(),
|
||||
created_at: self.created_at,
|
||||
source_file: self.source_file.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct LastInsertRowId {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
}
|
||||
|
||||
impl LocationHistoryDao for SqliteLocationHistoryDao {
|
||||
fn store_location(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
location: InsertLocationRecord,
|
||||
) -> Result<LocationRecord, DbError> {
|
||||
trace_db_call(context, "insert", "store_location", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
// Validate embedding dimensions if provided (rare for location data)
|
||||
if let Some(ref emb) = location.embedding
|
||||
&& emb.len() != 768
|
||||
{
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid embedding dimensions: {} (expected 768)",
|
||||
emb.len()
|
||||
));
|
||||
}
|
||||
|
||||
let embedding_bytes = location
|
||||
.embedding
|
||||
.as_ref()
|
||||
.map(|e| Self::serialize_vector(e));
|
||||
|
||||
// INSERT OR IGNORE to handle re-imports (UNIQUE constraint on timestamp+lat+lon)
|
||||
diesel::sql_query(
|
||||
"INSERT OR IGNORE INTO location_history
|
||||
(timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||
place_name, place_category, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(location.timestamp)
|
||||
.bind::<diesel::sql_types::Float, _>(location.latitude as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(location.longitude as f32)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(&location.accuracy)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.activity)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
|
||||
&location.activity_confidence,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.place_name)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&location.place_category,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(&embedding_bytes)
|
||||
.bind::<diesel::sql_types::BigInt, _>(location.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.source_file)
|
||||
.execute(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
|
||||
|
||||
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
|
||||
.get_result::<LastInsertRowId>(conn.deref_mut())
|
||||
.map(|r| r.id)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
|
||||
|
||||
Ok(LocationRecord {
|
||||
id: row_id,
|
||||
timestamp: location.timestamp,
|
||||
latitude: location.latitude,
|
||||
longitude: location.longitude,
|
||||
accuracy: location.accuracy,
|
||||
activity: location.activity,
|
||||
activity_confidence: location.activity_confidence,
|
||||
place_name: location.place_name,
|
||||
place_category: location.place_category,
|
||||
created_at: location.created_at,
|
||||
source_file: location.source_file,
|
||||
})
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn store_locations_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
locations: Vec<InsertLocationRecord>,
|
||||
) -> Result<usize, DbError> {
|
||||
trace_db_call(context, "insert", "store_locations_batch", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
let mut inserted = 0;
|
||||
|
||||
conn.transaction::<_, anyhow::Error, _>(|conn| {
|
||||
for location in locations {
|
||||
// Validate embedding if provided (rare)
|
||||
if let Some(ref emb) = location.embedding
|
||||
&& emb.len() != 768
|
||||
{
|
||||
log::warn!(
|
||||
"Skipping location with invalid embedding dimensions: {}",
|
||||
emb.len()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let embedding_bytes = location
|
||||
.embedding
|
||||
.as_ref()
|
||||
.map(|e| Self::serialize_vector(e));
|
||||
|
||||
let rows_affected = diesel::sql_query(
|
||||
"INSERT OR IGNORE INTO location_history
|
||||
(timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||
place_name, place_category, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(location.timestamp)
|
||||
.bind::<diesel::sql_types::Float, _>(location.latitude as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(location.longitude as f32)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
|
||||
&location.accuracy,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&location.activity,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
|
||||
&location.activity_confidence,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&location.place_name,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&location.place_category,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(
|
||||
&embedding_bytes,
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(location.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&location.source_file,
|
||||
)
|
||||
.execute(conn)
|
||||
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
|
||||
|
||||
if rows_affected > 0 {
|
||||
inserted += 1;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
|
||||
|
||||
Ok(inserted)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn find_nearest_location(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
max_time_diff_seconds: i64,
|
||||
) -> Result<Option<LocationRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_nearest_location", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
let start_ts = timestamp - max_time_diff_seconds;
|
||||
let end_ts = timestamp + max_time_diff_seconds;
|
||||
|
||||
// Find location closest to target timestamp within window
|
||||
let results = diesel::sql_query(
|
||||
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||
place_name, place_category, created_at, source_file
|
||||
FROM location_history
|
||||
WHERE timestamp >= ?1 AND timestamp <= ?2
|
||||
ORDER BY ABS(timestamp - ?3) ASC
|
||||
LIMIT 1"
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(timestamp)
|
||||
.load::<LocationRecordRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(results.into_iter().next().map(|r| r.to_location_record()))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_locations_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<LocationRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_locations_in_range", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
diesel::sql_query(
|
||||
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||
place_name, place_category, created_at, source_file
|
||||
FROM location_history
|
||||
WHERE timestamp >= ?1 AND timestamp <= ?2
|
||||
ORDER BY timestamp ASC"
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.load::<LocationRecordRow>(conn.deref_mut())
|
||||
.map(|rows| rows.into_iter().map(|r| r.to_location_record()).collect())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_locations_near_point(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
latitude: f64,
|
||||
longitude: f64,
|
||||
radius_km: f64,
|
||||
) -> Result<Vec<LocationRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_locations_near_point", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
// Use bounding box for initial filter (fast, indexed)
|
||||
let (min_lat, max_lat, min_lon, max_lon) =
|
||||
Self::bounding_box(latitude, longitude, radius_km);
|
||||
|
||||
let results = diesel::sql_query(
|
||||
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||
place_name, place_category, created_at, source_file
|
||||
FROM location_history
|
||||
WHERE latitude >= ?1 AND latitude <= ?2
|
||||
AND longitude >= ?3 AND longitude <= ?4"
|
||||
)
|
||||
.bind::<diesel::sql_types::Float, _>(min_lat as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(max_lat as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(min_lon as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(max_lon as f32)
|
||||
.load::<LocationRecordRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
// Refine with Haversine distance (in-memory, post-filter)
|
||||
let filtered: Vec<LocationRecord> = results
|
||||
.into_iter()
|
||||
.map(|r| r.to_location_record())
|
||||
.filter(|loc| {
|
||||
let distance =
|
||||
Self::haversine_distance(latitude, longitude, loc.latitude, loc.longitude);
|
||||
distance <= radius_km
|
||||
})
|
||||
.collect();
|
||||
|
||||
log::info!(
|
||||
"Found {} locations within {} km of ({}, {})",
|
||||
filtered.len(),
|
||||
radius_km,
|
||||
latitude,
|
||||
longitude
|
||||
);
|
||||
|
||||
Ok(filtered)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn location_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
latitude: f64,
|
||||
longitude: f64,
|
||||
) -> Result<bool, DbError> {
|
||||
trace_db_call(context, "query", "location_exists", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
count: i32,
|
||||
}
|
||||
|
||||
let result: CountResult = diesel::sql_query(
|
||||
"SELECT COUNT(*) as count FROM location_history
|
||||
WHERE timestamp = ?1 AND latitude = ?2 AND longitude = ?3",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(timestamp)
|
||||
.bind::<diesel::sql_types::Float, _>(latitude as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(longitude as f32)
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count > 0)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
|
||||
trace_db_call(context, "query", "get_location_count", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
count: i64,
|
||||
}
|
||||
|
||||
let result: CountResult =
|
||||
diesel::sql_query("SELECT COUNT(*) as count FROM location_history")
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
}
|
||||
@@ -1,733 +1,92 @@
|
||||
use bcrypt::{DEFAULT_COST, hash, verify};
|
||||
use bcrypt::{hash, verify, DEFAULT_COST};
|
||||
use diesel::prelude::*;
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use dotenv::dotenv;
|
||||
|
||||
use crate::database::models::{
|
||||
Favorite, ImageExif, InsertFavorite, InsertImageExif, InsertUser, User,
|
||||
};
|
||||
use crate::otel::trace_db_call;
|
||||
use crate::database::models::{Favorite, InsertFavorite, InsertUser, User};
|
||||
|
||||
pub mod calendar_dao;
|
||||
pub mod daily_summary_dao;
|
||||
pub mod insights_dao;
|
||||
pub mod location_dao;
|
||||
pub mod models;
|
||||
pub mod schema;
|
||||
pub mod search_dao;
|
||||
mod models;
|
||||
mod schema;
|
||||
|
||||
pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
|
||||
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
|
||||
pub use insights_dao::{InsightDao, SqliteInsightDao};
|
||||
pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
|
||||
pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao};
|
||||
fn connect() -> SqliteConnection {
|
||||
dotenv().ok();
|
||||
|
||||
pub trait UserDao {
|
||||
fn create_user(&mut self, user: &str, password: &str) -> Option<User>;
|
||||
fn get_user(&mut self, user: &str, password: &str) -> Option<User>;
|
||||
fn user_exists(&mut self, user: &str) -> bool;
|
||||
}
|
||||
|
||||
pub struct SqliteUserDao {
|
||||
connection: SqliteConnection,
|
||||
}
|
||||
|
||||
impl Default for SqliteUserDao {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteUserDao {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
connection: connect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use diesel::{Connection, SqliteConnection};
|
||||
use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
|
||||
|
||||
const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
|
||||
|
||||
pub fn in_memory_db_connection() -> SqliteConnection {
|
||||
let mut connection = SqliteConnection::establish(":memory:")
|
||||
.expect("Unable to create in-memory db connection");
|
||||
connection
|
||||
.run_pending_migrations(DB_MIGRATIONS)
|
||||
.expect("Failure running DB migrations");
|
||||
|
||||
connection
|
||||
}
|
||||
}
|
||||
|
||||
impl UserDao for SqliteUserDao {
|
||||
// TODO: Should probably use Result here
|
||||
fn create_user(&mut self, user: &str, pass: &str) -> Option<User> {
|
||||
use schema::users::dsl::*;
|
||||
|
||||
let hashed = hash(pass, DEFAULT_COST);
|
||||
if let Ok(hash) = hashed {
|
||||
diesel::insert_into(users)
|
||||
.values(InsertUser {
|
||||
username: user,
|
||||
password: &hash,
|
||||
})
|
||||
.execute(&mut self.connection)
|
||||
.unwrap();
|
||||
|
||||
users
|
||||
.filter(username.eq(username))
|
||||
.load::<User>(&mut self.connection)
|
||||
.unwrap()
|
||||
.first()
|
||||
.cloned()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn get_user(&mut self, user: &str, pass: &str) -> Option<User> {
|
||||
use schema::users::dsl::*;
|
||||
|
||||
match users
|
||||
.filter(username.eq(user))
|
||||
.load::<User>(&mut self.connection)
|
||||
.unwrap_or_default()
|
||||
.first()
|
||||
{
|
||||
Some(u) if verify(pass, &u.password).unwrap_or(false) => Some(u.clone()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn user_exists(&mut self, user: &str) -> bool {
|
||||
use schema::users::dsl::*;
|
||||
|
||||
!users
|
||||
.filter(username.eq(user))
|
||||
.load::<User>(&mut self.connection)
|
||||
.unwrap_or_default()
|
||||
.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn connect() -> SqliteConnection {
|
||||
let db_url = dotenv::var("DATABASE_URL").expect("DATABASE_URL must be set");
|
||||
SqliteConnection::establish(&db_url).expect("Error connecting to DB")
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DbError {
|
||||
pub kind: DbErrorKind,
|
||||
}
|
||||
// TODO: Should probably use Result here
|
||||
pub fn create_user(user: &str, pass: &str) -> Option<User> {
|
||||
use schema::users::dsl::*;
|
||||
|
||||
impl DbError {
|
||||
fn new(kind: DbErrorKind) -> Self {
|
||||
DbError { kind }
|
||||
}
|
||||
|
||||
fn exists() -> Self {
|
||||
DbError::new(DbErrorKind::AlreadyExists)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum DbErrorKind {
|
||||
AlreadyExists,
|
||||
InsertError,
|
||||
QueryError,
|
||||
UpdateError,
|
||||
}
|
||||
|
||||
pub trait FavoriteDao: Sync + Send {
|
||||
fn add_favorite(&mut self, user_id: i32, favorite_path: &str) -> Result<usize, DbError>;
|
||||
fn remove_favorite(&mut self, user_id: i32, favorite_path: String);
|
||||
fn get_favorites(&mut self, user_id: i32) -> Result<Vec<Favorite>, DbError>;
|
||||
fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError>;
|
||||
fn get_all_paths(&mut self) -> Result<Vec<String>, DbError>;
|
||||
}
|
||||
|
||||
pub struct SqliteFavoriteDao {
|
||||
connection: Arc<Mutex<SqliteConnection>>,
|
||||
}
|
||||
|
||||
impl Default for SqliteFavoriteDao {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteFavoriteDao {
|
||||
pub fn new() -> Self {
|
||||
SqliteFavoriteDao {
|
||||
connection: Arc::new(Mutex::new(connect())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FavoriteDao for SqliteFavoriteDao {
|
||||
fn add_favorite(&mut self, user_id: i32, favorite_path: &str) -> Result<usize, DbError> {
|
||||
use schema::favorites::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get FavoriteDao");
|
||||
|
||||
if favorites
|
||||
.filter(userid.eq(user_id).and(path.eq(&favorite_path)))
|
||||
.first::<Favorite>(connection.deref_mut())
|
||||
.is_err()
|
||||
{
|
||||
diesel::insert_into(favorites)
|
||||
.values(InsertFavorite {
|
||||
userid: &user_id,
|
||||
path: favorite_path,
|
||||
})
|
||||
.execute(connection.deref_mut())
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
} else {
|
||||
Err(DbError::exists())
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_favorite(&mut self, user_id: i32, favorite_path: String) {
|
||||
use schema::favorites::dsl::*;
|
||||
|
||||
diesel::delete(favorites)
|
||||
.filter(userid.eq(user_id).and(path.eq(favorite_path)))
|
||||
.execute(self.connection.lock().unwrap().deref_mut())
|
||||
let hashed = hash(pass, DEFAULT_COST);
|
||||
if let Ok(hash) = hashed {
|
||||
let connection = connect();
|
||||
diesel::insert_into(users)
|
||||
.values(InsertUser {
|
||||
username: user,
|
||||
password: &hash,
|
||||
})
|
||||
.execute(&connection)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
fn get_favorites(&mut self, user_id: i32) -> Result<Vec<Favorite>, DbError> {
|
||||
use schema::favorites::dsl::*;
|
||||
|
||||
favorites
|
||||
.filter(userid.eq(user_id))
|
||||
.load::<Favorite>(self.connection.lock().unwrap().deref_mut())
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError> {
|
||||
use schema::favorites::dsl::*;
|
||||
|
||||
diesel::update(favorites.filter(path.eq(old_path)))
|
||||
.set(path.eq(new_path))
|
||||
.execute(self.connection.lock().unwrap().deref_mut())
|
||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_all_paths(&mut self) -> Result<Vec<String>, DbError> {
|
||||
use schema::favorites::dsl::*;
|
||||
|
||||
favorites
|
||||
.select(path)
|
||||
.distinct()
|
||||
.load(self.connection.lock().unwrap().deref_mut())
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ExifDao: Sync + Send {
|
||||
fn store_exif(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
exif_data: InsertImageExif,
|
||||
) -> Result<ImageExif, DbError>;
|
||||
fn get_exif(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
file_path: &str,
|
||||
) -> Result<Option<ImageExif>, DbError>;
|
||||
fn update_exif(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
exif_data: InsertImageExif,
|
||||
) -> Result<ImageExif, DbError>;
|
||||
fn delete_exif(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
file_path: &str,
|
||||
) -> Result<(), DbError>;
|
||||
fn get_all_with_date_taken(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
) -> Result<Vec<(String, i64)>, DbError>;
|
||||
|
||||
/// Batch load EXIF data for multiple file paths (single query)
|
||||
fn get_exif_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
file_paths: &[String],
|
||||
) -> Result<Vec<ImageExif>, DbError>;
|
||||
|
||||
/// Query files by EXIF criteria with optional filters
|
||||
fn query_by_exif(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
camera_make: Option<&str>,
|
||||
camera_model: Option<&str>,
|
||||
lens_model: Option<&str>,
|
||||
gps_bounds: Option<(f64, f64, f64, f64)>, // (min_lat, max_lat, min_lon, max_lon)
|
||||
date_from: Option<i64>,
|
||||
date_to: Option<i64>,
|
||||
) -> Result<Vec<ImageExif>, DbError>;
|
||||
|
||||
/// Get distinct camera makes with counts
|
||||
fn get_camera_makes(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
) -> Result<Vec<(String, i64)>, DbError>;
|
||||
|
||||
/// Update file path in EXIF database
|
||||
fn update_file_path(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
old_path: &str,
|
||||
new_path: &str,
|
||||
) -> Result<(), DbError>;
|
||||
|
||||
/// Get all file paths from EXIF database
|
||||
fn get_all_file_paths(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
) -> Result<Vec<String>, DbError>;
|
||||
|
||||
/// Get files sorted by date with optional pagination
|
||||
/// Returns (sorted_file_paths, total_count)
|
||||
fn get_files_sorted_by_date(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
file_paths: &[String],
|
||||
ascending: bool,
|
||||
limit: Option<i64>,
|
||||
offset: i64,
|
||||
) -> Result<(Vec<String>, i64), DbError>;
|
||||
|
||||
/// Get all photos with GPS coordinates
|
||||
/// Returns Vec<(file_path, latitude, longitude, date_taken)>
|
||||
fn get_all_with_gps(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
base_path: &str,
|
||||
recursive: bool,
|
||||
) -> Result<Vec<(String, f64, f64, Option<i64>)>, DbError>;
|
||||
}
|
||||
|
||||
pub struct SqliteExifDao {
|
||||
connection: Arc<Mutex<SqliteConnection>>,
|
||||
}
|
||||
|
||||
impl Default for SqliteExifDao {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteExifDao {
|
||||
pub fn new() -> Self {
|
||||
SqliteExifDao {
|
||||
connection: Arc::new(Mutex::new(connect())),
|
||||
match users
|
||||
.filter(username.eq(user))
|
||||
.load::<User>(&connection)
|
||||
.unwrap()
|
||||
.first()
|
||||
{
|
||||
Some(u) => Some(u.clone()),
|
||||
None => None,
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl ExifDao for SqliteExifDao {
|
||||
fn store_exif(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
exif_data: InsertImageExif,
|
||||
) -> Result<ImageExif, DbError> {
|
||||
trace_db_call(context, "insert", "store_exif", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
pub fn get_user(user: &str, pass: &str) -> Option<User> {
|
||||
use schema::users::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
diesel::insert_into(image_exif)
|
||||
.values(&exif_data)
|
||||
.execute(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Insert error"))?;
|
||||
|
||||
image_exif
|
||||
.filter(file_path.eq(&exif_data.file_path))
|
||||
.first::<ImageExif>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn get_exif(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
path: &str,
|
||||
) -> Result<Option<ImageExif>, DbError> {
|
||||
trace_db_call(context, "query", "get_exif", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
// Try both normalized (forward slash) and Windows (backslash) paths
|
||||
// since database may contain either format
|
||||
let normalized = path.replace('\\', "/");
|
||||
let windows_path = path.replace('/', "\\");
|
||||
|
||||
match image_exif
|
||||
.filter(file_path.eq(&normalized).or(file_path.eq(&windows_path)))
|
||||
.first::<ImageExif>(connection.deref_mut())
|
||||
{
|
||||
Ok(exif) => Ok(Some(exif)),
|
||||
Err(diesel::result::Error::NotFound) => Ok(None),
|
||||
Err(_) => Err(anyhow::anyhow!("Query error")),
|
||||
}
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn update_exif(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
exif_data: InsertImageExif,
|
||||
) -> Result<ImageExif, DbError> {
|
||||
trace_db_call(context, "update", "update_exif", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
diesel::update(image_exif.filter(file_path.eq(&exif_data.file_path)))
|
||||
.set((
|
||||
camera_make.eq(&exif_data.camera_make),
|
||||
camera_model.eq(&exif_data.camera_model),
|
||||
lens_model.eq(&exif_data.lens_model),
|
||||
width.eq(&exif_data.width),
|
||||
height.eq(&exif_data.height),
|
||||
orientation.eq(&exif_data.orientation),
|
||||
gps_latitude.eq(&exif_data.gps_latitude),
|
||||
gps_longitude.eq(&exif_data.gps_longitude),
|
||||
gps_altitude.eq(&exif_data.gps_altitude),
|
||||
focal_length.eq(&exif_data.focal_length),
|
||||
aperture.eq(&exif_data.aperture),
|
||||
shutter_speed.eq(&exif_data.shutter_speed),
|
||||
iso.eq(&exif_data.iso),
|
||||
date_taken.eq(&exif_data.date_taken),
|
||||
last_modified.eq(&exif_data.last_modified),
|
||||
))
|
||||
.execute(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Update error"))?;
|
||||
|
||||
image_exif
|
||||
.filter(file_path.eq(&exif_data.file_path))
|
||||
.first::<ImageExif>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||
}
|
||||
|
||||
fn delete_exif(&mut self, context: &opentelemetry::Context, path: &str) -> Result<(), DbError> {
|
||||
trace_db_call(context, "delete", "delete_exif", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
diesel::delete(image_exif.filter(file_path.eq(path)))
|
||||
.execute(self.connection.lock().unwrap().deref_mut())
|
||||
.map(|_| ())
|
||||
.map_err(|_| anyhow::anyhow!("Delete error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_all_with_date_taken(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
) -> Result<Vec<(String, i64)>, DbError> {
|
||||
trace_db_call(context, "query", "get_all_with_date_taken", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
image_exif
|
||||
.select((file_path, date_taken))
|
||||
.filter(date_taken.is_not_null())
|
||||
.load::<(String, Option<i64>)>(connection.deref_mut())
|
||||
.map(|records| {
|
||||
records
|
||||
.into_iter()
|
||||
.filter_map(|(path, dt)| dt.map(|ts| (path, ts)))
|
||||
.collect()
|
||||
})
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_exif_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
file_paths: &[String],
|
||||
) -> Result<Vec<ImageExif>, DbError> {
|
||||
trace_db_call(context, "query", "get_exif_batch", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
if file_paths.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
image_exif
|
||||
.filter(file_path.eq_any(file_paths))
|
||||
.load::<ImageExif>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn query_by_exif(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
camera_make_filter: Option<&str>,
|
||||
camera_model_filter: Option<&str>,
|
||||
lens_model_filter: Option<&str>,
|
||||
gps_bounds: Option<(f64, f64, f64, f64)>,
|
||||
date_from: Option<i64>,
|
||||
date_to: Option<i64>,
|
||||
) -> Result<Vec<ImageExif>, DbError> {
|
||||
trace_db_call(context, "query", "query_by_exif", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
let mut query = image_exif.into_boxed();
|
||||
|
||||
// Camera filters (case-insensitive partial match)
|
||||
if let Some(make) = camera_make_filter {
|
||||
query = query.filter(camera_make.like(format!("%{}%", make)));
|
||||
}
|
||||
if let Some(model) = camera_model_filter {
|
||||
query = query.filter(camera_model.like(format!("%{}%", model)));
|
||||
}
|
||||
if let Some(lens) = lens_model_filter {
|
||||
query = query.filter(lens_model.like(format!("%{}%", lens)));
|
||||
}
|
||||
|
||||
// GPS bounding box
|
||||
if let Some((min_lat, max_lat, min_lon, max_lon)) = gps_bounds {
|
||||
query = query
|
||||
.filter(gps_latitude.between(min_lat as f32, max_lat as f32))
|
||||
.filter(gps_longitude.between(min_lon as f32, max_lon as f32))
|
||||
.filter(gps_latitude.is_not_null())
|
||||
.filter(gps_longitude.is_not_null());
|
||||
}
|
||||
|
||||
// Date range
|
||||
if let Some(from) = date_from {
|
||||
query = query.filter(date_taken.ge(from));
|
||||
}
|
||||
if let Some(to) = date_to {
|
||||
query = query.filter(date_taken.le(to));
|
||||
}
|
||||
if date_from.is_some() || date_to.is_some() {
|
||||
query = query.filter(date_taken.is_not_null());
|
||||
}
|
||||
|
||||
query
|
||||
.load::<ImageExif>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_camera_makes(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
) -> Result<Vec<(String, i64)>, DbError> {
|
||||
trace_db_call(context, "query", "get_camera_makes", |_span| {
|
||||
use diesel::dsl::count;
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
image_exif
|
||||
.filter(camera_make.is_not_null())
|
||||
.group_by(camera_make)
|
||||
.select((camera_make, count(id)))
|
||||
.order(count(id).desc())
|
||||
.load::<(Option<String>, i64)>(connection.deref_mut())
|
||||
.map(|records| {
|
||||
records
|
||||
.into_iter()
|
||||
.filter_map(|(make, cnt)| make.map(|m| (m, cnt)))
|
||||
.collect()
|
||||
})
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn update_file_path(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
old_path: &str,
|
||||
new_path: &str,
|
||||
) -> Result<(), DbError> {
|
||||
trace_db_call(context, "update", "update_file_path", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
diesel::update(image_exif.filter(file_path.eq(old_path)))
|
||||
.set(file_path.eq(new_path))
|
||||
.execute(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Update error"))?;
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||
}
|
||||
|
||||
fn get_all_file_paths(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
) -> Result<Vec<String>, DbError> {
|
||||
trace_db_call(context, "query", "get_all_file_paths", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
image_exif
|
||||
.select(file_path)
|
||||
.load(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_files_sorted_by_date(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
file_paths: &[String],
|
||||
ascending: bool,
|
||||
limit: Option<i64>,
|
||||
offset: i64,
|
||||
) -> Result<(Vec<String>, i64), DbError> {
|
||||
trace_db_call(context, "query", "get_files_sorted_by_date", |span| {
|
||||
use diesel::dsl::count_star;
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry::trace::Span;
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
span.set_attributes(vec![
|
||||
KeyValue::new("file_count", file_paths.len() as i64),
|
||||
KeyValue::new("ascending", ascending.to_string()),
|
||||
KeyValue::new("limit", limit.map(|l| l.to_string()).unwrap_or_default()),
|
||||
KeyValue::new("offset", offset.to_string()),
|
||||
]);
|
||||
|
||||
if file_paths.is_empty() {
|
||||
return Ok((Vec::new(), 0));
|
||||
}
|
||||
|
||||
let connection = &mut *self.connection.lock().unwrap();
|
||||
|
||||
// Get total count of files that have EXIF data
|
||||
let total_count: i64 = image_exif
|
||||
.filter(file_path.eq_any(file_paths))
|
||||
.select(count_star())
|
||||
.first(connection)
|
||||
.map_err(|_| anyhow::anyhow!("Count query error"))?;
|
||||
|
||||
// Build sorted query
|
||||
let mut query = image_exif.filter(file_path.eq_any(file_paths)).into_boxed();
|
||||
|
||||
// Apply sorting
|
||||
// Note: SQLite NULL handling varies - NULLs appear first for ASC, last for DESC by default
|
||||
if ascending {
|
||||
query = query.order(date_taken.asc());
|
||||
} else {
|
||||
query = query.order(date_taken.desc());
|
||||
}
|
||||
|
||||
// Apply pagination if requested
|
||||
if let Some(limit_val) = limit {
|
||||
query = query.limit(limit_val).offset(offset);
|
||||
}
|
||||
|
||||
// Execute and extract file paths
|
||||
let results: Vec<String> = query
|
||||
.select(file_path)
|
||||
.load::<String>(connection)
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))?;
|
||||
|
||||
Ok((results, total_count))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_all_with_gps(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
base_path: &str,
|
||||
recursive: bool,
|
||||
) -> Result<Vec<(String, f64, f64, Option<i64>)>, DbError> {
|
||||
trace_db_call(context, "query", "get_all_with_gps", |span| {
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry::trace::Span;
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
span.set_attributes(vec![
|
||||
KeyValue::new("base_path", base_path.to_string()),
|
||||
KeyValue::new("recursive", recursive.to_string()),
|
||||
]);
|
||||
|
||||
let connection = &mut *self.connection.lock().unwrap();
|
||||
|
||||
// Query all photos with non-null GPS coordinates
|
||||
let mut query = image_exif
|
||||
.filter(gps_latitude.is_not_null().and(gps_longitude.is_not_null()))
|
||||
.into_boxed();
|
||||
|
||||
// Apply path filtering
|
||||
// If base_path is empty or "/", return all GPS photos (no filter)
|
||||
// Otherwise filter by path prefix
|
||||
if !base_path.is_empty() && base_path != "/" {
|
||||
// Match base path as prefix (with wildcard)
|
||||
query = query.filter(file_path.like(format!("{}%", base_path)));
|
||||
|
||||
span.set_attribute(KeyValue::new("path_filter_applied", true));
|
||||
} else {
|
||||
span.set_attribute(KeyValue::new("path_filter_applied", false));
|
||||
span.set_attribute(KeyValue::new("returning_all_gps_photos", true));
|
||||
}
|
||||
|
||||
// Load full ImageExif records
|
||||
let results: Vec<ImageExif> = query
|
||||
.load::<ImageExif>(connection)
|
||||
.map_err(|e| anyhow::anyhow!("GPS query error: {}", e))?;
|
||||
|
||||
// Convert to tuple format (path, lat, lon, date_taken)
|
||||
// Filter out any rows where GPS is still None (shouldn't happen due to filter)
|
||||
// Cast f32 GPS values to f64 for API compatibility
|
||||
let filtered: Vec<(String, f64, f64, Option<i64>)> = results
|
||||
.into_iter()
|
||||
.filter_map(|exif| {
|
||||
if let (Some(lat_val), Some(lon_val)) = (exif.gps_latitude, exif.gps_longitude)
|
||||
{
|
||||
Some((
|
||||
exif.file_path,
|
||||
lat_val as f64,
|
||||
lon_val as f64,
|
||||
exif.date_taken,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
span.set_attribute(KeyValue::new("result_count", filtered.len() as i64));
|
||||
|
||||
Ok(filtered)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
match users
|
||||
.filter(username.eq(user))
|
||||
.load::<User>(&connect())
|
||||
.unwrap_or_default()
|
||||
.first()
|
||||
{
|
||||
Some(u) if verify(pass, &u.password).unwrap_or(false) => Some(u.clone()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn user_exists(name: &str) -> bool {
|
||||
use schema::users::dsl::*;
|
||||
|
||||
users
|
||||
.filter(username.eq(name))
|
||||
.load::<User>(&connect())
|
||||
.unwrap_or_default()
|
||||
.first()
|
||||
.is_some()
|
||||
}
|
||||
|
||||
pub fn add_favorite(user_id: i32, favorite_path: String) {
|
||||
use schema::favorites::dsl::*;
|
||||
|
||||
let connection = connect();
|
||||
diesel::insert_into(favorites)
|
||||
.values(InsertFavorite {
|
||||
userid: &user_id,
|
||||
path: &favorite_path,
|
||||
})
|
||||
.execute(&connection)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn get_favorites(user_id: i32) -> Vec<Favorite> {
|
||||
use schema::favorites::dsl::*;
|
||||
|
||||
favorites
|
||||
.filter(userid.eq(user_id))
|
||||
.load::<Favorite>(&connect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use crate::database::schema::{favorites, image_exif, photo_insights, users};
|
||||
use crate::database::schema::{favorites, users};
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Insertable)]
|
||||
#[diesel(table_name = users)]
|
||||
#[table_name = "users"]
|
||||
pub struct InsertUser<'a> {
|
||||
pub username: &'a str,
|
||||
pub password: &'a str,
|
||||
@@ -17,7 +17,7 @@ pub struct User {
|
||||
}
|
||||
|
||||
#[derive(Insertable)]
|
||||
#[diesel(table_name = favorites)]
|
||||
#[table_name = "favorites"]
|
||||
pub struct InsertFavorite<'a> {
|
||||
pub userid: &'a i32,
|
||||
pub path: &'a str,
|
||||
@@ -29,67 +29,3 @@ pub struct Favorite {
|
||||
pub userid: i32,
|
||||
pub path: String,
|
||||
}
|
||||
|
||||
#[derive(Insertable)]
|
||||
#[diesel(table_name = image_exif)]
|
||||
pub struct InsertImageExif {
|
||||
pub file_path: String,
|
||||
pub camera_make: Option<String>,
|
||||
pub camera_model: Option<String>,
|
||||
pub lens_model: Option<String>,
|
||||
pub width: Option<i32>,
|
||||
pub height: Option<i32>,
|
||||
pub orientation: Option<i32>,
|
||||
pub gps_latitude: Option<f32>,
|
||||
pub gps_longitude: Option<f32>,
|
||||
pub gps_altitude: Option<f32>,
|
||||
pub focal_length: Option<f32>,
|
||||
pub aperture: Option<f32>,
|
||||
pub shutter_speed: Option<String>,
|
||||
pub iso: Option<i32>,
|
||||
pub date_taken: Option<i64>,
|
||||
pub created_time: i64,
|
||||
pub last_modified: i64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Queryable, Clone, Debug)]
|
||||
pub struct ImageExif {
|
||||
pub id: i32,
|
||||
pub file_path: String,
|
||||
pub camera_make: Option<String>,
|
||||
pub camera_model: Option<String>,
|
||||
pub lens_model: Option<String>,
|
||||
pub width: Option<i32>,
|
||||
pub height: Option<i32>,
|
||||
pub orientation: Option<i32>,
|
||||
pub gps_latitude: Option<f32>,
|
||||
pub gps_longitude: Option<f32>,
|
||||
pub gps_altitude: Option<f32>,
|
||||
pub focal_length: Option<f32>,
|
||||
pub aperture: Option<f32>,
|
||||
pub shutter_speed: Option<String>,
|
||||
pub iso: Option<i32>,
|
||||
pub date_taken: Option<i64>,
|
||||
pub created_time: i64,
|
||||
pub last_modified: i64,
|
||||
}
|
||||
|
||||
#[derive(Insertable)]
|
||||
#[diesel(table_name = photo_insights)]
|
||||
pub struct InsertPhotoInsight {
|
||||
pub file_path: String,
|
||||
pub title: String,
|
||||
pub summary: String,
|
||||
pub generated_at: i64,
|
||||
pub model_version: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Queryable, Clone, Debug)]
|
||||
pub struct PhotoInsight {
|
||||
pub id: i32,
|
||||
pub file_path: String,
|
||||
pub title: String,
|
||||
pub summary: String,
|
||||
pub generated_at: i64,
|
||||
pub model_version: String,
|
||||
}
|
||||
|
||||
@@ -1,37 +1,4 @@
|
||||
// @generated automatically by Diesel CLI.
|
||||
|
||||
diesel::table! {
|
||||
calendar_events (id) {
|
||||
id -> Integer,
|
||||
event_uid -> Nullable<Text>,
|
||||
summary -> Text,
|
||||
description -> Nullable<Text>,
|
||||
location -> Nullable<Text>,
|
||||
start_time -> BigInt,
|
||||
end_time -> BigInt,
|
||||
all_day -> Bool,
|
||||
organizer -> Nullable<Text>,
|
||||
attendees -> Nullable<Text>,
|
||||
embedding -> Nullable<Binary>,
|
||||
created_at -> BigInt,
|
||||
source_file -> Nullable<Text>,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
daily_conversation_summaries (id) {
|
||||
id -> Integer,
|
||||
date -> Text,
|
||||
contact -> Text,
|
||||
summary -> Text,
|
||||
message_count -> Integer,
|
||||
embedding -> Binary,
|
||||
created_at -> BigInt,
|
||||
model_version -> Text,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
table! {
|
||||
favorites (id) {
|
||||
id -> Integer,
|
||||
userid -> Integer,
|
||||
@@ -39,112 +6,7 @@ diesel::table! {
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
image_exif (id) {
|
||||
id -> Integer,
|
||||
file_path -> Text,
|
||||
camera_make -> Nullable<Text>,
|
||||
camera_model -> Nullable<Text>,
|
||||
lens_model -> Nullable<Text>,
|
||||
width -> Nullable<Integer>,
|
||||
height -> Nullable<Integer>,
|
||||
orientation -> Nullable<Integer>,
|
||||
gps_latitude -> Nullable<Float>,
|
||||
gps_longitude -> Nullable<Float>,
|
||||
gps_altitude -> Nullable<Float>,
|
||||
focal_length -> Nullable<Float>,
|
||||
aperture -> Nullable<Float>,
|
||||
shutter_speed -> Nullable<Text>,
|
||||
iso -> Nullable<Integer>,
|
||||
date_taken -> Nullable<BigInt>,
|
||||
created_time -> BigInt,
|
||||
last_modified -> BigInt,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
knowledge_embeddings (id) {
|
||||
id -> Integer,
|
||||
keyword -> Text,
|
||||
description -> Text,
|
||||
category -> Nullable<Text>,
|
||||
embedding -> Binary,
|
||||
created_at -> BigInt,
|
||||
model_version -> Text,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
location_history (id) {
|
||||
id -> Integer,
|
||||
timestamp -> BigInt,
|
||||
latitude -> Float,
|
||||
longitude -> Float,
|
||||
accuracy -> Nullable<Integer>,
|
||||
activity -> Nullable<Text>,
|
||||
activity_confidence -> Nullable<Integer>,
|
||||
place_name -> Nullable<Text>,
|
||||
place_category -> Nullable<Text>,
|
||||
embedding -> Nullable<Binary>,
|
||||
created_at -> BigInt,
|
||||
source_file -> Nullable<Text>,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
message_embeddings (id) {
|
||||
id -> Integer,
|
||||
contact -> Text,
|
||||
body -> Text,
|
||||
timestamp -> BigInt,
|
||||
is_sent -> Bool,
|
||||
embedding -> Binary,
|
||||
created_at -> BigInt,
|
||||
model_version -> Text,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
photo_insights (id) {
|
||||
id -> Integer,
|
||||
file_path -> Text,
|
||||
title -> Text,
|
||||
summary -> Text,
|
||||
generated_at -> BigInt,
|
||||
model_version -> Text,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
search_history (id) {
|
||||
id -> Integer,
|
||||
timestamp -> BigInt,
|
||||
query -> Text,
|
||||
search_engine -> Nullable<Text>,
|
||||
embedding -> Binary,
|
||||
created_at -> BigInt,
|
||||
source_file -> Nullable<Text>,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
tagged_photo (id) {
|
||||
id -> Integer,
|
||||
photo_name -> Text,
|
||||
tag_id -> Integer,
|
||||
created_time -> BigInt,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
tags (id) {
|
||||
id -> Integer,
|
||||
name -> Text,
|
||||
created_time -> BigInt,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
table! {
|
||||
users (id) {
|
||||
id -> Integer,
|
||||
username -> Text,
|
||||
@@ -152,19 +14,4 @@ diesel::table! {
|
||||
}
|
||||
}
|
||||
|
||||
diesel::joinable!(tagged_photo -> tags (tag_id));
|
||||
|
||||
diesel::allow_tables_to_appear_in_same_query!(
|
||||
calendar_events,
|
||||
daily_conversation_summaries,
|
||||
favorites,
|
||||
image_exif,
|
||||
knowledge_embeddings,
|
||||
location_history,
|
||||
message_embeddings,
|
||||
photo_insights,
|
||||
search_history,
|
||||
tagged_photo,
|
||||
tags,
|
||||
users,
|
||||
);
|
||||
allow_tables_to_appear_in_same_query!(favorites, users,);
|
||||
|
||||
@@ -1,516 +0,0 @@
|
||||
use diesel::prelude::*;
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use serde::Serialize;
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::database::{DbError, DbErrorKind, connect};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
/// Represents a search history record
|
||||
#[derive(Serialize, Clone, Debug)]
|
||||
pub struct SearchRecord {
|
||||
pub id: i32,
|
||||
pub timestamp: i64,
|
||||
pub query: String,
|
||||
pub search_engine: Option<String>,
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
/// Data for inserting a new search record
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InsertSearchRecord {
|
||||
pub timestamp: i64,
|
||||
pub query: String,
|
||||
pub search_engine: Option<String>,
|
||||
pub embedding: Vec<f32>, // 768-dim, REQUIRED
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
pub trait SearchHistoryDao: Sync + Send {
|
||||
/// Store search with embedding
|
||||
fn store_search(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
search: InsertSearchRecord,
|
||||
) -> Result<SearchRecord, DbError>;
|
||||
|
||||
/// Batch insert searches
|
||||
fn store_searches_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
searches: Vec<InsertSearchRecord>,
|
||||
) -> Result<usize, DbError>;
|
||||
|
||||
/// Find searches in time range (for temporal context)
|
||||
fn find_searches_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<SearchRecord>, DbError>;
|
||||
|
||||
/// Find semantically similar searches (PRIMARY - embeddings shine here)
|
||||
fn find_similar_searches(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
limit: usize,
|
||||
) -> Result<Vec<SearchRecord>, DbError>;
|
||||
|
||||
/// Hybrid: Time window + semantic ranking
|
||||
fn find_relevant_searches_hybrid(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
center_timestamp: i64,
|
||||
time_window_days: i64,
|
||||
query_embedding: Option<&[f32]>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<SearchRecord>, DbError>;
|
||||
|
||||
/// Deduplication check
|
||||
fn search_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
query: &str,
|
||||
) -> Result<bool, DbError>;
|
||||
|
||||
/// Get count of search records
|
||||
fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
|
||||
}
|
||||
|
||||
pub struct SqliteSearchHistoryDao {
|
||||
connection: Arc<Mutex<SqliteConnection>>,
|
||||
}
|
||||
|
||||
impl Default for SqliteSearchHistoryDao {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteSearchHistoryDao {
|
||||
pub fn new() -> Self {
|
||||
SqliteSearchHistoryDao {
|
||||
connection: Arc::new(Mutex::new(connect())),
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
|
||||
use zerocopy::IntoBytes;
|
||||
vec.as_bytes().to_vec()
|
||||
}
|
||||
|
||||
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
|
||||
if !bytes.len().is_multiple_of(4) {
|
||||
return Err(DbError::new(DbErrorKind::QueryError));
|
||||
}
|
||||
|
||||
let count = bytes.len() / 4;
|
||||
let mut vec = Vec::with_capacity(count);
|
||||
|
||||
for chunk in bytes.chunks_exact(4) {
|
||||
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
|
||||
vec.push(float);
|
||||
}
|
||||
|
||||
Ok(vec)
|
||||
}
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if magnitude_a == 0.0 || magnitude_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
dot_product / (magnitude_a * magnitude_b)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct SearchRecordWithVectorRow {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
timestamp: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
query: String,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
search_engine: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Binary)]
|
||||
embedding: Vec<u8>,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
created_at: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
source_file: Option<String>,
|
||||
}
|
||||
|
||||
impl SearchRecordWithVectorRow {
|
||||
fn to_search_record(&self) -> SearchRecord {
|
||||
SearchRecord {
|
||||
id: self.id,
|
||||
timestamp: self.timestamp,
|
||||
query: self.query.clone(),
|
||||
search_engine: self.search_engine.clone(),
|
||||
created_at: self.created_at,
|
||||
source_file: self.source_file.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct LastInsertRowId {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
}
|
||||
|
||||
impl SearchHistoryDao for SqliteSearchHistoryDao {
|
||||
fn store_search(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
search: InsertSearchRecord,
|
||||
) -> Result<SearchRecord, DbError> {
|
||||
trace_db_call(context, "insert", "store_search", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
// Validate embedding dimensions (REQUIRED for searches)
|
||||
if search.embedding.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid embedding dimensions: {} (expected 768)",
|
||||
search.embedding.len()
|
||||
));
|
||||
}
|
||||
|
||||
let embedding_bytes = Self::serialize_vector(&search.embedding);
|
||||
|
||||
// INSERT OR IGNORE to handle re-imports (UNIQUE constraint on timestamp+query)
|
||||
diesel::sql_query(
|
||||
"INSERT OR IGNORE INTO search_history
|
||||
(timestamp, query, search_engine, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(search.timestamp)
|
||||
.bind::<diesel::sql_types::Text, _>(&search.query)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&search.search_engine)
|
||||
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
|
||||
.bind::<diesel::sql_types::BigInt, _>(search.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&search.source_file)
|
||||
.execute(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
|
||||
|
||||
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
|
||||
.get_result::<LastInsertRowId>(conn.deref_mut())
|
||||
.map(|r| r.id)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
|
||||
|
||||
Ok(SearchRecord {
|
||||
id: row_id,
|
||||
timestamp: search.timestamp,
|
||||
query: search.query,
|
||||
search_engine: search.search_engine,
|
||||
created_at: search.created_at,
|
||||
source_file: search.source_file,
|
||||
})
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn store_searches_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
searches: Vec<InsertSearchRecord>,
|
||||
) -> Result<usize, DbError> {
|
||||
trace_db_call(context, "insert", "store_searches_batch", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
let mut inserted = 0;
|
||||
|
||||
conn.transaction::<_, anyhow::Error, _>(|conn| {
|
||||
for search in searches {
|
||||
// Validate embedding (REQUIRED)
|
||||
if search.embedding.len() != 768 {
|
||||
log::warn!(
|
||||
"Skipping search with invalid embedding dimensions: {}",
|
||||
search.embedding.len()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let embedding_bytes = Self::serialize_vector(&search.embedding);
|
||||
|
||||
let rows_affected = diesel::sql_query(
|
||||
"INSERT OR IGNORE INTO search_history
|
||||
(timestamp, query, search_engine, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(search.timestamp)
|
||||
.bind::<diesel::sql_types::Text, _>(&search.query)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&search.search_engine,
|
||||
)
|
||||
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
|
||||
.bind::<diesel::sql_types::BigInt, _>(search.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&search.source_file,
|
||||
)
|
||||
.execute(conn)
|
||||
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
|
||||
|
||||
if rows_affected > 0 {
|
||||
inserted += 1;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
|
||||
|
||||
Ok(inserted)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn find_searches_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<SearchRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_searches_in_range", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
diesel::sql_query(
|
||||
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
|
||||
FROM search_history
|
||||
WHERE timestamp >= ?1 AND timestamp <= ?2
|
||||
ORDER BY timestamp DESC",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
|
||||
.map(|rows| rows.into_iter().map(|r| r.to_search_record()).collect())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_similar_searches(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
limit: usize,
|
||||
) -> Result<Vec<SearchRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_similar_searches", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
if query_embedding.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid query embedding dimensions: {} (expected 768)",
|
||||
query_embedding.len()
|
||||
));
|
||||
}
|
||||
|
||||
// Load all searches with embeddings
|
||||
let results = diesel::sql_query(
|
||||
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
|
||||
FROM search_history",
|
||||
)
|
||||
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
// Compute similarities
|
||||
let mut scored_searches: Vec<(f32, SearchRecord)> = results
|
||||
.into_iter()
|
||||
.filter_map(|row| {
|
||||
if let Ok(emb) = Self::deserialize_vector(&row.embedding) {
|
||||
let similarity = Self::cosine_similarity(query_embedding, &emb);
|
||||
Some((similarity, row.to_search_record()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by similarity descending
|
||||
scored_searches
|
||||
.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
log::info!("Found {} similar searches", scored_searches.len());
|
||||
if !scored_searches.is_empty() {
|
||||
log::info!(
|
||||
"Top similarity: {:.4} for query: '{}'",
|
||||
scored_searches[0].0,
|
||||
scored_searches[0].1.query
|
||||
);
|
||||
}
|
||||
|
||||
Ok(scored_searches
|
||||
.into_iter()
|
||||
.take(limit)
|
||||
.map(|(_, search)| search)
|
||||
.collect())
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_relevant_searches_hybrid(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
center_timestamp: i64,
|
||||
time_window_days: i64,
|
||||
query_embedding: Option<&[f32]>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<SearchRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_relevant_searches_hybrid", |_span| {
|
||||
let window_seconds = time_window_days * 86400;
|
||||
let start_ts = center_timestamp - window_seconds;
|
||||
let end_ts = center_timestamp + window_seconds;
|
||||
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
// Step 1: Time-based filter (fast, indexed)
|
||||
let searches_in_range = diesel::sql_query(
|
||||
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
|
||||
FROM search_history
|
||||
WHERE timestamp >= ?1 AND timestamp <= ?2",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
// Step 2: If query embedding provided, rank by semantic similarity
|
||||
if let Some(query_emb) = query_embedding {
|
||||
if query_emb.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid query embedding dimensions: {} (expected 768)",
|
||||
query_emb.len()
|
||||
));
|
||||
}
|
||||
|
||||
let mut scored_searches: Vec<(f32, SearchRecord)> = searches_in_range
|
||||
.into_iter()
|
||||
.filter_map(|row| {
|
||||
if let Ok(emb) = Self::deserialize_vector(&row.embedding) {
|
||||
let similarity = Self::cosine_similarity(query_emb, &emb);
|
||||
Some((similarity, row.to_search_record()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by similarity descending
|
||||
scored_searches
|
||||
.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
log::info!(
|
||||
"Hybrid query: {} searches in time range, ranked by similarity",
|
||||
scored_searches.len()
|
||||
);
|
||||
if !scored_searches.is_empty() {
|
||||
log::info!(
|
||||
"Top similarity: {:.4} for '{}'",
|
||||
scored_searches[0].0,
|
||||
scored_searches[0].1.query
|
||||
);
|
||||
}
|
||||
|
||||
Ok(scored_searches
|
||||
.into_iter()
|
||||
.take(limit)
|
||||
.map(|(_, search)| search)
|
||||
.collect())
|
||||
} else {
|
||||
// No semantic ranking, just return time-sorted (most recent first)
|
||||
log::info!(
|
||||
"Time-only query: {} searches in range",
|
||||
searches_in_range.len()
|
||||
);
|
||||
Ok(searches_in_range
|
||||
.into_iter()
|
||||
.take(limit)
|
||||
.map(|r| r.to_search_record())
|
||||
.collect())
|
||||
}
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn search_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
query: &str,
|
||||
) -> Result<bool, DbError> {
|
||||
trace_db_call(context, "query", "search_exists", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
count: i32,
|
||||
}
|
||||
|
||||
let result: CountResult = diesel::sql_query(
|
||||
"SELECT COUNT(*) as count FROM search_history WHERE timestamp = ?1 AND query = ?2",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(timestamp)
|
||||
.bind::<diesel::sql_types::Text, _>(query)
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count > 0)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
|
||||
trace_db_call(context, "query", "get_search_count", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
count: i64,
|
||||
}
|
||||
|
||||
let result: CountResult =
|
||||
diesel::sql_query("SELECT COUNT(*) as count FROM search_history")
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
}
|
||||
14
src/error.rs
14
src/error.rs
@@ -1,14 +0,0 @@
|
||||
use actix_web::{error::InternalError, http::StatusCode};
|
||||
|
||||
pub trait IntoHttpError<T> {
|
||||
fn into_http_internal_err(self) -> Result<T, actix_web::Error>;
|
||||
}
|
||||
|
||||
impl<T> IntoHttpError<T> for Result<T, anyhow::Error> {
|
||||
fn into_http_internal_err(self) -> Result<T, actix_web::Error> {
|
||||
self.map_err(|e| {
|
||||
log::error!("Map to err: {:?}", e);
|
||||
InternalError::new(e, StatusCode::INTERNAL_SERVER_ERROR).into()
|
||||
})
|
||||
}
|
||||
}
|
||||
319
src/exif.rs
319
src/exif.rs
@@ -1,319 +0,0 @@
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{Result, anyhow};
|
||||
use exif::{In, Reader, Tag, Value};
|
||||
use log::debug;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct ExifData {
|
||||
pub camera_make: Option<String>,
|
||||
pub camera_model: Option<String>,
|
||||
pub lens_model: Option<String>,
|
||||
pub width: Option<i32>,
|
||||
pub height: Option<i32>,
|
||||
pub orientation: Option<i32>,
|
||||
pub gps_latitude: Option<f64>,
|
||||
pub gps_longitude: Option<f64>,
|
||||
pub gps_altitude: Option<f64>,
|
||||
pub focal_length: Option<f64>,
|
||||
pub aperture: Option<f64>,
|
||||
pub shutter_speed: Option<String>,
|
||||
pub iso: Option<i32>,
|
||||
pub date_taken: Option<i64>,
|
||||
}
|
||||
|
||||
pub fn supports_exif(path: &Path) -> bool {
|
||||
if let Some(ext) = path.extension() {
|
||||
let ext_lower = ext.to_string_lossy().to_lowercase();
|
||||
matches!(
|
||||
ext_lower.as_str(),
|
||||
// JPEG formats
|
||||
"jpg" | "jpeg" |
|
||||
// TIFF and RAW formats based on TIFF
|
||||
"tiff" | "tif" | "nef" | "cr2" | "cr3" | "arw" | "dng" | "raf" | "orf" | "rw2" | "pef" | "srw" |
|
||||
// HEIF and variants
|
||||
"heif" | "heic" | "avif" |
|
||||
// PNG
|
||||
"png" |
|
||||
// WebP
|
||||
"webp"
|
||||
)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn extract_exif_from_path(path: &Path) -> Result<ExifData> {
|
||||
debug!("Extracting EXIF from: {:?}", path);
|
||||
|
||||
if !supports_exif(path) {
|
||||
return Err(anyhow!("File type does not support EXIF"));
|
||||
}
|
||||
|
||||
let file = File::open(path)?;
|
||||
let mut bufreader = BufReader::new(file);
|
||||
|
||||
let exifreader = Reader::new();
|
||||
let exif = exifreader.read_from_container(&mut bufreader)?;
|
||||
|
||||
let mut data = ExifData::default();
|
||||
|
||||
for field in exif.fields() {
|
||||
match field.tag {
|
||||
Tag::Make => {
|
||||
data.camera_make = get_string_value(field);
|
||||
}
|
||||
Tag::Model => {
|
||||
data.camera_model = get_string_value(field);
|
||||
}
|
||||
Tag::LensModel => {
|
||||
data.lens_model = get_string_value(field);
|
||||
}
|
||||
Tag::PixelXDimension | Tag::ImageWidth => {
|
||||
if data.width.is_none() {
|
||||
data.width = get_u32_value(field).map(|v| v as i32);
|
||||
}
|
||||
}
|
||||
Tag::PixelYDimension | Tag::ImageLength => {
|
||||
if data.height.is_none() {
|
||||
data.height = get_u32_value(field).map(|v| v as i32);
|
||||
}
|
||||
}
|
||||
Tag::Orientation => {
|
||||
data.orientation = get_u32_value(field).map(|v| v as i32);
|
||||
}
|
||||
Tag::FocalLength => {
|
||||
data.focal_length = get_rational_value(field);
|
||||
}
|
||||
Tag::FNumber => {
|
||||
data.aperture = get_rational_value(field);
|
||||
}
|
||||
Tag::ExposureTime => {
|
||||
data.shutter_speed = get_rational_string(field);
|
||||
}
|
||||
Tag::PhotographicSensitivity | Tag::ISOSpeed => {
|
||||
if data.iso.is_none() {
|
||||
data.iso = get_u32_value(field).map(|v| v as i32);
|
||||
}
|
||||
}
|
||||
Tag::DateTime | Tag::DateTimeOriginal => {
|
||||
if data.date_taken.is_none() {
|
||||
data.date_taken = parse_exif_datetime(field);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract GPS coordinates
|
||||
if let Some(lat) = extract_gps_coordinate(&exif, Tag::GPSLatitude, Tag::GPSLatitudeRef) {
|
||||
data.gps_latitude = Some(lat);
|
||||
}
|
||||
if let Some(lon) = extract_gps_coordinate(&exif, Tag::GPSLongitude, Tag::GPSLongitudeRef) {
|
||||
data.gps_longitude = Some(lon);
|
||||
}
|
||||
if let Some(alt) = extract_gps_altitude(&exif) {
|
||||
data.gps_altitude = Some(alt);
|
||||
}
|
||||
|
||||
debug!("Extracted EXIF data: {:?}", data);
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
fn get_string_value(field: &exif::Field) -> Option<String> {
|
||||
match &field.value {
|
||||
Value::Ascii(vec) => {
|
||||
if let Some(bytes) = vec.first() {
|
||||
String::from_utf8(bytes.to_vec())
|
||||
.ok()
|
||||
.map(|s| s.trim_end_matches('\0').to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let display = field.display_value().to_string();
|
||||
if display.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(display)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_u32_value(field: &exif::Field) -> Option<u32> {
|
||||
match &field.value {
|
||||
Value::Short(vec) => vec.first().map(|&v| v as u32),
|
||||
Value::Long(vec) => vec.first().copied(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_rational_value(field: &exif::Field) -> Option<f64> {
|
||||
match &field.value {
|
||||
Value::Rational(vec) => {
|
||||
if let Some(rational) = vec.first() {
|
||||
if rational.denom == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(rational.num as f64 / rational.denom as f64)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_rational_string(field: &exif::Field) -> Option<String> {
|
||||
match &field.value {
|
||||
Value::Rational(vec) => {
|
||||
if let Some(rational) = vec.first() {
|
||||
if rational.denom == 0 {
|
||||
None
|
||||
} else if rational.num < rational.denom {
|
||||
Some(format!("{}/{}", rational.num, rational.denom))
|
||||
} else {
|
||||
let value = rational.num as f64 / rational.denom as f64;
|
||||
Some(format!("{:.2}", value))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_exif_datetime(field: &exif::Field) -> Option<i64> {
|
||||
if let Some(datetime_str) = get_string_value(field) {
|
||||
use chrono::NaiveDateTime;
|
||||
|
||||
// EXIF datetime format: "YYYY:MM:DD HH:MM:SS"
|
||||
// Note: EXIF dates are local time without timezone info
|
||||
// We return the timestamp as if it were UTC, and the client will display it as-is
|
||||
NaiveDateTime::parse_from_str(&datetime_str, "%Y:%m:%d %H:%M:%S")
|
||||
.ok()
|
||||
.map(|dt| dt.and_utc().timestamp())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_gps_coordinate(exif: &exif::Exif, coord_tag: Tag, ref_tag: Tag) -> Option<f64> {
|
||||
let coord_field = exif.get_field(coord_tag, In::PRIMARY)?;
|
||||
let ref_field = exif.get_field(ref_tag, In::PRIMARY)?;
|
||||
|
||||
let coordinates = match &coord_field.value {
|
||||
Value::Rational(vec) => {
|
||||
if vec.len() < 3 {
|
||||
return None;
|
||||
}
|
||||
let degrees = vec[0].num as f64 / vec[0].denom as f64;
|
||||
let minutes = vec[1].num as f64 / vec[1].denom as f64;
|
||||
let seconds = vec[2].num as f64 / vec[2].denom as f64;
|
||||
degrees + (minutes / 60.0) + (seconds / 3600.0)
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let reference = get_string_value(ref_field)?;
|
||||
let sign = if reference.starts_with('S') || reference.starts_with('W') {
|
||||
-1.0
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
|
||||
Some(coordinates * sign)
|
||||
}
|
||||
|
||||
fn extract_gps_altitude(exif: &exif::Exif) -> Option<f64> {
|
||||
let alt_field = exif.get_field(Tag::GPSAltitude, In::PRIMARY)?;
|
||||
|
||||
match &alt_field.value {
|
||||
Value::Rational(vec) => {
|
||||
if let Some(rational) = vec.first() {
|
||||
if rational.denom == 0 {
|
||||
None
|
||||
} else {
|
||||
let altitude = rational.num as f64 / rational.denom as f64;
|
||||
|
||||
// Check if below sea level
|
||||
if let Some(ref_field) = exif.get_field(Tag::GPSAltitudeRef, In::PRIMARY)
|
||||
&& let Some(ref_val) = get_u32_value(ref_field)
|
||||
&& ref_val == 1
|
||||
{
|
||||
return Some(-altitude);
|
||||
}
|
||||
|
||||
Some(altitude)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_supports_exif_jpeg() {
|
||||
assert!(supports_exif(Path::new("test.jpg")));
|
||||
assert!(supports_exif(Path::new("test.jpeg")));
|
||||
assert!(supports_exif(Path::new("test.JPG")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_supports_exif_raw_formats() {
|
||||
assert!(supports_exif(Path::new("test.nef"))); // Nikon
|
||||
assert!(supports_exif(Path::new("test.NEF")));
|
||||
assert!(supports_exif(Path::new("test.cr2"))); // Canon
|
||||
assert!(supports_exif(Path::new("test.cr3"))); // Canon
|
||||
assert!(supports_exif(Path::new("test.arw"))); // Sony
|
||||
assert!(supports_exif(Path::new("test.dng"))); // Adobe DNG
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_supports_exif_tiff() {
|
||||
assert!(supports_exif(Path::new("test.tiff")));
|
||||
assert!(supports_exif(Path::new("test.tif")));
|
||||
assert!(supports_exif(Path::new("test.TIFF")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_supports_exif_heif() {
|
||||
assert!(supports_exif(Path::new("test.heif")));
|
||||
assert!(supports_exif(Path::new("test.heic")));
|
||||
assert!(supports_exif(Path::new("test.avif")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_supports_exif_png_webp() {
|
||||
assert!(supports_exif(Path::new("test.png")));
|
||||
assert!(supports_exif(Path::new("test.PNG")));
|
||||
assert!(supports_exif(Path::new("test.webp")));
|
||||
assert!(supports_exif(Path::new("test.WEBP")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_supports_exif_unsupported() {
|
||||
assert!(!supports_exif(Path::new("test.mp4")));
|
||||
assert!(!supports_exif(Path::new("test.mov")));
|
||||
assert!(!supports_exif(Path::new("test.txt")));
|
||||
assert!(!supports_exif(Path::new("test.gif")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_supports_exif_no_extension() {
|
||||
assert!(!supports_exif(Path::new("test")));
|
||||
}
|
||||
}
|
||||
@@ -1,88 +0,0 @@
|
||||
use std::path::Path;
|
||||
use walkdir::DirEntry;
|
||||
|
||||
/// Supported image file extensions
|
||||
pub const IMAGE_EXTENSIONS: &[&str] = &[
|
||||
"jpg", "jpeg", "png", "webp", "tiff", "tif", "heif", "heic", "avif", "nef",
|
||||
];
|
||||
|
||||
/// Supported video file extensions
|
||||
pub const VIDEO_EXTENSIONS: &[&str] = &["mp4", "mov", "avi", "mkv"];
|
||||
|
||||
/// Check if a path has an image extension
|
||||
pub fn is_image_file(path: &Path) -> bool {
|
||||
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
|
||||
let ext_lower = ext.to_lowercase();
|
||||
IMAGE_EXTENSIONS.contains(&ext_lower.as_str())
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a path has a video extension
|
||||
pub fn is_video_file(path: &Path) -> bool {
|
||||
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
|
||||
let ext_lower = ext.to_lowercase();
|
||||
VIDEO_EXTENSIONS.contains(&ext_lower.as_str())
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a path has a supported media extension (image or video)
|
||||
pub fn is_media_file(path: &Path) -> bool {
|
||||
is_image_file(path) || is_video_file(path)
|
||||
}
|
||||
|
||||
/// Check if a DirEntry is an image file (for walkdir usage)
|
||||
#[allow(dead_code)]
|
||||
pub fn direntry_is_image(entry: &DirEntry) -> bool {
|
||||
is_image_file(entry.path())
|
||||
}
|
||||
|
||||
/// Check if a DirEntry is a video file (for walkdir usage)
|
||||
#[allow(dead_code)]
|
||||
pub fn direntry_is_video(entry: &DirEntry) -> bool {
|
||||
is_video_file(entry.path())
|
||||
}
|
||||
|
||||
/// Check if a DirEntry is a media file (for walkdir usage)
|
||||
#[allow(dead_code)]
|
||||
pub fn direntry_is_media(entry: &DirEntry) -> bool {
|
||||
is_media_file(entry.path())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
fn test_is_image_file() {
|
||||
assert!(is_image_file(Path::new("photo.jpg")));
|
||||
assert!(is_image_file(Path::new("photo.JPG")));
|
||||
assert!(is_image_file(Path::new("photo.png")));
|
||||
assert!(is_image_file(Path::new("photo.nef")));
|
||||
assert!(!is_image_file(Path::new("video.mp4")));
|
||||
assert!(!is_image_file(Path::new("document.txt")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_video_file() {
|
||||
assert!(is_video_file(Path::new("video.mp4")));
|
||||
assert!(is_video_file(Path::new("video.MP4")));
|
||||
assert!(is_video_file(Path::new("video.mov")));
|
||||
assert!(is_video_file(Path::new("video.avi")));
|
||||
assert!(!is_video_file(Path::new("photo.jpg")));
|
||||
assert!(!is_video_file(Path::new("document.txt")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_media_file() {
|
||||
assert!(is_media_file(Path::new("photo.jpg")));
|
||||
assert!(is_media_file(Path::new("video.mp4")));
|
||||
assert!(is_media_file(Path::new("photo.PNG")));
|
||||
assert!(!is_media_file(Path::new("document.txt")));
|
||||
assert!(!is_media_file(Path::new("no_extension")));
|
||||
}
|
||||
}
|
||||
1715
src/files.rs
1715
src/files.rs
File diff suppressed because it is too large
Load Diff
121
src/geo.rs
121
src/geo.rs
@@ -1,121 +0,0 @@
|
||||
/// Geographic calculation utilities for GPS-based search
|
||||
use std::f64;
|
||||
|
||||
/// Calculate distance between two GPS coordinates using the Haversine formula.
|
||||
/// Returns distance in kilometers.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `lat1` - Latitude of first point in decimal degrees
|
||||
/// * `lon1` - Longitude of first point in decimal degrees
|
||||
/// * `lat2` - Latitude of second point in decimal degrees
|
||||
/// * `lon2` - Longitude of second point in decimal degrees
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use image_api::geo::haversine_distance;
|
||||
/// let distance = haversine_distance(37.7749, -122.4194, 34.0522, -118.2437);
|
||||
/// // Distance between San Francisco and Los Angeles (~559 km)
|
||||
/// ```
|
||||
pub fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
|
||||
const EARTH_RADIUS_KM: f64 = 6371.0;
|
||||
|
||||
let lat1_rad = lat1.to_radians();
|
||||
let lat2_rad = lat2.to_radians();
|
||||
let delta_lat = (lat2 - lat1).to_radians();
|
||||
let delta_lon = (lon2 - lon1).to_radians();
|
||||
|
||||
let a = (delta_lat / 2.0).sin().powi(2)
|
||||
+ lat1_rad.cos() * lat2_rad.cos() * (delta_lon / 2.0).sin().powi(2);
|
||||
let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
|
||||
|
||||
EARTH_RADIUS_KM * c
|
||||
}
|
||||
|
||||
/// Calculate bounding box for GPS radius query.
|
||||
/// Returns (min_lat, max_lat, min_lon, max_lon) that encompasses the search radius.
|
||||
///
|
||||
/// This is used as a fast first-pass filter for GPS queries, narrowing down
|
||||
/// candidates before applying the more expensive Haversine distance calculation.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `lat` - Center latitude in decimal degrees
|
||||
/// * `lon` - Center longitude in decimal degrees
|
||||
/// * `radius_km` - Search radius in kilometers
|
||||
///
|
||||
/// # Returns
|
||||
/// A tuple of (min_lat, max_lat, min_lon, max_lon) in decimal degrees
|
||||
pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f64) {
|
||||
const EARTH_RADIUS_KM: f64 = 6371.0;
|
||||
|
||||
// Calculate latitude delta (same at all latitudes)
|
||||
let lat_delta = (radius_km / EARTH_RADIUS_KM) * (180.0 / f64::consts::PI);
|
||||
|
||||
// Calculate longitude delta (varies with latitude)
|
||||
let lon_delta = lat_delta / lat.to_radians().cos();
|
||||
|
||||
(
|
||||
lat - lat_delta, // min_lat
|
||||
lat + lat_delta, // max_lat
|
||||
lon - lon_delta, // min_lon
|
||||
lon + lon_delta, // max_lon
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_haversine_distance_sf_to_la() {
|
||||
// San Francisco to Los Angeles
|
||||
let distance = haversine_distance(37.7749, -122.4194, 34.0522, -118.2437);
|
||||
// Should be approximately 559 km
|
||||
assert!(
|
||||
(distance - 559.0).abs() < 10.0,
|
||||
"Distance should be ~559km, got {}",
|
||||
distance
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_haversine_distance_same_point() {
|
||||
// Same point should have zero distance
|
||||
let distance = haversine_distance(37.7749, -122.4194, 37.7749, -122.4194);
|
||||
assert!(
|
||||
distance < 0.001,
|
||||
"Same point should have ~0 distance, got {}",
|
||||
distance
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gps_bounding_box() {
|
||||
// Test bounding box calculation for 10km radius around San Francisco
|
||||
let (min_lat, max_lat, min_lon, max_lon) = gps_bounding_box(37.7749, -122.4194, 10.0);
|
||||
|
||||
// Verify the bounds are reasonable
|
||||
assert!(min_lat < 37.7749, "min_lat should be less than center");
|
||||
assert!(max_lat > 37.7749, "max_lat should be greater than center");
|
||||
assert!(min_lon < -122.4194, "min_lon should be less than center");
|
||||
assert!(max_lon > -122.4194, "max_lon should be greater than center");
|
||||
|
||||
// Verify bounds span roughly the right distance
|
||||
let lat_span = max_lat - min_lat;
|
||||
assert!(
|
||||
lat_span > 0.1 && lat_span < 0.3,
|
||||
"Latitude span should be reasonable for 10km"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_haversine_distance_across_equator() {
|
||||
// Test across equator
|
||||
let distance = haversine_distance(1.0, 0.0, -1.0, 0.0);
|
||||
// Should be approximately 222 km
|
||||
assert!(
|
||||
(distance - 222.0).abs() < 5.0,
|
||||
"Distance should be ~222km, got {}",
|
||||
distance
|
||||
);
|
||||
}
|
||||
}
|
||||
45
src/lib.rs
45
src/lib.rs
@@ -1,45 +0,0 @@
|
||||
#[macro_use]
|
||||
extern crate diesel;
|
||||
|
||||
pub mod ai;
|
||||
pub mod auth;
|
||||
pub mod cleanup;
|
||||
pub mod data;
|
||||
pub mod database;
|
||||
pub mod error;
|
||||
pub mod exif;
|
||||
pub mod file_types;
|
||||
pub mod files;
|
||||
pub mod geo;
|
||||
pub mod memories;
|
||||
pub mod otel;
|
||||
pub mod parsers;
|
||||
pub mod service;
|
||||
pub mod state;
|
||||
pub mod tags;
|
||||
#[cfg(test)]
|
||||
pub mod testhelpers;
|
||||
pub mod utils;
|
||||
pub mod video;
|
||||
|
||||
// Re-export commonly used types
|
||||
pub use data::{Claims, ThumbnailRequest};
|
||||
pub use database::{connect, schema};
|
||||
pub use state::AppState;
|
||||
|
||||
// Stub functions for modules that reference main.rs
|
||||
// These are not used by cleanup_files binary
|
||||
use std::path::Path;
|
||||
use walkdir::DirEntry;
|
||||
|
||||
pub fn create_thumbnails() {
|
||||
// Stub - implemented in main.rs
|
||||
}
|
||||
|
||||
pub fn update_media_counts(_media_dir: &Path) {
|
||||
// Stub - implemented in main.rs
|
||||
}
|
||||
|
||||
pub fn is_video(entry: &DirEntry) -> bool {
|
||||
file_types::direntry_is_video(entry)
|
||||
}
|
||||
1423
src/main.rs
1423
src/main.rs
File diff suppressed because it is too large
Load Diff
1148
src/memories.rs
1148
src/memories.rs
File diff suppressed because it is too large
Load Diff
112
src/otel.rs
112
src/otel.rs
@@ -1,112 +0,0 @@
|
||||
use actix_web::HttpRequest;
|
||||
use actix_web::http::header::HeaderMap;
|
||||
use opentelemetry::global::{BoxedSpan, BoxedTracer};
|
||||
use opentelemetry::propagation::TextMapPropagator;
|
||||
use opentelemetry::trace::{Span, Status, Tracer};
|
||||
use opentelemetry::{Context, KeyValue, global};
|
||||
use opentelemetry_appender_log::OpenTelemetryLogBridge;
|
||||
use opentelemetry_otlp::WithExportConfig;
|
||||
use opentelemetry_sdk::Resource;
|
||||
use opentelemetry_sdk::logs::{BatchLogProcessor, SdkLoggerProvider};
|
||||
use opentelemetry_sdk::propagation::TraceContextPropagator;
|
||||
|
||||
pub fn global_tracer() -> BoxedTracer {
|
||||
global::tracer("image-server")
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn init_tracing() {
|
||||
let resources = Resource::builder()
|
||||
.with_attributes([
|
||||
KeyValue::new("service.name", "image-server"),
|
||||
KeyValue::new("service.version", env!("CARGO_PKG_VERSION")),
|
||||
])
|
||||
.build();
|
||||
|
||||
let span_exporter = opentelemetry_otlp::SpanExporter::builder()
|
||||
.with_tonic()
|
||||
.with_endpoint(std::env::var("OTLP_OTLS_ENDPOINT").unwrap())
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let tracer_provider = opentelemetry_sdk::trace::SdkTracerProvider::builder()
|
||||
.with_batch_exporter(span_exporter)
|
||||
.with_resource(resources)
|
||||
.build();
|
||||
|
||||
global::set_tracer_provider(tracer_provider);
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn init_logs() {
|
||||
let otlp_exporter = opentelemetry_otlp::LogExporter::builder()
|
||||
.with_tonic()
|
||||
.with_endpoint(std::env::var("OTLP_OTLS_ENDPOINT").unwrap())
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let exporter = opentelemetry_stdout::LogExporter::default();
|
||||
|
||||
let resources = Resource::builder()
|
||||
.with_attributes([
|
||||
KeyValue::new("service.name", "image-server"),
|
||||
KeyValue::new("service.version", env!("CARGO_PKG_VERSION")),
|
||||
])
|
||||
.build();
|
||||
|
||||
let log_provider = SdkLoggerProvider::builder()
|
||||
.with_log_processor(BatchLogProcessor::builder(exporter).build())
|
||||
.with_log_processor(BatchLogProcessor::builder(otlp_exporter).build())
|
||||
.with_resource(resources)
|
||||
.build();
|
||||
|
||||
let otel_log_appender = OpenTelemetryLogBridge::new(&log_provider);
|
||||
log::set_boxed_logger(Box::new(otel_log_appender)).expect("Unable to set boxed logger");
|
||||
//TODO: Still set this with the env? Ideally we still have a clean/simple local logger for local dev
|
||||
log::set_max_level(log::LevelFilter::Info);
|
||||
}
|
||||
|
||||
struct HeaderExtractor<'a>(&'a HeaderMap);
|
||||
|
||||
impl<'a> opentelemetry::propagation::Extractor for HeaderExtractor<'a> {
|
||||
fn get(&self, key: &str) -> Option<&str> {
|
||||
self.0.get(key).and_then(|v| v.to_str().ok())
|
||||
}
|
||||
|
||||
fn keys(&self) -> Vec<&str> {
|
||||
self.0.keys().map(|k| k.as_str()).collect()
|
||||
}
|
||||
}
|
||||
pub fn extract_context_from_request(req: &HttpRequest) -> Context {
|
||||
let propagator = TraceContextPropagator::new();
|
||||
propagator.extract(&HeaderExtractor(req.headers()))
|
||||
}
|
||||
|
||||
pub fn trace_db_call<F, O>(
|
||||
context: &Context,
|
||||
query_type: &str,
|
||||
operation: &str,
|
||||
func: F,
|
||||
) -> anyhow::Result<O>
|
||||
where
|
||||
F: FnOnce(&mut BoxedSpan) -> anyhow::Result<O>,
|
||||
{
|
||||
let tracer = global::tracer("db");
|
||||
let mut span = tracer
|
||||
.span_builder(format!("db.{}.{}", query_type, operation))
|
||||
.with_attributes(vec![
|
||||
KeyValue::new("db.query_type", query_type.to_string().clone()),
|
||||
KeyValue::new("db.operation", operation.to_string().clone()),
|
||||
])
|
||||
.start_with_context(&tracer, context);
|
||||
|
||||
let result = func(&mut span);
|
||||
match &result {
|
||||
Ok(_) => {
|
||||
span.set_status(Status::Ok);
|
||||
}
|
||||
Err(e) => span.set_status(Status::error(e.to_string())),
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
@@ -1,183 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::NaiveDateTime;
|
||||
use ical::parser::ical::component::IcalCalendar;
|
||||
use ical::property::Property;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParsedCalendarEvent {
|
||||
pub event_uid: Option<String>,
|
||||
pub summary: String,
|
||||
pub description: Option<String>,
|
||||
pub location: Option<String>,
|
||||
pub start_time: i64,
|
||||
pub end_time: i64,
|
||||
pub all_day: bool,
|
||||
pub organizer: Option<String>,
|
||||
pub attendees: Vec<String>,
|
||||
}
|
||||
|
||||
pub fn parse_ics_file(path: &str) -> Result<Vec<ParsedCalendarEvent>> {
|
||||
let file = File::open(path).context("Failed to open .ics file")?;
|
||||
let reader = BufReader::new(file);
|
||||
|
||||
let parser = ical::IcalParser::new(reader);
|
||||
let mut events = Vec::new();
|
||||
|
||||
for calendar_result in parser {
|
||||
let calendar: IcalCalendar = calendar_result.context("Failed to parse calendar")?;
|
||||
|
||||
for event in calendar.events {
|
||||
// Extract properties
|
||||
let mut event_uid = None;
|
||||
let mut summary = None;
|
||||
let mut description = None;
|
||||
let mut location = None;
|
||||
let mut start_time = None;
|
||||
let mut end_time = None;
|
||||
let mut all_day = false;
|
||||
let mut organizer = None;
|
||||
let mut attendees = Vec::new();
|
||||
|
||||
for property in event.properties {
|
||||
match property.name.as_str() {
|
||||
"UID" => {
|
||||
event_uid = property.value;
|
||||
}
|
||||
"SUMMARY" => {
|
||||
summary = property.value;
|
||||
}
|
||||
"DESCRIPTION" => {
|
||||
description = property.value;
|
||||
}
|
||||
"LOCATION" => {
|
||||
location = property.value;
|
||||
}
|
||||
"DTSTART" => {
|
||||
if let Some(ref value) = property.value {
|
||||
start_time = parse_ical_datetime(value, &property)?;
|
||||
// Check if it's an all-day event (no time component)
|
||||
all_day = value.len() == 8; // YYYYMMDD format
|
||||
}
|
||||
}
|
||||
"DTEND" => {
|
||||
if let Some(ref value) = property.value {
|
||||
end_time = parse_ical_datetime(value, &property)?;
|
||||
}
|
||||
}
|
||||
"ORGANIZER" => {
|
||||
organizer = extract_email_from_mailto(property.value.as_deref());
|
||||
}
|
||||
"ATTENDEE" => {
|
||||
if let Some(email) = extract_email_from_mailto(property.value.as_deref()) {
|
||||
attendees.push(email);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Only include events with required fields
|
||||
if let (Some(summary_text), Some(start), Some(end)) = (summary, start_time, end_time) {
|
||||
events.push(ParsedCalendarEvent {
|
||||
event_uid,
|
||||
summary: summary_text,
|
||||
description,
|
||||
location,
|
||||
start_time: start,
|
||||
end_time: end,
|
||||
all_day,
|
||||
organizer,
|
||||
attendees,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
fn parse_ical_datetime(value: &str, property: &Property) -> Result<Option<i64>> {
|
||||
// Check for TZID parameter
|
||||
let _tzid = property.params.as_ref().and_then(|params| {
|
||||
params
|
||||
.iter()
|
||||
.find(|(key, _)| key == "TZID")
|
||||
.and_then(|(_, values)| values.first())
|
||||
.cloned()
|
||||
});
|
||||
|
||||
// iCal datetime formats:
|
||||
// - 20240815T140000Z (UTC)
|
||||
// - 20240815T140000 (local/TZID)
|
||||
// - 20240815 (all-day)
|
||||
|
||||
let cleaned = value.replace("Z", "").replace("T", "");
|
||||
|
||||
// All-day event (YYYYMMDD)
|
||||
if cleaned.len() == 8 {
|
||||
let dt = NaiveDateTime::parse_from_str(&format!("{}000000", cleaned), "%Y%m%d%H%M%S")
|
||||
.context("Failed to parse all-day date")?;
|
||||
return Ok(Some(dt.and_utc().timestamp()));
|
||||
}
|
||||
|
||||
// DateTime event (YYYYMMDDTHHMMSS)
|
||||
if cleaned.len() >= 14 {
|
||||
let dt = NaiveDateTime::parse_from_str(&cleaned[..14], "%Y%m%d%H%M%S")
|
||||
.context("Failed to parse datetime")?;
|
||||
|
||||
// If original had 'Z', it's UTC
|
||||
let timestamp = if value.ends_with('Z') {
|
||||
dt.and_utc().timestamp()
|
||||
} else {
|
||||
// Treat as UTC for simplicity (proper TZID handling is complex)
|
||||
dt.and_utc().timestamp()
|
||||
};
|
||||
|
||||
return Ok(Some(timestamp));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn extract_email_from_mailto(value: Option<&str>) -> Option<String> {
|
||||
value.map(|v| {
|
||||
// ORGANIZER and ATTENDEE often have format: mailto:user@example.com
|
||||
if v.starts_with("mailto:") {
|
||||
v.trim_start_matches("mailto:").to_string()
|
||||
} else {
|
||||
v.to_string()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_ical_datetime() {
|
||||
let prop = Property {
|
||||
name: "DTSTART".to_string(),
|
||||
params: None,
|
||||
value: Some("20240815T140000Z".to_string()),
|
||||
};
|
||||
|
||||
let timestamp = parse_ical_datetime("20240815T140000Z", &prop).unwrap();
|
||||
assert!(timestamp.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_email() {
|
||||
assert_eq!(
|
||||
extract_email_from_mailto(Some("mailto:user@example.com")),
|
||||
Some("user@example.com".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
extract_email_from_mailto(Some("user@example.com")),
|
||||
Some("user@example.com".to_string())
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,134 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::DateTime;
|
||||
use serde::Deserialize;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParsedLocationRecord {
|
||||
pub timestamp: i64,
|
||||
pub latitude: f64,
|
||||
pub longitude: f64,
|
||||
pub accuracy: Option<i32>,
|
||||
pub activity: Option<String>,
|
||||
pub activity_confidence: Option<i32>,
|
||||
}
|
||||
|
||||
// Google Takeout Location History JSON structures
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct LocationHistory {
|
||||
locations: Vec<LocationPoint>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct LocationPoint {
|
||||
timestamp_ms: Option<String>, // Older format
|
||||
timestamp: Option<String>, // Newer format (ISO8601)
|
||||
latitude_e7: Option<i64>,
|
||||
longitude_e7: Option<i64>,
|
||||
accuracy: Option<i32>,
|
||||
activity: Option<Vec<ActivityRecord>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ActivityRecord {
|
||||
activity: Vec<ActivityType>,
|
||||
#[allow(dead_code)] // Part of JSON structure, may be used in future
|
||||
timestamp_ms: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ActivityType {
|
||||
#[serde(rename = "type")]
|
||||
activity_type: String,
|
||||
confidence: i32,
|
||||
}
|
||||
|
||||
pub fn parse_location_json(path: &str) -> Result<Vec<ParsedLocationRecord>> {
|
||||
let file = File::open(path).context("Failed to open location JSON file")?;
|
||||
let reader = BufReader::new(file);
|
||||
|
||||
let history: LocationHistory =
|
||||
serde_json::from_reader(reader).context("Failed to parse location history JSON")?;
|
||||
|
||||
let mut records = Vec::new();
|
||||
|
||||
for point in history.locations {
|
||||
// Parse timestamp (try both formats)
|
||||
let timestamp = if let Some(ts_ms) = point.timestamp_ms {
|
||||
// Milliseconds since epoch
|
||||
ts_ms
|
||||
.parse::<i64>()
|
||||
.context("Failed to parse timestamp_ms")?
|
||||
/ 1000
|
||||
} else if let Some(ts_iso) = point.timestamp {
|
||||
// ISO8601 format
|
||||
DateTime::parse_from_rfc3339(&ts_iso)
|
||||
.context("Failed to parse ISO8601 timestamp")?
|
||||
.timestamp()
|
||||
} else {
|
||||
continue; // Skip points without timestamp
|
||||
};
|
||||
|
||||
// Convert E7 format to decimal degrees
|
||||
let latitude = point.latitude_e7.map(|e7| e7 as f64 / 10_000_000.0);
|
||||
let longitude = point.longitude_e7.map(|e7| e7 as f64 / 10_000_000.0);
|
||||
|
||||
// Extract highest-confidence activity
|
||||
let (activity, activity_confidence) = point
|
||||
.activity
|
||||
.as_ref()
|
||||
.and_then(|activities| activities.first())
|
||||
.and_then(|record| {
|
||||
record
|
||||
.activity
|
||||
.iter()
|
||||
.max_by_key(|a| a.confidence)
|
||||
.map(|a| (a.activity_type.clone(), a.confidence))
|
||||
})
|
||||
.unzip();
|
||||
|
||||
if let (Some(lat), Some(lon)) = (latitude, longitude) {
|
||||
records.push(ParsedLocationRecord {
|
||||
timestamp,
|
||||
latitude: lat,
|
||||
longitude: lon,
|
||||
accuracy: point.accuracy,
|
||||
activity,
|
||||
activity_confidence,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(records)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_e7_conversion() {
|
||||
let lat_e7 = 374228300_i64;
|
||||
let lat = lat_e7 as f64 / 10_000_000.0;
|
||||
assert!((lat - 37.42283).abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_sample_json() {
|
||||
let json = r#"{
|
||||
"locations": [
|
||||
{
|
||||
"latitudeE7": 374228300,
|
||||
"longitudeE7": -1221086100,
|
||||
"accuracy": 20,
|
||||
"timestampMs": "1692115200000"
|
||||
}
|
||||
]
|
||||
}"#;
|
||||
|
||||
let history: LocationHistory = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(history.locations.len(), 1);
|
||||
}
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
pub mod ical_parser;
|
||||
pub mod location_json_parser;
|
||||
pub mod search_html_parser;
|
||||
|
||||
pub use ical_parser::{ParsedCalendarEvent, parse_ics_file};
|
||||
pub use location_json_parser::{ParsedLocationRecord, parse_location_json};
|
||||
pub use search_html_parser::{ParsedSearchRecord, parse_search_html};
|
||||
@@ -1,209 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::{DateTime, NaiveDateTime, Utc};
|
||||
use scraper::{Html, Selector};
|
||||
use std::fs;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParsedSearchRecord {
|
||||
pub timestamp: i64,
|
||||
pub query: String,
|
||||
pub search_engine: Option<String>,
|
||||
}
|
||||
|
||||
pub fn parse_search_html(path: &str) -> Result<Vec<ParsedSearchRecord>> {
|
||||
let html_content =
|
||||
fs::read_to_string(path).context("Failed to read search history HTML file")?;
|
||||
|
||||
let document = Html::parse_document(&html_content);
|
||||
let mut records = Vec::new();
|
||||
|
||||
// Try multiple selector strategies as Google Takeout format varies
|
||||
|
||||
// Strategy 1: Look for specific cell structure
|
||||
if let Ok(cell_selector) = Selector::parse("div.content-cell") {
|
||||
for cell in document.select(&cell_selector) {
|
||||
if let Some(record) = parse_content_cell(&cell) {
|
||||
records.push(record);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Strategy 2: Look for outer-cell structure (older format)
|
||||
if records.is_empty()
|
||||
&& let Ok(outer_selector) = Selector::parse("div.outer-cell")
|
||||
{
|
||||
for cell in document.select(&outer_selector) {
|
||||
if let Some(record) = parse_outer_cell(&cell) {
|
||||
records.push(record);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Strategy 3: Generic approach - look for links and timestamps
|
||||
if records.is_empty()
|
||||
&& let Ok(link_selector) = Selector::parse("a")
|
||||
{
|
||||
for link in document.select(&link_selector) {
|
||||
if let Some(href) = link.value().attr("href") {
|
||||
// Check if it's a search URL
|
||||
if (href.contains("google.com/search?q=") || href.contains("search?q="))
|
||||
&& let Some(query) = extract_query_from_url(href)
|
||||
{
|
||||
// Try to find nearby timestamp
|
||||
let timestamp = find_nearby_timestamp(&link);
|
||||
|
||||
records.push(ParsedSearchRecord {
|
||||
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
|
||||
query,
|
||||
search_engine: Some("Google".to_string()),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(records)
|
||||
}
|
||||
|
||||
fn parse_content_cell(cell: &scraper::ElementRef) -> Option<ParsedSearchRecord> {
|
||||
let link_selector = Selector::parse("a").ok()?;
|
||||
|
||||
let link = cell.select(&link_selector).next()?;
|
||||
let href = link.value().attr("href")?;
|
||||
let query = extract_query_from_url(href)?;
|
||||
|
||||
// Extract timestamp from cell text
|
||||
let cell_text = cell.text().collect::<Vec<_>>().join(" ");
|
||||
let timestamp = parse_timestamp_from_text(&cell_text);
|
||||
|
||||
Some(ParsedSearchRecord {
|
||||
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
|
||||
query,
|
||||
search_engine: Some("Google".to_string()),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_outer_cell(cell: &scraper::ElementRef) -> Option<ParsedSearchRecord> {
|
||||
let link_selector = Selector::parse("a").ok()?;
|
||||
|
||||
let link = cell.select(&link_selector).next()?;
|
||||
let href = link.value().attr("href")?;
|
||||
let query = extract_query_from_url(href)?;
|
||||
|
||||
let cell_text = cell.text().collect::<Vec<_>>().join(" ");
|
||||
let timestamp = parse_timestamp_from_text(&cell_text);
|
||||
|
||||
Some(ParsedSearchRecord {
|
||||
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
|
||||
query,
|
||||
search_engine: Some("Google".to_string()),
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_query_from_url(url: &str) -> Option<String> {
|
||||
// Extract query parameter from URL
|
||||
// Example: https://www.google.com/search?q=rust+programming
|
||||
|
||||
if let Some(query_start) = url.find("?q=").or_else(|| url.find("&q=")) {
|
||||
let query_part = &url[query_start + 3..];
|
||||
let query_end = query_part.find('&').unwrap_or(query_part.len());
|
||||
let encoded_query = &query_part[..query_end];
|
||||
|
||||
// URL decode
|
||||
urlencoding::decode(encoded_query)
|
||||
.ok()
|
||||
.map(|s| s.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn find_nearby_timestamp(element: &scraper::ElementRef) -> Option<i64> {
|
||||
// Look for timestamp in parent or sibling elements
|
||||
if let Some(parent) = element.parent()
|
||||
&& parent.value().as_element().is_some()
|
||||
{
|
||||
let parent_ref = scraper::ElementRef::wrap(parent)?;
|
||||
let text = parent_ref.text().collect::<Vec<_>>().join(" ");
|
||||
return parse_timestamp_from_text(&text);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn parse_timestamp_from_text(text: &str) -> Option<i64> {
|
||||
// Google Takeout timestamps often look like:
|
||||
// "Aug 15, 2024, 2:34:56 PM PDT"
|
||||
// "2024-08-15T14:34:56Z"
|
||||
|
||||
// Try ISO8601 first
|
||||
if let Some(iso_match) = text
|
||||
.split_whitespace()
|
||||
.find(|s| s.contains('T') && s.contains('-'))
|
||||
&& let Ok(dt) = DateTime::parse_from_rfc3339(iso_match)
|
||||
{
|
||||
return Some(dt.timestamp());
|
||||
}
|
||||
|
||||
// Try common date patterns
|
||||
let patterns = [
|
||||
"%b %d, %Y, %I:%M:%S %p", // Aug 15, 2024, 2:34:56 PM
|
||||
"%Y-%m-%d %H:%M:%S", // 2024-08-15 14:34:56
|
||||
"%m/%d/%Y %H:%M:%S", // 08/15/2024 14:34:56
|
||||
];
|
||||
|
||||
for pattern in patterns {
|
||||
// Extract potential date string
|
||||
if let Some(date_part) = extract_date_substring(text)
|
||||
&& let Ok(dt) = NaiveDateTime::parse_from_str(&date_part, pattern)
|
||||
{
|
||||
return Some(dt.and_utc().timestamp());
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn extract_date_substring(text: &str) -> Option<String> {
|
||||
// Try to extract date-like substring from text
|
||||
// This is a heuristic approach for varied formats
|
||||
|
||||
// Look for patterns like "Aug 15, 2024, 2:34:56 PM"
|
||||
if let Some(pos) = text.find(|c: char| c.is_numeric()) {
|
||||
let rest = &text[pos..];
|
||||
if let Some(end) =
|
||||
rest.find(|c: char| !c.is_alphanumeric() && c != ':' && c != ',' && c != ' ')
|
||||
{
|
||||
Some(rest[..end].trim().to_string())
|
||||
} else {
|
||||
Some(rest.trim().to_string())
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_extract_query_from_url() {
|
||||
let url = "https://www.google.com/search?q=rust+programming&oq=rust";
|
||||
let query = extract_query_from_url(url);
|
||||
assert_eq!(query, Some("rust+programming".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_query_with_encoding() {
|
||||
let url = "https://www.google.com/search?q=hello%20world";
|
||||
let query = extract_query_from_url(url);
|
||||
assert_eq!(query, Some("hello world".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_iso_timestamp() {
|
||||
let text = "Some text 2024-08-15T14:34:56Z more text";
|
||||
let timestamp = parse_timestamp_from_text(text);
|
||||
assert!(timestamp.is_some());
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
use actix_web::App;
|
||||
|
||||
pub trait ServiceBuilder<T> {
|
||||
fn add_feature<F>(self, f: F) -> App<T>
|
||||
where
|
||||
F: Fn(App<T>) -> App<T>;
|
||||
}
|
||||
|
||||
impl<T> ServiceBuilder<T> for App<T> {
|
||||
fn add_feature<F>(self, create_feature: F) -> App<T>
|
||||
where
|
||||
F: Fn(App<T>) -> App<T>,
|
||||
{
|
||||
create_feature(self)
|
||||
}
|
||||
}
|
||||
211
src/state.rs
211
src/state.rs
@@ -1,211 +0,0 @@
|
||||
use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
|
||||
use crate::database::{
|
||||
CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, LocationHistoryDao, SearchHistoryDao,
|
||||
SqliteCalendarEventDao, SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao,
|
||||
SqliteLocationHistoryDao, SqliteSearchHistoryDao,
|
||||
};
|
||||
use crate::video::actors::{PlaylistGenerator, StreamActor, VideoPlaylistManager};
|
||||
use actix::{Actor, Addr};
|
||||
use std::env;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
pub struct AppState {
|
||||
pub stream_manager: Arc<Addr<StreamActor>>,
|
||||
pub playlist_manager: Arc<Addr<VideoPlaylistManager>>,
|
||||
pub base_path: String,
|
||||
pub thumbnail_path: String,
|
||||
pub video_path: String,
|
||||
pub gif_path: String,
|
||||
pub excluded_dirs: Vec<String>,
|
||||
pub ollama: OllamaClient,
|
||||
pub sms_client: SmsApiClient,
|
||||
pub insight_generator: InsightGenerator,
|
||||
}
|
||||
|
||||
impl AppState {
|
||||
pub fn new(
|
||||
stream_manager: Arc<Addr<StreamActor>>,
|
||||
base_path: String,
|
||||
thumbnail_path: String,
|
||||
video_path: String,
|
||||
gif_path: String,
|
||||
excluded_dirs: Vec<String>,
|
||||
ollama: OllamaClient,
|
||||
sms_client: SmsApiClient,
|
||||
insight_generator: InsightGenerator,
|
||||
) -> Self {
|
||||
let playlist_generator = PlaylistGenerator::new();
|
||||
let video_playlist_manager =
|
||||
VideoPlaylistManager::new(video_path.clone(), playlist_generator.start());
|
||||
|
||||
Self {
|
||||
stream_manager,
|
||||
playlist_manager: Arc::new(video_playlist_manager.start()),
|
||||
base_path,
|
||||
thumbnail_path,
|
||||
video_path,
|
||||
gif_path,
|
||||
excluded_dirs,
|
||||
ollama,
|
||||
sms_client,
|
||||
insight_generator,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse excluded directories from environment variable
|
||||
fn parse_excluded_dirs() -> Vec<String> {
|
||||
env::var("EXCLUDED_DIRS")
|
||||
.unwrap_or_default()
|
||||
.split(',')
|
||||
.filter(|dir| !dir.trim().is_empty())
|
||||
.map(|dir| dir.trim().to_string())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for AppState {
|
||||
fn default() -> Self {
|
||||
// Initialize AI clients
|
||||
let ollama_primary_url = env::var("OLLAMA_PRIMARY_URL").unwrap_or_else(|_| {
|
||||
env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".to_string())
|
||||
});
|
||||
let ollama_fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
|
||||
let ollama_primary_model = env::var("OLLAMA_PRIMARY_MODEL")
|
||||
.or_else(|_| env::var("OLLAMA_MODEL"))
|
||||
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string());
|
||||
let ollama_fallback_model = env::var("OLLAMA_FALLBACK_MODEL").ok();
|
||||
|
||||
let ollama = OllamaClient::new(
|
||||
ollama_primary_url,
|
||||
ollama_fallback_url,
|
||||
ollama_primary_model,
|
||||
ollama_fallback_model,
|
||||
);
|
||||
|
||||
let sms_api_url =
|
||||
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
|
||||
let sms_api_token = env::var("SMS_API_TOKEN").ok();
|
||||
let sms_client = SmsApiClient::new(sms_api_url, sms_api_token);
|
||||
|
||||
// Initialize DAOs
|
||||
let insight_dao: Arc<Mutex<Box<dyn InsightDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteInsightDao::new())));
|
||||
let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
|
||||
let daily_summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new())));
|
||||
|
||||
// Initialize Google Takeout DAOs
|
||||
let calendar_dao: Arc<Mutex<Box<dyn CalendarEventDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteCalendarEventDao::new())));
|
||||
let location_dao: Arc<Mutex<Box<dyn LocationHistoryDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteLocationHistoryDao::new())));
|
||||
let search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteSearchHistoryDao::new())));
|
||||
|
||||
// Load base path
|
||||
let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
|
||||
|
||||
// Initialize InsightGenerator with all data sources
|
||||
let insight_generator = InsightGenerator::new(
|
||||
ollama.clone(),
|
||||
sms_client.clone(),
|
||||
insight_dao.clone(),
|
||||
exif_dao.clone(),
|
||||
daily_summary_dao.clone(),
|
||||
calendar_dao.clone(),
|
||||
location_dao.clone(),
|
||||
search_dao.clone(),
|
||||
base_path.clone(),
|
||||
);
|
||||
|
||||
Self::new(
|
||||
Arc::new(StreamActor {}.start()),
|
||||
base_path,
|
||||
env::var("THUMBNAILS").expect("THUMBNAILS was not set in the env"),
|
||||
env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env"),
|
||||
env::var("GIFS_DIRECTORY").expect("GIFS_DIRECTORY was not set in the env"),
|
||||
Self::parse_excluded_dirs(),
|
||||
ollama,
|
||||
sms_client,
|
||||
insight_generator,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl AppState {
|
||||
/// Creates an AppState instance for testing with temporary directories
|
||||
pub fn test_state() -> Self {
|
||||
use actix::Actor;
|
||||
// Create a base temporary directory
|
||||
let temp_dir = tempfile::tempdir().expect("Failed to create temp directory");
|
||||
let base_path = temp_dir.path().to_path_buf();
|
||||
|
||||
// Create subdirectories for thumbnails, videos, and gifs
|
||||
let thumbnail_path = create_test_subdir(&base_path, "thumbnails");
|
||||
let video_path = create_test_subdir(&base_path, "videos");
|
||||
let gif_path = create_test_subdir(&base_path, "gifs");
|
||||
|
||||
// Initialize test AI clients
|
||||
let ollama = OllamaClient::new(
|
||||
"http://localhost:11434".to_string(),
|
||||
None,
|
||||
"llama3.2".to_string(),
|
||||
None,
|
||||
);
|
||||
let sms_client = SmsApiClient::new("http://localhost:8000".to_string(), None);
|
||||
|
||||
// Initialize test DAOs
|
||||
let insight_dao: Arc<Mutex<Box<dyn InsightDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteInsightDao::new())));
|
||||
let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
|
||||
let daily_summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new())));
|
||||
|
||||
// Initialize test Google Takeout DAOs
|
||||
let calendar_dao: Arc<Mutex<Box<dyn CalendarEventDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteCalendarEventDao::new())));
|
||||
let location_dao: Arc<Mutex<Box<dyn LocationHistoryDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteLocationHistoryDao::new())));
|
||||
let search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteSearchHistoryDao::new())));
|
||||
|
||||
// Initialize test InsightGenerator with all data sources
|
||||
let base_path_str = base_path.to_string_lossy().to_string();
|
||||
let insight_generator = InsightGenerator::new(
|
||||
ollama.clone(),
|
||||
sms_client.clone(),
|
||||
insight_dao.clone(),
|
||||
exif_dao.clone(),
|
||||
daily_summary_dao.clone(),
|
||||
calendar_dao.clone(),
|
||||
location_dao.clone(),
|
||||
search_dao.clone(),
|
||||
base_path_str.clone(),
|
||||
);
|
||||
|
||||
// Create the AppState with the temporary paths
|
||||
AppState::new(
|
||||
Arc::new(StreamActor {}.start()),
|
||||
base_path_str,
|
||||
thumbnail_path.to_string_lossy().to_string(),
|
||||
video_path.to_string_lossy().to_string(),
|
||||
gif_path.to_string_lossy().to_string(),
|
||||
Vec::new(), // No excluded directories for test state
|
||||
ollama,
|
||||
sms_client,
|
||||
insight_generator,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to create a subdirectory inside the base directory for testing
|
||||
#[cfg(test)]
|
||||
fn create_test_subdir(base_path: &std::path::Path, name: &str) -> std::path::PathBuf {
|
||||
let dir_path = base_path.join(name);
|
||||
std::fs::create_dir_all(&dir_path)
|
||||
.unwrap_or_else(|_| panic!("Failed to create {} directory", name));
|
||||
dir_path
|
||||
}
|
||||
1014
src/tags.rs
1014
src/tags.rs
File diff suppressed because it is too large
Load Diff
@@ -1,64 +0,0 @@
|
||||
use actix_web::{
|
||||
HttpResponse,
|
||||
body::{BoxBody, MessageBody},
|
||||
};
|
||||
|
||||
use crate::database::{UserDao, models::User};
|
||||
use std::cell::RefCell;
|
||||
use std::option::Option;
|
||||
|
||||
pub struct TestUserDao {
|
||||
pub user_map: RefCell<Vec<User>>,
|
||||
}
|
||||
|
||||
impl TestUserDao {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
user_map: RefCell::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl UserDao for TestUserDao {
|
||||
fn create_user(&mut self, username: &str, password: &str) -> Option<User> {
|
||||
let u = User {
|
||||
id: (self.user_map.borrow().len() + 1) as i32,
|
||||
username: username.to_string(),
|
||||
password: password.to_string(),
|
||||
};
|
||||
|
||||
self.user_map.borrow_mut().push(u.clone());
|
||||
|
||||
Some(u)
|
||||
}
|
||||
|
||||
fn get_user(&mut self, user: &str, pass: &str) -> Option<User> {
|
||||
match self
|
||||
.user_map
|
||||
.borrow()
|
||||
.iter()
|
||||
.find(|&u| u.username == user && u.password == pass)
|
||||
{
|
||||
Some(u) => {
|
||||
let copy = (*u).clone();
|
||||
Some(copy)
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn user_exists(&mut self, user: &str) -> bool {
|
||||
self.user_map.borrow().iter().any(|u| u.username == user)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait BodyReader {
|
||||
fn read_to_str(self) -> String;
|
||||
}
|
||||
|
||||
impl BodyReader for HttpResponse<BoxBody> {
|
||||
fn read_to_str(self) -> String {
|
||||
let body = self.into_body().try_into_bytes().unwrap();
|
||||
std::str::from_utf8(&body).unwrap().to_string()
|
||||
}
|
||||
}
|
||||
83
src/utils.rs
83
src/utils.rs
@@ -1,83 +0,0 @@
|
||||
/// Normalize a file path to use forward slashes for cross-platform consistency
|
||||
/// This ensures paths stored in the database always use `/` regardless of OS
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// use image_api::utils::normalize_path;
|
||||
///
|
||||
/// assert_eq!(normalize_path("foo\\bar\\baz.jpg"), "foo/bar/baz.jpg");
|
||||
/// assert_eq!(normalize_path("foo/bar/baz.jpg"), "foo/bar/baz.jpg");
|
||||
/// ```
|
||||
pub fn normalize_path(path: &str) -> String {
|
||||
path.replace('\\', "/")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_normalize_path_with_backslashes() {
|
||||
assert_eq!(normalize_path("foo\\bar\\baz.jpg"), "foo/bar/baz.jpg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_path_with_forward_slashes() {
|
||||
assert_eq!(normalize_path("foo/bar/baz.jpg"), "foo/bar/baz.jpg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_path_mixed() {
|
||||
assert_eq!(
|
||||
normalize_path("foo\\bar/baz\\qux.jpg"),
|
||||
"foo/bar/baz/qux.jpg"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_path_empty() {
|
||||
assert_eq!(normalize_path(""), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_path_absolute_windows() {
|
||||
assert_eq!(
|
||||
normalize_path("C:\\Users\\Photos\\image.jpg"),
|
||||
"C:/Users/Photos/image.jpg"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_path_unc_path() {
|
||||
assert_eq!(
|
||||
normalize_path("\\\\server\\share\\folder\\file.jpg"),
|
||||
"//server/share/folder/file.jpg"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_path_single_filename() {
|
||||
assert_eq!(normalize_path("image.jpg"), "image.jpg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_path_trailing_slash() {
|
||||
assert_eq!(normalize_path("foo\\bar\\"), "foo/bar/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_path_multiple_consecutive_backslashes() {
|
||||
assert_eq!(
|
||||
normalize_path("foo\\\\bar\\\\\\baz.jpg"),
|
||||
"foo//bar///baz.jpg"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_path_deep_nesting() {
|
||||
assert_eq!(
|
||||
normalize_path("a\\b\\c\\d\\e\\f\\g\\file.jpg"),
|
||||
"a/b/c/d/e/f/g/file.jpg"
|
||||
);
|
||||
}
|
||||
}
|
||||
49
src/video.rs
Normal file
49
src/video.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
// ffmpeg -i test.mp4 -c:v h264 -flags +cgop -g 30 -hls_time 3 out.m3u8
|
||||
// ffmpeg -i "filename.mp4" -preset veryfast -c:v libx264 -f hls -hls_list_size 100 -hls_time 2 -crf 24 -vf scale=1080:-2,setsar=1:1 attempt/vid_out.m3u8
|
||||
|
||||
pub fn create_playlist(video_path: &str, playlist_file: &str) {
|
||||
if Path::new(playlist_file).exists() {
|
||||
println!("Playlist already exists: {}", playlist_file);
|
||||
return;
|
||||
}
|
||||
|
||||
let result = Command::new("ffmpeg")
|
||||
.arg("-i")
|
||||
.arg(video_path)
|
||||
.arg("-c:v")
|
||||
.arg("h264")
|
||||
.arg("-crf")
|
||||
.arg("23")
|
||||
.arg("-preset")
|
||||
.arg("veryfast")
|
||||
.arg("-hls_time")
|
||||
.arg("3")
|
||||
.arg("-hls_list_size")
|
||||
.arg("100")
|
||||
.arg("-vf")
|
||||
.arg("scale=1080:-2,setsar=1:1")
|
||||
.arg(playlist_file)
|
||||
.output()
|
||||
.expect("Expected this to work..");
|
||||
|
||||
println!("{:?}", result);
|
||||
println!("Status: {}", String::from_utf8(result.stdout).unwrap())
|
||||
}
|
||||
|
||||
pub fn generate_video_thumbnail(path: &Path, destination: &Path) {
|
||||
Command::new("ffmpeg")
|
||||
.arg("-ss")
|
||||
.arg("3")
|
||||
.arg("-i")
|
||||
.arg(path.to_str().unwrap())
|
||||
.arg("-vframes")
|
||||
.arg("1")
|
||||
.arg("-f")
|
||||
.arg("image2")
|
||||
.arg(destination)
|
||||
.output()
|
||||
.expect("Failure to create video frame");
|
||||
}
|
||||
@@ -1,449 +0,0 @@
|
||||
use crate::is_video;
|
||||
use crate::otel::global_tracer;
|
||||
use actix::prelude::*;
|
||||
use futures::TryFutureExt;
|
||||
use log::{debug, error, info, trace, warn};
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry::trace::{Span, Status, Tracer};
|
||||
use std::io::Result;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Child, Command, ExitStatus, Stdio};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Semaphore;
|
||||
use walkdir::{DirEntry, WalkDir};
|
||||
// ffmpeg -i test.mp4 -c:v h264 -flags +cgop -g 30 -hls_time 3 out.m3u8
|
||||
// ffmpeg -i "filename.mp4" -preset veryfast -c:v libx264 -f hls -hls_list_size 100 -hls_time 2 -crf 24 -vf scale=1080:-2,setsar=1:1 attempt/vid_out.m3u8
|
||||
|
||||
pub struct StreamActor;
|
||||
|
||||
impl Actor for StreamActor {
|
||||
type Context = Context<Self>;
|
||||
}
|
||||
|
||||
pub struct ProcessMessage(pub String, pub Child);
|
||||
|
||||
impl Message for ProcessMessage {
|
||||
type Result = Result<ExitStatus>;
|
||||
}
|
||||
|
||||
impl Handler<ProcessMessage> for StreamActor {
|
||||
type Result = Result<ExitStatus>;
|
||||
|
||||
fn handle(&mut self, msg: ProcessMessage, _ctx: &mut Self::Context) -> Self::Result {
|
||||
trace!("Message received");
|
||||
let mut process = msg.1;
|
||||
let result = process.wait();
|
||||
|
||||
debug!(
|
||||
"Finished waiting for: {:?}. Code: {:?}",
|
||||
msg.0,
|
||||
result
|
||||
.as_ref()
|
||||
.map_or(-1, |status| status.code().unwrap_or(-1))
|
||||
);
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn create_playlist(video_path: &str, playlist_file: &str) -> Result<Child> {
|
||||
if Path::new(playlist_file).exists() {
|
||||
debug!("Playlist already exists: {}", playlist_file);
|
||||
return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists));
|
||||
}
|
||||
|
||||
let result = Command::new("ffmpeg")
|
||||
.arg("-i")
|
||||
.arg(video_path)
|
||||
.arg("-c:v")
|
||||
.arg("h264")
|
||||
.arg("-crf")
|
||||
.arg("21")
|
||||
.arg("-preset")
|
||||
.arg("veryfast")
|
||||
.arg("-hls_time")
|
||||
.arg("3")
|
||||
.arg("-hls_list_size")
|
||||
.arg("100")
|
||||
.arg("-vf")
|
||||
.arg("scale=1080:-2,setsar=1:1")
|
||||
.arg(playlist_file)
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.spawn();
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
loop {
|
||||
actix::clock::sleep(std::time::Duration::from_secs(1)).await;
|
||||
|
||||
if Path::new(playlist_file).exists()
|
||||
|| std::time::Instant::now() - start_time > std::time::Duration::from_secs(5)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
pub fn generate_video_thumbnail(path: &Path, destination: &Path) {
|
||||
Command::new("ffmpeg")
|
||||
.arg("-ss")
|
||||
.arg("3")
|
||||
.arg("-i")
|
||||
.arg(path.to_str().unwrap())
|
||||
.arg("-vframes")
|
||||
.arg("1")
|
||||
.arg("-f")
|
||||
.arg("image2")
|
||||
.arg(destination)
|
||||
.output()
|
||||
.expect("Failure to create video frame");
|
||||
}
|
||||
|
||||
/// Check if a video is already encoded with h264 codec
|
||||
/// Returns true if the video uses h264, false otherwise or if detection fails
|
||||
async fn is_h264_encoded(video_path: &str) -> bool {
|
||||
let output = tokio::process::Command::new("ffprobe")
|
||||
.arg("-v")
|
||||
.arg("error")
|
||||
.arg("-select_streams")
|
||||
.arg("v:0")
|
||||
.arg("-show_entries")
|
||||
.arg("stream=codec_name")
|
||||
.arg("-of")
|
||||
.arg("default=noprint_wrappers=1:nokey=1")
|
||||
.arg(video_path)
|
||||
.output()
|
||||
.await;
|
||||
|
||||
match output {
|
||||
Ok(output) if output.status.success() => {
|
||||
let codec = String::from_utf8_lossy(&output.stdout);
|
||||
let codec = codec.trim();
|
||||
debug!("Detected codec for {}: {}", video_path, codec);
|
||||
codec == "h264"
|
||||
}
|
||||
Ok(output) => {
|
||||
warn!(
|
||||
"ffprobe failed for {}: {}",
|
||||
video_path,
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
false
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to run ffprobe for {}: {}", video_path, e);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a video has rotation metadata
|
||||
/// Returns the rotation angle in degrees (0, 90, 180, 270) or 0 if none detected
|
||||
async fn get_video_rotation(video_path: &str) -> i32 {
|
||||
let output = tokio::process::Command::new("ffprobe")
|
||||
.arg("-v")
|
||||
.arg("error")
|
||||
.arg("-select_streams")
|
||||
.arg("v:0")
|
||||
.arg("-show_entries")
|
||||
.arg("stream_tags=rotate")
|
||||
.arg("-of")
|
||||
.arg("default=noprint_wrappers=1:nokey=1")
|
||||
.arg(video_path)
|
||||
.output()
|
||||
.await;
|
||||
|
||||
match output {
|
||||
Ok(output) if output.status.success() => {
|
||||
let rotation_str = String::from_utf8_lossy(&output.stdout);
|
||||
let rotation_str = rotation_str.trim();
|
||||
if rotation_str.is_empty() {
|
||||
0
|
||||
} else {
|
||||
rotation_str.parse::<i32>().unwrap_or(0)
|
||||
}
|
||||
}
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub struct VideoPlaylistManager {
|
||||
playlist_dir: PathBuf,
|
||||
playlist_generator: Addr<PlaylistGenerator>,
|
||||
}
|
||||
|
||||
impl VideoPlaylistManager {
|
||||
pub fn new<P: Into<PathBuf>>(
|
||||
playlist_dir: P,
|
||||
playlist_generator: Addr<PlaylistGenerator>,
|
||||
) -> Self {
|
||||
Self {
|
||||
playlist_dir: playlist_dir.into(),
|
||||
playlist_generator,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Actor for VideoPlaylistManager {
|
||||
type Context = Context<Self>;
|
||||
}
|
||||
|
||||
impl Handler<ScanDirectoryMessage> for VideoPlaylistManager {
|
||||
type Result = ResponseFuture<()>;
|
||||
|
||||
fn handle(&mut self, msg: ScanDirectoryMessage, _ctx: &mut Self::Context) -> Self::Result {
|
||||
let tracer = global_tracer();
|
||||
let mut span = tracer.start("videoplaylistmanager.scan_directory");
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
info!(
|
||||
"Starting scan directory for video playlist generation: {}",
|
||||
msg.directory
|
||||
);
|
||||
|
||||
let video_files = WalkDir::new(&msg.directory)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| e.file_type().is_file())
|
||||
.filter(is_video)
|
||||
.collect::<Vec<DirEntry>>();
|
||||
|
||||
let scan_dir_name = msg.directory.clone();
|
||||
let playlist_output_dir = self.playlist_dir.clone();
|
||||
let playlist_generator = self.playlist_generator.clone();
|
||||
|
||||
Box::pin(async move {
|
||||
for e in video_files {
|
||||
let path = e.path();
|
||||
let path_as_str = path.to_str().unwrap();
|
||||
debug!(
|
||||
"Sending generate playlist message for path: {}",
|
||||
path_as_str
|
||||
);
|
||||
|
||||
match playlist_generator
|
||||
.send(GeneratePlaylistMessage {
|
||||
playlist_path: playlist_output_dir.to_str().unwrap().to_string(),
|
||||
video_path: PathBuf::from(path),
|
||||
})
|
||||
.await
|
||||
.expect("Failed to send generate playlist message")
|
||||
{
|
||||
Ok(_) => {
|
||||
span.add_event(
|
||||
"Playlist generated",
|
||||
vec![KeyValue::new("video_path", path_as_str.to_string())],
|
||||
);
|
||||
|
||||
debug!(
|
||||
"Successfully generated playlist for file: '{}'",
|
||||
path_as_str
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to generate playlist for path '{:?}'. {:?}", path, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
span.add_event(
|
||||
"Finished directory scan",
|
||||
vec![KeyValue::new("directory", scan_dir_name.to_string())],
|
||||
);
|
||||
info!(
|
||||
"Finished directory scan of '{}' in {:?}",
|
||||
scan_dir_name,
|
||||
start.elapsed()
|
||||
);
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Handler<QueueVideosMessage> for VideoPlaylistManager {
|
||||
type Result = ();
|
||||
|
||||
fn handle(&mut self, msg: QueueVideosMessage, _ctx: &mut Self::Context) -> Self::Result {
|
||||
if msg.video_paths.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
info!(
|
||||
"Queueing {} videos for HLS playlist generation",
|
||||
msg.video_paths.len()
|
||||
);
|
||||
|
||||
let playlist_output_dir = self.playlist_dir.clone();
|
||||
let playlist_generator = self.playlist_generator.clone();
|
||||
|
||||
for video_path in msg.video_paths {
|
||||
let path_str = video_path.to_string_lossy().to_string();
|
||||
debug!("Queueing playlist generation for: {}", path_str);
|
||||
|
||||
playlist_generator.do_send(GeneratePlaylistMessage {
|
||||
playlist_path: playlist_output_dir.to_str().unwrap().to_string(),
|
||||
video_path,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Message)]
|
||||
#[rtype(result = "()")]
|
||||
pub struct ScanDirectoryMessage {
|
||||
pub(crate) directory: String,
|
||||
}
|
||||
|
||||
#[derive(Message)]
|
||||
#[rtype(result = "()")]
|
||||
pub struct QueueVideosMessage {
|
||||
pub video_paths: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Message)]
|
||||
#[rtype(result = "Result<()>")]
|
||||
pub struct GeneratePlaylistMessage {
|
||||
pub video_path: PathBuf,
|
||||
pub playlist_path: String,
|
||||
}
|
||||
|
||||
pub struct PlaylistGenerator {
|
||||
semaphore: Arc<Semaphore>,
|
||||
}
|
||||
|
||||
impl PlaylistGenerator {
|
||||
pub(crate) fn new() -> Self {
|
||||
PlaylistGenerator {
|
||||
semaphore: Arc::new(Semaphore::new(2)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Actor for PlaylistGenerator {
|
||||
type Context = Context<Self>;
|
||||
}
|
||||
|
||||
impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
|
||||
type Result = ResponseFuture<Result<()>>;
|
||||
|
||||
fn handle(&mut self, msg: GeneratePlaylistMessage, _ctx: &mut Self::Context) -> Self::Result {
|
||||
let video_file = msg.video_path.to_str().unwrap().to_owned();
|
||||
let playlist_path = msg.playlist_path.as_str().to_owned();
|
||||
let semaphore = self.semaphore.clone();
|
||||
|
||||
let playlist_file = format!(
|
||||
"{}/{}.m3u8",
|
||||
playlist_path,
|
||||
msg.video_path.file_name().unwrap().to_str().unwrap()
|
||||
);
|
||||
|
||||
let tracer = global_tracer();
|
||||
let mut span = tracer
|
||||
.span_builder("playlistgenerator.generate_playlist")
|
||||
.with_attributes(vec![
|
||||
KeyValue::new("video_file", video_file.clone()),
|
||||
KeyValue::new("playlist_file", playlist_file.clone()),
|
||||
])
|
||||
.start(&tracer);
|
||||
|
||||
Box::pin(async move {
|
||||
let wait_start = std::time::Instant::now();
|
||||
let permit = semaphore
|
||||
.acquire_owned()
|
||||
.await
|
||||
.expect("Unable to acquire semaphore");
|
||||
|
||||
debug!(
|
||||
"Waited for {:?} before starting ffmpeg",
|
||||
wait_start.elapsed()
|
||||
);
|
||||
span.add_event(
|
||||
"Waited for FFMPEG semaphore",
|
||||
vec![KeyValue::new(
|
||||
"wait_time",
|
||||
wait_start.elapsed().as_secs_f64(),
|
||||
)],
|
||||
);
|
||||
|
||||
if Path::new(&playlist_file).exists() {
|
||||
debug!("Playlist already exists: {}", playlist_file);
|
||||
span.set_status(Status::error(format!(
|
||||
"Playlist already exists: {}",
|
||||
playlist_file
|
||||
)));
|
||||
return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists));
|
||||
}
|
||||
|
||||
// Check if video is already h264 encoded
|
||||
let is_h264 = is_h264_encoded(&video_file).await;
|
||||
|
||||
// Check for rotation metadata
|
||||
let rotation = get_video_rotation(&video_file).await;
|
||||
let has_rotation = rotation != 0;
|
||||
|
||||
let use_copy = is_h264 && !has_rotation;
|
||||
|
||||
if has_rotation {
|
||||
info!(
|
||||
"Video {} has rotation metadata ({}°), transcoding to apply rotation",
|
||||
video_file, rotation
|
||||
);
|
||||
span.add_event(
|
||||
"Transcoding due to rotation",
|
||||
vec![KeyValue::new("rotation_degrees", rotation as i64)],
|
||||
);
|
||||
} else if use_copy {
|
||||
info!("Video {} is already h264, using stream copy", video_file);
|
||||
span.add_event("Using stream copy (h264 detected)", vec![]);
|
||||
} else {
|
||||
info!("Video {} needs transcoding to h264", video_file);
|
||||
span.add_event("Transcoding to h264", vec![]);
|
||||
}
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut cmd = tokio::process::Command::new("ffmpeg");
|
||||
cmd.arg("-i").arg(&video_file);
|
||||
|
||||
if use_copy {
|
||||
// Video is already h264, just copy the stream
|
||||
// Note: rotation metadata will be preserved in the stream
|
||||
cmd.arg("-c:v").arg("copy");
|
||||
cmd.arg("-c:a").arg("aac"); // Still need to ensure audio is compatible
|
||||
} else {
|
||||
// Need to transcode - autorotate is enabled by default and will apply rotation
|
||||
cmd.arg("-c:v").arg("h264");
|
||||
cmd.arg("-crf").arg("21");
|
||||
cmd.arg("-preset").arg("veryfast");
|
||||
cmd.arg("-vf").arg("scale=1080:-2,setsar=1:1");
|
||||
cmd.arg("-c:a").arg("aac");
|
||||
}
|
||||
|
||||
// Common HLS settings
|
||||
cmd.arg("-hls_time").arg("3");
|
||||
cmd.arg("-hls_list_size").arg("100");
|
||||
cmd.arg(&playlist_file);
|
||||
cmd.stdout(Stdio::null());
|
||||
cmd.stderr(Stdio::piped());
|
||||
|
||||
let ffmpeg_result = cmd
|
||||
.output()
|
||||
.inspect_err(|e| error!("Failed to run ffmpeg on child process: {}", e))
|
||||
.map_err(|e| std::io::Error::other(e.to_string()))
|
||||
.await;
|
||||
|
||||
// Hang on to the permit until we're done decoding and then explicitly drop
|
||||
drop(permit);
|
||||
|
||||
if let Ok(ref res) = ffmpeg_result {
|
||||
debug!("ffmpeg output: {:?}", res);
|
||||
}
|
||||
|
||||
span.set_status(Status::Ok);
|
||||
|
||||
ffmpeg_result
|
||||
});
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,185 +0,0 @@
|
||||
use futures::TryFutureExt;
|
||||
use log::{debug, error, info, warn};
|
||||
use std::io::Result;
|
||||
use std::process::{Output, Stdio};
|
||||
use std::time::Instant;
|
||||
use tokio::process::Command;
|
||||
|
||||
pub struct Ffmpeg;
|
||||
|
||||
pub enum GifType {
|
||||
Overview,
|
||||
OverviewVideo { duration: u32 },
|
||||
}
|
||||
|
||||
impl Ffmpeg {
|
||||
async fn _generate_playlist(&self, input_file: &str, output_file: &str) -> Result<String> {
|
||||
let ffmpeg_result: Result<Output> = Command::new("ffmpeg")
|
||||
.arg("-i")
|
||||
.arg(input_file)
|
||||
.arg("-c:v")
|
||||
.arg("h264")
|
||||
.arg("-crf")
|
||||
.arg("21")
|
||||
.arg("-preset")
|
||||
.arg("veryfast")
|
||||
.arg("-hls_time")
|
||||
.arg("3")
|
||||
.arg("-hls_list_size")
|
||||
.arg("100")
|
||||
.arg("-vf")
|
||||
.arg("scale=1080:-2,setsar=1:1")
|
||||
.arg(output_file)
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::piped())
|
||||
.output()
|
||||
.inspect_err(|e| error!("Failed to run ffmpeg on child process: {}", e))
|
||||
.map_err(|e| std::io::Error::other(e.to_string()))
|
||||
.await;
|
||||
|
||||
if let Ok(ref res) = ffmpeg_result {
|
||||
debug!("ffmpeg output: {:?}", res);
|
||||
}
|
||||
|
||||
ffmpeg_result.map(|_| output_file.to_string())
|
||||
}
|
||||
|
||||
async fn get_video_duration(&self, input_file: &str) -> Result<u32> {
|
||||
Command::new("ffprobe")
|
||||
.args(["-i", input_file])
|
||||
.args(["-show_entries", "format=duration"])
|
||||
.args(["-v", "quiet"])
|
||||
.args(["-of", "csv=p=0"])
|
||||
.output()
|
||||
.await
|
||||
.map(|out| String::from_utf8_lossy(&out.stdout).trim().to_string())
|
||||
.inspect(|duration| debug!("Found video duration: {:?}", duration))
|
||||
.and_then(|duration| {
|
||||
duration
|
||||
.parse::<f32>()
|
||||
.map(|duration| duration as u32)
|
||||
.map_err(|e| std::io::Error::other(e.to_string()))
|
||||
})
|
||||
.inspect(|duration| debug!("Found video duration: {:?}", duration))
|
||||
}
|
||||
pub async fn generate_video_gif(
|
||||
&self,
|
||||
input_file: &str,
|
||||
output_file: &str,
|
||||
gif_type: GifType,
|
||||
) -> Result<String> {
|
||||
info!("Creating gif for: '{}'", input_file);
|
||||
|
||||
match gif_type {
|
||||
GifType::Overview => {
|
||||
let temp_dir = tempfile::tempdir()?;
|
||||
let temp_path = temp_dir
|
||||
.path()
|
||||
.to_str()
|
||||
.expect("Unable to make temp_dir a string");
|
||||
|
||||
match self
|
||||
.get_video_duration(input_file)
|
||||
.and_then(|duration| {
|
||||
debug!("Creating gif frames for '{}'", input_file);
|
||||
|
||||
Command::new("ffmpeg")
|
||||
.args(["-i", input_file])
|
||||
.args(["-vf", &format!("fps=20/{}", duration)])
|
||||
.args(["-q:v", "2"])
|
||||
.stderr(Stdio::null())
|
||||
.arg(format!("{}/frame_%03d.jpg", temp_path))
|
||||
.status()
|
||||
})
|
||||
.and_then(|_| {
|
||||
debug!("Generating palette");
|
||||
|
||||
Command::new("ffmpeg")
|
||||
.args(["-i", &format!("{}/frame_%03d.jpg", temp_path)])
|
||||
.args(["-vf", "palettegen"])
|
||||
.arg(format!("{}/palette.png", temp_path))
|
||||
.stderr(Stdio::null())
|
||||
.status()
|
||||
})
|
||||
.and_then(|_| {
|
||||
debug!("Creating gif for: '{}'", input_file);
|
||||
self.create_gif_from_frames(temp_path, output_file)
|
||||
})
|
||||
.await
|
||||
{
|
||||
Ok(exit_code) => {
|
||||
if exit_code == 0 {
|
||||
info!("Created gif for '{}' -> '{}'", input_file, output_file);
|
||||
} else {
|
||||
warn!(
|
||||
"Failed to create gif for '{}' with exit code: {}",
|
||||
input_file, exit_code
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Error creating gif for '{}': {:?}", input_file, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
GifType::OverviewVideo { duration } => {
|
||||
let start = Instant::now();
|
||||
|
||||
match self
|
||||
.get_video_duration(input_file)
|
||||
.and_then(|input_duration| {
|
||||
Command::new("ffmpeg")
|
||||
.args(["-i", input_file])
|
||||
.args([
|
||||
"-vf",
|
||||
// Grab 1 second of frames equally spaced to create a 'duration' second long video scaled to 720px on longest side
|
||||
&format!(
|
||||
"select='lt(mod(t,{}),1)',setpts=N/FRAME_RATE/TB,scale='if(gt(iw,ih),720,-2)':'if(gt(ih,iw),720,-2)",
|
||||
input_duration / duration
|
||||
),
|
||||
])
|
||||
.arg("-an")
|
||||
.arg(output_file)
|
||||
.status()
|
||||
})
|
||||
.await
|
||||
{
|
||||
Ok(out) => info!("Finished clip '{}' with code {:?} in {:?}", output_file, out.code(), start.elapsed()),
|
||||
Err(e) => error!("Error creating video overview: {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(output_file.to_string())
|
||||
}
|
||||
|
||||
async fn create_gif_from_frames(&self, frame_base_dir: &str, output_file: &str) -> Result<i32> {
|
||||
let output = Command::new("ffmpeg")
|
||||
.arg("-y")
|
||||
.args(["-framerate", "4"])
|
||||
.args(["-i", &format!("{}/frame_%03d.jpg", frame_base_dir)])
|
||||
.args(["-i", &format!("{}/palette.png", frame_base_dir)])
|
||||
.args([
|
||||
"-filter_complex",
|
||||
// Scale to 480x480 with a center crop
|
||||
"[0:v]scale=480:-1:flags=lanczos,crop='min(in_w,in_h)':'min(in_w,in_h)':(in_w-out_w)/2:(in_h-out_h)/2, paletteuse",
|
||||
])
|
||||
.args(["-loop", "0"]) // loop forever
|
||||
.args(["-final_delay", "75"])
|
||||
.arg(output_file)
|
||||
.stderr(Stdio::piped()) // Change this to capture stderr
|
||||
.stdout(Stdio::piped()) // Optionally capture stdout too
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
error!("FFmpeg error: {}", stderr);
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
debug!("FFmpeg stdout: {}", stdout);
|
||||
} else {
|
||||
debug!("FFmpeg successful with exit code: {}", output.status);
|
||||
}
|
||||
|
||||
Ok(output.status.code().unwrap_or(-1))
|
||||
}
|
||||
}
|
||||
@@ -1,67 +0,0 @@
|
||||
use crate::otel::global_tracer;
|
||||
use crate::video::ffmpeg::{Ffmpeg, GifType};
|
||||
use crate::{is_video, update_media_counts};
|
||||
use log::info;
|
||||
use opentelemetry::trace::Tracer;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
pub mod actors;
|
||||
pub mod ffmpeg;
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub async fn generate_video_gifs() {
|
||||
tokio::spawn(async {
|
||||
info!("Starting to make video gifs");
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
let tracer = global_tracer();
|
||||
tracer.start("creating video gifs");
|
||||
|
||||
let gif_base_path = &dotenv::var("GIFS_DIRECTORY").unwrap_or(String::from("gifs"));
|
||||
let gif_directory: &Path = Path::new(gif_base_path);
|
||||
fs::create_dir_all(gif_base_path).expect("There was an issue creating directory");
|
||||
|
||||
let files = PathBuf::from(dotenv::var("BASE_PATH").unwrap());
|
||||
|
||||
let ffmpeg = Ffmpeg;
|
||||
for file in WalkDir::new(&files)
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter(|entry| entry.file_type().is_file())
|
||||
.filter(is_video)
|
||||
.filter(|entry| {
|
||||
let path = entry.path();
|
||||
let relative_path = &path.strip_prefix(&files).unwrap();
|
||||
let thumb_path = Path::new(gif_directory).join(relative_path);
|
||||
let gif_path = thumb_path.with_extension("gif");
|
||||
!gif_path.exists()
|
||||
})
|
||||
{
|
||||
let path = file.path();
|
||||
let relative_path = &path.strip_prefix(&files).unwrap();
|
||||
let gif_path = Path::new(gif_directory).join(relative_path);
|
||||
let gif_path = gif_path.with_extension("gif");
|
||||
if let Some(parent_dir) = gif_path.parent() {
|
||||
fs::create_dir_all(parent_dir).unwrap_or_else(|_| {
|
||||
panic!("There was an issue creating gif directory {:?}", gif_path)
|
||||
});
|
||||
}
|
||||
info!("Generating gif for {:?}", path);
|
||||
|
||||
ffmpeg
|
||||
.generate_video_gif(
|
||||
path.to_str().unwrap(),
|
||||
gif_path.to_str().unwrap(),
|
||||
GifType::Overview,
|
||||
)
|
||||
.await
|
||||
.expect("There was an issue generating the gif");
|
||||
}
|
||||
|
||||
info!("Finished making video gifs in {:?}", start.elapsed());
|
||||
|
||||
update_media_counts(&files);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user