4 Commits

Author SHA1 Message Date
Cameron Cordes
c482912fd8 Wrestling with string interpolation
Some checks failed
Core Repos/ImageApi/pipeline/pr-master There was a failure building this commit
2021-02-02 15:14:50 -05:00
Cameron Cordes
fcc520af1a Try and fix docker image args
Some checks failed
Core Repos/ImageApi/pipeline/pr-master There was a failure building this commit
2021-02-02 15:11:24 -05:00
Cameron Cordes
19dea67e3f Fix archive syntax
Some checks failed
Core Repos/ImageApi/pipeline/pr-master There was a failure building this commit
2021-02-02 15:07:11 -05:00
Cameron Cordes
55725e2b3c First stab at a Jenkinsfile
Some checks failed
Core Repos/ImageApi/pipeline/pr-master There was a failure building this commit
Core Repos/ImageApi/pipeline/head There was a failure building this commit
2021-02-02 15:04:28 -05:00
84 changed files with 1770 additions and 20036 deletions

11
.gitignore vendored
View File

@@ -2,14 +2,3 @@
database/target
*.db
.env
/tmp
# Default ignored files
.idea/shelf/
.idea/workspace.xml
# Datasource local storage ignored files
.idea/dataSources*
.idea/dataSources.local.xml
# Editor-based HTTP Client requests
.idea/httpRequests/
/.claude/settings.local.json

12
.idea/image-api.iml generated
View File

@@ -1,12 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="CPP_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
<excludeFolder url="file://$MODULE_DIR$/.idea/dataSources" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

6
.idea/misc.xml generated
View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
</project>

8
.idea/modules.xml generated
View File

@@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/image-api.iml" filepath="$PROJECT_DIR$/.idea/image-api.iml" />
</modules>
</component>
</project>

7
.idea/sqldialects.xml generated
View File

@@ -1,7 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="SqlDialectMappings">
<file url="file://$PROJECT_DIR$/migrations/2021-09-02-000740_create_tags/up.sql" dialect="GenericSQL" />
<file url="PROJECT" dialect="SQLite" />
</component>
</project>

6
.idea/vcs.xml generated
View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

289
CLAUDE.md
View File

@@ -1,289 +0,0 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project Overview
An Actix-web REST API for serving images and videos from a filesystem with automatic thumbnail generation, EXIF extraction, tag organization, and a memories feature for browsing photos by date. Uses SQLite/Diesel ORM for data persistence and ffmpeg for video processing.
## Development Commands
### Building & Running
```bash
# Build for development
cargo build
# Build for release (uses thin LTO optimization)
cargo build --release
# Run the server (requires .env file with DATABASE_URL, BASE_PATH, THUMBNAILS, VIDEO_PATH, BIND_URL, SECRET_KEY)
cargo run
# Run with specific log level
RUST_LOG=debug cargo run
```
### Testing
```bash
# Run all tests (requires BASE_PATH in .env)
cargo test
# Run specific test
cargo test test_name
# Run tests with output
cargo test -- --nocapture
```
### Database Migrations
```bash
# Install diesel CLI (one-time setup)
cargo install diesel_cli --no-default-features --features sqlite
# Create new migration
diesel migration generate migration_name
# Run migrations (also runs automatically on app startup)
diesel migration run
# Revert last migration
diesel migration revert
# Regenerate schema.rs after manual migration changes
diesel print-schema > src/database/schema.rs
```
### Code Quality
```bash
# Format code
cargo fmt
# Run clippy linter
cargo clippy
# Fix automatically fixable issues
cargo fix
```
### Utility Binaries
```bash
# Two-phase cleanup: resolve missing files and validate file types
cargo run --bin cleanup_files -- --base-path /path/to/media --database-url ./database.db
# Batch extract EXIF for existing files
cargo run --bin migrate_exif
```
## Architecture Overview
### Core Components
**Layered Architecture:**
- **HTTP Layer** (`main.rs`): Route handlers for images, videos, metadata, tags, favorites, memories
- **Auth Layer** (`auth.rs`): JWT token validation, Claims extraction via FromRequest trait
- **Service Layer** (`files.rs`, `exif.rs`, `memories.rs`): Business logic for file operations and EXIF extraction
- **DAO Layer** (`database/mod.rs`): Trait-based data access (ExifDao, UserDao, FavoriteDao, TagDao)
- **Database Layer**: Diesel ORM with SQLite, schema in `database/schema.rs`
**Async Actor System (Actix):**
- `StreamActor`: Manages ffmpeg video processing lifecycle
- `VideoPlaylistManager`: Scans directories and queues videos
- `PlaylistGenerator`: Creates HLS playlists for video streaming
### Database Schema & Patterns
**Tables:**
- `users`: Authentication (id, username, password_hash)
- `favorites`: User-specific favorites (userid, path)
- `tags`: Custom labels with timestamps
- `tagged_photo`: Many-to-many photo-tag relationships
- `image_exif`: Rich metadata (file_path + 16 EXIF fields: camera, GPS, dates, exposure settings)
**DAO Pattern:**
All database access goes through trait-based DAOs (e.g., `ExifDao`, `SqliteExifDao`). Connection pooling uses `Arc<Mutex<SqliteConnection>>`. All DB operations are traced with OpenTelemetry in release builds.
**Key DAO Methods:**
- `store_exif()`, `get_exif()`, `get_exif_batch()`: EXIF CRUD operations
- `query_by_exif()`: Complex filtering by camera, GPS bounds, date ranges
- Batch operations minimize DB hits during file watching
### File Processing Pipeline
**Thumbnail Generation:**
1. Startup scan: Rayon parallel walk of BASE_PATH
2. Creates 200x200 thumbnails in THUMBNAILS directory (mirrors source structure)
3. Videos: extracts frame at 3-second mark via ffmpeg
4. Images: uses `image` crate for JPEG/PNG processing
**File Watching:**
Runs in background thread with two-tier strategy:
- **Quick scan** (default 60s): Recently modified files only
- **Full scan** (default 3600s): Comprehensive directory check
- Batch queries EXIF DB to detect new files
- Configurable via `WATCH_QUICK_INTERVAL_SECONDS` and `WATCH_FULL_INTERVAL_SECONDS`
**EXIF Extraction:**
- Uses `kamadak-exif` crate
- Supports: JPEG, TIFF, RAW (NEF, CR2, CR3), HEIF/HEIC, PNG, WebP
- Extracts: camera make/model, lens, dimensions, GPS coordinates, focal length, aperture, shutter speed, ISO, date taken
- Triggered on upload and during file watching
**File Upload Behavior:**
If file exists, appends timestamp to filename (`photo_1735124234.jpg`) to preserve history without overwrites.
### Authentication Flow
**Login:**
1. POST `/login` with username/password
2. Verify with `bcrypt::verify()` against password_hash
3. Generate JWT with claims: `{ sub: user_id, exp: 5_days_from_now }`
4. Sign with HS256 using `SECRET_KEY` environment variable
**Authorization:**
All protected endpoints extract `Claims` via `FromRequest` trait implementation. Token passed as `Authorization: Bearer <token>` header.
### API Structure
**Key Endpoint Patterns:**
```rust
// Image serving & upload
GET /image?path=...&size=...&format=...
POST /image (multipart file upload)
// Metadata & EXIF
GET /image/metadata?path=...
// Advanced search with filters
GET /photos?path=...&recursive=true&sort=DateTakenDesc&camera_make=Canon&gps_lat=...&gps_lon=...&gps_radius_km=10&date_from=...&date_to=...&tag_ids=1,2,3&media_type=Photo
// Video streaming (HLS)
POST /video/generate (creates .m3u8 playlist + .ts segments)
GET /video/stream?path=... (serves playlist)
// Tags
GET /image/tags/all
POST /image/tags (add tag to file)
DELETE /image/tags (remove tag from file)
POST /image/tags/batch (bulk tag updates)
// Memories (week-based grouping)
GET /memories?path=...&recursive=true
```
**Request Types:**
- `FilesRequest`: Supports complex filtering (tags, EXIF fields, GPS radius, date ranges)
- `SortType`: Shuffle, NameAsc/Desc, TagCountAsc/Desc, DateTakenAsc/Desc
### Important Patterns
**Service Builder Pattern:**
Routes are registered via composable `ServiceBuilder` trait in `service.rs`. Allows modular feature addition.
**Path Validation:**
Always use `is_valid_full_path(&base_path, &requested_path, check_exists)` to prevent directory traversal attacks.
**File Type Detection:**
Centralized in `file_types.rs` with constants `IMAGE_EXTENSIONS` and `VIDEO_EXTENSIONS`. Provides both `Path` and `DirEntry` variants for performance.
**OpenTelemetry Tracing:**
All database operations and HTTP handlers wrapped in spans. In release builds, exports to OTLP endpoint via `OTLP_OTLS_ENDPOINT`. Debug builds use basic logger.
**Memory Exclusion:**
`PathExcluder` in `memories.rs` filters out directories from memories API via `EXCLUDED_DIRS` environment variable (comma-separated paths or substring patterns).
### Startup Sequence
1. Load `.env` file
2. Run embedded Diesel migrations
3. Spawn file watcher thread
4. Create initial thumbnails (parallel scan)
5. Generate video GIF thumbnails
6. Initialize AppState with Actix actors
7. Set up Prometheus metrics (`imageserver_image_total`, `imageserver_video_total`)
8. Scan directory for videos and queue HLS processing
9. Start HTTP server on `BIND_URL` + localhost:8088
## Testing Patterns
Tests require `BASE_PATH` environment variable. Many integration tests create temporary directories and files.
When testing database code:
- Use in-memory SQLite: `DATABASE_URL=":memory:"`
- Run migrations in test setup
- Clean up with `DROP TABLE` or use `#[serial]` from `serial_test` crate if parallel tests conflict
## Common Gotchas
**EXIF Date Parsing:**
Multiple formats supported (EXIF DateTime, ISO8601, Unix timestamp). Fallback chain attempts multiple parsers.
**Video Processing:**
ffmpeg processes run asynchronously via actors. Use `StreamActor` to track completion. HLS segments written to `VIDEO_PATH`.
**File Extensions:**
Extension detection is case-insensitive. Use `file_types.rs` helpers rather than manual string matching.
**Migration Workflow:**
After creating a migration, manually edit the SQL, then regenerate `schema.rs` with `diesel print-schema`. Migrations auto-run on startup via `embedded_migrations!()` macro.
**Path Absolutization:**
Use `path-absolutize` crate's `.absolutize()` method when converting user-provided paths to ensure they're within `BASE_PATH`.
## Required Environment Variables
```bash
DATABASE_URL=./database.db # SQLite database path
BASE_PATH=/path/to/media # Root media directory
THUMBNAILS=/path/to/thumbnails # Thumbnail storage
VIDEO_PATH=/path/to/video/hls # HLS playlist output
GIFS_DIRECTORY=/path/to/gifs # Video GIF thumbnails
BIND_URL=0.0.0.0:8080 # Server binding
CORS_ALLOWED_ORIGINS=http://localhost:3000
SECRET_KEY=your-secret-key-here # JWT signing secret
RUST_LOG=info # Log level
EXCLUDED_DIRS=/private,/archive # Comma-separated paths to exclude from memories
```
Optional:
```bash
WATCH_QUICK_INTERVAL_SECONDS=60 # Quick scan interval
WATCH_FULL_INTERVAL_SECONDS=3600 # Full scan interval
OTLP_OTLS_ENDPOINT=http://... # OpenTelemetry collector (release builds)
# AI Insights Configuration
OLLAMA_PRIMARY_URL=http://desktop:11434 # Primary Ollama server (e.g., desktop)
OLLAMA_FALLBACK_URL=http://server:11434 # Fallback Ollama server (optional, always-on)
OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b # Model for primary server (default: nemotron-3-nano:30b)
OLLAMA_FALLBACK_MODEL=llama3.2:3b # Model for fallback server (optional, uses primary if not set)
SMS_API_URL=http://localhost:8000 # SMS message API endpoint (default: localhost:8000)
SMS_API_TOKEN=your-api-token # SMS API authentication token (optional)
```
**AI Insights Fallback Behavior:**
- Primary server is tried first with its configured model (5-second connection timeout)
- On connection failure, automatically falls back to secondary server with its model (if configured)
- If `OLLAMA_FALLBACK_MODEL` not set, uses same model as primary server on fallback
- Total request timeout is 120 seconds to accommodate slow LLM inference
- Logs indicate which server and model was used (info level) and failover attempts (warn level)
- Backwards compatible: `OLLAMA_URL` and `OLLAMA_MODEL` still supported as fallbacks
**Model Discovery:**
The `OllamaClient` provides methods to query available models:
- `OllamaClient::list_models(url)` - Returns list of all models on a server
- `OllamaClient::is_model_available(url, model_name)` - Checks if a specific model exists
This allows runtime verification of model availability before generating insights.
## Dependencies of Note
- **actix-web**: HTTP framework
- **diesel**: ORM for SQLite
- **jsonwebtoken**: JWT implementation
- **kamadak-exif**: EXIF parsing
- **image**: Thumbnail generation
- **walkdir**: Directory traversal
- **rayon**: Parallel processing
- **opentelemetry**: Distributed tracing
- **bcrypt**: Password hashing
- **infer**: Magic number file type detection

4612
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,57 +1,30 @@
[package]
name = "image-api"
version = "0.5.2"
version = "0.1.0"
authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
edition = "2024"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[profile.release]
lto = "thin"
[dependencies]
actix = "0.13.1"
actix-web = "4"
actix-rt = "2.6"
tokio = { version = "1.42.0", features = ["default", "process", "sync", "macros", "rt-multi-thread"] }
actix-files = "0.6"
actix-cors = "0.7"
actix-multipart = "0.7.2"
actix-governor = "0.5"
actix-web = "3"
actix-rt = "1"
actix-files = "0.4"
actix-multipart = "0.3.0"
actix-cors="0.5"
futures = "0.3.5"
jsonwebtoken = "9.3.0"
jsonwebtoken = "7.2.0"
serde = "1"
serde_json = "1"
diesel = { version = "2.2.10", features = ["sqlite"] }
libsqlite3-sys = { version = "0.35", features = ["bundled"] }
diesel_migrations = "2.2.0"
chrono = "0.4"
clap = { version = "4.5", features = ["derive"] }
diesel = { version = "1.4.5", features = ["sqlite"] }
hmac = "0.7.1"
sha2 = "0.8.2"
chrono = "0.4.11"
dotenv = "0.15"
bcrypt = "0.17.1"
image = { version = "0.25.5", default-features = false, features = ["jpeg", "png", "rayon"] }
infer = "0.16"
walkdir = "2.4.0"
rayon = "1.5"
path-absolutize = "3.1"
log = "0.4"
env_logger = "0.11.5"
actix-web-prom = "0.9.0"
prometheus = "0.13"
lazy_static = "1.5"
anyhow = "1.0"
rand = "0.8.5"
opentelemetry = { version = "0.31.0", features = ["default", "metrics", "tracing"] }
opentelemetry_sdk = { version = "0.31.0", features = ["default", "rt-tokio-current-thread", "metrics"] }
opentelemetry-otlp = { version = "0.31.0", features = ["default", "metrics", "tracing", "grpc-tonic"] }
opentelemetry-stdout = "0.31.0"
opentelemetry-appender-log = "0.31.0"
tempfile = "3.20.0"
regex = "1.11.1"
exif = { package = "kamadak-exif", version = "0.6.1" }
reqwest = { version = "0.12", features = ["json"] }
urlencoding = "2.1"
zerocopy = "0.8"
ical = "0.11"
scraper = "0.20"
base64 = "0.22"
bcrypt = "0.8.1"
image = "0.23.7"
walkdir = "2"
rayon = "1.3"
notify = "4.0"
tokio = "0.2"
path-absolutize = "3.0.6"

19
Jenkinsfile vendored
View File

@@ -1,30 +1,25 @@
pipeline {
agent {
docker {
image 'rust:1.59'
args '-v "$PWD":/usr/src/image-api'
image 'rust:1.48'
args "-v '$PWD':/usr/src/image-api"
}
}
stages {
stage('build') {
steps {
sh 'cargo build --release'
archiveArtifacts artifacts: '**/target/release/image-api', fingerprint: true
echo $PWD
sh 'cargo build --release'
archiveArtifacts artifacts: '**/target/release/**', fingerprint: true
}
}
stage('test') {
steps {
sh 'echo "BASE_PATH=$PWD" > .env'
sh 'cargo test'
}
post {
always {
sh 'rm -f .env'
}
sh 'cargo test'
}
}
}
}

View File

@@ -2,65 +2,13 @@
This is an Actix-web server for serving images and videos from a filesystem.
Upon first run it will generate thumbnails for all images and videos at `BASE_PATH`.
## Features
- Automatic thumbnail generation for images and videos
- EXIF data extraction and storage for photos
- File watching with NFS support (polling-based)
- Video streaming with HLS
- Tag-based organization
- Memories API for browsing photos by date
- **AI-Powered Photo Insights** - Generate contextual insights from photos using LLMs
- **RAG-based Context Retrieval** - Semantic search over daily conversation summaries
- **Automatic Daily Summaries** - LLM-generated summaries of daily conversations with embeddings
## Environment
There are a handful of required environment variables to have the API run.
They should be defined where the binary is located or above it in an `.env` file.
You must have `ffmpeg` installed for streaming video and generating video thumbnails.
- `DATABASE_URL` is a path or url to a database (currently only SQLite is tested)
- `BASE_PATH` is the root from which you want to serve images and videos
- `THUMBNAILS` is a path where generated thumbnails should be stored
- `VIDEO_PATH` is a path where HLS playlists and video parts should be stored
- `BIND_URL` is the url and port to bind to (typically your own IP address)
- `SECRET_KEY` is the *hopefully* random string to sign Tokens with
- `RUST_LOG` is one of `off, error, warn, info, debug, trace`, from least to most noisy [error is default]
- `EXCLUDED_DIRS` is a comma separated list of directories to exclude from the Memories API
- `WATCH_QUICK_INTERVAL_SECONDS` (optional) is the interval in seconds for quick file scans [default: 60]
- `WATCH_FULL_INTERVAL_SECONDS` (optional) is the interval in seconds for full file scans [default: 3600]
### AI Insights Configuration (Optional)
The following environment variables configure AI-powered photo insights and daily conversation summaries:
#### Ollama Configuration
- `OLLAMA_PRIMARY_URL` - Primary Ollama server URL [default: `http://localhost:11434`]
- Example: `http://desktop:11434` (your main/powerful server)
- `OLLAMA_FALLBACK_URL` - Fallback Ollama server URL (optional)
- Example: `http://server:11434` (always-on backup server)
- `OLLAMA_PRIMARY_MODEL` - Model to use on primary server [default: `nemotron-3-nano:30b`]
- Example: `nemotron-3-nano:30b`, `llama3.2:3b`, etc.
- `OLLAMA_FALLBACK_MODEL` - Model to use on fallback server (optional)
- If not set, uses `OLLAMA_PRIMARY_MODEL` on fallback server
**Legacy Variables** (still supported):
- `OLLAMA_URL` - Used if `OLLAMA_PRIMARY_URL` not set
- `OLLAMA_MODEL` - Used if `OLLAMA_PRIMARY_MODEL` not set
#### SMS API Configuration
- `SMS_API_URL` - URL to SMS message API [default: `http://localhost:8000`]
- Used to fetch conversation data for context in insights
- `SMS_API_TOKEN` - Authentication token for SMS API (optional)
#### Fallback Behavior
- Primary server is tried first with 5-second connection timeout
- On failure, automatically falls back to secondary server (if configured)
- Total request timeout is 120 seconds to accommodate LLM inference
- Logs indicate which server/model was used and any failover attempts
#### Daily Summary Generation
Daily conversation summaries are generated automatically on server startup. Configure in `src/main.rs`:
- Date range for summary generation
- Contacts to process
- Model version used for embeddings: `nomic-embed-text:v1.5`

View File

@@ -1,3 +0,0 @@
DROP TABLE tags;
DROP TABLE tagged_photo;

View File

@@ -1,13 +0,0 @@
CREATE TABLE tags (
id INTEGER PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
created_time BIGINT NOT NULL
);
CREATE TABLE tagged_photo (
id INTEGER PRIMARY KEY NOT NULL,
photo_name TEXT NOT NULL,
tag_id INTEGER NOT NULL,
created_time BIGINT NOT NULL,
CONSTRAINT tagid FOREIGN KEY (tag_id) REFERENCES tags (id) ON DELETE CASCADE ON UPDATE CASCADE
);

View File

@@ -1,2 +0,0 @@
DROP INDEX IF EXISTS idx_image_exif_file_path;
DROP TABLE IF EXISTS image_exif;

View File

@@ -1,32 +0,0 @@
CREATE TABLE image_exif (
id INTEGER PRIMARY KEY NOT NULL,
file_path TEXT NOT NULL UNIQUE,
-- Camera Information
camera_make TEXT,
camera_model TEXT,
lens_model TEXT,
-- Image Properties
width INTEGER,
height INTEGER,
orientation INTEGER,
-- GPS Coordinates
gps_latitude REAL,
gps_longitude REAL,
gps_altitude REAL,
-- Capture Settings
focal_length REAL,
aperture REAL,
shutter_speed TEXT,
iso INTEGER,
date_taken BIGINT,
-- Housekeeping
created_time BIGINT NOT NULL,
last_modified BIGINT NOT NULL
);
CREATE INDEX idx_image_exif_file_path ON image_exif(file_path);

View File

@@ -1,9 +0,0 @@
-- Rollback indexes
DROP INDEX IF EXISTS idx_favorites_userid;
DROP INDEX IF EXISTS idx_favorites_path;
DROP INDEX IF EXISTS idx_tags_name;
DROP INDEX IF EXISTS idx_tagged_photo_photo_name;
DROP INDEX IF EXISTS idx_tagged_photo_tag_id;
DROP INDEX IF EXISTS idx_image_exif_camera;
DROP INDEX IF EXISTS idx_image_exif_gps;

View File

@@ -1,17 +0,0 @@
-- Add indexes for improved query performance
-- Favorites table indexes
CREATE INDEX IF NOT EXISTS idx_favorites_userid ON favorites(userid);
CREATE INDEX IF NOT EXISTS idx_favorites_path ON favorites(path);
-- Tags table indexes
CREATE INDEX IF NOT EXISTS idx_tags_name ON tags(name);
-- Tagged photos indexes
CREATE INDEX IF NOT EXISTS idx_tagged_photo_photo_name ON tagged_photo(photo_name);
CREATE INDEX IF NOT EXISTS idx_tagged_photo_tag_id ON tagged_photo(tag_id);
-- EXIF table indexes (date_taken already has index from previous migration)
-- Adding composite index for common EXIF queries
CREATE INDEX IF NOT EXISTS idx_image_exif_camera ON image_exif(camera_make, camera_model);
CREATE INDEX IF NOT EXISTS idx_image_exif_gps ON image_exif(gps_latitude, gps_longitude);

View File

@@ -1,3 +0,0 @@
-- Rollback unique constraint on favorites
DROP INDEX IF EXISTS idx_favorites_unique;

View File

@@ -1,12 +0,0 @@
-- Add unique constraint to prevent duplicate favorites per user
-- First, remove any existing duplicates (keep the oldest one)
DELETE FROM favorites
WHERE rowid NOT IN (
SELECT MIN(rowid)
FROM favorites
GROUP BY userid, path
);
-- Add unique index to enforce constraint
CREATE UNIQUE INDEX idx_favorites_unique ON favorites(userid, path);

View File

@@ -1,2 +0,0 @@
-- Remove date_taken index
DROP INDEX IF EXISTS idx_image_exif_date_taken;

View File

@@ -1,2 +0,0 @@
-- Add index on date_taken for efficient date range queries
CREATE INDEX IF NOT EXISTS idx_image_exif_date_taken ON image_exif(date_taken);

View File

@@ -1,3 +0,0 @@
-- Rollback AI insights table
DROP INDEX IF EXISTS idx_photo_insights_path;
DROP TABLE IF EXISTS photo_insights;

View File

@@ -1,11 +0,0 @@
-- AI-generated insights for individual photos
CREATE TABLE IF NOT EXISTS photo_insights (
id INTEGER PRIMARY KEY NOT NULL,
file_path TEXT NOT NULL UNIQUE, -- Full path to the photo
title TEXT NOT NULL, -- "At the beach with Sarah"
summary TEXT NOT NULL, -- 2-3 sentence description
generated_at BIGINT NOT NULL,
model_version TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_photo_insights_path ON photo_insights(file_path);

View File

@@ -1 +0,0 @@
DROP TABLE daily_conversation_summaries;

View File

@@ -1,19 +0,0 @@
-- Daily conversation summaries for improved RAG quality
-- Each row = one day's conversation with a contact, summarized by LLM and embedded
CREATE TABLE daily_conversation_summaries (
id INTEGER PRIMARY KEY NOT NULL,
date TEXT NOT NULL, -- ISO date "2024-08-15"
contact TEXT NOT NULL, -- Contact name
summary TEXT NOT NULL, -- LLM-generated 3-5 sentence summary
message_count INTEGER NOT NULL, -- Number of messages in this day
embedding BLOB NOT NULL, -- 768-dim vector of the summary
created_at BIGINT NOT NULL, -- When this summary was generated
model_version TEXT NOT NULL, -- "nomic-embed-text:v1.5"
UNIQUE(date, contact)
);
-- Indexes for efficient querying
CREATE INDEX idx_daily_summaries_date ON daily_conversation_summaries(date);
CREATE INDEX idx_daily_summaries_contact ON daily_conversation_summaries(contact);
CREATE INDEX idx_daily_summaries_date_contact ON daily_conversation_summaries(date, contact);

View File

@@ -1 +0,0 @@
DROP TABLE IF EXISTS calendar_events;

View File

@@ -1,20 +0,0 @@
CREATE TABLE calendar_events (
id INTEGER PRIMARY KEY NOT NULL,
event_uid TEXT,
summary TEXT NOT NULL,
description TEXT,
location TEXT,
start_time BIGINT NOT NULL,
end_time BIGINT NOT NULL,
all_day BOOLEAN NOT NULL DEFAULT 0,
organizer TEXT,
attendees TEXT,
embedding BLOB,
created_at BIGINT NOT NULL,
source_file TEXT,
UNIQUE(event_uid, start_time)
);
CREATE INDEX idx_calendar_start_time ON calendar_events(start_time);
CREATE INDEX idx_calendar_end_time ON calendar_events(end_time);
CREATE INDEX idx_calendar_time_range ON calendar_events(start_time, end_time);

View File

@@ -1 +0,0 @@
DROP TABLE IF EXISTS location_history;

View File

@@ -1,19 +0,0 @@
CREATE TABLE location_history (
id INTEGER PRIMARY KEY NOT NULL,
timestamp BIGINT NOT NULL,
latitude REAL NOT NULL,
longitude REAL NOT NULL,
accuracy INTEGER,
activity TEXT,
activity_confidence INTEGER,
place_name TEXT,
place_category TEXT,
embedding BLOB,
created_at BIGINT NOT NULL,
source_file TEXT,
UNIQUE(timestamp, latitude, longitude)
);
CREATE INDEX idx_location_timestamp ON location_history(timestamp);
CREATE INDEX idx_location_coords ON location_history(latitude, longitude);
CREATE INDEX idx_location_activity ON location_history(activity);

View File

@@ -1 +0,0 @@
DROP TABLE IF EXISTS search_history;

View File

@@ -1,13 +0,0 @@
CREATE TABLE search_history (
id INTEGER PRIMARY KEY NOT NULL,
timestamp BIGINT NOT NULL,
query TEXT NOT NULL,
search_engine TEXT,
embedding BLOB NOT NULL,
created_at BIGINT NOT NULL,
source_file TEXT,
UNIQUE(timestamp, query)
);
CREATE INDEX idx_search_timestamp ON search_history(timestamp);
CREATE INDEX idx_search_query ON search_history(query);

View File

@@ -1,4 +0,0 @@
-- Revert search performance optimization indexes
DROP INDEX IF EXISTS idx_image_exif_date_path;
DROP INDEX IF EXISTS idx_tagged_photo_count;

View File

@@ -1,15 +0,0 @@
-- Add composite indexes for search performance optimization
-- This migration addresses N+1 query issues and enables database-level sorting
-- Covering index for date-sorted queries (supports ORDER BY + pagination)
-- Enables efficient date-based sorting without loading all files into memory
CREATE INDEX IF NOT EXISTS idx_image_exif_date_path
ON image_exif(date_taken DESC, file_path);
-- Optimize batch tag count queries with GROUP BY
-- Reduces N individual queries to a single batch query
CREATE INDEX IF NOT EXISTS idx_tagged_photo_count
ON tagged_photo(photo_name, tag_id);
-- Update query planner statistics to optimize query execution
ANALYZE;

View File

@@ -1,403 +0,0 @@
use anyhow::Result;
use chrono::{NaiveDate, Utc};
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use tokio::time::sleep;
use crate::ai::{OllamaClient, SmsApiClient, SmsMessage};
use crate::database::{DailySummaryDao, InsertDailySummary};
use crate::otel::global_tracer;
/// Strip boilerplate prefixes and common phrases from summaries before embedding.
/// This improves embedding diversity by removing structural similarity.
pub fn strip_summary_boilerplate(summary: &str) -> String {
let mut text = summary.trim().to_string();
// Remove markdown headers
while text.starts_with('#') {
if let Some(pos) = text.find('\n') {
text = text[pos..].trim_start().to_string();
} else {
// Single line with just headers, try to extract content after #s
text = text.trim_start_matches('#').trim().to_string();
break;
}
}
// Remove "Summary:" prefix variations (with optional markdown bold)
let prefixes = [
"**Summary:**",
"**Summary**:",
"*Summary:*",
"Summary:",
"**summary:**",
"summary:",
];
for prefix in prefixes {
if text.to_lowercase().starts_with(&prefix.to_lowercase()) {
text = text[prefix.len()..].trim_start().to_string();
break;
}
}
// Remove common opening phrases that add no semantic value
let opening_phrases = [
"Today, Melissa and I discussed",
"Today, Amanda and I discussed",
"Today Melissa and I discussed",
"Today Amanda and I discussed",
"Melissa and I discussed",
"Amanda and I discussed",
"Today, I discussed",
"Today I discussed",
"The conversation covered",
"This conversation covered",
"In this conversation,",
"During this conversation,",
];
for phrase in opening_phrases {
if text.to_lowercase().starts_with(&phrase.to_lowercase()) {
text = text[phrase.len()..].trim_start().to_string();
// Remove leading punctuation/articles after stripping phrase
text = text
.trim_start_matches([',', ':', '-'])
.trim_start()
.to_string();
break;
}
}
// Remove any remaining leading markdown bold markers
if text.starts_with("**")
&& let Some(end) = text[2..].find("**")
{
// Keep the content between ** but remove the markers
let bold_content = &text[2..2 + end];
text = format!("{}{}", bold_content, &text[4 + end..]);
}
text.trim().to_string()
}
/// Generate and embed daily conversation summaries for a date range
/// Default: August 2024 ±30 days (July 1 - September 30, 2024)
pub async fn generate_daily_summaries(
contact: &str,
start_date: Option<NaiveDate>,
end_date: Option<NaiveDate>,
ollama: &OllamaClient,
sms_client: &SmsApiClient,
summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>>,
) -> Result<()> {
let tracer = global_tracer();
// Get current context (empty in background task) and start span with it
let current_cx = opentelemetry::Context::current();
let mut span = tracer.start_with_context("ai.daily_summary.generate_batch", &current_cx);
span.set_attribute(KeyValue::new("contact", contact.to_string()));
// Create context with this span for child operations
let parent_cx = current_cx.with_span(span);
// Default to August 2024 ±30 days
let start = start_date.unwrap_or_else(|| NaiveDate::from_ymd_opt(2024, 7, 1).unwrap());
let end = end_date.unwrap_or_else(|| NaiveDate::from_ymd_opt(2024, 9, 30).unwrap());
parent_cx
.span()
.set_attribute(KeyValue::new("start_date", start.to_string()));
parent_cx
.span()
.set_attribute(KeyValue::new("end_date", end.to_string()));
parent_cx.span().set_attribute(KeyValue::new(
"date_range_days",
(end - start).num_days() + 1,
));
log::info!("========================================");
log::info!("Starting daily summary generation for {}", contact);
log::info!(
"Date range: {} to {} ({} days)",
start,
end,
(end - start).num_days() + 1
);
log::info!("========================================");
// Fetch all messages for the contact in the date range
log::info!("Fetching messages for date range...");
let _start_timestamp = start.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
let _end_timestamp = end.and_hms_opt(23, 59, 59).unwrap().and_utc().timestamp();
let all_messages = sms_client.fetch_all_messages_for_contact(contact).await?;
// Filter to date range and group by date
let mut messages_by_date: HashMap<NaiveDate, Vec<SmsMessage>> = HashMap::new();
for msg in all_messages {
let msg_dt = chrono::DateTime::from_timestamp(msg.timestamp, 0);
if let Some(dt) = msg_dt {
let date = dt.date_naive();
if date >= start && date <= end {
messages_by_date.entry(date).or_default().push(msg);
}
}
}
log::info!(
"Grouped messages into {} days with activity",
messages_by_date.len()
);
if messages_by_date.is_empty() {
log::warn!("No messages found in date range");
return Ok(());
}
// Sort dates for ordered processing
let mut dates: Vec<NaiveDate> = messages_by_date.keys().cloned().collect();
dates.sort();
let total_days = dates.len();
let mut processed = 0;
let mut skipped = 0;
let mut failed = 0;
log::info!("Processing {} days with messages...", total_days);
for (idx, date) in dates.iter().enumerate() {
let messages = messages_by_date.get(date).unwrap();
let date_str = date.format("%Y-%m-%d").to_string();
// Check if summary already exists
{
let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
let otel_context = opentelemetry::Context::new();
if dao
.summary_exists(&otel_context, &date_str, contact)
.unwrap_or(false)
{
skipped += 1;
if idx % 10 == 0 {
log::info!(
"Progress: {}/{} ({} processed, {} skipped)",
idx + 1,
total_days,
processed,
skipped
);
}
continue;
}
}
// Generate summary for this day
match generate_and_store_daily_summary(
&parent_cx,
date,
contact,
messages,
ollama,
summary_dao.clone(),
)
.await
{
Ok(_) => {
processed += 1;
log::info!(
"✓ {}/{}: {} ({} messages)",
idx + 1,
total_days,
date_str,
messages.len()
);
}
Err(e) => {
failed += 1;
log::error!("✗ Failed to process {}: {:?}", date_str, e);
}
}
// Rate limiting: sleep 500ms between summaries
if idx < total_days - 1 {
sleep(std::time::Duration::from_millis(500)).await;
}
// Progress logging every 10 days
if idx % 10 == 0 && idx > 0 {
log::info!(
"Progress: {}/{} ({} processed, {} skipped, {} failed)",
idx + 1,
total_days,
processed,
skipped,
failed
);
}
}
log::info!("========================================");
log::info!("Daily summary generation complete!");
log::info!(
"Processed: {}, Skipped: {}, Failed: {}",
processed,
skipped,
failed
);
log::info!("========================================");
// Record final metrics in span
parent_cx
.span()
.set_attribute(KeyValue::new("days_processed", processed as i64));
parent_cx
.span()
.set_attribute(KeyValue::new("days_skipped", skipped as i64));
parent_cx
.span()
.set_attribute(KeyValue::new("days_failed", failed as i64));
parent_cx
.span()
.set_attribute(KeyValue::new("total_days", total_days as i64));
if failed > 0 {
parent_cx
.span()
.set_status(Status::error(format!("{} days failed to process", failed)));
} else {
parent_cx.span().set_status(Status::Ok);
}
Ok(())
}
/// Generate and store a single day's summary
async fn generate_and_store_daily_summary(
parent_cx: &opentelemetry::Context,
date: &NaiveDate,
contact: &str,
messages: &[SmsMessage],
ollama: &OllamaClient,
summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>>,
) -> Result<()> {
let tracer = global_tracer();
let mut span = tracer.start_with_context("ai.daily_summary.generate_single", parent_cx);
span.set_attribute(KeyValue::new("date", date.to_string()));
span.set_attribute(KeyValue::new("contact", contact.to_string()));
span.set_attribute(KeyValue::new("message_count", messages.len() as i64));
// Format messages for LLM
let messages_text: String = messages
.iter()
.take(200) // Limit to 200 messages per day to avoid token overflow
.map(|m| {
if m.is_sent {
format!("Me: {}", m.body)
} else {
format!("{}: {}", m.contact, m.body)
}
})
.collect::<Vec<_>>()
.join("\n");
let weekday = date.format("%A");
let prompt = format!(
r#"Summarize this day's conversation between me and {}.
CRITICAL FORMAT RULES:
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
- Do NOT repeat the date at the beginning
- Start DIRECTLY with the content - begin with a person's name or action
- Write in past tense, as if recording what happened
NARRATIVE (3-5 sentences):
- What specific topics, activities, or events were discussed?
- What places, people, or organizations were mentioned?
- What plans were made or decisions discussed?
- Clearly distinguish between what "I" did versus what {} did
KEYWORDS (comma-separated):
5-10 specific keywords that capture this conversation's unique content:
- Proper nouns (people, places, brands)
- Specific activities ("drum corps audition" not just "music")
- Distinctive terms that make this day unique
Date: {} ({})
Messages:
{}
YOUR RESPONSE (follow this format EXACTLY):
Summary: [Start directly with content, NO preamble]
Keywords: [specific, unique terms]"#,
contact,
contact,
date.format("%B %d, %Y"),
weekday,
messages_text
);
// Generate summary with LLM
let summary = ollama
.generate(
&prompt,
Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
)
.await?;
log::debug!(
"Generated summary for {}: {}",
date,
summary.chars().take(100).collect::<String>()
);
span.set_attribute(KeyValue::new("summary_length", summary.len() as i64));
// Strip boilerplate before embedding to improve vector diversity
let stripped_summary = strip_summary_boilerplate(&summary);
log::debug!(
"Stripped summary for embedding: {}",
stripped_summary.chars().take(100).collect::<String>()
);
// Embed the stripped summary (store original summary in DB)
let embedding = ollama.generate_embedding(&stripped_summary).await?;
span.set_attribute(KeyValue::new(
"embedding_dimensions",
embedding.len() as i64,
));
// Store in database
let insert = InsertDailySummary {
date: date.format("%Y-%m-%d").to_string(),
contact: contact.to_string(),
summary: summary.trim().to_string(),
message_count: messages.len() as i32,
embedding,
created_at: Utc::now().timestamp(),
// model_version: "nomic-embed-text:v1.5".to_string(),
model_version: "mxbai-embed-large:335m".to_string(),
};
// Create context from current span for DB operation
let child_cx = opentelemetry::Context::current_with_span(span);
let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
let result = dao
.store_summary(&child_cx, insert)
.map_err(|e| anyhow::anyhow!("Failed to store summary: {:?}", e));
match &result {
Ok(_) => child_cx.span().set_status(Status::Ok),
Err(e) => child_cx.span().set_status(Status::error(e.to_string())),
}
result?;
Ok(())
}

View File

@@ -1,263 +0,0 @@
use actix_web::{HttpRequest, HttpResponse, Responder, delete, get, post, web};
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, Tracer};
use serde::{Deserialize, Serialize};
use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
use crate::data::Claims;
use crate::database::InsightDao;
use crate::otel::{extract_context_from_request, global_tracer};
use crate::utils::normalize_path;
#[derive(Debug, Deserialize)]
pub struct GeneratePhotoInsightRequest {
pub file_path: String,
#[serde(default)]
pub model: Option<String>,
#[serde(default)]
pub system_prompt: Option<String>,
#[serde(default)]
pub num_ctx: Option<i32>,
}
#[derive(Debug, Deserialize)]
pub struct GetPhotoInsightQuery {
pub path: String,
}
#[derive(Debug, Serialize)]
pub struct PhotoInsightResponse {
pub id: i32,
pub file_path: String,
pub title: String,
pub summary: String,
pub generated_at: i64,
pub model_version: String,
}
#[derive(Debug, Serialize)]
pub struct AvailableModelsResponse {
pub primary: ServerModels,
#[serde(skip_serializing_if = "Option::is_none")]
pub fallback: Option<ServerModels>,
}
#[derive(Debug, Serialize)]
pub struct ServerModels {
pub url: String,
pub models: Vec<ModelCapabilities>,
pub default_model: String,
}
/// POST /insights/generate - Generate insight for a specific photo
#[post("/insights/generate")]
pub async fn generate_insight_handler(
http_request: HttpRequest,
_claims: Claims,
request: web::Json<GeneratePhotoInsightRequest>,
insight_generator: web::Data<InsightGenerator>,
) -> impl Responder {
let parent_context = extract_context_from_request(&http_request);
let tracer = global_tracer();
let mut span = tracer.start_with_context("http.insights.generate", &parent_context);
let normalized_path = normalize_path(&request.file_path);
span.set_attribute(KeyValue::new("file_path", normalized_path.clone()));
if let Some(ref model) = request.model {
span.set_attribute(KeyValue::new("model", model.clone()));
}
if let Some(ref prompt) = request.system_prompt {
span.set_attribute(KeyValue::new("has_custom_prompt", true));
span.set_attribute(KeyValue::new("prompt_length", prompt.len() as i64));
}
if let Some(ctx) = request.num_ctx {
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
}
log::info!(
"Manual insight generation triggered for photo: {} with model: {:?}, custom_prompt: {}, num_ctx: {:?}",
normalized_path,
request.model,
request.system_prompt.is_some(),
request.num_ctx
);
// Generate insight with optional custom model, system prompt, and context size
let result = insight_generator
.generate_insight_for_photo_with_config(
&normalized_path,
request.model.clone(),
request.system_prompt.clone(),
request.num_ctx,
)
.await;
match result {
Ok(()) => {
span.set_status(Status::Ok);
HttpResponse::Ok().json(serde_json::json!({
"success": true,
"message": "Insight generated successfully"
}))
}
Err(e) => {
log::error!("Failed to generate insight: {:?}", e);
span.set_status(Status::error(e.to_string()));
HttpResponse::InternalServerError().json(serde_json::json!({
"error": format!("Failed to generate insight: {:?}", e)
}))
}
}
}
/// GET /insights?path=/path/to/photo.jpg - Fetch insight for specific photo
#[get("/insights")]
pub async fn get_insight_handler(
_claims: Claims,
query: web::Query<GetPhotoInsightQuery>,
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
let normalized_path = normalize_path(&query.path);
log::debug!("Fetching insight for {}", normalized_path);
let otel_context = opentelemetry::Context::new();
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
match dao.get_insight(&otel_context, &normalized_path) {
Ok(Some(insight)) => {
let response = PhotoInsightResponse {
id: insight.id,
file_path: insight.file_path,
title: insight.title,
summary: insight.summary,
generated_at: insight.generated_at,
model_version: insight.model_version,
};
HttpResponse::Ok().json(response)
}
Ok(None) => HttpResponse::NotFound().json(serde_json::json!({
"error": "Insight not found"
})),
Err(e) => {
log::error!("Failed to fetch insight ({}): {:?}", &query.path, e);
HttpResponse::InternalServerError().json(serde_json::json!({
"error": format!("Failed to fetch insight: {:?}", e)
}))
}
}
}
/// DELETE /insights?path=/path/to/photo.jpg - Remove insight (will regenerate on next request)
#[delete("/insights")]
pub async fn delete_insight_handler(
_claims: Claims,
query: web::Query<GetPhotoInsightQuery>,
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
let normalized_path = normalize_path(&query.path);
log::info!("Deleting insight for {}", normalized_path);
let otel_context = opentelemetry::Context::new();
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
match dao.delete_insight(&otel_context, &normalized_path) {
Ok(()) => HttpResponse::Ok().json(serde_json::json!({
"success": true,
"message": "Insight deleted successfully"
})),
Err(e) => {
log::error!("Failed to delete insight: {:?}", e);
HttpResponse::InternalServerError().json(serde_json::json!({
"error": format!("Failed to delete insight: {:?}", e)
}))
}
}
}
/// GET /insights/all - Get all insights
#[get("/insights/all")]
pub async fn get_all_insights_handler(
_claims: Claims,
insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
log::debug!("Fetching all insights");
let otel_context = opentelemetry::Context::new();
let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
match dao.get_all_insights(&otel_context) {
Ok(insights) => {
let responses: Vec<PhotoInsightResponse> = insights
.into_iter()
.map(|insight| PhotoInsightResponse {
id: insight.id,
file_path: insight.file_path,
title: insight.title,
summary: insight.summary,
generated_at: insight.generated_at,
model_version: insight.model_version,
})
.collect();
HttpResponse::Ok().json(responses)
}
Err(e) => {
log::error!("Failed to fetch all insights: {:?}", e);
HttpResponse::InternalServerError().json(serde_json::json!({
"error": format!("Failed to fetch insights: {:?}", e)
}))
}
}
}
/// GET /insights/models - List available models from both servers with capabilities
#[get("/insights/models")]
pub async fn get_available_models_handler(
_claims: Claims,
app_state: web::Data<crate::state::AppState>,
) -> impl Responder {
log::debug!("Fetching available models with capabilities");
let ollama_client = &app_state.ollama;
// Fetch models with capabilities from primary server
let primary_models =
match OllamaClient::list_models_with_capabilities(&ollama_client.primary_url).await {
Ok(models) => models,
Err(e) => {
log::warn!("Failed to fetch models from primary server: {:?}", e);
vec![]
}
};
let primary = ServerModels {
url: ollama_client.primary_url.clone(),
models: primary_models,
default_model: ollama_client.primary_model.clone(),
};
// Fetch models with capabilities from fallback server if configured
let fallback = if let Some(fallback_url) = &ollama_client.fallback_url {
match OllamaClient::list_models_with_capabilities(fallback_url).await {
Ok(models) => Some(ServerModels {
url: fallback_url.clone(),
models,
default_model: ollama_client
.fallback_model
.clone()
.unwrap_or_else(|| ollama_client.primary_model.clone()),
}),
Err(e) => {
log::warn!("Failed to fetch models from fallback server: {:?}", e);
None
}
}
} else {
None
};
let response = AvailableModelsResponse { primary, fallback };
HttpResponse::Ok().json(response)
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,16 +0,0 @@
pub mod daily_summary_job;
pub mod handlers;
pub mod insight_generator;
pub mod ollama;
pub mod sms_client;
// strip_summary_boilerplate is used by binaries (test_daily_summary), not the library
#[allow(unused_imports)]
pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate};
pub use handlers::{
delete_insight_handler, generate_insight_handler, get_all_insights_handler,
get_available_models_handler, get_insight_handler,
};
pub use insight_generator::InsightGenerator;
pub use ollama::{ModelCapabilities, OllamaClient};
pub use sms_client::{SmsApiClient, SmsMessage};

View File

@@ -1,735 +0,0 @@
use anyhow::Result;
use chrono::NaiveDate;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
// Cache duration: 15 minutes
const CACHE_DURATION_SECS: u64 = 15 * 60;
// Cached entry with timestamp
#[derive(Clone)]
struct CachedEntry<T> {
data: T,
cached_at: Instant,
}
impl<T> CachedEntry<T> {
fn new(data: T) -> Self {
Self {
data,
cached_at: Instant::now(),
}
}
fn is_expired(&self) -> bool {
self.cached_at.elapsed().as_secs() > CACHE_DURATION_SECS
}
}
// Global cache for model lists and capabilities
lazy_static::lazy_static! {
static ref MODEL_LIST_CACHE: Arc<Mutex<HashMap<String, CachedEntry<Vec<String>>>>> =
Arc::new(Mutex::new(HashMap::new()));
static ref MODEL_CAPABILITIES_CACHE: Arc<Mutex<HashMap<String, CachedEntry<Vec<ModelCapabilities>>>>> =
Arc::new(Mutex::new(HashMap::new()));
}
#[derive(Clone)]
pub struct OllamaClient {
client: Client,
pub primary_url: String,
pub fallback_url: Option<String>,
pub primary_model: String,
pub fallback_model: Option<String>,
num_ctx: Option<i32>,
}
impl OllamaClient {
pub fn new(
primary_url: String,
fallback_url: Option<String>,
primary_model: String,
fallback_model: Option<String>,
) -> Self {
Self {
client: Client::builder()
.connect_timeout(Duration::from_secs(5)) // Quick connection timeout
.timeout(Duration::from_secs(120)) // Total request timeout for generation
.build()
.unwrap_or_else(|_| Client::new()),
primary_url,
fallback_url,
primary_model,
fallback_model,
num_ctx: None,
}
}
pub fn set_num_ctx(&mut self, num_ctx: Option<i32>) {
self.num_ctx = num_ctx;
}
/// List available models on an Ollama server (cached for 15 minutes)
pub async fn list_models(url: &str) -> Result<Vec<String>> {
// Check cache first
{
let cache = MODEL_LIST_CACHE.lock().unwrap();
if let Some(entry) = cache.get(url)
&& !entry.is_expired()
{
log::debug!("Returning cached model list for {}", url);
return Ok(entry.data.clone());
}
}
log::debug!("Fetching fresh model list from {}", url);
let client = Client::builder()
.connect_timeout(Duration::from_secs(5))
.timeout(Duration::from_secs(10))
.build()?;
let response = client.get(format!("{}/api/tags", url)).send().await?;
if !response.status().is_success() {
return Err(anyhow::anyhow!("Failed to list models from {}", url));
}
let tags_response: OllamaTagsResponse = response.json().await?;
let models: Vec<String> = tags_response.models.into_iter().map(|m| m.name).collect();
// Store in cache
{
let mut cache = MODEL_LIST_CACHE.lock().unwrap();
cache.insert(url.to_string(), CachedEntry::new(models.clone()));
}
Ok(models)
}
/// Check if a model is available on a server
pub async fn is_model_available(url: &str, model_name: &str) -> Result<bool> {
let models = Self::list_models(url).await?;
Ok(models.iter().any(|m| m == model_name))
}
/// Clear the model list cache for a specific URL or all URLs
pub fn clear_model_cache(url: Option<&str>) {
let mut cache = MODEL_LIST_CACHE.lock().unwrap();
if let Some(url) = url {
cache.remove(url);
log::debug!("Cleared model list cache for {}", url);
} else {
cache.clear();
log::debug!("Cleared all model list cache entries");
}
}
/// Clear the model capabilities cache for a specific URL or all URLs
pub fn clear_capabilities_cache(url: Option<&str>) {
let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
if let Some(url) = url {
cache.remove(url);
log::debug!("Cleared model capabilities cache for {}", url);
} else {
cache.clear();
log::debug!("Cleared all model capabilities cache entries");
}
}
/// Check if a model has vision capabilities using the /api/show endpoint
pub async fn check_model_capabilities(
url: &str,
model_name: &str,
) -> Result<ModelCapabilities> {
let client = Client::builder()
.connect_timeout(Duration::from_secs(5))
.timeout(Duration::from_secs(10))
.build()?;
#[derive(Serialize)]
struct ShowRequest {
model: String,
}
let response = client
.post(format!("{}/api/show", url))
.json(&ShowRequest {
model: model_name.to_string(),
})
.send()
.await?;
if !response.status().is_success() {
return Err(anyhow::anyhow!(
"Failed to get model details for {} from {}",
model_name,
url
));
}
let show_response: OllamaShowResponse = response.json().await?;
// Check if "vision" is in the capabilities array
let has_vision = show_response.capabilities.iter().any(|cap| cap == "vision");
Ok(ModelCapabilities {
name: model_name.to_string(),
has_vision,
})
}
/// List all models with their capabilities from a server (cached for 15 minutes)
pub async fn list_models_with_capabilities(url: &str) -> Result<Vec<ModelCapabilities>> {
// Check cache first
{
let cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
if let Some(entry) = cache.get(url)
&& !entry.is_expired()
{
log::debug!("Returning cached model capabilities for {}", url);
return Ok(entry.data.clone());
}
}
log::debug!("Fetching fresh model capabilities from {}", url);
let models = Self::list_models(url).await?;
let mut capabilities = Vec::new();
for model_name in models {
match Self::check_model_capabilities(url, &model_name).await {
Ok(cap) => capabilities.push(cap),
Err(e) => {
log::warn!("Failed to get capabilities for model {}: {}", model_name, e);
// Fallback: assume no vision if we can't check
capabilities.push(ModelCapabilities {
name: model_name,
has_vision: false,
});
}
}
}
// Store in cache
{
let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
cache.insert(url.to_string(), CachedEntry::new(capabilities.clone()));
}
Ok(capabilities)
}
/// Extract final answer from thinking model output
/// Handles <think>...</think> tags and takes everything after
fn extract_final_answer(&self, response: &str) -> String {
let response = response.trim();
// Look for </think> tag and take everything after it
if let Some(pos) = response.find("</think>") {
let answer = response[pos + 8..].trim();
if !answer.is_empty() {
return answer.to_string();
}
}
// Fallback: return the whole response trimmed
response.to_string()
}
async fn try_generate(
&self,
url: &str,
model: &str,
prompt: &str,
system: Option<&str>,
images: Option<Vec<String>>,
) -> Result<String> {
let request = OllamaRequest {
model: model.to_string(),
prompt: prompt.to_string(),
stream: false,
system: system.map(|s| s.to_string()),
options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: ctx }),
images,
};
let response = self
.client
.post(format!("{}/api/generate", url))
.json(&request)
.send()
.await?;
if !response.status().is_success() {
let status = response.status();
let error_body = response.text().await.unwrap_or_default();
return Err(anyhow::anyhow!(
"Ollama request failed: {} - {}",
status,
error_body
));
}
let result: OllamaResponse = response.json().await?;
Ok(result.response)
}
pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
self.generate_with_images(prompt, system, None).await
}
pub async fn generate_with_images(
&self,
prompt: &str,
system: Option<&str>,
images: Option<Vec<String>>,
) -> Result<String> {
log::debug!("=== Ollama Request ===");
log::debug!("Primary model: {}", self.primary_model);
if let Some(sys) = system {
log::debug!("System: {}", sys);
}
log::debug!("Prompt:\n{}", prompt);
if let Some(ref imgs) = images {
log::debug!("Images: {} image(s) included", imgs.len());
}
log::debug!("=====================");
// Try primary server first with primary model
log::info!(
"Attempting to generate with primary server: {} (model: {})",
self.primary_url,
self.primary_model
);
let primary_result = self
.try_generate(
&self.primary_url,
&self.primary_model,
prompt,
system,
images.clone(),
)
.await;
let raw_response = match primary_result {
Ok(response) => {
log::info!("Successfully generated response from primary server");
response
}
Err(e) => {
log::warn!("Primary server failed: {}", e);
// Try fallback server if available
if let Some(fallback_url) = &self.fallback_url {
// Use fallback model if specified, otherwise use primary model
let fallback_model =
self.fallback_model.as_ref().unwrap_or(&self.primary_model);
log::info!(
"Attempting to generate with fallback server: {} (model: {})",
fallback_url,
fallback_model
);
match self
.try_generate(fallback_url, fallback_model, prompt, system, images.clone())
.await
{
Ok(response) => {
log::info!("Successfully generated response from fallback server");
response
}
Err(fallback_e) => {
log::error!("Fallback server also failed: {}", fallback_e);
return Err(anyhow::anyhow!(
"Both primary and fallback servers failed. Primary: {}, Fallback: {}",
e,
fallback_e
));
}
}
} else {
log::error!("No fallback server configured");
return Err(e);
}
}
};
log::debug!("=== Ollama Response ===");
log::debug!("Raw response: {}", raw_response.trim());
log::debug!("=======================");
// Extract final answer from thinking model output
let cleaned = self.extract_final_answer(&raw_response);
log::debug!("=== Cleaned Response ===");
log::debug!("Final answer: {}", cleaned);
log::debug!("========================");
Ok(cleaned)
}
/// Generate a title for a single photo based on its context
pub async fn generate_photo_title(
&self,
date: NaiveDate,
location: Option<&str>,
contact: Option<&str>,
sms_summary: Option<&str>,
custom_system: Option<&str>,
image_base64: Option<String>,
) -> Result<String> {
let location_str = location.unwrap_or("Unknown location");
let sms_str = sms_summary.unwrap_or("No messages");
let prompt = if image_base64.is_some() {
if let Some(contact_name) = contact {
format!(
r#"Create a short title (maximum 8 words) about this moment by analyzing the image and context:
Date: {}
Location: {}
Person/Contact: {}
Messages: {}
Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. If limited information is available, use a simple descriptive title based on what you see.
Return ONLY the title, nothing else."#,
date.format("%B %d, %Y"),
location_str,
contact_name,
sms_str,
contact_name
)
} else {
format!(
r#"Create a short title (maximum 8 words) about this moment by analyzing the image and context:
Date: {}
Location: {}
Messages: {}
Analyze the image and use specific details from both the visual content and the context above. If limited information is available, use a simple descriptive title based on what you see.
Return ONLY the title, nothing else."#,
date.format("%B %d, %Y"),
location_str,
sms_str
)
}
} else if let Some(contact_name) = contact {
format!(
r#"Create a short title (maximum 8 words) about this moment:
Date: {}
Location: {}
Person/Contact: {}
Messages: {}
Use specific details from the context above. The photo is from a folder for {}, so they are likely related to this moment. If no specific details are available, use a simple descriptive title.
Return ONLY the title, nothing else."#,
date.format("%B %d, %Y"),
location_str,
contact_name,
sms_str,
contact_name
)
} else {
format!(
r#"Create a short title (maximum 8 words) about this moment:
Date: {}
Location: {}
Messages: {}
Use specific details from the context above. If no specific details are available, use a simple descriptive title.
Return ONLY the title, nothing else."#,
date.format("%B %d, %Y"),
location_str,
sms_str
)
};
let system = custom_system.unwrap_or("You are my long term memory assistant. Use only the information provided. Do not invent details.");
let images = image_base64.map(|img| vec![img]);
let title = self
.generate_with_images(&prompt, Some(system), images)
.await?;
Ok(title.trim().trim_matches('"').to_string())
}
/// Generate a summary for a single photo based on its context
pub async fn generate_photo_summary(
&self,
date: NaiveDate,
location: Option<&str>,
contact: Option<&str>,
sms_summary: Option<&str>,
custom_system: Option<&str>,
image_base64: Option<String>,
) -> Result<String> {
let location_str = location.unwrap_or("Unknown");
let sms_str = sms_summary.unwrap_or("No messages");
let prompt = if image_base64.is_some() {
if let Some(contact_name) = contact {
format!(
r#"Write a 1-3 paragraph description of this moment by analyzing the image and the available context:
Date: {}
Location: {}
Person/Contact: {}
Messages: {}
Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. Mention people's names (especially {}), places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
date.format("%B %d, %Y"),
location_str,
contact_name,
sms_str,
contact_name,
contact_name
)
} else {
format!(
r#"Write a 1-3 paragraph description of this moment by analyzing the image and the available context:
Date: {}
Location: {}
Messages: {}
Analyze the image and use specific details from both the visual content and the context above. Mention people's names, places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
date.format("%B %d, %Y"),
location_str,
sms_str
)
}
} else if let Some(contact_name) = contact {
format!(
r#"Write a 1-3 paragraph description of this moment based on the available information:
Date: {}
Location: {}
Person/Contact: {}
Messages: {}
Use only the specific details provided above. The photo is from a folder for {}, so they are likely related to this moment. Mention people's names (especially {}), places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
date.format("%B %d, %Y"),
location_str,
contact_name,
sms_str,
contact_name,
contact_name
)
} else {
format!(
r#"Write a 1-3 paragraph description of this moment based on the available information:
Date: {}
Location: {}
Messages: {}
Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
date.format("%B %d, %Y"),
location_str,
sms_str
)
};
let system = custom_system.unwrap_or("You are a memory refreshing assistant who is able to provide insights through analyzing past conversations. Use only the information provided. Do not invent details.");
let images = image_base64.map(|img| vec![img]);
self.generate_with_images(&prompt, Some(system), images)
.await
}
/// Generate an embedding vector for text using nomic-embed-text:v1.5
/// Returns a 768-dimensional vector as Vec<f32>
pub async fn generate_embedding(&self, text: &str) -> Result<Vec<f32>> {
let embeddings = self.generate_embeddings(&[text]).await?;
embeddings
.into_iter()
.next()
.ok_or_else(|| anyhow::anyhow!("No embedding returned"))
}
/// Generate embeddings for multiple texts in a single API call (batch mode)
/// Returns a vector of 768-dimensional vectors
/// This is much more efficient than calling generate_embedding multiple times
pub async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
let embedding_model = "nomic-embed-text:v1.5";
log::debug!("=== Ollama Batch Embedding Request ===");
log::debug!("Model: {}", embedding_model);
log::debug!("Batch size: {} texts", texts.len());
log::debug!("======================================");
// Try primary server first
log::debug!(
"Attempting to generate {} embeddings with primary server: {} (model: {})",
texts.len(),
self.primary_url,
embedding_model
);
let primary_result = self
.try_generate_embeddings(&self.primary_url, embedding_model, texts)
.await;
let embeddings = match primary_result {
Ok(embeddings) => {
log::debug!(
"Successfully generated {} embeddings from primary server",
embeddings.len()
);
embeddings
}
Err(e) => {
log::warn!("Primary server batch embedding failed: {}", e);
// Try fallback server if available
if let Some(fallback_url) = &self.fallback_url {
log::info!(
"Attempting to generate {} embeddings with fallback server: {} (model: {})",
texts.len(),
fallback_url,
embedding_model
);
match self
.try_generate_embeddings(fallback_url, embedding_model, texts)
.await
{
Ok(embeddings) => {
log::info!(
"Successfully generated {} embeddings from fallback server",
embeddings.len()
);
embeddings
}
Err(fallback_e) => {
log::error!(
"Fallback server batch embedding also failed: {}",
fallback_e
);
return Err(anyhow::anyhow!(
"Both primary and fallback servers failed. Primary: {}, Fallback: {}",
e,
fallback_e
));
}
}
} else {
log::error!("No fallback server configured");
return Err(e);
}
}
};
// Validate embedding dimensions (should be 768 for nomic-embed-text:v1.5)
for (i, embedding) in embeddings.iter().enumerate() {
if embedding.len() != 768 {
log::warn!(
"Unexpected embedding dimensions for item {}: {} (expected 768)",
i,
embedding.len()
);
}
}
Ok(embeddings)
}
/// Internal helper to try generating embeddings for multiple texts from a specific server
async fn try_generate_embeddings(
&self,
url: &str,
model: &str,
texts: &[&str],
) -> Result<Vec<Vec<f32>>> {
let request = OllamaBatchEmbedRequest {
model: model.to_string(),
input: texts.iter().map(|s| s.to_string()).collect(),
};
let response = self
.client
.post(format!("{}/api/embed", url))
.json(&request)
.send()
.await?;
if !response.status().is_success() {
let status = response.status();
let error_body = response.text().await.unwrap_or_default();
return Err(anyhow::anyhow!(
"Ollama batch embedding request failed: {} - {}",
status,
error_body
));
}
let result: OllamaEmbedResponse = response.json().await?;
Ok(result.embeddings)
}
}
#[derive(Serialize)]
struct OllamaRequest {
model: String,
prompt: String,
stream: bool,
#[serde(skip_serializing_if = "Option::is_none")]
system: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
options: Option<OllamaOptions>,
#[serde(skip_serializing_if = "Option::is_none")]
images: Option<Vec<String>>,
}
#[derive(Serialize)]
struct OllamaOptions {
num_ctx: i32,
}
#[derive(Deserialize)]
struct OllamaResponse {
response: String,
}
#[derive(Deserialize)]
struct OllamaTagsResponse {
models: Vec<OllamaModel>,
}
#[derive(Deserialize)]
struct OllamaModel {
name: String,
}
#[derive(Deserialize)]
struct OllamaShowResponse {
#[serde(default)]
capabilities: Vec<String>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ModelCapabilities {
pub name: String,
pub has_vision: bool,
}
#[derive(Serialize)]
struct OllamaBatchEmbedRequest {
model: String,
input: Vec<String>,
}
#[derive(Deserialize)]
struct OllamaEmbedResponse {
embeddings: Vec<Vec<f32>>,
}

View File

@@ -1,316 +0,0 @@
use anyhow::Result;
use reqwest::Client;
use serde::Deserialize;
use super::ollama::OllamaClient;
#[derive(Clone)]
pub struct SmsApiClient {
client: Client,
base_url: String,
token: Option<String>,
}
impl SmsApiClient {
pub fn new(base_url: String, token: Option<String>) -> Self {
Self {
client: Client::new(),
base_url,
token,
}
}
/// Fetch messages for a specific contact within ±4 days of the given timestamp
/// Falls back to all contacts if no messages found for the specific contact
/// Messages are sorted by proximity to the center timestamp
pub async fn fetch_messages_for_contact(
&self,
contact: Option<&str>,
center_timestamp: i64,
) -> Result<Vec<SmsMessage>> {
use chrono::Duration;
// Calculate ±4 days range around the center timestamp
let center_dt = chrono::DateTime::from_timestamp(center_timestamp, 0)
.ok_or_else(|| anyhow::anyhow!("Invalid timestamp"))?;
let start_dt = center_dt - Duration::days(4);
let end_dt = center_dt + Duration::days(4);
let start_ts = start_dt.timestamp();
let end_ts = end_dt.timestamp();
// If contact specified, try fetching for that contact first
if let Some(contact_name) = contact {
log::info!(
"Fetching SMS for contact: {} (±4 days from {})",
contact_name,
center_dt.format("%Y-%m-%d %H:%M:%S")
);
let messages = self
.fetch_messages(start_ts, end_ts, Some(contact_name), Some(center_timestamp))
.await?;
if !messages.is_empty() {
log::info!(
"Found {} messages for contact {}",
messages.len(),
contact_name
);
return Ok(messages);
}
log::info!(
"No messages found for contact {}, falling back to all contacts",
contact_name
);
}
// Fallback to all contacts
log::info!(
"Fetching all SMS messages (±4 days from {})",
center_dt.format("%Y-%m-%d %H:%M:%S")
);
self.fetch_messages(start_ts, end_ts, None, Some(center_timestamp))
.await
}
/// Fetch all messages for a specific contact across all time
/// Used for embedding generation - retrieves complete message history
/// Handles pagination automatically if the API returns a limited number of results
pub async fn fetch_all_messages_for_contact(&self, contact: &str) -> Result<Vec<SmsMessage>> {
let start_ts = chrono::DateTime::parse_from_rfc3339("2000-01-01T00:00:00Z")
.unwrap()
.timestamp();
let end_ts = chrono::Utc::now().timestamp();
log::info!("Fetching all historical messages for contact: {}", contact);
let mut all_messages = Vec::new();
let mut offset = 0;
let limit = 1000; // Fetch in batches of 1000
loop {
log::debug!(
"Fetching batch at offset {} for contact {}",
offset,
contact
);
let batch = self
.fetch_messages_paginated(start_ts, end_ts, Some(contact), None, limit, offset)
.await?;
let batch_size = batch.len();
all_messages.extend(batch);
log::debug!(
"Fetched {} messages (total so far: {})",
batch_size,
all_messages.len()
);
// If we got fewer messages than the limit, we've reached the end
if batch_size < limit {
break;
}
offset += limit;
}
log::info!(
"Fetched {} total messages for contact {}",
all_messages.len(),
contact
);
Ok(all_messages)
}
/// Internal method to fetch messages with pagination support
async fn fetch_messages_paginated(
&self,
start_ts: i64,
end_ts: i64,
contact: Option<&str>,
center_timestamp: Option<i64>,
limit: usize,
offset: usize,
) -> Result<Vec<SmsMessage>> {
let mut url = format!(
"{}/api/messages/by-date-range/?start_date={}&end_date={}&limit={}&offset={}",
self.base_url, start_ts, end_ts, limit, offset
);
if let Some(contact_name) = contact {
url.push_str(&format!("&contact={}", urlencoding::encode(contact_name)));
}
if let Some(ts) = center_timestamp {
url.push_str(&format!("&timestamp={}", ts));
}
log::debug!("Fetching SMS messages from: {}", url);
let mut request = self.client.get(&url);
if let Some(token) = &self.token {
request = request.header("Authorization", format!("Bearer {}", token));
}
let response = request.send().await?;
log::debug!("SMS API response status: {}", response.status());
if !response.status().is_success() {
let status = response.status();
let error_body = response.text().await.unwrap_or_default();
log::error!("SMS API request failed: {} - {}", status, error_body);
return Err(anyhow::anyhow!(
"SMS API request failed: {} - {}",
status,
error_body
));
}
let data: SmsApiResponse = response.json().await?;
Ok(data
.messages
.into_iter()
.map(|m| SmsMessage {
contact: m.contact_name,
body: m.body,
timestamp: m.date,
is_sent: m.type_ == 2,
})
.collect())
}
/// Internal method to fetch messages with optional contact filter and timestamp sorting
async fn fetch_messages(
&self,
start_ts: i64,
end_ts: i64,
contact: Option<&str>,
center_timestamp: Option<i64>,
) -> Result<Vec<SmsMessage>> {
// Call Django endpoint
let mut url = format!(
"{}/api/messages/by-date-range/?start_date={}&end_date={}",
self.base_url, start_ts, end_ts
);
// Add contact filter if provided
if let Some(contact_name) = contact {
url.push_str(&format!("&contact={}", urlencoding::encode(contact_name)));
}
// Add timestamp for proximity sorting if provided
if let Some(ts) = center_timestamp {
url.push_str(&format!("&timestamp={}", ts));
}
log::debug!("Fetching SMS messages from: {}", url);
let mut request = self.client.get(&url);
// Add authorization header if token exists
if let Some(token) = &self.token {
request = request.header("Authorization", format!("Bearer {}", token));
}
let response = request.send().await?;
log::debug!("SMS API response status: {}", response.status());
if !response.status().is_success() {
let status = response.status();
let error_body = response.text().await.unwrap_or_default();
log::error!("SMS API request failed: {} - {}", status, error_body);
return Err(anyhow::anyhow!(
"SMS API request failed: {} - {}",
status,
error_body
));
}
let data: SmsApiResponse = response.json().await?;
// Convert to internal format
Ok(data
.messages
.into_iter()
.map(|m| SmsMessage {
contact: m.contact_name,
body: m.body,
timestamp: m.date,
is_sent: m.type_ == 2, // type 2 = sent
})
.collect())
}
pub async fn summarize_context(
&self,
messages: &[SmsMessage],
ollama: &OllamaClient,
) -> Result<String> {
if messages.is_empty() {
return Ok(String::from("No messages on this day"));
}
// Create prompt for Ollama with sender/receiver distinction
let messages_text: String = messages
.iter()
.take(60) // Limit to avoid token overflow
.map(|m| {
if m.is_sent {
format!("Me: {}", m.body)
} else {
format!("{}: {}", m.contact, m.body)
}
})
.collect::<Vec<_>>()
.join("\n");
let prompt = format!(
r#"Summarize these messages in up to 4-5 sentences. Focus on key topics, places, people mentioned, and the overall context of the conversations.
Messages:
{}
Summary:"#,
messages_text
);
ollama
.generate(
&prompt,
// Some("You are a summarizer for the purposes of jogging my memory and highlighting events and situations."),
Some("You are the keeper of memories, ingest the context and give me a casual summary of the moment."),
)
.await
}
}
#[derive(Debug, Clone)]
pub struct SmsMessage {
pub contact: String,
pub body: String,
pub timestamp: i64,
pub is_sent: bool,
}
#[derive(Deserialize)]
struct SmsApiResponse {
messages: Vec<SmsApiMessage>,
}
#[derive(Deserialize)]
struct SmsApiMessage {
contact_name: String,
body: String,
date: i64,
#[serde(rename = "type")]
type_: i32,
}

View File

@@ -1,144 +0,0 @@
use actix_web::Responder;
use actix_web::{
HttpResponse,
web::{self, Json},
};
use chrono::{Duration, Utc};
use jsonwebtoken::{EncodingKey, Header, encode};
use log::{error, info};
use std::sync::Mutex;
use crate::{
data::{Claims, CreateAccountRequest, LoginRequest, Token, secret_key},
database::UserDao,
};
/// Validate password meets security requirements
fn validate_password(password: &str) -> Result<(), String> {
if password.len() < 12 {
return Err("Password must be at least 12 characters".into());
}
if !password.chars().any(|c| c.is_uppercase()) {
return Err("Password must contain at least one uppercase letter".into());
}
if !password.chars().any(|c| c.is_lowercase()) {
return Err("Password must contain at least one lowercase letter".into());
}
if !password.chars().any(|c| c.is_numeric()) {
return Err("Password must contain at least one number".into());
}
if !password.chars().any(|c| !c.is_alphanumeric()) {
return Err("Password must contain at least one special character".into());
}
Ok(())
}
#[allow(dead_code)]
async fn register<D: UserDao>(
user: Json<CreateAccountRequest>,
user_dao: web::Data<Mutex<D>>,
) -> impl Responder {
// Validate password strength
if let Err(msg) = validate_password(&user.password) {
return HttpResponse::BadRequest().body(msg);
}
if !user.username.is_empty() && user.password == user.confirmation {
let mut dao = user_dao.lock().expect("Unable to get UserDao");
if dao.user_exists(&user.username) {
HttpResponse::BadRequest().finish()
} else if let Some(_user) = dao.create_user(&user.username, &user.password) {
HttpResponse::Ok().finish()
} else {
HttpResponse::InternalServerError().finish()
}
} else {
HttpResponse::BadRequest().finish()
}
}
pub async fn login<D: UserDao>(
creds: Json<LoginRequest>,
user_dao: web::Data<Mutex<D>>,
) -> HttpResponse {
info!("Logging in: {}", creds.username);
let mut user_dao = user_dao.lock().expect("Unable to get UserDao");
if let Some(user) = user_dao.get_user(&creds.username, &creds.password) {
let claims = Claims {
sub: user.id.to_string(),
exp: (Utc::now() + Duration::days(5)).timestamp(),
};
let token = match encode(
&Header::default(),
&claims,
&EncodingKey::from_secret(secret_key().as_bytes()),
) {
Ok(t) => t,
Err(e) => {
error!("Failed to encode JWT: {}", e);
return HttpResponse::InternalServerError().finish();
}
};
HttpResponse::Ok().json(Token { token: &token })
} else {
error!("Failed login attempt for user: '{}'", creds.username);
HttpResponse::NotFound().finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::testhelpers::{BodyReader, TestUserDao};
#[actix_rt::test]
async fn test_login_reports_200_when_user_exists() {
let mut dao = TestUserDao::new();
dao.create_user("user", "pass");
let j = Json(LoginRequest {
username: "user".to_string(),
password: "pass".to_string(),
});
let response = login::<TestUserDao>(j, web::Data::new(Mutex::new(dao))).await;
assert_eq!(response.status(), 200);
}
#[actix_rt::test]
async fn test_login_returns_token_on_success() {
let mut dao = TestUserDao::new();
dao.create_user("user", "password");
let j = Json(LoginRequest {
username: "user".to_string(),
password: "password".to_string(),
});
let response = login::<TestUserDao>(j, web::Data::new(Mutex::new(dao))).await;
assert_eq!(response.status(), 200);
let response_text: String = response.read_to_str();
assert!(response_text.contains("\"token\""));
}
#[actix_rt::test]
async fn test_login_reports_404_when_user_does_not_exist() {
let mut dao = TestUserDao::new();
dao.create_user("user", "password");
let j = Json(LoginRequest {
username: "doesnotexist".to_string(),
password: "password".to_string(),
});
let response = login::<TestUserDao>(j, web::Data::new(Mutex::new(dao))).await;
assert_eq!(response.status(), 404);
}
}

View File

@@ -1,143 +0,0 @@
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use clap::Parser;
use image_api::cleanup::{
CleanupConfig, DatabaseUpdater, resolve_missing_files, validate_file_types,
};
use image_api::database::{SqliteExifDao, SqliteFavoriteDao};
use image_api::tags::SqliteTagDao;
#[derive(Parser, Debug)]
#[command(name = "cleanup_files")]
#[command(about = "File cleanup and fix utility for ImageApi", long_about = None)]
struct Args {
#[arg(long, help = "Preview changes without making them")]
dry_run: bool,
#[arg(long, help = "Auto-fix all issues without prompting")]
auto_fix: bool,
#[arg(long, help = "Skip phase 1 (missing file resolution)")]
skip_phase1: bool,
#[arg(long, help = "Skip phase 2 (file type validation)")]
skip_phase2: bool,
}
fn main() -> anyhow::Result<()> {
// Initialize logging
env_logger::init();
// Load environment variables
dotenv::dotenv()?;
// Parse CLI arguments
let args = Args::parse();
// Get base path from environment
let base_path = dotenv::var("BASE_PATH")?;
let base = PathBuf::from(&base_path);
println!("File Cleanup and Fix Utility");
println!("============================");
println!("Base path: {}", base.display());
println!("Dry run: {}", args.dry_run);
println!("Auto fix: {}", args.auto_fix);
println!();
// Pre-flight checks
if !base.exists() {
eprintln!("Error: Base path does not exist: {}", base.display());
std::process::exit(1);
}
if !base.is_dir() {
eprintln!("Error: Base path is not a directory: {}", base.display());
std::process::exit(1);
}
// Create configuration
let config = CleanupConfig {
base_path: base,
dry_run: args.dry_run,
auto_fix: args.auto_fix,
};
// Create DAOs
println!("Connecting to database...");
let tag_dao: Arc<Mutex<dyn image_api::tags::TagDao>> =
Arc::new(Mutex::new(SqliteTagDao::default()));
let exif_dao: Arc<Mutex<dyn image_api::database::ExifDao>> =
Arc::new(Mutex::new(SqliteExifDao::new()));
let favorites_dao: Arc<Mutex<dyn image_api::database::FavoriteDao>> =
Arc::new(Mutex::new(SqliteFavoriteDao::new()));
// Create database updater
let mut db_updater = DatabaseUpdater::new(tag_dao, exif_dao, favorites_dao);
println!("✓ Database connected\n");
// Track overall statistics
let mut total_issues_found = 0;
let mut total_issues_fixed = 0;
let mut total_errors = Vec::new();
// Phase 1: Missing file resolution
if !args.skip_phase1 {
match resolve_missing_files(&config, &mut db_updater) {
Ok(stats) => {
total_issues_found += stats.issues_found;
total_issues_fixed += stats.issues_fixed;
total_errors.extend(stats.errors);
}
Err(e) => {
eprintln!("Phase 1 failed: {:?}", e);
total_errors.push(format!("Phase 1 error: {}", e));
}
}
} else {
println!("Phase 1: Skipped (--skip-phase1)");
}
// Phase 2: File type validation
if !args.skip_phase2 {
match validate_file_types(&config, &mut db_updater) {
Ok(stats) => {
total_issues_found += stats.issues_found;
total_issues_fixed += stats.issues_fixed;
total_errors.extend(stats.errors);
}
Err(e) => {
eprintln!("Phase 2 failed: {:?}", e);
total_errors.push(format!("Phase 2 error: {}", e));
}
}
} else {
println!("\nPhase 2: Skipped (--skip-phase2)");
}
// Final summary
println!("\n============================");
println!("Cleanup Complete!");
println!("============================");
println!("Total issues found: {}", total_issues_found);
if config.dry_run {
println!("Total issues that would be fixed: {}", total_issues_found);
} else {
println!("Total issues fixed: {}", total_issues_fixed);
}
if !total_errors.is_empty() {
println!("\nErrors encountered:");
for (i, error) in total_errors.iter().enumerate() {
println!(" {}. {}", i + 1, error);
}
println!("\nSome operations failed. Review errors above.");
} else {
println!("\n✓ No errors encountered");
}
Ok(())
}

View File

@@ -1,307 +0,0 @@
use anyhow::Result;
use clap::Parser;
use diesel::prelude::*;
use diesel::sql_query;
use diesel::sqlite::SqliteConnection;
use std::env;
#[derive(Parser, Debug)]
#[command(author, version, about = "Diagnose embedding distribution and identify problematic summaries", long_about = None)]
struct Args {
/// Show detailed per-summary statistics
#[arg(short, long, default_value_t = false)]
verbose: bool,
/// Number of top "central" summaries to show (ones that match everything)
#[arg(short, long, default_value_t = 10)]
top: usize,
/// Test a specific query to see what matches
#[arg(short, long)]
query: Option<String>,
}
#[derive(QueryableByName, Debug)]
struct EmbeddingRow {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
#[diesel(sql_type = diesel::sql_types::Text)]
date: String,
#[diesel(sql_type = diesel::sql_types::Text)]
contact: String,
#[diesel(sql_type = diesel::sql_types::Text)]
summary: String,
#[diesel(sql_type = diesel::sql_types::Binary)]
embedding: Vec<u8>,
}
fn deserialize_embedding(bytes: &[u8]) -> Result<Vec<f32>> {
if !bytes.len().is_multiple_of(4) {
return Err(anyhow::anyhow!("Invalid embedding byte length"));
}
let count = bytes.len() / 4;
let mut vec = Vec::with_capacity(count);
for chunk in bytes.chunks_exact(4) {
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
vec.push(float);
}
Ok(vec)
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude_a == 0.0 || magnitude_b == 0.0 {
return 0.0;
}
dot_product / (magnitude_a * magnitude_b)
}
fn main() -> Result<()> {
dotenv::dotenv().ok();
let args = Args::parse();
let database_url = env::var("DATABASE_URL").unwrap_or_else(|_| "auth.db".to_string());
println!("Connecting to database: {}", database_url);
let mut conn = SqliteConnection::establish(&database_url)?;
// Load all embeddings
println!("\nLoading embeddings from daily_conversation_summaries...");
let rows: Vec<EmbeddingRow> = sql_query(
"SELECT id, date, contact, summary, embedding FROM daily_conversation_summaries ORDER BY date"
)
.load(&mut conn)?;
println!("Found {} summaries with embeddings\n", rows.len());
if rows.is_empty() {
println!("No summaries found!");
return Ok(());
}
// Parse all embeddings
let mut embeddings: Vec<(i32, String, String, String, Vec<f32>)> = Vec::new();
for row in &rows {
match deserialize_embedding(&row.embedding) {
Ok(emb) => {
embeddings.push((
row.id,
row.date.clone(),
row.contact.clone(),
row.summary.clone(),
emb,
));
}
Err(e) => {
println!(
"Warning: Failed to parse embedding for id {}: {}",
row.id, e
);
}
}
}
println!("Successfully parsed {} embeddings\n", embeddings.len());
// Compute embedding statistics
println!("========================================");
println!("EMBEDDING STATISTICS");
println!("========================================\n");
// Check embedding variance (are values clustered or spread out?)
let first_emb = &embeddings[0].4;
let dim = first_emb.len();
println!("Embedding dimensions: {}", dim);
// Calculate mean and std dev per dimension
let mut dim_means: Vec<f32> = vec![0.0; dim];
let mut dim_vars: Vec<f32> = vec![0.0; dim];
for (_, _, _, _, emb) in &embeddings {
for (i, &val) in emb.iter().enumerate() {
dim_means[i] += val;
}
}
for m in &mut dim_means {
*m /= embeddings.len() as f32;
}
for (_, _, _, _, emb) in &embeddings {
for (i, &val) in emb.iter().enumerate() {
let diff = val - dim_means[i];
dim_vars[i] += diff * diff;
}
}
for v in &mut dim_vars {
*v = (*v / embeddings.len() as f32).sqrt();
}
let avg_std_dev: f32 = dim_vars.iter().sum::<f32>() / dim as f32;
let min_std_dev: f32 = dim_vars.iter().cloned().fold(f32::INFINITY, f32::min);
let max_std_dev: f32 = dim_vars.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
println!("Per-dimension standard deviation:");
println!(" Average: {:.6}", avg_std_dev);
println!(" Min: {:.6}", min_std_dev);
println!(" Max: {:.6}", max_std_dev);
println!();
// Compute pairwise similarities
println!("Computing pairwise similarities (this may take a moment)...\n");
let mut all_similarities: Vec<f32> = Vec::new();
let mut per_embedding_avg: Vec<(usize, f32)> = Vec::new();
for i in 0..embeddings.len() {
let mut sum = 0.0;
let mut count = 0;
for j in 0..embeddings.len() {
if i != j {
let sim = cosine_similarity(&embeddings[i].4, &embeddings[j].4);
all_similarities.push(sim);
sum += sim;
count += 1;
}
}
per_embedding_avg.push((i, sum / count as f32));
}
// Sort similarities for percentile analysis
all_similarities.sort_by(|a, b| a.partial_cmp(b).unwrap());
let min_sim = all_similarities.first().copied().unwrap_or(0.0);
let max_sim = all_similarities.last().copied().unwrap_or(0.0);
let median_sim = all_similarities[all_similarities.len() / 2];
let p25 = all_similarities[all_similarities.len() / 4];
let p75 = all_similarities[3 * all_similarities.len() / 4];
let mean_sim: f32 = all_similarities.iter().sum::<f32>() / all_similarities.len() as f32;
println!("========================================");
println!("PAIRWISE SIMILARITY DISTRIBUTION");
println!("========================================\n");
println!("Total pairs analyzed: {}", all_similarities.len());
println!();
println!("Min similarity: {:.4}", min_sim);
println!("25th percentile: {:.4}", p25);
println!("Median similarity: {:.4}", median_sim);
println!("Mean similarity: {:.4}", mean_sim);
println!("75th percentile: {:.4}", p75);
println!("Max similarity: {:.4}", max_sim);
println!();
// Analyze distribution
let count_above_08 = all_similarities.iter().filter(|&&s| s > 0.8).count();
let count_above_07 = all_similarities.iter().filter(|&&s| s > 0.7).count();
let count_above_06 = all_similarities.iter().filter(|&&s| s > 0.6).count();
let count_above_05 = all_similarities.iter().filter(|&&s| s > 0.5).count();
let count_below_03 = all_similarities.iter().filter(|&&s| s < 0.3).count();
println!("Similarity distribution:");
println!(
" > 0.8: {} ({:.1}%)",
count_above_08,
100.0 * count_above_08 as f32 / all_similarities.len() as f32
);
println!(
" > 0.7: {} ({:.1}%)",
count_above_07,
100.0 * count_above_07 as f32 / all_similarities.len() as f32
);
println!(
" > 0.6: {} ({:.1}%)",
count_above_06,
100.0 * count_above_06 as f32 / all_similarities.len() as f32
);
println!(
" > 0.5: {} ({:.1}%)",
count_above_05,
100.0 * count_above_05 as f32 / all_similarities.len() as f32
);
println!(
" < 0.3: {} ({:.1}%)",
count_below_03,
100.0 * count_below_03 as f32 / all_similarities.len() as f32
);
println!();
// Identify "central" embeddings (high average similarity to all others)
per_embedding_avg.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
println!("========================================");
println!("TOP {} MOST 'CENTRAL' SUMMARIES", args.top);
println!("(These match everything with high similarity)");
println!("========================================\n");
for (rank, (idx, avg_sim)) in per_embedding_avg.iter().take(args.top).enumerate() {
let (id, date, contact, summary, _) = &embeddings[*idx];
let preview: String = summary.chars().take(80).collect();
println!("{}. [id={}, avg_sim={:.4}]", rank + 1, id, avg_sim);
println!(" Date: {}, Contact: {}", date, contact);
println!(" Preview: {}...", preview.replace('\n', " "));
println!();
}
// Also show the least central (most unique)
println!("========================================");
println!("TOP {} MOST UNIQUE SUMMARIES", args.top);
println!("(These are most different from others)");
println!("========================================\n");
for (rank, (idx, avg_sim)) in per_embedding_avg.iter().rev().take(args.top).enumerate() {
let (id, date, contact, summary, _) = &embeddings[*idx];
let preview: String = summary.chars().take(80).collect();
println!("{}. [id={}, avg_sim={:.4}]", rank + 1, id, avg_sim);
println!(" Date: {}, Contact: {}", date, contact);
println!(" Preview: {}...", preview.replace('\n', " "));
println!();
}
// Diagnosis
println!("========================================");
println!("DIAGNOSIS");
println!("========================================\n");
if mean_sim > 0.7 {
println!("⚠️ HIGH AVERAGE SIMILARITY ({:.4})", mean_sim);
println!(" All embeddings are very similar to each other.");
println!(" This explains why the same summaries always match.");
println!();
println!(" Possible causes:");
println!(
" 1. Summaries have similar structure/phrasing (e.g., all start with 'Summary:')"
);
println!(" 2. Embedding model isn't capturing semantic differences well");
println!(" 3. Daily conversations have similar topics (e.g., 'good morning', plans)");
println!();
println!(" Recommendations:");
println!(" 1. Try a different embedding model (mxbai-embed-large, bge-large)");
println!(" 2. Improve summary diversity by varying the prompt");
println!(" 3. Extract and embed only keywords/entities, not full summaries");
} else if mean_sim > 0.5 {
println!("⚡ MODERATE AVERAGE SIMILARITY ({:.4})", mean_sim);
println!(" Some clustering in embeddings, but some differentiation exists.");
println!();
println!(" The 'central' summaries above are likely dominating search results.");
println!(" Consider:");
println!(" 1. Filtering out summaries with very high centrality");
println!(" 2. Adding time-based weighting to prefer recent/relevant dates");
println!(" 3. Increasing the similarity threshold from 0.3 to 0.5");
} else {
println!("✅ GOOD EMBEDDING DIVERSITY ({:.4})", mean_sim);
println!(" Embeddings are well-differentiated.");
println!(" If same results keep appearing, the issue may be elsewhere.");
}
Ok(())
}

View File

@@ -1,166 +0,0 @@
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Parser;
use image_api::ai::ollama::OllamaClient;
use image_api::database::calendar_dao::{InsertCalendarEvent, SqliteCalendarEventDao};
use image_api::parsers::ical_parser::parse_ics_file;
use log::{error, info};
use std::sync::{Arc, Mutex};
// Import the trait to use its methods
use image_api::database::CalendarEventDao;
#[derive(Parser, Debug)]
#[command(author, version, about = "Import Google Takeout Calendar data", long_about = None)]
struct Args {
/// Path to the .ics calendar file
#[arg(short, long)]
path: String,
/// Generate embeddings for calendar events (slower but enables semantic search)
#[arg(long, default_value = "false")]
generate_embeddings: bool,
/// Skip events that already exist in the database
#[arg(long, default_value = "true")]
skip_existing: bool,
/// Batch size for embedding generation
#[arg(long, default_value = "128")]
batch_size: usize,
}
#[tokio::main]
async fn main() -> Result<()> {
dotenv::dotenv().ok();
env_logger::init();
let args = Args::parse();
info!("Parsing calendar file: {}", args.path);
let events = parse_ics_file(&args.path).context("Failed to parse .ics file")?;
info!("Found {} calendar events", events.len());
let context = opentelemetry::Context::current();
let ollama = if args.generate_embeddings {
let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
.or_else(|_| dotenv::var("OLLAMA_URL"))
.unwrap_or_else(|_| "http://localhost:11434".to_string());
let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
.or_else(|_| dotenv::var("OLLAMA_MODEL"))
.unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
Some(OllamaClient::new(
primary_url,
fallback_url,
primary_model,
fallback_model,
))
} else {
None
};
let inserted_count = Arc::new(Mutex::new(0));
let skipped_count = Arc::new(Mutex::new(0));
let error_count = Arc::new(Mutex::new(0));
// Process events in batches
// Can't use rayon with async, so process sequentially
for event in &events {
let mut dao_instance = SqliteCalendarEventDao::new();
// Check if event exists
if args.skip_existing
&& let Ok(exists) = dao_instance.event_exists(
&context,
event.event_uid.as_deref().unwrap_or(""),
event.start_time,
)
&& exists
{
*skipped_count.lock().unwrap() += 1;
continue;
}
// Generate embedding if requested (blocking call)
let embedding = if let Some(ref ollama_client) = ollama {
let text = format!(
"{} {} {}",
event.summary,
event.description.as_deref().unwrap_or(""),
event.location.as_deref().unwrap_or("")
);
match tokio::task::block_in_place(|| {
tokio::runtime::Handle::current()
.block_on(async { ollama_client.generate_embedding(&text).await })
}) {
Ok(emb) => Some(emb),
Err(e) => {
error!(
"Failed to generate embedding for event '{}': {}",
event.summary, e
);
None
}
}
} else {
None
};
// Insert into database
let insert_event = InsertCalendarEvent {
event_uid: event.event_uid.clone(),
summary: event.summary.clone(),
description: event.description.clone(),
location: event.location.clone(),
start_time: event.start_time,
end_time: event.end_time,
all_day: event.all_day,
organizer: event.organizer.clone(),
attendees: if event.attendees.is_empty() {
None
} else {
Some(serde_json::to_string(&event.attendees).unwrap_or_default())
},
embedding,
created_at: Utc::now().timestamp(),
source_file: Some(args.path.clone()),
};
match dao_instance.store_event(&context, insert_event) {
Ok(_) => {
*inserted_count.lock().unwrap() += 1;
if *inserted_count.lock().unwrap() % 100 == 0 {
info!("Imported {} events...", *inserted_count.lock().unwrap());
}
}
Err(e) => {
error!("Failed to store event '{}': {:?}", event.summary, e);
*error_count.lock().unwrap() += 1;
}
}
}
let final_inserted = *inserted_count.lock().unwrap();
let final_skipped = *skipped_count.lock().unwrap();
let final_errors = *error_count.lock().unwrap();
info!("\n=== Import Summary ===");
info!("Total events found: {}", events.len());
info!("Successfully inserted: {}", final_inserted);
info!("Skipped (already exist): {}", final_skipped);
info!("Errors: {}", final_errors);
if args.generate_embeddings {
info!("Embeddings were generated for semantic search");
} else {
info!("No embeddings generated (use --generate-embeddings to enable semantic search)");
}
Ok(())
}

View File

@@ -1,114 +0,0 @@
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Parser;
use image_api::database::location_dao::{InsertLocationRecord, SqliteLocationHistoryDao};
use image_api::parsers::location_json_parser::parse_location_json;
use log::{error, info};
// Import the trait to use its methods
use image_api::database::LocationHistoryDao;
#[derive(Parser, Debug)]
#[command(author, version, about = "Import Google Takeout Location History data", long_about = None)]
struct Args {
/// Path to the Location History JSON file
#[arg(short, long)]
path: String,
/// Skip locations that already exist in the database
#[arg(long, default_value = "true")]
skip_existing: bool,
/// Batch size for database inserts
#[arg(long, default_value = "1000")]
batch_size: usize,
}
#[tokio::main]
async fn main() -> Result<()> {
dotenv::dotenv().ok();
env_logger::init();
let args = Args::parse();
info!("Parsing location history file: {}", args.path);
let locations =
parse_location_json(&args.path).context("Failed to parse location history JSON")?;
info!("Found {} location records", locations.len());
let context = opentelemetry::Context::current();
let mut inserted_count = 0;
let mut skipped_count = 0;
let mut error_count = 0;
let mut dao_instance = SqliteLocationHistoryDao::new();
let created_at = Utc::now().timestamp();
// Process in batches using batch insert for massive speedup
for (batch_idx, chunk) in locations.chunks(args.batch_size).enumerate() {
info!(
"Processing batch {} ({} records)...",
batch_idx + 1,
chunk.len()
);
// Convert to InsertLocationRecord
let mut batch_inserts = Vec::with_capacity(chunk.len());
for location in chunk {
// Skip existing check if requested (makes import much slower)
if args.skip_existing
&& let Ok(exists) = dao_instance.location_exists(
&context,
location.timestamp,
location.latitude,
location.longitude,
)
&& exists
{
skipped_count += 1;
continue;
}
batch_inserts.push(InsertLocationRecord {
timestamp: location.timestamp,
latitude: location.latitude,
longitude: location.longitude,
accuracy: location.accuracy,
activity: location.activity.clone(),
activity_confidence: location.activity_confidence,
place_name: None,
place_category: None,
embedding: None,
created_at,
source_file: Some(args.path.clone()),
});
}
// Batch insert entire chunk in single transaction
if !batch_inserts.is_empty() {
match dao_instance.store_locations_batch(&context, batch_inserts) {
Ok(count) => {
inserted_count += count;
info!(
"Imported {} locations (total: {})...",
count, inserted_count
);
}
Err(e) => {
error!("Failed to store batch: {:?}", e);
error_count += chunk.len();
}
}
}
}
info!("\n=== Import Summary ===");
info!("Total locations found: {}", locations.len());
info!("Successfully inserted: {}", inserted_count);
info!("Skipped (already exist): {}", skipped_count);
info!("Errors: {}", error_count);
Ok(())
}

View File

@@ -1,152 +0,0 @@
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Parser;
use image_api::ai::ollama::OllamaClient;
use image_api::database::search_dao::{InsertSearchRecord, SqliteSearchHistoryDao};
use image_api::parsers::search_html_parser::parse_search_html;
use log::{error, info, warn};
// Import the trait to use its methods
use image_api::database::SearchHistoryDao;
#[derive(Parser, Debug)]
#[command(author, version, about = "Import Google Takeout Search History data", long_about = None)]
struct Args {
/// Path to the search history HTML file
#[arg(short, long)]
path: String,
/// Skip searches that already exist in the database
#[arg(long, default_value = "true")]
skip_existing: bool,
/// Batch size for embedding generation (max 128 recommended)
#[arg(long, default_value = "64")]
batch_size: usize,
}
#[tokio::main]
async fn main() -> Result<()> {
dotenv::dotenv().ok();
env_logger::init();
let args = Args::parse();
info!("Parsing search history file: {}", args.path);
let searches = parse_search_html(&args.path).context("Failed to parse search history HTML")?;
info!("Found {} search records", searches.len());
let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
.or_else(|_| dotenv::var("OLLAMA_URL"))
.unwrap_or_else(|_| "http://localhost:11434".to_string());
let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
.or_else(|_| dotenv::var("OLLAMA_MODEL"))
.unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
let ollama = OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model);
let context = opentelemetry::Context::current();
let mut inserted_count = 0;
let mut skipped_count = 0;
let mut error_count = 0;
let mut dao_instance = SqliteSearchHistoryDao::new();
let created_at = Utc::now().timestamp();
// Process searches in batches (embeddings are REQUIRED for searches)
for (batch_idx, chunk) in searches.chunks(args.batch_size).enumerate() {
info!(
"Processing batch {} ({} searches)...",
batch_idx + 1,
chunk.len()
);
// Generate embeddings for this batch
let queries: Vec<String> = chunk.iter().map(|s| s.query.clone()).collect();
let embeddings_result = tokio::task::spawn({
let ollama_client = ollama.clone();
async move {
// Generate embeddings in parallel for the batch
let mut embeddings = Vec::new();
for query in &queries {
match ollama_client.generate_embedding(query).await {
Ok(emb) => embeddings.push(Some(emb)),
Err(e) => {
warn!("Failed to generate embedding for query '{}': {}", query, e);
embeddings.push(None);
}
}
}
embeddings
}
})
.await
.context("Failed to generate embeddings for batch")?;
// Build batch of searches with embeddings
let mut batch_inserts = Vec::new();
for (search, embedding_opt) in chunk.iter().zip(embeddings_result.iter()) {
// Check if search exists (optional for speed)
if args.skip_existing
&& let Ok(exists) =
dao_instance.search_exists(&context, search.timestamp, &search.query)
&& exists
{
skipped_count += 1;
continue;
}
// Only insert if we have an embedding
if let Some(embedding) = embedding_opt {
batch_inserts.push(InsertSearchRecord {
timestamp: search.timestamp,
query: search.query.clone(),
search_engine: search.search_engine.clone(),
embedding: embedding.clone(),
created_at,
source_file: Some(args.path.clone()),
});
} else {
error!(
"Skipping search '{}' due to missing embedding",
search.query
);
error_count += 1;
}
}
// Batch insert entire chunk in single transaction
if !batch_inserts.is_empty() {
match dao_instance.store_searches_batch(&context, batch_inserts) {
Ok(count) => {
inserted_count += count;
info!("Imported {} searches (total: {})...", count, inserted_count);
}
Err(e) => {
error!("Failed to store batch: {:?}", e);
error_count += chunk.len();
}
}
}
// Rate limiting between batches
if batch_idx < searches.len() / args.batch_size {
info!("Waiting 500ms before next batch...");
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
}
}
info!("\n=== Import Summary ===");
info!("Total searches found: {}", searches.len());
info!("Successfully inserted: {}", inserted_count);
info!("Skipped (already exist): {}", skipped_count);
info!("Errors: {}", error_count);
info!("All imported searches have embeddings for semantic search");
Ok(())
}

View File

@@ -1,195 +0,0 @@
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use chrono::Utc;
use clap::Parser;
use rayon::prelude::*;
use walkdir::WalkDir;
use image_api::database::models::InsertImageExif;
use image_api::database::{ExifDao, SqliteExifDao};
use image_api::exif;
#[derive(Parser, Debug)]
#[command(name = "migrate_exif")]
#[command(about = "Extract and store EXIF data from images", long_about = None)]
struct Args {
#[arg(long, help = "Skip files that already have EXIF data in database")]
skip_existing: bool,
}
fn main() -> anyhow::Result<()> {
env_logger::init();
dotenv::dotenv()?;
let args = Args::parse();
let base_path = dotenv::var("BASE_PATH")?;
let base = PathBuf::from(&base_path);
println!("EXIF Migration Tool");
println!("===================");
println!("Base path: {}", base.display());
if args.skip_existing {
println!("Mode: Skip existing (incremental)");
} else {
println!("Mode: Upsert (insert new, update existing)");
}
println!();
// Collect all image files that support EXIF
println!("Scanning for images...");
let image_files: Vec<PathBuf> = WalkDir::new(&base)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| exif::supports_exif(e.path()))
.map(|e| e.path().to_path_buf())
.collect();
println!("Found {} images to process", image_files.len());
if image_files.is_empty() {
println!("No EXIF-supporting images found. Exiting.");
return Ok(());
}
println!();
println!("Extracting EXIF data...");
// Create a thread-safe DAO
let dao = Arc::new(Mutex::new(SqliteExifDao::new()));
// Process in parallel using rayon
let results: Vec<_> = image_files
.par_iter()
.map(|path| {
// Create context for this processing iteration
let context = opentelemetry::Context::new();
let relative_path = match path.strip_prefix(&base) {
Ok(p) => p.to_str().unwrap().to_string(),
Err(_) => {
eprintln!(
"Error: Could not create relative path for {}",
path.display()
);
return Err(anyhow::anyhow!("Path error"));
}
};
// Check if EXIF data already exists
let existing = if let Ok(mut dao_lock) = dao.lock() {
dao_lock.get_exif(&context, &relative_path).ok().flatten()
} else {
eprintln!("{} - Failed to acquire database lock", relative_path);
return Err(anyhow::anyhow!("Lock error"));
};
// Skip if exists and skip_existing flag is set
if args.skip_existing && existing.is_some() {
return Ok(("skip".to_string(), relative_path));
}
match exif::extract_exif_from_path(path) {
Ok(exif_data) => {
let timestamp = Utc::now().timestamp();
let insert_exif = InsertImageExif {
file_path: relative_path.clone(),
camera_make: exif_data.camera_make,
camera_model: exif_data.camera_model,
lens_model: exif_data.lens_model,
width: exif_data.width,
height: exif_data.height,
orientation: exif_data.orientation,
gps_latitude: exif_data.gps_latitude.map(|v| v as f32),
gps_longitude: exif_data.gps_longitude.map(|v| v as f32),
gps_altitude: exif_data.gps_altitude.map(|v| v as f32),
focal_length: exif_data.focal_length.map(|v| v as f32),
aperture: exif_data.aperture.map(|v| v as f32),
shutter_speed: exif_data.shutter_speed,
iso: exif_data.iso,
date_taken: exif_data.date_taken,
created_time: existing
.as_ref()
.map(|e| e.created_time)
.unwrap_or(timestamp),
last_modified: timestamp,
};
// Store or update in database
if let Ok(mut dao_lock) = dao.lock() {
let result = if existing.is_some() {
// Update existing record
dao_lock
.update_exif(&context, insert_exif)
.map(|_| "update")
} else {
// Insert new record
dao_lock.store_exif(&context, insert_exif).map(|_| "insert")
};
match result {
Ok(action) => {
if action == "update" {
println!("{} (updated)", relative_path);
} else {
println!("{} (inserted)", relative_path);
}
Ok((action.to_string(), relative_path))
}
Err(e) => {
eprintln!("{} - Database error: {:?}", relative_path, e);
Err(anyhow::anyhow!("Database error"))
}
}
} else {
eprintln!("{} - Failed to acquire database lock", relative_path);
Err(anyhow::anyhow!("Lock error"))
}
}
Err(e) => {
eprintln!("{} - No EXIF data: {:?}", relative_path, e);
Err(e)
}
}
})
.collect();
// Count results
let mut success_count = 0;
let mut inserted_count = 0;
let mut updated_count = 0;
let mut skipped_count = 0;
for (action, _) in results.iter().flatten() {
success_count += 1;
match action.as_str() {
"insert" => inserted_count += 1,
"update" => updated_count += 1,
"skip" => skipped_count += 1,
_ => {}
}
}
let error_count = results.len() - success_count - skipped_count;
println!();
println!("===================");
println!("Migration complete!");
println!("Total images processed: {}", image_files.len());
if inserted_count > 0 {
println!(" New EXIF records inserted: {}", inserted_count);
}
if updated_count > 0 {
println!(" Existing records updated: {}", updated_count);
}
if skipped_count > 0 {
println!(" Skipped (already exists): {}", skipped_count);
}
if error_count > 0 {
println!(" Errors (no EXIF data or failures): {}", error_count);
}
Ok(())
}

View File

@@ -1,288 +0,0 @@
use anyhow::Result;
use chrono::NaiveDate;
use clap::Parser;
use image_api::ai::{OllamaClient, SmsApiClient, strip_summary_boilerplate};
use image_api::database::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
use std::env;
use std::sync::{Arc, Mutex};
#[derive(Parser, Debug)]
#[command(author, version, about = "Test daily summary generation with different models and prompts", long_about = None)]
struct Args {
/// Contact name to generate summaries for
#[arg(short, long)]
contact: String,
/// Start date (YYYY-MM-DD)
#[arg(short, long)]
start: String,
/// End date (YYYY-MM-DD)
#[arg(short, long)]
end: String,
/// Optional: Override the model to use (e.g., "qwen2.5:32b", "llama3.1:30b")
#[arg(short, long)]
model: Option<String>,
/// Test mode: Generate but don't save to database (shows output only)
#[arg(short = 't', long, default_value_t = false)]
test_mode: bool,
/// Show message count and preview
#[arg(short, long, default_value_t = false)]
verbose: bool,
}
#[tokio::main]
async fn main() -> Result<()> {
// Load .env file
dotenv::dotenv().ok();
// Initialize logging
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
let args = Args::parse();
// Parse dates
let start_date = NaiveDate::parse_from_str(&args.start, "%Y-%m-%d")
.expect("Invalid start date format. Use YYYY-MM-DD");
let end_date = NaiveDate::parse_from_str(&args.end, "%Y-%m-%d")
.expect("Invalid end date format. Use YYYY-MM-DD");
println!("========================================");
println!("Daily Summary Generation Test Tool");
println!("========================================");
println!("Contact: {}", args.contact);
println!("Date range: {} to {}", start_date, end_date);
println!("Days: {}", (end_date - start_date).num_days() + 1);
if let Some(ref model) = args.model {
println!("Model: {}", model);
} else {
println!(
"Model: {} (from env)",
env::var("OLLAMA_PRIMARY_MODEL")
.or_else(|_| env::var("OLLAMA_MODEL"))
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
);
}
if args.test_mode {
println!("⚠ TEST MODE: Results will NOT be saved to database");
}
println!("========================================");
println!();
// Initialize AI clients
let ollama_primary_url = env::var("OLLAMA_PRIMARY_URL")
.or_else(|_| env::var("OLLAMA_URL"))
.unwrap_or_else(|_| "http://localhost:11434".to_string());
let ollama_fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
// Use provided model or fallback to env
let model_to_use = args.model.clone().unwrap_or_else(|| {
env::var("OLLAMA_PRIMARY_MODEL")
.or_else(|_| env::var("OLLAMA_MODEL"))
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
});
let ollama = OllamaClient::new(
ollama_primary_url,
ollama_fallback_url.clone(),
model_to_use.clone(),
Some(model_to_use), // Use same model for fallback
);
let sms_api_url =
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
let sms_api_token = env::var("SMS_API_TOKEN").ok();
let sms_client = SmsApiClient::new(sms_api_url, sms_api_token);
// Initialize DAO
let summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new())));
// Fetch messages for contact
println!("Fetching messages for {}...", args.contact);
let all_messages = sms_client
.fetch_all_messages_for_contact(&args.contact)
.await?;
println!(
"Found {} total messages for {}",
all_messages.len(),
args.contact
);
println!();
// Filter to date range and group by date
let mut messages_by_date = std::collections::HashMap::new();
for msg in all_messages {
if let Some(dt) = chrono::DateTime::from_timestamp(msg.timestamp, 0) {
let date = dt.date_naive();
if date >= start_date && date <= end_date {
messages_by_date
.entry(date)
.or_insert_with(Vec::new)
.push(msg);
}
}
}
if messages_by_date.is_empty() {
println!("⚠ No messages found in date range");
return Ok(());
}
println!("Found {} days with messages", messages_by_date.len());
println!();
// Sort dates
let mut dates: Vec<NaiveDate> = messages_by_date.keys().cloned().collect();
dates.sort();
// Process each day
for (idx, date) in dates.iter().enumerate() {
let messages = messages_by_date.get(date).unwrap();
let date_str = date.format("%Y-%m-%d").to_string();
let weekday = date.format("%A");
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!(
"Day {}/{}: {} ({}) - {} messages",
idx + 1,
dates.len(),
date_str,
weekday,
messages.len()
);
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
if args.verbose {
println!("\nMessage preview:");
for (i, msg) in messages.iter().take(3).enumerate() {
let sender = if msg.is_sent { "Me" } else { &msg.contact };
let preview = msg.body.chars().take(60).collect::<String>();
println!(" {}. {}: {}...", i + 1, sender, preview);
}
if messages.len() > 3 {
println!(" ... and {} more", messages.len() - 3);
}
println!();
}
// Format messages for LLM
let messages_text: String = messages
.iter()
.take(200)
.map(|m| {
if m.is_sent {
format!("Me: {}", m.body)
} else {
format!("{}: {}", m.contact, m.body)
}
})
.collect::<Vec<_>>()
.join("\n");
let prompt = format!(
r#"Summarize this day's conversation between me and {}.
CRITICAL FORMAT RULES:
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
- Do NOT repeat the date at the beginning
- Start DIRECTLY with the content - begin with a person's name or action
- Write in past tense, as if recording what happened
NARRATIVE (3-5 sentences):
- What specific topics, activities, or events were discussed?
- What places, people, or organizations were mentioned?
- What plans were made or decisions discussed?
- Clearly distinguish between what "I" did versus what {} did
KEYWORDS (comma-separated):
5-10 specific keywords that capture this conversation's unique content:
- Proper nouns (people, places, brands)
- Specific activities ("drum corps audition" not just "music")
- Distinctive terms that make this day unique
Date: {} ({})
Messages:
{}
YOUR RESPONSE (follow this format EXACTLY):
Summary: [Start directly with content, NO preamble]
Keywords: [specific, unique terms]"#,
args.contact,
args.contact,
date.format("%B %d, %Y"),
weekday,
messages_text
);
println!("Generating summary...");
let summary = ollama
.generate(
&prompt,
Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
)
.await?;
println!("\n📝 GENERATED SUMMARY:");
println!("─────────────────────────────────────────");
println!("{}", summary.trim());
println!("─────────────────────────────────────────");
if !args.test_mode {
println!("\nStripping boilerplate for embedding...");
let stripped = strip_summary_boilerplate(&summary);
println!(
"Stripped: {}...",
stripped.chars().take(80).collect::<String>()
);
println!("\nGenerating embedding...");
let embedding = ollama.generate_embedding(&stripped).await?;
println!("✓ Embedding generated ({} dimensions)", embedding.len());
println!("Saving to database...");
let insert = InsertDailySummary {
date: date_str.clone(),
contact: args.contact.clone(),
summary: summary.trim().to_string(),
message_count: messages.len() as i32,
embedding,
created_at: chrono::Utc::now().timestamp(),
// model_version: "nomic-embed-text:v1.5".to_string(),
model_version: "mxbai-embed-large:335m".to_string(),
};
let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
let context = opentelemetry::Context::new();
match dao.store_summary(&context, insert) {
Ok(_) => println!("✓ Saved to database"),
Err(e) => println!("✗ Database error: {:?}", e),
}
} else {
println!("\n⚠ TEST MODE: Not saved to database");
}
println!();
// Rate limiting between days
if idx < dates.len() - 1 {
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
}
}
println!("========================================");
println!("✓ Complete!");
println!("Processed {} days", dates.len());
println!("========================================");
Ok(())
}

View File

@@ -1,154 +0,0 @@
use crate::database::{ExifDao, FavoriteDao};
use crate::tags::TagDao;
use anyhow::Result;
use log::{error, info};
use opentelemetry;
use std::sync::{Arc, Mutex};
pub struct DatabaseUpdater {
tag_dao: Arc<Mutex<dyn TagDao>>,
exif_dao: Arc<Mutex<dyn ExifDao>>,
favorites_dao: Arc<Mutex<dyn FavoriteDao>>,
}
impl DatabaseUpdater {
pub fn new(
tag_dao: Arc<Mutex<dyn TagDao>>,
exif_dao: Arc<Mutex<dyn ExifDao>>,
favorites_dao: Arc<Mutex<dyn FavoriteDao>>,
) -> Self {
Self {
tag_dao,
exif_dao,
favorites_dao,
}
}
/// Update file path across all three database tables
/// Returns Ok(()) if successful, continues on partial failures but logs errors
pub fn update_file_path(&mut self, old_path: &str, new_path: &str) -> Result<()> {
let context = opentelemetry::Context::current();
let mut success_count = 0;
let mut error_count = 0;
// Update tagged_photo table
if let Ok(mut dao) = self.tag_dao.lock() {
match dao.update_photo_name(old_path, new_path, &context) {
Ok(_) => {
info!("Updated tagged_photo: {} -> {}", old_path, new_path);
success_count += 1;
}
Err(e) => {
error!("Failed to update tagged_photo for {}: {:?}", old_path, e);
error_count += 1;
}
}
} else {
error!("Failed to acquire lock on TagDao");
error_count += 1;
}
// Update image_exif table
if let Ok(mut dao) = self.exif_dao.lock() {
match dao.update_file_path(&context, old_path, new_path) {
Ok(_) => {
info!("Updated image_exif: {} -> {}", old_path, new_path);
success_count += 1;
}
Err(e) => {
error!("Failed to update image_exif for {}: {:?}", old_path, e);
error_count += 1;
}
}
} else {
error!("Failed to acquire lock on ExifDao");
error_count += 1;
}
// Update favorites table
if let Ok(mut dao) = self.favorites_dao.lock() {
match dao.update_path(old_path, new_path) {
Ok(_) => {
info!("Updated favorites: {} -> {}", old_path, new_path);
success_count += 1;
}
Err(e) => {
error!("Failed to update favorites for {}: {:?}", old_path, e);
error_count += 1;
}
}
} else {
error!("Failed to acquire lock on FavoriteDao");
error_count += 1;
}
if success_count > 0 {
info!(
"Updated {}/{} tables for {} -> {}",
success_count,
success_count + error_count,
old_path,
new_path
);
Ok(())
} else {
Err(anyhow::anyhow!(
"Failed to update any tables for {} -> {}",
old_path,
new_path
))
}
}
/// Get all file paths from all three database tables
pub fn get_all_file_paths(&mut self) -> Result<Vec<String>> {
let context = opentelemetry::Context::current();
let mut all_paths = Vec::new();
// Get from tagged_photo
if let Ok(mut dao) = self.tag_dao.lock() {
match dao.get_all_photo_names(&context) {
Ok(paths) => {
info!("Found {} paths in tagged_photo", paths.len());
all_paths.extend(paths);
}
Err(e) => {
error!("Failed to get paths from tagged_photo: {:?}", e);
}
}
}
// Get from image_exif
if let Ok(mut dao) = self.exif_dao.lock() {
match dao.get_all_file_paths(&context) {
Ok(paths) => {
info!("Found {} paths in image_exif", paths.len());
all_paths.extend(paths);
}
Err(e) => {
error!("Failed to get paths from image_exif: {:?}", e);
}
}
}
// Get from favorites
if let Ok(mut dao) = self.favorites_dao.lock() {
match dao.get_all_paths() {
Ok(paths) => {
info!("Found {} paths in favorites", paths.len());
all_paths.extend(paths);
}
Err(e) => {
error!("Failed to get paths from favorites: {:?}", e);
}
}
}
// Deduplicate
all_paths.sort();
all_paths.dedup();
info!("Total unique paths across all tables: {}", all_paths.len());
Ok(all_paths)
}
}

View File

@@ -1,103 +0,0 @@
use anyhow::{Context, Result};
use std::fs::File;
use std::io::Read;
use std::path::Path;
/// Detect the actual file type by reading the magic number (file header)
/// Returns the canonical extension for the detected type, or None if unknown
pub fn detect_file_type(path: &Path) -> Result<Option<String>> {
let mut file = File::open(path).with_context(|| format!("Failed to open file: {:?}", path))?;
// Read first 512 bytes for magic number detection
let mut buffer = vec![0; 512];
let bytes_read = file
.read(&mut buffer)
.with_context(|| format!("Failed to read file: {:?}", path))?;
buffer.truncate(bytes_read);
// Detect type using infer crate
let detected_type = infer::get(&buffer);
Ok(detected_type.map(|t| get_canonical_extension(t.mime_type())))
}
/// Map MIME type to canonical file extension
pub fn get_canonical_extension(mime_type: &str) -> String {
match mime_type {
// Images
"image/jpeg" => "jpg",
"image/png" => "png",
"image/webp" => "webp",
"image/tiff" => "tiff",
"image/heif" | "image/heic" => "heic",
"image/avif" => "avif",
// Videos
"video/mp4" => "mp4",
"video/quicktime" => "mov",
// Fallback: use the last part of MIME type
_ => mime_type.split('/').next_back().unwrap_or("unknown"),
}
.to_string()
}
/// Check if a file should be renamed based on current vs detected extension
/// Handles aliases (jpg/jpeg are equivalent)
pub fn should_rename(current_ext: &str, detected_ext: &str) -> bool {
let current = current_ext.to_lowercase();
let detected = detected_ext.to_lowercase();
// Direct match
if current == detected {
return false;
}
// Handle JPEG aliases (jpg and jpeg are equivalent)
if (current == "jpg" || current == "jpeg") && (detected == "jpg" || detected == "jpeg") {
return false;
}
// Handle TIFF aliases (tiff and tif are equivalent)
if (current == "tiff" || current == "tif") && (detected == "tiff" || detected == "tif") {
return false;
}
// Extensions differ and are not aliases
true
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_canonical_extension() {
assert_eq!(get_canonical_extension("image/jpeg"), "jpg");
assert_eq!(get_canonical_extension("image/png"), "png");
assert_eq!(get_canonical_extension("image/webp"), "webp");
assert_eq!(get_canonical_extension("video/mp4"), "mp4");
assert_eq!(get_canonical_extension("video/quicktime"), "mov");
}
#[test]
fn test_should_rename() {
// Same extension - no rename
assert!(!should_rename("jpg", "jpg"));
assert!(!should_rename("png", "png"));
// JPEG aliases - no rename
assert!(!should_rename("jpg", "jpeg"));
assert!(!should_rename("jpeg", "jpg"));
assert!(!should_rename("JPG", "jpeg"));
// TIFF aliases - no rename
assert!(!should_rename("tiff", "tif"));
assert!(!should_rename("tif", "tiff"));
// Different types - should rename
assert!(should_rename("png", "jpg"));
assert!(should_rename("jpg", "png"));
assert!(should_rename("webp", "png"));
}
}

View File

@@ -1,11 +0,0 @@
pub mod database_updater;
pub mod file_type_detector;
pub mod phase1;
pub mod phase2;
pub mod types;
pub use database_updater::DatabaseUpdater;
pub use file_type_detector::{detect_file_type, get_canonical_extension, should_rename};
pub use phase1::resolve_missing_files;
pub use phase2::validate_file_types;
pub use types::{CleanupConfig, CleanupStats, FileIssue, IssueType};

View File

@@ -1,147 +0,0 @@
use crate::cleanup::database_updater::DatabaseUpdater;
use crate::cleanup::types::{CleanupConfig, CleanupStats};
use crate::file_types::IMAGE_EXTENSIONS;
use anyhow::Result;
use log::{error, warn};
use std::path::PathBuf;
// All supported image extensions to try
const SUPPORTED_EXTENSIONS: &[&str] = IMAGE_EXTENSIONS;
/// Phase 1: Resolve missing files by searching for alternative extensions
pub fn resolve_missing_files(
config: &CleanupConfig,
db_updater: &mut DatabaseUpdater,
) -> Result<CleanupStats> {
let mut stats = CleanupStats::new();
println!("\nPhase 1: Missing File Resolution");
println!("---------------------------------");
// Get all file paths from database
println!("Scanning database for file references...");
let all_paths = db_updater.get_all_file_paths()?;
println!("Found {} unique file paths\n", all_paths.len());
stats.files_checked = all_paths.len();
println!("Checking file existence...");
let mut missing_count = 0;
let mut resolved_count = 0;
for path_str in all_paths {
let full_path = config.base_path.join(&path_str);
// Check if file exists
if full_path.exists() {
continue;
}
missing_count += 1;
stats.issues_found += 1;
// Try to find the file with different extensions
match find_file_with_alternative_extension(&config.base_path, &path_str) {
Some(new_path_str) => {
println!(
"{} → found as {} {}",
path_str,
new_path_str,
if config.dry_run {
"(dry-run, not updated)"
} else {
""
}
);
if !config.dry_run {
// Update database
match db_updater.update_file_path(&path_str, &new_path_str) {
Ok(_) => {
resolved_count += 1;
stats.issues_fixed += 1;
}
Err(e) => {
error!("Failed to update database for {}: {:?}", path_str, e);
stats.add_error(format!("DB update failed for {}: {}", path_str, e));
}
}
} else {
resolved_count += 1;
}
}
None => {
warn!("✗ {} → not found with any extension", path_str);
}
}
}
println!("\nResults:");
println!("- Files checked: {}", stats.files_checked);
println!("- Missing files: {}", missing_count);
println!("- Resolved: {}", resolved_count);
println!(
"- Still missing: {}",
missing_count - if config.dry_run { 0 } else { resolved_count }
);
if !stats.errors.is_empty() {
println!("- Errors: {}", stats.errors.len());
}
Ok(stats)
}
/// Find a file with an alternative extension
/// Returns the relative path with the new extension if found
fn find_file_with_alternative_extension(
base_path: &PathBuf,
relative_path: &str,
) -> Option<String> {
let full_path = base_path.join(relative_path);
// Get the parent directory and file stem (name without extension)
let parent = full_path.parent()?;
let stem = full_path.file_stem()?.to_str()?;
// Try each supported extension
for ext in SUPPORTED_EXTENSIONS {
let test_path = parent.join(format!("{}.{}", stem, ext));
if test_path.exists() {
// Convert back to relative path
if let Ok(rel) = test_path.strip_prefix(base_path)
&& let Some(rel_str) = rel.to_str()
{
return Some(rel_str.to_string());
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_find_file_with_alternative_extension() {
let temp_dir = TempDir::new().unwrap();
let base_path = temp_dir.path().to_path_buf();
// Create a test file with .jpeg extension
let test_file = base_path.join("test.jpeg");
fs::write(&test_file, b"test").unwrap();
// Try to find it as .jpg
let result = find_file_with_alternative_extension(&base_path, "test.jpg");
assert!(result.is_some());
assert_eq!(result.unwrap(), "test.jpeg");
// Try to find non-existent file
let result = find_file_with_alternative_extension(&base_path, "nonexistent.jpg");
assert!(result.is_none());
}
}

View File

@@ -1,241 +0,0 @@
use crate::cleanup::database_updater::DatabaseUpdater;
use crate::cleanup::file_type_detector::{detect_file_type, should_rename};
use crate::cleanup::types::{CleanupConfig, CleanupStats};
use anyhow::Result;
use log::{error, warn};
use std::fs;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
/// Phase 2: Validate file types and rename mismatches
pub fn validate_file_types(
config: &CleanupConfig,
db_updater: &mut DatabaseUpdater,
) -> Result<CleanupStats> {
let mut stats = CleanupStats::new();
let mut auto_fix_all = config.auto_fix;
let mut skip_all = false;
println!("\nPhase 2: File Type Validation");
println!("------------------------------");
// Walk the filesystem
println!("Scanning filesystem...");
let files: Vec<PathBuf> = WalkDir::new(&config.base_path)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| is_supported_media_file(e.path()))
.map(|e| e.path().to_path_buf())
.collect();
println!("Files found: {}\n", files.len());
stats.files_checked = files.len();
println!("Detecting file types...");
let mut mismatches_found = 0;
let mut files_renamed = 0;
let mut user_skipped = 0;
for file_path in files {
// Get current extension
let current_ext = match file_path.extension() {
Some(ext) => ext.to_str().unwrap_or(""),
None => continue, // Skip files without extensions
};
// Detect actual file type
match detect_file_type(&file_path) {
Ok(Some(detected_ext)) => {
// Check if we should rename
if should_rename(current_ext, &detected_ext) {
mismatches_found += 1;
stats.issues_found += 1;
// Get relative path for display and database
let relative_path = match file_path.strip_prefix(&config.base_path) {
Ok(rel) => rel.to_str().unwrap_or(""),
Err(_) => {
error!("Failed to get relative path for {:?}", file_path);
continue;
}
};
println!("\nFile type mismatch:");
println!(" Path: {}", relative_path);
println!(" Current: .{}", current_ext);
println!(" Actual: .{}", detected_ext);
// Calculate new path
let new_file_path = file_path.with_extension(&detected_ext);
let new_relative_path = match new_file_path.strip_prefix(&config.base_path) {
Ok(rel) => rel.to_str().unwrap_or(""),
Err(_) => {
error!("Failed to get new relative path for {:?}", new_file_path);
continue;
}
};
// Check if destination already exists
if new_file_path.exists() {
warn!("✗ Destination already exists: {}", new_relative_path);
stats.add_error(format!(
"Destination exists for {}: {}",
relative_path, new_relative_path
));
continue;
}
// Determine if we should proceed
let should_proceed = if config.dry_run {
println!(" (dry-run mode - would rename to {})", new_relative_path);
false
} else if skip_all {
println!(" Skipped (skip all)");
user_skipped += 1;
false
} else if auto_fix_all {
true
} else {
// Interactive prompt
match prompt_for_rename(new_relative_path) {
RenameDecision::Yes => true,
RenameDecision::No => {
user_skipped += 1;
false
}
RenameDecision::All => {
auto_fix_all = true;
true
}
RenameDecision::SkipAll => {
skip_all = true;
user_skipped += 1;
false
}
}
};
if should_proceed {
// Rename the file
match fs::rename(&file_path, &new_file_path) {
Ok(_) => {
println!("✓ Renamed file");
// Update database
match db_updater.update_file_path(relative_path, new_relative_path)
{
Ok(_) => {
files_renamed += 1;
stats.issues_fixed += 1;
}
Err(e) => {
error!(
"File renamed but DB update failed for {}: {:?}",
relative_path, e
);
stats.add_error(format!(
"DB update failed for {}: {}",
relative_path, e
));
}
}
}
Err(e) => {
error!("✗ Failed to rename file: {:?}", e);
stats.add_error(format!(
"Rename failed for {}: {}",
relative_path, e
));
}
}
}
}
}
Ok(None) => {
// Could not detect file type - skip
// This is normal for some RAW formats or corrupted files
}
Err(e) => {
warn!("Failed to detect type for {:?}: {:?}", file_path, e);
}
}
}
println!("\nResults:");
println!("- Files scanned: {}", stats.files_checked);
println!("- Mismatches found: {}", mismatches_found);
if config.dry_run {
println!("- Would rename: {}", mismatches_found);
} else {
println!("- Files renamed: {}", files_renamed);
if user_skipped > 0 {
println!("- User skipped: {}", user_skipped);
}
}
if !stats.errors.is_empty() {
println!("- Errors: {}", stats.errors.len());
}
Ok(stats)
}
/// Check if a file is a supported media file based on extension
fn is_supported_media_file(path: &Path) -> bool {
use crate::file_types::is_media_file;
is_media_file(path)
}
#[derive(Debug)]
enum RenameDecision {
Yes,
No,
All,
SkipAll,
}
/// Prompt the user for rename decision
fn prompt_for_rename(new_path: &str) -> RenameDecision {
println!("\nRename to {}?", new_path);
println!(" [y] Yes");
println!(" [n] No (default)");
println!(" [a] Yes to all");
println!(" [s] Skip all remaining");
print!("Choice: ");
// Force flush stdout
use std::io::{self, Write};
let _ = io::stdout().flush();
let mut input = String::new();
match io::stdin().read_line(&mut input) {
Ok(_) => {
let choice = input.trim().to_lowercase();
match choice.as_str() {
"y" | "yes" => RenameDecision::Yes,
"a" | "all" => RenameDecision::All,
"s" | "skip" => RenameDecision::SkipAll,
_ => RenameDecision::No,
}
}
Err(_) => RenameDecision::No,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_supported_media_file() {
assert!(is_supported_media_file(Path::new("test.jpg")));
assert!(is_supported_media_file(Path::new("test.JPG")));
assert!(is_supported_media_file(Path::new("test.png")));
assert!(is_supported_media_file(Path::new("test.webp")));
assert!(is_supported_media_file(Path::new("test.mp4")));
assert!(is_supported_media_file(Path::new("test.mov")));
assert!(!is_supported_media_file(Path::new("test.txt")));
assert!(!is_supported_media_file(Path::new("test")));
}
}

View File

@@ -1,39 +0,0 @@
use std::path::PathBuf;
#[derive(Debug, Clone)]
pub struct CleanupConfig {
pub base_path: PathBuf,
pub dry_run: bool,
pub auto_fix: bool,
}
#[derive(Debug, Clone)]
pub struct FileIssue {
pub current_path: String,
pub issue_type: IssueType,
pub suggested_path: Option<String>,
}
#[derive(Debug, Clone)]
pub enum IssueType {
MissingFile,
ExtensionMismatch { current: String, actual: String },
}
#[derive(Debug, Clone, Default)]
pub struct CleanupStats {
pub files_checked: usize,
pub issues_found: usize,
pub issues_fixed: usize,
pub errors: Vec<String>,
}
impl CleanupStats {
pub fn new() -> Self {
Self::default()
}
pub fn add_error(&mut self, error: String) {
self.errors.push(error);
}
}

View File

@@ -1,15 +1,9 @@
use std::{fs, str::FromStr};
use crate::database::models::ImageExif;
use anyhow::{Context, anyhow};
use chrono::{DateTime, Utc};
use log::error;
use std::str::FromStr;
use actix_web::{dev, Error, FromRequest, http::header, HttpRequest};
use actix_web::error::ErrorUnauthorized;
use actix_web::{Error, FromRequest, HttpRequest, dev, http::header};
use futures::future::{Ready, err, ok};
use jsonwebtoken::{Algorithm, DecodingKey, Validation, decode};
use futures::future::{err, ok, Ready};
use jsonwebtoken::{Algorithm, decode, DecodingKey, Validation};
use serde::{Deserialize, Serialize};
#[derive(Serialize)]
@@ -17,51 +11,30 @@ pub struct Token<'a> {
pub token: &'a str,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
#[derive(Deserialize, Serialize)]
pub struct Claims {
pub sub: String,
pub exp: i64,
}
#[cfg(test)]
pub mod helper {
use super::Claims;
use chrono::{Duration, Utc};
impl Claims {
pub fn valid_user(user_id: String) -> Self {
Claims {
sub: user_id,
exp: (Utc::now() + Duration::minutes(1)).timestamp(),
}
}
}
}
pub fn secret_key() -> String {
if cfg!(test) {
String::from("test_key")
} else {
dotenv::var("SECRET_KEY").expect("SECRET_KEY env not set!")
}
dotenv::var("SECRET_KEY").expect("SECRET_KEY env not set!")
}
impl FromStr for Claims {
type Err = jsonwebtoken::errors::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let token = s.strip_prefix("Bearer ").ok_or_else(|| {
jsonwebtoken::errors::Error::from(jsonwebtoken::errors::ErrorKind::InvalidToken)
})?;
let token = *(s.split("Bearer ").collect::<Vec<_>>().last().unwrap_or(&""));
match decode::<Claims>(
token,
&token,
&DecodingKey::from_secret(secret_key().as_bytes()),
&Validation::new(Algorithm::HS256),
) {
Ok(data) => Ok(data.claims),
Err(other) => {
error!("DecodeError: {}", other);
println!("DecodeError: {}", other);
Err(other)
}
}
@@ -71,139 +44,28 @@ impl FromStr for Claims {
impl FromRequest for Claims {
type Error = Error;
type Future = Ready<Result<Self, Self::Error>>;
type Config = ();
fn from_request(req: &HttpRequest, _payload: &mut dev::Payload) -> Self::Future {
req.headers()
.get(header::AUTHORIZATION)
.map_or_else(
|| Err(anyhow!("No authorization header")),
|header| {
header
.to_str()
.context("Unable to read Authorization header to string")
},
)
.and_then(|header| {
Claims::from_str(header)
.with_context(|| format!("Unable to decode token from: {}", header))
})
.map_or_else(
|e| {
error!("{}", e);
err(ErrorUnauthorized("Bad token"))
},
ok,
)
let claims = match req.headers().get(header::AUTHORIZATION) {
Some(header) => Claims::from_str(header.to_str().unwrap_or_else(|_| "")),
None => Err(jsonwebtoken::errors::Error::from(
jsonwebtoken::errors::ErrorKind::InvalidToken,
)),
};
if let Ok(claims) = claims {
ok(claims)
} else {
err(ErrorUnauthorized("Bad token"))
}
}
}
#[derive(Serialize, Deserialize, Debug)]
pub struct PhotosResponse {
pub photos: Vec<String>,
pub dirs: Vec<String>,
// Pagination metadata (only present when limit is set)
#[serde(skip_serializing_if = "Option::is_none")]
pub total_count: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub has_more: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub next_offset: Option<i64>,
}
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
#[serde(rename_all = "lowercase")]
pub enum SortType {
Shuffle,
NameAsc,
NameDesc,
TagCountAsc,
TagCountDesc,
DateTakenAsc,
DateTakenDesc,
}
#[derive(Deserialize)]
pub struct FilesRequest {
pub path: String,
// comma separated numbers
pub tag_ids: Option<String>,
pub exclude_tag_ids: Option<String>,
pub tag_filter_mode: Option<FilterMode>,
pub recursive: Option<bool>,
pub sort: Option<SortType>,
// EXIF-based search parameters
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub lens_model: Option<String>,
// GPS location search
pub gps_lat: Option<f64>,
pub gps_lon: Option<f64>,
pub gps_radius_km: Option<f64>,
// Date range filtering (Unix timestamps)
pub date_from: Option<i64>,
pub date_to: Option<i64>,
// Media type filtering
pub media_type: Option<MediaType>,
// Pagination parameters (optional - backward compatible)
pub limit: Option<i64>,
pub offset: Option<i64>,
}
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
pub enum FilterMode {
Any,
All,
}
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
#[serde(rename_all = "lowercase")]
pub enum MediaType {
Photo,
Video,
All,
}
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
#[serde(rename_all = "lowercase")]
pub enum PhotoSize {
Full,
Thumb,
}
#[derive(Debug, Deserialize)]
pub struct ThumbnailRequest {
pub(crate) path: String,
#[allow(dead_code)] // Part of API contract, may be used in future
pub(crate) size: Option<PhotoSize>,
#[serde(default)]
#[allow(dead_code)] // Part of API contract, may be used in future
pub(crate) format: Option<ThumbnailFormat>,
#[serde(default)]
pub(crate) shape: Option<ThumbnailShape>,
}
#[derive(Debug, Deserialize, PartialEq)]
pub enum ThumbnailFormat {
#[serde(rename = "gif")]
Gif,
#[serde(rename = "image")]
Image,
}
#[derive(Debug, Deserialize, PartialEq)]
pub enum ThumbnailShape {
#[serde(rename = "circle")]
Circle,
#[serde(rename = "square")]
Square,
pub path: String,
pub size: Option<String>,
}
#[derive(Deserialize)]
@@ -223,200 +85,3 @@ pub struct CreateAccountRequest {
pub struct AddFavoriteRequest {
pub path: String,
}
#[derive(Debug, Serialize)]
pub struct MetadataResponse {
pub created: Option<i64>,
pub modified: Option<i64>,
pub size: u64,
pub exif: Option<ExifMetadata>,
pub filename_date: Option<i64>, // Date extracted from filename
}
impl From<fs::Metadata> for MetadataResponse {
fn from(metadata: fs::Metadata) -> Self {
MetadataResponse {
created: metadata.created().ok().map(|created| {
let utc: DateTime<Utc> = created.into();
utc.timestamp()
}),
modified: metadata.modified().ok().map(|modified| {
let utc: DateTime<Utc> = modified.into();
utc.timestamp()
}),
size: metadata.len(),
exif: None,
filename_date: None, // Will be set in endpoint handler
}
}
}
#[derive(Debug, Serialize)]
pub struct ExifMetadata {
pub camera: Option<CameraInfo>,
pub image_properties: Option<ImageProperties>,
pub gps: Option<GpsCoordinates>,
pub capture_settings: Option<CaptureSettings>,
pub date_taken: Option<i64>,
}
#[derive(Debug, Serialize)]
pub struct CameraInfo {
pub make: Option<String>,
pub model: Option<String>,
pub lens: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct ImageProperties {
pub width: Option<i32>,
pub height: Option<i32>,
pub orientation: Option<i32>,
}
#[derive(Debug, Serialize)]
pub struct GpsCoordinates {
pub latitude: Option<f64>,
pub longitude: Option<f64>,
pub altitude: Option<f64>,
}
#[derive(Debug, Serialize)]
pub struct CaptureSettings {
pub focal_length: Option<f64>,
pub aperture: Option<f64>,
pub shutter_speed: Option<String>,
pub iso: Option<i32>,
}
impl From<ImageExif> for ExifMetadata {
fn from(exif: ImageExif) -> Self {
let has_camera_info =
exif.camera_make.is_some() || exif.camera_model.is_some() || exif.lens_model.is_some();
let has_image_properties =
exif.width.is_some() || exif.height.is_some() || exif.orientation.is_some();
let has_gps = exif.gps_latitude.is_some()
|| exif.gps_longitude.is_some()
|| exif.gps_altitude.is_some();
let has_capture_settings = exif.focal_length.is_some()
|| exif.aperture.is_some()
|| exif.shutter_speed.is_some()
|| exif.iso.is_some();
ExifMetadata {
camera: if has_camera_info {
Some(CameraInfo {
make: exif.camera_make,
model: exif.camera_model,
lens: exif.lens_model,
})
} else {
None
},
image_properties: if has_image_properties {
Some(ImageProperties {
width: exif.width,
height: exif.height,
orientation: exif.orientation,
})
} else {
None
},
gps: if has_gps {
Some(GpsCoordinates {
latitude: exif.gps_latitude.map(|v| v as f64),
longitude: exif.gps_longitude.map(|v| v as f64),
altitude: exif.gps_altitude.map(|v| v as f64),
})
} else {
None
},
capture_settings: if has_capture_settings {
Some(CaptureSettings {
focal_length: exif.focal_length.map(|v| v as f64),
aperture: exif.aperture.map(|v| v as f64),
shutter_speed: exif.shutter_speed,
iso: exif.iso,
})
} else {
None
},
date_taken: exif.date_taken,
}
}
}
#[derive(Debug, Deserialize)]
pub struct AddTagRequest {
pub file_name: String,
pub tag_name: String,
}
#[derive(Deserialize)]
pub struct GetTagsRequest {
pub path: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct GpsPhotoSummary {
pub path: String,
pub lat: f64,
pub lon: f64,
pub date_taken: Option<i64>,
}
#[derive(Debug, Serialize)]
pub struct GpsPhotosResponse {
pub photos: Vec<GpsPhotoSummary>,
pub total: usize,
}
#[cfg(test)]
mod tests {
use super::Claims;
use jsonwebtoken::errors::ErrorKind;
use std::str::FromStr;
#[test]
fn test_token_from_claims() {
let claims = Claims {
exp: 16136164790, // 2481-ish
sub: String::from("9"),
};
let c = Claims::from_str(
"Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5IiwiZXhwIjoxNjEzNjE2NDc5MH0.9wwK4l8vhvq55YoueEljMbN_5uVTaAsGLLRPr0AuymE")
.unwrap();
assert_eq!(claims.sub, c.sub);
assert_eq!(claims.exp, c.exp);
}
#[test]
fn test_expired_token() {
let err = Claims::from_str(
"Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5IiwiZXhwIjoxNn0.eZnfaNfiD54VMbphIqeBICeG9SzAtwNXntLwtTBihjY",
);
match err.unwrap_err().into_kind() {
ErrorKind::ExpiredSignature => assert!(true),
kind => {
println!("Unexpected error: {:?}", kind);
assert!(false)
}
}
}
#[test]
fn test_junk_token_is_invalid() {
let err = Claims::from_str("uni-֍ՓՓՓՓՓՓՓՓՓՓՓՓՓՓՓ");
match err.unwrap_err().into_kind() {
ErrorKind::InvalidToken => assert!(true),
kind => {
println!("Unexpected error: {:?}", kind);
assert!(false)
}
}
}
}

View File

@@ -1,554 +0,0 @@
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use serde::Serialize;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::{DbError, DbErrorKind, connect};
use crate::otel::trace_db_call;
/// Represents a calendar event
#[derive(Serialize, Clone, Debug)]
pub struct CalendarEvent {
pub id: i32,
pub event_uid: Option<String>,
pub summary: String,
pub description: Option<String>,
pub location: Option<String>,
pub start_time: i64,
pub end_time: i64,
pub all_day: bool,
pub organizer: Option<String>,
pub attendees: Option<String>, // JSON string
pub created_at: i64,
pub source_file: Option<String>,
}
/// Data for inserting a new calendar event
#[derive(Clone, Debug)]
#[allow(dead_code)]
pub struct InsertCalendarEvent {
pub event_uid: Option<String>,
pub summary: String,
pub description: Option<String>,
pub location: Option<String>,
pub start_time: i64,
pub end_time: i64,
pub all_day: bool,
pub organizer: Option<String>,
pub attendees: Option<String>,
pub embedding: Option<Vec<f32>>, // 768-dim, optional
pub created_at: i64,
pub source_file: Option<String>,
}
pub trait CalendarEventDao: Sync + Send {
/// Store calendar event with optional embedding
fn store_event(
&mut self,
context: &opentelemetry::Context,
event: InsertCalendarEvent,
) -> Result<CalendarEvent, DbError>;
/// Batch insert events (for import efficiency)
fn store_events_batch(
&mut self,
context: &opentelemetry::Context,
events: Vec<InsertCalendarEvent>,
) -> Result<usize, DbError>;
/// Find events in time range (PRIMARY query method)
fn find_events_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<CalendarEvent>, DbError>;
/// Find semantically similar events (SECONDARY - requires embeddings)
fn find_similar_events(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<CalendarEvent>, DbError>;
/// Hybrid: Time-filtered + semantic ranking
/// "Events during photo timestamp ±N days, ranked by similarity to context"
fn find_relevant_events_hybrid(
&mut self,
context: &opentelemetry::Context,
center_timestamp: i64,
time_window_days: i64,
query_embedding: Option<&[f32]>,
limit: usize,
) -> Result<Vec<CalendarEvent>, DbError>;
/// Check if event exists (idempotency)
fn event_exists(
&mut self,
context: &opentelemetry::Context,
event_uid: &str,
start_time: i64,
) -> Result<bool, DbError>;
/// Get count of events
fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
}
pub struct SqliteCalendarEventDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteCalendarEventDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteCalendarEventDao {
pub fn new() -> Self {
SqliteCalendarEventDao {
connection: Arc::new(Mutex::new(connect())),
}
}
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
use zerocopy::IntoBytes;
vec.as_bytes().to_vec()
}
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
if !bytes.len().is_multiple_of(4) {
return Err(DbError::new(DbErrorKind::QueryError));
}
let count = bytes.len() / 4;
let mut vec = Vec::with_capacity(count);
for chunk in bytes.chunks_exact(4) {
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
vec.push(float);
}
Ok(vec)
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude_a == 0.0 || magnitude_b == 0.0 {
return 0.0;
}
dot_product / (magnitude_a * magnitude_b)
}
}
#[derive(QueryableByName)]
struct CalendarEventWithVectorRow {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
event_uid: Option<String>,
#[diesel(sql_type = diesel::sql_types::Text)]
summary: String,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
description: Option<String>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
location: Option<String>,
#[diesel(sql_type = diesel::sql_types::BigInt)]
start_time: i64,
#[diesel(sql_type = diesel::sql_types::BigInt)]
end_time: i64,
#[diesel(sql_type = diesel::sql_types::Bool)]
all_day: bool,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
organizer: Option<String>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
attendees: Option<String>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Binary>)]
embedding: Option<Vec<u8>>,
#[diesel(sql_type = diesel::sql_types::BigInt)]
created_at: i64,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
source_file: Option<String>,
}
impl CalendarEventWithVectorRow {
fn to_calendar_event(&self) -> CalendarEvent {
CalendarEvent {
id: self.id,
event_uid: self.event_uid.clone(),
summary: self.summary.clone(),
description: self.description.clone(),
location: self.location.clone(),
start_time: self.start_time,
end_time: self.end_time,
all_day: self.all_day,
organizer: self.organizer.clone(),
attendees: self.attendees.clone(),
created_at: self.created_at,
source_file: self.source_file.clone(),
}
}
}
#[derive(QueryableByName)]
struct LastInsertRowId {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
}
impl CalendarEventDao for SqliteCalendarEventDao {
fn store_event(
&mut self,
context: &opentelemetry::Context,
event: InsertCalendarEvent,
) -> Result<CalendarEvent, DbError> {
trace_db_call(context, "insert", "store_event", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get CalendarEventDao");
// Validate embedding dimensions if provided
if let Some(ref emb) = event.embedding
&& emb.len() != 768
{
return Err(anyhow::anyhow!(
"Invalid embedding dimensions: {} (expected 768)",
emb.len()
));
}
let embedding_bytes = event.embedding.as_ref().map(|e| Self::serialize_vector(e));
// INSERT OR REPLACE to handle re-imports
diesel::sql_query(
"INSERT OR REPLACE INTO calendar_events
(event_uid, summary, description, location, start_time, end_time, all_day,
organizer, attendees, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.event_uid)
.bind::<diesel::sql_types::Text, _>(&event.summary)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.description)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.location)
.bind::<diesel::sql_types::BigInt, _>(event.start_time)
.bind::<diesel::sql_types::BigInt, _>(event.end_time)
.bind::<diesel::sql_types::Bool, _>(event.all_day)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.organizer)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.attendees)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(&embedding_bytes)
.bind::<diesel::sql_types::BigInt, _>(event.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.source_file)
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
.get_result::<LastInsertRowId>(conn.deref_mut())
.map(|r| r.id)
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
Ok(CalendarEvent {
id: row_id,
event_uid: event.event_uid,
summary: event.summary,
description: event.description,
location: event.location,
start_time: event.start_time,
end_time: event.end_time,
all_day: event.all_day,
organizer: event.organizer,
attendees: event.attendees,
created_at: event.created_at,
source_file: event.source_file,
})
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn store_events_batch(
&mut self,
context: &opentelemetry::Context,
events: Vec<InsertCalendarEvent>,
) -> Result<usize, DbError> {
trace_db_call(context, "insert", "store_events_batch", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get CalendarEventDao");
let mut inserted = 0;
conn.transaction::<_, anyhow::Error, _>(|conn| {
for event in events {
// Validate embedding if provided
if let Some(ref emb) = event.embedding
&& emb.len() != 768
{
log::warn!(
"Skipping event with invalid embedding dimensions: {}",
emb.len()
);
continue;
}
let embedding_bytes =
event.embedding.as_ref().map(|e| Self::serialize_vector(e));
diesel::sql_query(
"INSERT OR REPLACE INTO calendar_events
(event_uid, summary, description, location, start_time, end_time, all_day,
organizer, attendees, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.event_uid,
)
.bind::<diesel::sql_types::Text, _>(&event.summary)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.description,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.location,
)
.bind::<diesel::sql_types::BigInt, _>(event.start_time)
.bind::<diesel::sql_types::BigInt, _>(event.end_time)
.bind::<diesel::sql_types::Bool, _>(event.all_day)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.organizer,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.attendees,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(
&embedding_bytes,
)
.bind::<diesel::sql_types::BigInt, _>(event.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&event.source_file,
)
.execute(conn)
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
inserted += 1;
}
Ok(())
})
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
Ok(inserted)
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn find_events_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<CalendarEvent>, DbError> {
trace_db_call(context, "query", "find_events_in_range", |_span| {
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
diesel::sql_query(
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
organizer, attendees, NULL as embedding, created_at, source_file
FROM calendar_events
WHERE start_time >= ?1 AND start_time <= ?2
ORDER BY start_time ASC"
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
.map(|rows| rows.into_iter().map(|r| r.to_calendar_event()).collect())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_similar_events(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<CalendarEvent>, DbError> {
trace_db_call(context, "query", "find_similar_events", |_span| {
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
if query_embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_embedding.len()
));
}
// Load all events with embeddings
let results = diesel::sql_query(
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
organizer, attendees, embedding, created_at, source_file
FROM calendar_events
WHERE embedding IS NOT NULL"
)
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
// Compute similarities
let mut scored_events: Vec<(f32, CalendarEvent)> = results
.into_iter()
.filter_map(|row| {
if let Some(ref emb_bytes) = row.embedding {
if let Ok(emb) = Self::deserialize_vector(emb_bytes) {
let similarity = Self::cosine_similarity(query_embedding, &emb);
Some((similarity, row.to_calendar_event()))
} else {
None
}
} else {
None
}
})
.collect();
// Sort by similarity descending
scored_events.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
log::info!("Found {} similar calendar events", scored_events.len());
if !scored_events.is_empty() {
log::info!("Top similarity: {:.4}", scored_events[0].0);
}
Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_relevant_events_hybrid(
&mut self,
context: &opentelemetry::Context,
center_timestamp: i64,
time_window_days: i64,
query_embedding: Option<&[f32]>,
limit: usize,
) -> Result<Vec<CalendarEvent>, DbError> {
trace_db_call(context, "query", "find_relevant_events_hybrid", |_span| {
let window_seconds = time_window_days * 86400;
let start_ts = center_timestamp - window_seconds;
let end_ts = center_timestamp + window_seconds;
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
// Step 1: Time-based filter (fast, indexed)
let events_in_range = diesel::sql_query(
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
organizer, attendees, embedding, created_at, source_file
FROM calendar_events
WHERE start_time >= ?1 AND start_time <= ?2"
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
// Step 2: If query embedding provided, rank by semantic similarity
if let Some(query_emb) = query_embedding {
if query_emb.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_emb.len()
));
}
let mut scored_events: Vec<(f32, CalendarEvent)> = events_in_range
.into_iter()
.map(|row| {
// Events with embeddings get semantic scoring
let similarity = if let Some(ref emb_bytes) = row.embedding {
if let Ok(emb) = Self::deserialize_vector(emb_bytes) {
Self::cosine_similarity(query_emb, &emb)
} else {
0.5 // Neutral score for deserialization errors
}
} else {
0.5 // Neutral score for events without embeddings
};
(similarity, row.to_calendar_event())
})
.collect();
// Sort by similarity descending
scored_events.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
log::info!("Hybrid query: {} events in time range, ranked by similarity", scored_events.len());
if !scored_events.is_empty() {
log::info!("Top similarity: {:.4}", scored_events[0].0);
}
Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
} else {
// No semantic ranking, just return time-sorted (limit applied)
log::info!("Time-only query: {} events in range", events_in_range.len());
Ok(events_in_range.into_iter().take(limit).map(|r| r.to_calendar_event()).collect())
}
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn event_exists(
&mut self,
context: &opentelemetry::Context,
event_uid: &str,
start_time: i64,
) -> Result<bool, DbError> {
trace_db_call(context, "query", "event_exists", |_span| {
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::Integer)]
count: i32,
}
let result: CountResult = diesel::sql_query(
"SELECT COUNT(*) as count FROM calendar_events WHERE event_uid = ?1 AND start_time = ?2"
)
.bind::<diesel::sql_types::Text, _>(event_uid)
.bind::<diesel::sql_types::BigInt, _>(start_time)
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count > 0)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
trace_db_call(context, "query", "get_event_count", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get CalendarEventDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::BigInt)]
count: i64,
}
let result: CountResult =
diesel::sql_query("SELECT COUNT(*) as count FROM calendar_events")
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}

View File

@@ -1,489 +0,0 @@
use chrono::NaiveDate;
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use serde::Serialize;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::{DbError, DbErrorKind, connect};
use crate::otel::trace_db_call;
/// Represents a daily conversation summary
#[derive(Serialize, Clone, Debug)]
pub struct DailySummary {
pub id: i32,
pub date: String,
pub contact: String,
pub summary: String,
pub message_count: i32,
pub created_at: i64,
pub model_version: String,
}
/// Data for inserting a new daily summary
#[derive(Clone, Debug)]
pub struct InsertDailySummary {
pub date: String,
pub contact: String,
pub summary: String,
pub message_count: i32,
pub embedding: Vec<f32>,
pub created_at: i64,
pub model_version: String,
}
pub trait DailySummaryDao: Sync + Send {
/// Store a daily summary with its embedding
fn store_summary(
&mut self,
context: &opentelemetry::Context,
summary: InsertDailySummary,
) -> Result<DailySummary, DbError>;
/// Find semantically similar daily summaries using vector similarity
fn find_similar_summaries(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<DailySummary>, DbError>;
/// Find semantically similar daily summaries with time-based weighting
/// Combines cosine similarity with temporal proximity to target_date
/// Final score = similarity * time_weight, where time_weight decays with distance from target_date
fn find_similar_summaries_with_time_weight(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
target_date: &str,
limit: usize,
) -> Result<Vec<DailySummary>, DbError>;
/// Check if a summary exists for a given date and contact
fn summary_exists(
&mut self,
context: &opentelemetry::Context,
date: &str,
contact: &str,
) -> Result<bool, DbError>;
/// Get count of summaries for a contact
fn get_summary_count(
&mut self,
context: &opentelemetry::Context,
contact: &str,
) -> Result<i64, DbError>;
}
pub struct SqliteDailySummaryDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteDailySummaryDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteDailySummaryDao {
pub fn new() -> Self {
SqliteDailySummaryDao {
connection: Arc::new(Mutex::new(connect())),
}
}
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
use zerocopy::IntoBytes;
vec.as_bytes().to_vec()
}
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
if !bytes.len().is_multiple_of(4) {
return Err(DbError::new(DbErrorKind::QueryError));
}
let count = bytes.len() / 4;
let mut vec = Vec::with_capacity(count);
for chunk in bytes.chunks_exact(4) {
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
vec.push(float);
}
Ok(vec)
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude_a == 0.0 || magnitude_b == 0.0 {
return 0.0;
}
dot_product / (magnitude_a * magnitude_b)
}
}
impl DailySummaryDao for SqliteDailySummaryDao {
fn store_summary(
&mut self,
context: &opentelemetry::Context,
summary: InsertDailySummary,
) -> Result<DailySummary, DbError> {
trace_db_call(context, "insert", "store_summary", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get DailySummaryDao");
// Validate embedding dimensions
if summary.embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid embedding dimensions: {} (expected 768)",
summary.embedding.len()
));
}
let embedding_bytes = Self::serialize_vector(&summary.embedding);
// INSERT OR REPLACE to handle updates if summary needs regeneration
diesel::sql_query(
"INSERT OR REPLACE INTO daily_conversation_summaries
(date, contact, summary, message_count, embedding, created_at, model_version)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
)
.bind::<diesel::sql_types::Text, _>(&summary.date)
.bind::<diesel::sql_types::Text, _>(&summary.contact)
.bind::<diesel::sql_types::Text, _>(&summary.summary)
.bind::<diesel::sql_types::Integer, _>(summary.message_count)
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
.bind::<diesel::sql_types::BigInt, _>(summary.created_at)
.bind::<diesel::sql_types::Text, _>(&summary.model_version)
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
.get_result::<LastInsertRowId>(conn.deref_mut())
.map(|r| r.id as i32)
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
Ok(DailySummary {
id: row_id,
date: summary.date,
contact: summary.contact,
summary: summary.summary,
message_count: summary.message_count,
created_at: summary.created_at,
model_version: summary.model_version,
})
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn find_similar_summaries(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<DailySummary>, DbError> {
trace_db_call(context, "query", "find_similar_summaries", |_span| {
let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao");
if query_embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_embedding.len()
));
}
// Load all summaries with embeddings
let results = diesel::sql_query(
"SELECT id, date, contact, summary, message_count, embedding, created_at, model_version
FROM daily_conversation_summaries"
)
.load::<DailySummaryWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
log::info!("Loaded {} daily summaries for similarity comparison", results.len());
// Compute similarity for each summary
let mut scored_summaries: Vec<(f32, DailySummary)> = results
.into_iter()
.filter_map(|row| {
match Self::deserialize_vector(&row.embedding) {
Ok(embedding) => {
let similarity = Self::cosine_similarity(query_embedding, &embedding);
Some((
similarity,
DailySummary {
id: row.id,
date: row.date,
contact: row.contact,
summary: row.summary,
message_count: row.message_count,
created_at: row.created_at,
model_version: row.model_version,
},
))
}
Err(e) => {
log::warn!("Failed to deserialize embedding for summary {}: {:?}", row.id, e);
None
}
}
})
.collect();
// Sort by similarity (highest first)
scored_summaries.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
// Filter out poor matches (similarity < 0.3 is likely noise)
scored_summaries.retain(|(similarity, _)| *similarity >= 0.3);
// Log similarity distribution
if !scored_summaries.is_empty() {
let top_score = scored_summaries.first().map(|(s, _)| *s).unwrap_or(0.0);
let median_score = scored_summaries.get(scored_summaries.len() / 2).map(|(s, _)| *s).unwrap_or(0.0);
log::info!(
"Daily summary similarity - Top: {:.3}, Median: {:.3}, Count: {} (after 0.3 threshold)",
top_score,
median_score,
scored_summaries.len()
);
} else {
log::warn!("No daily summaries met the 0.3 similarity threshold");
}
// Take top N and log matches
let top_results: Vec<DailySummary> = scored_summaries
.into_iter()
.take(limit)
.map(|(similarity, summary)| {
log::info!(
"Summary match: similarity={:.3}, date={}, contact={}, summary=\"{}\"",
similarity,
summary.date,
summary.contact,
summary.summary.chars().take(100).collect::<String>()
);
summary
})
.collect();
Ok(top_results)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_similar_summaries_with_time_weight(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
target_date: &str,
limit: usize,
) -> Result<Vec<DailySummary>, DbError> {
trace_db_call(context, "query", "find_similar_summaries_with_time_weight", |_span| {
let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao");
if query_embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_embedding.len()
));
}
// Parse target date
let target = NaiveDate::parse_from_str(target_date, "%Y-%m-%d")
.map_err(|e| anyhow::anyhow!("Invalid target date: {}", e))?;
// Load all summaries with embeddings
let results = diesel::sql_query(
"SELECT id, date, contact, summary, message_count, embedding, created_at, model_version
FROM daily_conversation_summaries"
)
.load::<DailySummaryWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
log::info!("Loaded {} daily summaries for time-weighted similarity (target: {})", results.len(), target_date);
// Compute time-weighted similarity for each summary
// Score = cosine_similarity * time_weight
// time_weight = 1 / (1 + days_distance/30) - decays with ~30 day half-life
let mut scored_summaries: Vec<(f32, f32, i64, DailySummary)> = results
.into_iter()
.filter_map(|row| {
match Self::deserialize_vector(&row.embedding) {
Ok(embedding) => {
let similarity = Self::cosine_similarity(query_embedding, &embedding);
// Calculate time weight
let summary_date = NaiveDate::parse_from_str(&row.date, "%Y-%m-%d").ok()?;
let days_distance = (target - summary_date).num_days().abs();
// Exponential decay with 30-day half-life
// At 0 days: weight = 1.0
// At 30 days: weight = 0.5
// At 60 days: weight = 0.25
// At 365 days: weight ~= 0.0001
let time_weight = 0.5_f32.powf(days_distance as f32 / 30.0);
// Combined score - but ensure semantic similarity still matters
// We use sqrt to soften the time weight's impact
let combined_score = similarity * time_weight.sqrt();
Some((
combined_score,
similarity,
days_distance,
DailySummary {
id: row.id,
date: row.date,
contact: row.contact,
summary: row.summary,
message_count: row.message_count,
created_at: row.created_at,
model_version: row.model_version,
},
))
}
Err(e) => {
log::warn!("Failed to deserialize embedding for summary {}: {:?}", row.id, e);
None
}
}
})
.collect();
// Sort by combined score (highest first)
scored_summaries.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
// Filter out poor matches (base similarity < 0.5 - stricter than before since we have time weighting)
scored_summaries.retain(|(_, similarity, _, _)| *similarity >= 0.5);
// Log similarity distribution
if !scored_summaries.is_empty() {
let (top_combined, top_sim, top_days, _) = &scored_summaries[0];
log::info!(
"Time-weighted similarity - Top: combined={:.3} (sim={:.3}, days={}), Count: {} matches",
top_combined,
top_sim,
top_days,
scored_summaries.len()
);
} else {
log::warn!("No daily summaries met the 0.5 similarity threshold");
}
// Take top N and log matches
let top_results: Vec<DailySummary> = scored_summaries
.into_iter()
.take(limit)
.map(|(combined, similarity, days, summary)| {
log::info!(
"Summary match: combined={:.3} (sim={:.3}, days={}), date={}, contact={}, summary=\"{}\"",
combined,
similarity,
days,
summary.date,
summary.contact,
summary.summary.chars().take(80).collect::<String>()
);
summary
})
.collect();
Ok(top_results)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn summary_exists(
&mut self,
context: &opentelemetry::Context,
date: &str,
contact: &str,
) -> Result<bool, DbError> {
trace_db_call(context, "query", "summary_exists", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get DailySummaryDao");
let count = diesel::sql_query(
"SELECT COUNT(*) as count FROM daily_conversation_summaries
WHERE date = ?1 AND contact = ?2",
)
.bind::<diesel::sql_types::Text, _>(date)
.bind::<diesel::sql_types::Text, _>(contact)
.get_result::<CountResult>(conn.deref_mut())
.map(|r| r.count)
.map_err(|e| anyhow::anyhow!("Count query error: {:?}", e))?;
Ok(count > 0)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_summary_count(
&mut self,
context: &opentelemetry::Context,
contact: &str,
) -> Result<i64, DbError> {
trace_db_call(context, "query", "get_summary_count", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get DailySummaryDao");
diesel::sql_query(
"SELECT COUNT(*) as count FROM daily_conversation_summaries WHERE contact = ?1",
)
.bind::<diesel::sql_types::Text, _>(contact)
.get_result::<CountResult>(conn.deref_mut())
.map(|r| r.count)
.map_err(|e| anyhow::anyhow!("Count query error: {:?}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}
// Helper structs for raw SQL queries
#[derive(QueryableByName)]
struct LastInsertRowId {
#[diesel(sql_type = diesel::sql_types::BigInt)]
id: i64,
}
#[derive(QueryableByName)]
struct DailySummaryWithVectorRow {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
#[diesel(sql_type = diesel::sql_types::Text)]
date: String,
#[diesel(sql_type = diesel::sql_types::Text)]
contact: String,
#[diesel(sql_type = diesel::sql_types::Text)]
summary: String,
#[diesel(sql_type = diesel::sql_types::Integer)]
message_count: i32,
#[diesel(sql_type = diesel::sql_types::Binary)]
embedding: Vec<u8>,
#[diesel(sql_type = diesel::sql_types::BigInt)]
created_at: i64,
#[diesel(sql_type = diesel::sql_types::Text)]
model_version: String,
}
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::BigInt)]
count: i64,
}

View File

@@ -1,133 +0,0 @@
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::models::{InsertPhotoInsight, PhotoInsight};
use crate::database::schema;
use crate::database::{DbError, DbErrorKind, connect};
use crate::otel::trace_db_call;
pub trait InsightDao: Sync + Send {
fn store_insight(
&mut self,
context: &opentelemetry::Context,
insight: InsertPhotoInsight,
) -> Result<PhotoInsight, DbError>;
fn get_insight(
&mut self,
context: &opentelemetry::Context,
file_path: &str,
) -> Result<Option<PhotoInsight>, DbError>;
fn delete_insight(
&mut self,
context: &opentelemetry::Context,
file_path: &str,
) -> Result<(), DbError>;
fn get_all_insights(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<PhotoInsight>, DbError>;
}
pub struct SqliteInsightDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteInsightDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteInsightDao {
pub fn new() -> Self {
SqliteInsightDao {
connection: Arc::new(Mutex::new(connect())),
}
}
}
impl InsightDao for SqliteInsightDao {
fn store_insight(
&mut self,
context: &opentelemetry::Context,
insight: InsertPhotoInsight,
) -> Result<PhotoInsight, DbError> {
trace_db_call(context, "insert", "store_insight", |_span| {
use schema::photo_insights::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
// Insert or replace on conflict (UNIQUE constraint on file_path)
diesel::replace_into(photo_insights)
.values(&insight)
.execute(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Insert error"))?;
// Retrieve the inserted record
photo_insights
.filter(file_path.eq(&insight.file_path))
.first::<PhotoInsight>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn get_insight(
&mut self,
context: &opentelemetry::Context,
path: &str,
) -> Result<Option<PhotoInsight>, DbError> {
trace_db_call(context, "query", "get_insight", |_span| {
use schema::photo_insights::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
photo_insights
.filter(file_path.eq(path))
.first::<PhotoInsight>(connection.deref_mut())
.optional()
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn delete_insight(
&mut self,
context: &opentelemetry::Context,
path: &str,
) -> Result<(), DbError> {
trace_db_call(context, "delete", "delete_insight", |_span| {
use schema::photo_insights::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
diesel::delete(photo_insights.filter(file_path.eq(path)))
.execute(connection.deref_mut())
.map(|_| ())
.map_err(|_| anyhow::anyhow!("Delete error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_all_insights(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<PhotoInsight>, DbError> {
trace_db_call(context, "query", "get_all_insights", |_span| {
use schema::photo_insights::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get InsightDao");
photo_insights
.order(generated_at.desc())
.load::<PhotoInsight>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}

View File

@@ -1,528 +0,0 @@
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use serde::Serialize;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::{DbError, DbErrorKind, connect};
use crate::otel::trace_db_call;
/// Represents a location history record
#[derive(Serialize, Clone, Debug)]
pub struct LocationRecord {
pub id: i32,
pub timestamp: i64,
pub latitude: f64,
pub longitude: f64,
pub accuracy: Option<i32>,
pub activity: Option<String>,
pub activity_confidence: Option<i32>,
pub place_name: Option<String>,
pub place_category: Option<String>,
pub created_at: i64,
pub source_file: Option<String>,
}
/// Data for inserting a new location record
#[derive(Clone, Debug)]
pub struct InsertLocationRecord {
pub timestamp: i64,
pub latitude: f64,
pub longitude: f64,
pub accuracy: Option<i32>,
pub activity: Option<String>,
pub activity_confidence: Option<i32>,
pub place_name: Option<String>,
pub place_category: Option<String>,
pub embedding: Option<Vec<f32>>, // 768-dim, optional (rarely used)
pub created_at: i64,
pub source_file: Option<String>,
}
pub trait LocationHistoryDao: Sync + Send {
/// Store single location record
fn store_location(
&mut self,
context: &opentelemetry::Context,
location: InsertLocationRecord,
) -> Result<LocationRecord, DbError>;
/// Batch insert locations (Google Takeout has millions of points)
fn store_locations_batch(
&mut self,
context: &opentelemetry::Context,
locations: Vec<InsertLocationRecord>,
) -> Result<usize, DbError>;
/// Find nearest location to timestamp (PRIMARY query)
/// "Where was I at photo timestamp ±N minutes?"
fn find_nearest_location(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
max_time_diff_seconds: i64,
) -> Result<Option<LocationRecord>, DbError>;
/// Find locations in time range
fn find_locations_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<LocationRecord>, DbError>;
/// Find locations near GPS coordinates (for "photos near this place")
/// Uses approximate bounding box for performance
fn find_locations_near_point(
&mut self,
context: &opentelemetry::Context,
latitude: f64,
longitude: f64,
radius_km: f64,
) -> Result<Vec<LocationRecord>, DbError>;
/// Deduplicate: check if location exists
fn location_exists(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
latitude: f64,
longitude: f64,
) -> Result<bool, DbError>;
/// Get count of location records
fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
}
pub struct SqliteLocationHistoryDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteLocationHistoryDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteLocationHistoryDao {
pub fn new() -> Self {
SqliteLocationHistoryDao {
connection: Arc::new(Mutex::new(connect())),
}
}
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
use zerocopy::IntoBytes;
vec.as_bytes().to_vec()
}
/// Haversine distance calculation (in kilometers)
/// Used for filtering locations by proximity to a point
fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
const R: f64 = 6371.0; // Earth radius in km
let d_lat = (lat2 - lat1).to_radians();
let d_lon = (lon2 - lon1).to_radians();
let a = (d_lat / 2.0).sin().powi(2)
+ lat1.to_radians().cos() * lat2.to_radians().cos() * (d_lon / 2.0).sin().powi(2);
let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
R * c
}
/// Calculate approximate bounding box for spatial queries
/// Returns (min_lat, max_lat, min_lon, max_lon)
fn bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f64) {
const KM_PER_DEGREE_LAT: f64 = 111.0;
let km_per_degree_lon = 111.0 * lat.to_radians().cos();
let delta_lat = radius_km / KM_PER_DEGREE_LAT;
let delta_lon = radius_km / km_per_degree_lon;
(
lat - delta_lat, // min_lat
lat + delta_lat, // max_lat
lon - delta_lon, // min_lon
lon + delta_lon, // max_lon
)
}
}
#[derive(QueryableByName)]
struct LocationRecordRow {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
#[diesel(sql_type = diesel::sql_types::BigInt)]
timestamp: i64,
#[diesel(sql_type = diesel::sql_types::Float)]
latitude: f32,
#[diesel(sql_type = diesel::sql_types::Float)]
longitude: f32,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Integer>)]
accuracy: Option<i32>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
activity: Option<String>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Integer>)]
activity_confidence: Option<i32>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
place_name: Option<String>,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
place_category: Option<String>,
#[diesel(sql_type = diesel::sql_types::BigInt)]
created_at: i64,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
source_file: Option<String>,
}
impl LocationRecordRow {
fn to_location_record(&self) -> LocationRecord {
LocationRecord {
id: self.id,
timestamp: self.timestamp,
latitude: self.latitude as f64,
longitude: self.longitude as f64,
accuracy: self.accuracy,
activity: self.activity.clone(),
activity_confidence: self.activity_confidence,
place_name: self.place_name.clone(),
place_category: self.place_category.clone(),
created_at: self.created_at,
source_file: self.source_file.clone(),
}
}
}
#[derive(QueryableByName)]
struct LastInsertRowId {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
}
impl LocationHistoryDao for SqliteLocationHistoryDao {
fn store_location(
&mut self,
context: &opentelemetry::Context,
location: InsertLocationRecord,
) -> Result<LocationRecord, DbError> {
trace_db_call(context, "insert", "store_location", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
// Validate embedding dimensions if provided (rare for location data)
if let Some(ref emb) = location.embedding
&& emb.len() != 768
{
return Err(anyhow::anyhow!(
"Invalid embedding dimensions: {} (expected 768)",
emb.len()
));
}
let embedding_bytes = location
.embedding
.as_ref()
.map(|e| Self::serialize_vector(e));
// INSERT OR IGNORE to handle re-imports (UNIQUE constraint on timestamp+lat+lon)
diesel::sql_query(
"INSERT OR IGNORE INTO location_history
(timestamp, latitude, longitude, accuracy, activity, activity_confidence,
place_name, place_category, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
)
.bind::<diesel::sql_types::BigInt, _>(location.timestamp)
.bind::<diesel::sql_types::Float, _>(location.latitude as f32)
.bind::<diesel::sql_types::Float, _>(location.longitude as f32)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(&location.accuracy)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.activity)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
&location.activity_confidence,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.place_name)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&location.place_category,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(&embedding_bytes)
.bind::<diesel::sql_types::BigInt, _>(location.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.source_file)
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
.get_result::<LastInsertRowId>(conn.deref_mut())
.map(|r| r.id)
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
Ok(LocationRecord {
id: row_id,
timestamp: location.timestamp,
latitude: location.latitude,
longitude: location.longitude,
accuracy: location.accuracy,
activity: location.activity,
activity_confidence: location.activity_confidence,
place_name: location.place_name,
place_category: location.place_category,
created_at: location.created_at,
source_file: location.source_file,
})
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn store_locations_batch(
&mut self,
context: &opentelemetry::Context,
locations: Vec<InsertLocationRecord>,
) -> Result<usize, DbError> {
trace_db_call(context, "insert", "store_locations_batch", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
let mut inserted = 0;
conn.transaction::<_, anyhow::Error, _>(|conn| {
for location in locations {
// Validate embedding if provided (rare)
if let Some(ref emb) = location.embedding
&& emb.len() != 768
{
log::warn!(
"Skipping location with invalid embedding dimensions: {}",
emb.len()
);
continue;
}
let embedding_bytes = location
.embedding
.as_ref()
.map(|e| Self::serialize_vector(e));
let rows_affected = diesel::sql_query(
"INSERT OR IGNORE INTO location_history
(timestamp, latitude, longitude, accuracy, activity, activity_confidence,
place_name, place_category, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
)
.bind::<diesel::sql_types::BigInt, _>(location.timestamp)
.bind::<diesel::sql_types::Float, _>(location.latitude as f32)
.bind::<diesel::sql_types::Float, _>(location.longitude as f32)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
&location.accuracy,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&location.activity,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
&location.activity_confidence,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&location.place_name,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&location.place_category,
)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(
&embedding_bytes,
)
.bind::<diesel::sql_types::BigInt, _>(location.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&location.source_file,
)
.execute(conn)
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
if rows_affected > 0 {
inserted += 1;
}
}
Ok(())
})
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
Ok(inserted)
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn find_nearest_location(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
max_time_diff_seconds: i64,
) -> Result<Option<LocationRecord>, DbError> {
trace_db_call(context, "query", "find_nearest_location", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
let start_ts = timestamp - max_time_diff_seconds;
let end_ts = timestamp + max_time_diff_seconds;
// Find location closest to target timestamp within window
let results = diesel::sql_query(
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
place_name, place_category, created_at, source_file
FROM location_history
WHERE timestamp >= ?1 AND timestamp <= ?2
ORDER BY ABS(timestamp - ?3) ASC
LIMIT 1"
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.bind::<diesel::sql_types::BigInt, _>(timestamp)
.load::<LocationRecordRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(results.into_iter().next().map(|r| r.to_location_record()))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_locations_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<LocationRecord>, DbError> {
trace_db_call(context, "query", "find_locations_in_range", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
diesel::sql_query(
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
place_name, place_category, created_at, source_file
FROM location_history
WHERE timestamp >= ?1 AND timestamp <= ?2
ORDER BY timestamp ASC"
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.load::<LocationRecordRow>(conn.deref_mut())
.map(|rows| rows.into_iter().map(|r| r.to_location_record()).collect())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_locations_near_point(
&mut self,
context: &opentelemetry::Context,
latitude: f64,
longitude: f64,
radius_km: f64,
) -> Result<Vec<LocationRecord>, DbError> {
trace_db_call(context, "query", "find_locations_near_point", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
// Use bounding box for initial filter (fast, indexed)
let (min_lat, max_lat, min_lon, max_lon) =
Self::bounding_box(latitude, longitude, radius_km);
let results = diesel::sql_query(
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
place_name, place_category, created_at, source_file
FROM location_history
WHERE latitude >= ?1 AND latitude <= ?2
AND longitude >= ?3 AND longitude <= ?4"
)
.bind::<diesel::sql_types::Float, _>(min_lat as f32)
.bind::<diesel::sql_types::Float, _>(max_lat as f32)
.bind::<diesel::sql_types::Float, _>(min_lon as f32)
.bind::<diesel::sql_types::Float, _>(max_lon as f32)
.load::<LocationRecordRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
// Refine with Haversine distance (in-memory, post-filter)
let filtered: Vec<LocationRecord> = results
.into_iter()
.map(|r| r.to_location_record())
.filter(|loc| {
let distance =
Self::haversine_distance(latitude, longitude, loc.latitude, loc.longitude);
distance <= radius_km
})
.collect();
log::info!(
"Found {} locations within {} km of ({}, {})",
filtered.len(),
radius_km,
latitude,
longitude
);
Ok(filtered)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn location_exists(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
latitude: f64,
longitude: f64,
) -> Result<bool, DbError> {
trace_db_call(context, "query", "location_exists", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::Integer)]
count: i32,
}
let result: CountResult = diesel::sql_query(
"SELECT COUNT(*) as count FROM location_history
WHERE timestamp = ?1 AND latitude = ?2 AND longitude = ?3",
)
.bind::<diesel::sql_types::BigInt, _>(timestamp)
.bind::<diesel::sql_types::Float, _>(latitude as f32)
.bind::<diesel::sql_types::Float, _>(longitude as f32)
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count > 0)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
trace_db_call(context, "query", "get_location_count", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get LocationHistoryDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::BigInt)]
count: i64,
}
let result: CountResult =
diesel::sql_query("SELECT COUNT(*) as count FROM location_history")
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}

View File

@@ -1,733 +1,92 @@
use bcrypt::{DEFAULT_COST, hash, verify};
use bcrypt::{hash, verify, DEFAULT_COST};
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use dotenv::dotenv;
use crate::database::models::{
Favorite, ImageExif, InsertFavorite, InsertImageExif, InsertUser, User,
};
use crate::otel::trace_db_call;
use crate::database::models::{Favorite, InsertFavorite, InsertUser, User};
pub mod calendar_dao;
pub mod daily_summary_dao;
pub mod insights_dao;
pub mod location_dao;
pub mod models;
pub mod schema;
pub mod search_dao;
mod models;
mod schema;
pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
pub use insights_dao::{InsightDao, SqliteInsightDao};
pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao};
fn connect() -> SqliteConnection {
dotenv().ok();
pub trait UserDao {
fn create_user(&mut self, user: &str, password: &str) -> Option<User>;
fn get_user(&mut self, user: &str, password: &str) -> Option<User>;
fn user_exists(&mut self, user: &str) -> bool;
}
pub struct SqliteUserDao {
connection: SqliteConnection,
}
impl Default for SqliteUserDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteUserDao {
pub fn new() -> Self {
Self {
connection: connect(),
}
}
}
#[cfg(test)]
pub mod test {
use diesel::{Connection, SqliteConnection};
use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
pub fn in_memory_db_connection() -> SqliteConnection {
let mut connection = SqliteConnection::establish(":memory:")
.expect("Unable to create in-memory db connection");
connection
.run_pending_migrations(DB_MIGRATIONS)
.expect("Failure running DB migrations");
connection
}
}
impl UserDao for SqliteUserDao {
// TODO: Should probably use Result here
fn create_user(&mut self, user: &str, pass: &str) -> Option<User> {
use schema::users::dsl::*;
let hashed = hash(pass, DEFAULT_COST);
if let Ok(hash) = hashed {
diesel::insert_into(users)
.values(InsertUser {
username: user,
password: &hash,
})
.execute(&mut self.connection)
.unwrap();
users
.filter(username.eq(username))
.load::<User>(&mut self.connection)
.unwrap()
.first()
.cloned()
} else {
None
}
}
fn get_user(&mut self, user: &str, pass: &str) -> Option<User> {
use schema::users::dsl::*;
match users
.filter(username.eq(user))
.load::<User>(&mut self.connection)
.unwrap_or_default()
.first()
{
Some(u) if verify(pass, &u.password).unwrap_or(false) => Some(u.clone()),
_ => None,
}
}
fn user_exists(&mut self, user: &str) -> bool {
use schema::users::dsl::*;
!users
.filter(username.eq(user))
.load::<User>(&mut self.connection)
.unwrap_or_default()
.is_empty()
}
}
pub fn connect() -> SqliteConnection {
let db_url = dotenv::var("DATABASE_URL").expect("DATABASE_URL must be set");
SqliteConnection::establish(&db_url).expect("Error connecting to DB")
}
#[derive(Debug)]
pub struct DbError {
pub kind: DbErrorKind,
}
// TODO: Should probably use Result here
pub fn create_user(user: &str, pass: &str) -> Option<User> {
use schema::users::dsl::*;
impl DbError {
fn new(kind: DbErrorKind) -> Self {
DbError { kind }
}
fn exists() -> Self {
DbError::new(DbErrorKind::AlreadyExists)
}
}
#[derive(Debug, PartialEq)]
pub enum DbErrorKind {
AlreadyExists,
InsertError,
QueryError,
UpdateError,
}
pub trait FavoriteDao: Sync + Send {
fn add_favorite(&mut self, user_id: i32, favorite_path: &str) -> Result<usize, DbError>;
fn remove_favorite(&mut self, user_id: i32, favorite_path: String);
fn get_favorites(&mut self, user_id: i32) -> Result<Vec<Favorite>, DbError>;
fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError>;
fn get_all_paths(&mut self) -> Result<Vec<String>, DbError>;
}
pub struct SqliteFavoriteDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteFavoriteDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteFavoriteDao {
pub fn new() -> Self {
SqliteFavoriteDao {
connection: Arc::new(Mutex::new(connect())),
}
}
}
impl FavoriteDao for SqliteFavoriteDao {
fn add_favorite(&mut self, user_id: i32, favorite_path: &str) -> Result<usize, DbError> {
use schema::favorites::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get FavoriteDao");
if favorites
.filter(userid.eq(user_id).and(path.eq(&favorite_path)))
.first::<Favorite>(connection.deref_mut())
.is_err()
{
diesel::insert_into(favorites)
.values(InsertFavorite {
userid: &user_id,
path: favorite_path,
})
.execute(connection.deref_mut())
.map_err(|_| DbError::new(DbErrorKind::InsertError))
} else {
Err(DbError::exists())
}
}
fn remove_favorite(&mut self, user_id: i32, favorite_path: String) {
use schema::favorites::dsl::*;
diesel::delete(favorites)
.filter(userid.eq(user_id).and(path.eq(favorite_path)))
.execute(self.connection.lock().unwrap().deref_mut())
let hashed = hash(pass, DEFAULT_COST);
if let Ok(hash) = hashed {
let connection = connect();
diesel::insert_into(users)
.values(InsertUser {
username: user,
password: &hash,
})
.execute(&connection)
.unwrap();
}
fn get_favorites(&mut self, user_id: i32) -> Result<Vec<Favorite>, DbError> {
use schema::favorites::dsl::*;
favorites
.filter(userid.eq(user_id))
.load::<Favorite>(self.connection.lock().unwrap().deref_mut())
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError> {
use schema::favorites::dsl::*;
diesel::update(favorites.filter(path.eq(old_path)))
.set(path.eq(new_path))
.execute(self.connection.lock().unwrap().deref_mut())
.map_err(|_| DbError::new(DbErrorKind::UpdateError))?;
Ok(())
}
fn get_all_paths(&mut self) -> Result<Vec<String>, DbError> {
use schema::favorites::dsl::*;
favorites
.select(path)
.distinct()
.load(self.connection.lock().unwrap().deref_mut())
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}
pub trait ExifDao: Sync + Send {
fn store_exif(
&mut self,
context: &opentelemetry::Context,
exif_data: InsertImageExif,
) -> Result<ImageExif, DbError>;
fn get_exif(
&mut self,
context: &opentelemetry::Context,
file_path: &str,
) -> Result<Option<ImageExif>, DbError>;
fn update_exif(
&mut self,
context: &opentelemetry::Context,
exif_data: InsertImageExif,
) -> Result<ImageExif, DbError>;
fn delete_exif(
&mut self,
context: &opentelemetry::Context,
file_path: &str,
) -> Result<(), DbError>;
fn get_all_with_date_taken(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<(String, i64)>, DbError>;
/// Batch load EXIF data for multiple file paths (single query)
fn get_exif_batch(
&mut self,
context: &opentelemetry::Context,
file_paths: &[String],
) -> Result<Vec<ImageExif>, DbError>;
/// Query files by EXIF criteria with optional filters
fn query_by_exif(
&mut self,
context: &opentelemetry::Context,
camera_make: Option<&str>,
camera_model: Option<&str>,
lens_model: Option<&str>,
gps_bounds: Option<(f64, f64, f64, f64)>, // (min_lat, max_lat, min_lon, max_lon)
date_from: Option<i64>,
date_to: Option<i64>,
) -> Result<Vec<ImageExif>, DbError>;
/// Get distinct camera makes with counts
fn get_camera_makes(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<(String, i64)>, DbError>;
/// Update file path in EXIF database
fn update_file_path(
&mut self,
context: &opentelemetry::Context,
old_path: &str,
new_path: &str,
) -> Result<(), DbError>;
/// Get all file paths from EXIF database
fn get_all_file_paths(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<String>, DbError>;
/// Get files sorted by date with optional pagination
/// Returns (sorted_file_paths, total_count)
fn get_files_sorted_by_date(
&mut self,
context: &opentelemetry::Context,
file_paths: &[String],
ascending: bool,
limit: Option<i64>,
offset: i64,
) -> Result<(Vec<String>, i64), DbError>;
/// Get all photos with GPS coordinates
/// Returns Vec<(file_path, latitude, longitude, date_taken)>
fn get_all_with_gps(
&mut self,
context: &opentelemetry::Context,
base_path: &str,
recursive: bool,
) -> Result<Vec<(String, f64, f64, Option<i64>)>, DbError>;
}
pub struct SqliteExifDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteExifDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteExifDao {
pub fn new() -> Self {
SqliteExifDao {
connection: Arc::new(Mutex::new(connect())),
match users
.filter(username.eq(user))
.load::<User>(&connection)
.unwrap()
.first()
{
Some(u) => Some(u.clone()),
None => None,
}
} else {
None
}
}
impl ExifDao for SqliteExifDao {
fn store_exif(
&mut self,
context: &opentelemetry::Context,
exif_data: InsertImageExif,
) -> Result<ImageExif, DbError> {
trace_db_call(context, "insert", "store_exif", |_span| {
use schema::image_exif::dsl::*;
pub fn get_user(user: &str, pass: &str) -> Option<User> {
use schema::users::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
diesel::insert_into(image_exif)
.values(&exif_data)
.execute(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Insert error"))?;
image_exif
.filter(file_path.eq(&exif_data.file_path))
.first::<ImageExif>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn get_exif(
&mut self,
context: &opentelemetry::Context,
path: &str,
) -> Result<Option<ImageExif>, DbError> {
trace_db_call(context, "query", "get_exif", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
// Try both normalized (forward slash) and Windows (backslash) paths
// since database may contain either format
let normalized = path.replace('\\', "/");
let windows_path = path.replace('/', "\\");
match image_exif
.filter(file_path.eq(&normalized).or(file_path.eq(&windows_path)))
.first::<ImageExif>(connection.deref_mut())
{
Ok(exif) => Ok(Some(exif)),
Err(diesel::result::Error::NotFound) => Ok(None),
Err(_) => Err(anyhow::anyhow!("Query error")),
}
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn update_exif(
&mut self,
context: &opentelemetry::Context,
exif_data: InsertImageExif,
) -> Result<ImageExif, DbError> {
trace_db_call(context, "update", "update_exif", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
diesel::update(image_exif.filter(file_path.eq(&exif_data.file_path)))
.set((
camera_make.eq(&exif_data.camera_make),
camera_model.eq(&exif_data.camera_model),
lens_model.eq(&exif_data.lens_model),
width.eq(&exif_data.width),
height.eq(&exif_data.height),
orientation.eq(&exif_data.orientation),
gps_latitude.eq(&exif_data.gps_latitude),
gps_longitude.eq(&exif_data.gps_longitude),
gps_altitude.eq(&exif_data.gps_altitude),
focal_length.eq(&exif_data.focal_length),
aperture.eq(&exif_data.aperture),
shutter_speed.eq(&exif_data.shutter_speed),
iso.eq(&exif_data.iso),
date_taken.eq(&exif_data.date_taken),
last_modified.eq(&exif_data.last_modified),
))
.execute(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Update error"))?;
image_exif
.filter(file_path.eq(&exif_data.file_path))
.first::<ImageExif>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
}
fn delete_exif(&mut self, context: &opentelemetry::Context, path: &str) -> Result<(), DbError> {
trace_db_call(context, "delete", "delete_exif", |_span| {
use schema::image_exif::dsl::*;
diesel::delete(image_exif.filter(file_path.eq(path)))
.execute(self.connection.lock().unwrap().deref_mut())
.map(|_| ())
.map_err(|_| anyhow::anyhow!("Delete error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_all_with_date_taken(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<(String, i64)>, DbError> {
trace_db_call(context, "query", "get_all_with_date_taken", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.select((file_path, date_taken))
.filter(date_taken.is_not_null())
.load::<(String, Option<i64>)>(connection.deref_mut())
.map(|records| {
records
.into_iter()
.filter_map(|(path, dt)| dt.map(|ts| (path, ts)))
.collect()
})
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_exif_batch(
&mut self,
context: &opentelemetry::Context,
file_paths: &[String],
) -> Result<Vec<ImageExif>, DbError> {
trace_db_call(context, "query", "get_exif_batch", |_span| {
use schema::image_exif::dsl::*;
if file_paths.is_empty() {
return Ok(Vec::new());
}
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.filter(file_path.eq_any(file_paths))
.load::<ImageExif>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn query_by_exif(
&mut self,
context: &opentelemetry::Context,
camera_make_filter: Option<&str>,
camera_model_filter: Option<&str>,
lens_model_filter: Option<&str>,
gps_bounds: Option<(f64, f64, f64, f64)>,
date_from: Option<i64>,
date_to: Option<i64>,
) -> Result<Vec<ImageExif>, DbError> {
trace_db_call(context, "query", "query_by_exif", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
let mut query = image_exif.into_boxed();
// Camera filters (case-insensitive partial match)
if let Some(make) = camera_make_filter {
query = query.filter(camera_make.like(format!("%{}%", make)));
}
if let Some(model) = camera_model_filter {
query = query.filter(camera_model.like(format!("%{}%", model)));
}
if let Some(lens) = lens_model_filter {
query = query.filter(lens_model.like(format!("%{}%", lens)));
}
// GPS bounding box
if let Some((min_lat, max_lat, min_lon, max_lon)) = gps_bounds {
query = query
.filter(gps_latitude.between(min_lat as f32, max_lat as f32))
.filter(gps_longitude.between(min_lon as f32, max_lon as f32))
.filter(gps_latitude.is_not_null())
.filter(gps_longitude.is_not_null());
}
// Date range
if let Some(from) = date_from {
query = query.filter(date_taken.ge(from));
}
if let Some(to) = date_to {
query = query.filter(date_taken.le(to));
}
if date_from.is_some() || date_to.is_some() {
query = query.filter(date_taken.is_not_null());
}
query
.load::<ImageExif>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_camera_makes(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<(String, i64)>, DbError> {
trace_db_call(context, "query", "get_camera_makes", |_span| {
use diesel::dsl::count;
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.filter(camera_make.is_not_null())
.group_by(camera_make)
.select((camera_make, count(id)))
.order(count(id).desc())
.load::<(Option<String>, i64)>(connection.deref_mut())
.map(|records| {
records
.into_iter()
.filter_map(|(make, cnt)| make.map(|m| (m, cnt)))
.collect()
})
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn update_file_path(
&mut self,
context: &opentelemetry::Context,
old_path: &str,
new_path: &str,
) -> Result<(), DbError> {
trace_db_call(context, "update", "update_file_path", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
diesel::update(image_exif.filter(file_path.eq(old_path)))
.set(file_path.eq(new_path))
.execute(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Update error"))?;
Ok(())
})
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
}
fn get_all_file_paths(
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<String>, DbError> {
trace_db_call(context, "query", "get_all_file_paths", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
.select(file_path)
.load(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_files_sorted_by_date(
&mut self,
context: &opentelemetry::Context,
file_paths: &[String],
ascending: bool,
limit: Option<i64>,
offset: i64,
) -> Result<(Vec<String>, i64), DbError> {
trace_db_call(context, "query", "get_files_sorted_by_date", |span| {
use diesel::dsl::count_star;
use opentelemetry::KeyValue;
use opentelemetry::trace::Span;
use schema::image_exif::dsl::*;
span.set_attributes(vec![
KeyValue::new("file_count", file_paths.len() as i64),
KeyValue::new("ascending", ascending.to_string()),
KeyValue::new("limit", limit.map(|l| l.to_string()).unwrap_or_default()),
KeyValue::new("offset", offset.to_string()),
]);
if file_paths.is_empty() {
return Ok((Vec::new(), 0));
}
let connection = &mut *self.connection.lock().unwrap();
// Get total count of files that have EXIF data
let total_count: i64 = image_exif
.filter(file_path.eq_any(file_paths))
.select(count_star())
.first(connection)
.map_err(|_| anyhow::anyhow!("Count query error"))?;
// Build sorted query
let mut query = image_exif.filter(file_path.eq_any(file_paths)).into_boxed();
// Apply sorting
// Note: SQLite NULL handling varies - NULLs appear first for ASC, last for DESC by default
if ascending {
query = query.order(date_taken.asc());
} else {
query = query.order(date_taken.desc());
}
// Apply pagination if requested
if let Some(limit_val) = limit {
query = query.limit(limit_val).offset(offset);
}
// Execute and extract file paths
let results: Vec<String> = query
.select(file_path)
.load::<String>(connection)
.map_err(|_| anyhow::anyhow!("Query error"))?;
Ok((results, total_count))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_all_with_gps(
&mut self,
context: &opentelemetry::Context,
base_path: &str,
recursive: bool,
) -> Result<Vec<(String, f64, f64, Option<i64>)>, DbError> {
trace_db_call(context, "query", "get_all_with_gps", |span| {
use opentelemetry::KeyValue;
use opentelemetry::trace::Span;
use schema::image_exif::dsl::*;
span.set_attributes(vec![
KeyValue::new("base_path", base_path.to_string()),
KeyValue::new("recursive", recursive.to_string()),
]);
let connection = &mut *self.connection.lock().unwrap();
// Query all photos with non-null GPS coordinates
let mut query = image_exif
.filter(gps_latitude.is_not_null().and(gps_longitude.is_not_null()))
.into_boxed();
// Apply path filtering
// If base_path is empty or "/", return all GPS photos (no filter)
// Otherwise filter by path prefix
if !base_path.is_empty() && base_path != "/" {
// Match base path as prefix (with wildcard)
query = query.filter(file_path.like(format!("{}%", base_path)));
span.set_attribute(KeyValue::new("path_filter_applied", true));
} else {
span.set_attribute(KeyValue::new("path_filter_applied", false));
span.set_attribute(KeyValue::new("returning_all_gps_photos", true));
}
// Load full ImageExif records
let results: Vec<ImageExif> = query
.load::<ImageExif>(connection)
.map_err(|e| anyhow::anyhow!("GPS query error: {}", e))?;
// Convert to tuple format (path, lat, lon, date_taken)
// Filter out any rows where GPS is still None (shouldn't happen due to filter)
// Cast f32 GPS values to f64 for API compatibility
let filtered: Vec<(String, f64, f64, Option<i64>)> = results
.into_iter()
.filter_map(|exif| {
if let (Some(lat_val), Some(lon_val)) = (exif.gps_latitude, exif.gps_longitude)
{
Some((
exif.file_path,
lat_val as f64,
lon_val as f64,
exif.date_taken,
))
} else {
None
}
})
.collect();
span.set_attribute(KeyValue::new("result_count", filtered.len() as i64));
Ok(filtered)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
match users
.filter(username.eq(user))
.load::<User>(&connect())
.unwrap_or_default()
.first()
{
Some(u) if verify(pass, &u.password).unwrap_or(false) => Some(u.clone()),
_ => None,
}
}
pub fn user_exists(name: &str) -> bool {
use schema::users::dsl::*;
users
.filter(username.eq(name))
.load::<User>(&connect())
.unwrap_or_default()
.first()
.is_some()
}
pub fn add_favorite(user_id: i32, favorite_path: String) {
use schema::favorites::dsl::*;
let connection = connect();
diesel::insert_into(favorites)
.values(InsertFavorite {
userid: &user_id,
path: &favorite_path,
})
.execute(&connection)
.unwrap();
}
pub fn get_favorites(user_id: i32) -> Vec<Favorite> {
use schema::favorites::dsl::*;
favorites
.filter(userid.eq(user_id))
.load::<Favorite>(&connect())
.unwrap_or_default()
}

View File

@@ -1,8 +1,8 @@
use crate::database::schema::{favorites, image_exif, photo_insights, users};
use crate::database::schema::{favorites, users};
use serde::Serialize;
#[derive(Insertable)]
#[diesel(table_name = users)]
#[table_name = "users"]
pub struct InsertUser<'a> {
pub username: &'a str,
pub password: &'a str,
@@ -17,7 +17,7 @@ pub struct User {
}
#[derive(Insertable)]
#[diesel(table_name = favorites)]
#[table_name = "favorites"]
pub struct InsertFavorite<'a> {
pub userid: &'a i32,
pub path: &'a str,
@@ -29,67 +29,3 @@ pub struct Favorite {
pub userid: i32,
pub path: String,
}
#[derive(Insertable)]
#[diesel(table_name = image_exif)]
pub struct InsertImageExif {
pub file_path: String,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub lens_model: Option<String>,
pub width: Option<i32>,
pub height: Option<i32>,
pub orientation: Option<i32>,
pub gps_latitude: Option<f32>,
pub gps_longitude: Option<f32>,
pub gps_altitude: Option<f32>,
pub focal_length: Option<f32>,
pub aperture: Option<f32>,
pub shutter_speed: Option<String>,
pub iso: Option<i32>,
pub date_taken: Option<i64>,
pub created_time: i64,
pub last_modified: i64,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct ImageExif {
pub id: i32,
pub file_path: String,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub lens_model: Option<String>,
pub width: Option<i32>,
pub height: Option<i32>,
pub orientation: Option<i32>,
pub gps_latitude: Option<f32>,
pub gps_longitude: Option<f32>,
pub gps_altitude: Option<f32>,
pub focal_length: Option<f32>,
pub aperture: Option<f32>,
pub shutter_speed: Option<String>,
pub iso: Option<i32>,
pub date_taken: Option<i64>,
pub created_time: i64,
pub last_modified: i64,
}
#[derive(Insertable)]
#[diesel(table_name = photo_insights)]
pub struct InsertPhotoInsight {
pub file_path: String,
pub title: String,
pub summary: String,
pub generated_at: i64,
pub model_version: String,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct PhotoInsight {
pub id: i32,
pub file_path: String,
pub title: String,
pub summary: String,
pub generated_at: i64,
pub model_version: String,
}

View File

@@ -1,37 +1,4 @@
// @generated automatically by Diesel CLI.
diesel::table! {
calendar_events (id) {
id -> Integer,
event_uid -> Nullable<Text>,
summary -> Text,
description -> Nullable<Text>,
location -> Nullable<Text>,
start_time -> BigInt,
end_time -> BigInt,
all_day -> Bool,
organizer -> Nullable<Text>,
attendees -> Nullable<Text>,
embedding -> Nullable<Binary>,
created_at -> BigInt,
source_file -> Nullable<Text>,
}
}
diesel::table! {
daily_conversation_summaries (id) {
id -> Integer,
date -> Text,
contact -> Text,
summary -> Text,
message_count -> Integer,
embedding -> Binary,
created_at -> BigInt,
model_version -> Text,
}
}
diesel::table! {
table! {
favorites (id) {
id -> Integer,
userid -> Integer,
@@ -39,112 +6,7 @@ diesel::table! {
}
}
diesel::table! {
image_exif (id) {
id -> Integer,
file_path -> Text,
camera_make -> Nullable<Text>,
camera_model -> Nullable<Text>,
lens_model -> Nullable<Text>,
width -> Nullable<Integer>,
height -> Nullable<Integer>,
orientation -> Nullable<Integer>,
gps_latitude -> Nullable<Float>,
gps_longitude -> Nullable<Float>,
gps_altitude -> Nullable<Float>,
focal_length -> Nullable<Float>,
aperture -> Nullable<Float>,
shutter_speed -> Nullable<Text>,
iso -> Nullable<Integer>,
date_taken -> Nullable<BigInt>,
created_time -> BigInt,
last_modified -> BigInt,
}
}
diesel::table! {
knowledge_embeddings (id) {
id -> Integer,
keyword -> Text,
description -> Text,
category -> Nullable<Text>,
embedding -> Binary,
created_at -> BigInt,
model_version -> Text,
}
}
diesel::table! {
location_history (id) {
id -> Integer,
timestamp -> BigInt,
latitude -> Float,
longitude -> Float,
accuracy -> Nullable<Integer>,
activity -> Nullable<Text>,
activity_confidence -> Nullable<Integer>,
place_name -> Nullable<Text>,
place_category -> Nullable<Text>,
embedding -> Nullable<Binary>,
created_at -> BigInt,
source_file -> Nullable<Text>,
}
}
diesel::table! {
message_embeddings (id) {
id -> Integer,
contact -> Text,
body -> Text,
timestamp -> BigInt,
is_sent -> Bool,
embedding -> Binary,
created_at -> BigInt,
model_version -> Text,
}
}
diesel::table! {
photo_insights (id) {
id -> Integer,
file_path -> Text,
title -> Text,
summary -> Text,
generated_at -> BigInt,
model_version -> Text,
}
}
diesel::table! {
search_history (id) {
id -> Integer,
timestamp -> BigInt,
query -> Text,
search_engine -> Nullable<Text>,
embedding -> Binary,
created_at -> BigInt,
source_file -> Nullable<Text>,
}
}
diesel::table! {
tagged_photo (id) {
id -> Integer,
photo_name -> Text,
tag_id -> Integer,
created_time -> BigInt,
}
}
diesel::table! {
tags (id) {
id -> Integer,
name -> Text,
created_time -> BigInt,
}
}
diesel::table! {
table! {
users (id) {
id -> Integer,
username -> Text,
@@ -152,19 +14,4 @@ diesel::table! {
}
}
diesel::joinable!(tagged_photo -> tags (tag_id));
diesel::allow_tables_to_appear_in_same_query!(
calendar_events,
daily_conversation_summaries,
favorites,
image_exif,
knowledge_embeddings,
location_history,
message_embeddings,
photo_insights,
search_history,
tagged_photo,
tags,
users,
);
allow_tables_to_appear_in_same_query!(favorites, users,);

View File

@@ -1,516 +0,0 @@
use diesel::prelude::*;
use diesel::sqlite::SqliteConnection;
use serde::Serialize;
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
use crate::database::{DbError, DbErrorKind, connect};
use crate::otel::trace_db_call;
/// Represents a search history record
#[derive(Serialize, Clone, Debug)]
pub struct SearchRecord {
pub id: i32,
pub timestamp: i64,
pub query: String,
pub search_engine: Option<String>,
pub created_at: i64,
pub source_file: Option<String>,
}
/// Data for inserting a new search record
#[derive(Clone, Debug)]
pub struct InsertSearchRecord {
pub timestamp: i64,
pub query: String,
pub search_engine: Option<String>,
pub embedding: Vec<f32>, // 768-dim, REQUIRED
pub created_at: i64,
pub source_file: Option<String>,
}
pub trait SearchHistoryDao: Sync + Send {
/// Store search with embedding
fn store_search(
&mut self,
context: &opentelemetry::Context,
search: InsertSearchRecord,
) -> Result<SearchRecord, DbError>;
/// Batch insert searches
fn store_searches_batch(
&mut self,
context: &opentelemetry::Context,
searches: Vec<InsertSearchRecord>,
) -> Result<usize, DbError>;
/// Find searches in time range (for temporal context)
fn find_searches_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<SearchRecord>, DbError>;
/// Find semantically similar searches (PRIMARY - embeddings shine here)
fn find_similar_searches(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<SearchRecord>, DbError>;
/// Hybrid: Time window + semantic ranking
fn find_relevant_searches_hybrid(
&mut self,
context: &opentelemetry::Context,
center_timestamp: i64,
time_window_days: i64,
query_embedding: Option<&[f32]>,
limit: usize,
) -> Result<Vec<SearchRecord>, DbError>;
/// Deduplication check
fn search_exists(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
query: &str,
) -> Result<bool, DbError>;
/// Get count of search records
fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
}
pub struct SqliteSearchHistoryDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl Default for SqliteSearchHistoryDao {
fn default() -> Self {
Self::new()
}
}
impl SqliteSearchHistoryDao {
pub fn new() -> Self {
SqliteSearchHistoryDao {
connection: Arc::new(Mutex::new(connect())),
}
}
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
use zerocopy::IntoBytes;
vec.as_bytes().to_vec()
}
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
if !bytes.len().is_multiple_of(4) {
return Err(DbError::new(DbErrorKind::QueryError));
}
let count = bytes.len() / 4;
let mut vec = Vec::with_capacity(count);
for chunk in bytes.chunks_exact(4) {
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
vec.push(float);
}
Ok(vec)
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude_a == 0.0 || magnitude_b == 0.0 {
return 0.0;
}
dot_product / (magnitude_a * magnitude_b)
}
}
#[derive(QueryableByName)]
struct SearchRecordWithVectorRow {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
#[diesel(sql_type = diesel::sql_types::BigInt)]
timestamp: i64,
#[diesel(sql_type = diesel::sql_types::Text)]
query: String,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
search_engine: Option<String>,
#[diesel(sql_type = diesel::sql_types::Binary)]
embedding: Vec<u8>,
#[diesel(sql_type = diesel::sql_types::BigInt)]
created_at: i64,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
source_file: Option<String>,
}
impl SearchRecordWithVectorRow {
fn to_search_record(&self) -> SearchRecord {
SearchRecord {
id: self.id,
timestamp: self.timestamp,
query: self.query.clone(),
search_engine: self.search_engine.clone(),
created_at: self.created_at,
source_file: self.source_file.clone(),
}
}
}
#[derive(QueryableByName)]
struct LastInsertRowId {
#[diesel(sql_type = diesel::sql_types::Integer)]
id: i32,
}
impl SearchHistoryDao for SqliteSearchHistoryDao {
fn store_search(
&mut self,
context: &opentelemetry::Context,
search: InsertSearchRecord,
) -> Result<SearchRecord, DbError> {
trace_db_call(context, "insert", "store_search", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
// Validate embedding dimensions (REQUIRED for searches)
if search.embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid embedding dimensions: {} (expected 768)",
search.embedding.len()
));
}
let embedding_bytes = Self::serialize_vector(&search.embedding);
// INSERT OR IGNORE to handle re-imports (UNIQUE constraint on timestamp+query)
diesel::sql_query(
"INSERT OR IGNORE INTO search_history
(timestamp, query, search_engine, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
)
.bind::<diesel::sql_types::BigInt, _>(search.timestamp)
.bind::<diesel::sql_types::Text, _>(&search.query)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&search.search_engine)
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
.bind::<diesel::sql_types::BigInt, _>(search.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&search.source_file)
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
.get_result::<LastInsertRowId>(conn.deref_mut())
.map(|r| r.id)
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
Ok(SearchRecord {
id: row_id,
timestamp: search.timestamp,
query: search.query,
search_engine: search.search_engine,
created_at: search.created_at,
source_file: search.source_file,
})
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn store_searches_batch(
&mut self,
context: &opentelemetry::Context,
searches: Vec<InsertSearchRecord>,
) -> Result<usize, DbError> {
trace_db_call(context, "insert", "store_searches_batch", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
let mut inserted = 0;
conn.transaction::<_, anyhow::Error, _>(|conn| {
for search in searches {
// Validate embedding (REQUIRED)
if search.embedding.len() != 768 {
log::warn!(
"Skipping search with invalid embedding dimensions: {}",
search.embedding.len()
);
continue;
}
let embedding_bytes = Self::serialize_vector(&search.embedding);
let rows_affected = diesel::sql_query(
"INSERT OR IGNORE INTO search_history
(timestamp, query, search_engine, embedding, created_at, source_file)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
)
.bind::<diesel::sql_types::BigInt, _>(search.timestamp)
.bind::<diesel::sql_types::Text, _>(&search.query)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&search.search_engine,
)
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
.bind::<diesel::sql_types::BigInt, _>(search.created_at)
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
&search.source_file,
)
.execute(conn)
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
if rows_affected > 0 {
inserted += 1;
}
}
Ok(())
})
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
Ok(inserted)
})
.map_err(|_| DbError::new(DbErrorKind::InsertError))
}
fn find_searches_in_range(
&mut self,
context: &opentelemetry::Context,
start_ts: i64,
end_ts: i64,
) -> Result<Vec<SearchRecord>, DbError> {
trace_db_call(context, "query", "find_searches_in_range", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
diesel::sql_query(
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
FROM search_history
WHERE timestamp >= ?1 AND timestamp <= ?2
ORDER BY timestamp DESC",
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
.map(|rows| rows.into_iter().map(|r| r.to_search_record()).collect())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_similar_searches(
&mut self,
context: &opentelemetry::Context,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<SearchRecord>, DbError> {
trace_db_call(context, "query", "find_similar_searches", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
if query_embedding.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_embedding.len()
));
}
// Load all searches with embeddings
let results = diesel::sql_query(
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
FROM search_history",
)
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
// Compute similarities
let mut scored_searches: Vec<(f32, SearchRecord)> = results
.into_iter()
.filter_map(|row| {
if let Ok(emb) = Self::deserialize_vector(&row.embedding) {
let similarity = Self::cosine_similarity(query_embedding, &emb);
Some((similarity, row.to_search_record()))
} else {
None
}
})
.collect();
// Sort by similarity descending
scored_searches
.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
log::info!("Found {} similar searches", scored_searches.len());
if !scored_searches.is_empty() {
log::info!(
"Top similarity: {:.4} for query: '{}'",
scored_searches[0].0,
scored_searches[0].1.query
);
}
Ok(scored_searches
.into_iter()
.take(limit)
.map(|(_, search)| search)
.collect())
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_relevant_searches_hybrid(
&mut self,
context: &opentelemetry::Context,
center_timestamp: i64,
time_window_days: i64,
query_embedding: Option<&[f32]>,
limit: usize,
) -> Result<Vec<SearchRecord>, DbError> {
trace_db_call(context, "query", "find_relevant_searches_hybrid", |_span| {
let window_seconds = time_window_days * 86400;
let start_ts = center_timestamp - window_seconds;
let end_ts = center_timestamp + window_seconds;
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
// Step 1: Time-based filter (fast, indexed)
let searches_in_range = diesel::sql_query(
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
FROM search_history
WHERE timestamp >= ?1 AND timestamp <= ?2",
)
.bind::<diesel::sql_types::BigInt, _>(start_ts)
.bind::<diesel::sql_types::BigInt, _>(end_ts)
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
// Step 2: If query embedding provided, rank by semantic similarity
if let Some(query_emb) = query_embedding {
if query_emb.len() != 768 {
return Err(anyhow::anyhow!(
"Invalid query embedding dimensions: {} (expected 768)",
query_emb.len()
));
}
let mut scored_searches: Vec<(f32, SearchRecord)> = searches_in_range
.into_iter()
.filter_map(|row| {
if let Ok(emb) = Self::deserialize_vector(&row.embedding) {
let similarity = Self::cosine_similarity(query_emb, &emb);
Some((similarity, row.to_search_record()))
} else {
None
}
})
.collect();
// Sort by similarity descending
scored_searches
.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
log::info!(
"Hybrid query: {} searches in time range, ranked by similarity",
scored_searches.len()
);
if !scored_searches.is_empty() {
log::info!(
"Top similarity: {:.4} for '{}'",
scored_searches[0].0,
scored_searches[0].1.query
);
}
Ok(scored_searches
.into_iter()
.take(limit)
.map(|(_, search)| search)
.collect())
} else {
// No semantic ranking, just return time-sorted (most recent first)
log::info!(
"Time-only query: {} searches in range",
searches_in_range.len()
);
Ok(searches_in_range
.into_iter()
.take(limit)
.map(|r| r.to_search_record())
.collect())
}
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn search_exists(
&mut self,
context: &opentelemetry::Context,
timestamp: i64,
query: &str,
) -> Result<bool, DbError> {
trace_db_call(context, "query", "search_exists", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::Integer)]
count: i32,
}
let result: CountResult = diesel::sql_query(
"SELECT COUNT(*) as count FROM search_history WHERE timestamp = ?1 AND query = ?2",
)
.bind::<diesel::sql_types::BigInt, _>(timestamp)
.bind::<diesel::sql_types::Text, _>(query)
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count > 0)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
trace_db_call(context, "query", "get_search_count", |_span| {
let mut conn = self
.connection
.lock()
.expect("Unable to get SearchHistoryDao");
#[derive(QueryableByName)]
struct CountResult {
#[diesel(sql_type = diesel::sql_types::BigInt)]
count: i64,
}
let result: CountResult =
diesel::sql_query("SELECT COUNT(*) as count FROM search_history")
.get_result(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
Ok(result.count)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}

View File

@@ -1,14 +0,0 @@
use actix_web::{error::InternalError, http::StatusCode};
pub trait IntoHttpError<T> {
fn into_http_internal_err(self) -> Result<T, actix_web::Error>;
}
impl<T> IntoHttpError<T> for Result<T, anyhow::Error> {
fn into_http_internal_err(self) -> Result<T, actix_web::Error> {
self.map_err(|e| {
log::error!("Map to err: {:?}", e);
InternalError::new(e, StatusCode::INTERNAL_SERVER_ERROR).into()
})
}
}

View File

@@ -1,319 +0,0 @@
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
use anyhow::{Result, anyhow};
use exif::{In, Reader, Tag, Value};
use log::debug;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ExifData {
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub lens_model: Option<String>,
pub width: Option<i32>,
pub height: Option<i32>,
pub orientation: Option<i32>,
pub gps_latitude: Option<f64>,
pub gps_longitude: Option<f64>,
pub gps_altitude: Option<f64>,
pub focal_length: Option<f64>,
pub aperture: Option<f64>,
pub shutter_speed: Option<String>,
pub iso: Option<i32>,
pub date_taken: Option<i64>,
}
pub fn supports_exif(path: &Path) -> bool {
if let Some(ext) = path.extension() {
let ext_lower = ext.to_string_lossy().to_lowercase();
matches!(
ext_lower.as_str(),
// JPEG formats
"jpg" | "jpeg" |
// TIFF and RAW formats based on TIFF
"tiff" | "tif" | "nef" | "cr2" | "cr3" | "arw" | "dng" | "raf" | "orf" | "rw2" | "pef" | "srw" |
// HEIF and variants
"heif" | "heic" | "avif" |
// PNG
"png" |
// WebP
"webp"
)
} else {
false
}
}
pub fn extract_exif_from_path(path: &Path) -> Result<ExifData> {
debug!("Extracting EXIF from: {:?}", path);
if !supports_exif(path) {
return Err(anyhow!("File type does not support EXIF"));
}
let file = File::open(path)?;
let mut bufreader = BufReader::new(file);
let exifreader = Reader::new();
let exif = exifreader.read_from_container(&mut bufreader)?;
let mut data = ExifData::default();
for field in exif.fields() {
match field.tag {
Tag::Make => {
data.camera_make = get_string_value(field);
}
Tag::Model => {
data.camera_model = get_string_value(field);
}
Tag::LensModel => {
data.lens_model = get_string_value(field);
}
Tag::PixelXDimension | Tag::ImageWidth => {
if data.width.is_none() {
data.width = get_u32_value(field).map(|v| v as i32);
}
}
Tag::PixelYDimension | Tag::ImageLength => {
if data.height.is_none() {
data.height = get_u32_value(field).map(|v| v as i32);
}
}
Tag::Orientation => {
data.orientation = get_u32_value(field).map(|v| v as i32);
}
Tag::FocalLength => {
data.focal_length = get_rational_value(field);
}
Tag::FNumber => {
data.aperture = get_rational_value(field);
}
Tag::ExposureTime => {
data.shutter_speed = get_rational_string(field);
}
Tag::PhotographicSensitivity | Tag::ISOSpeed => {
if data.iso.is_none() {
data.iso = get_u32_value(field).map(|v| v as i32);
}
}
Tag::DateTime | Tag::DateTimeOriginal => {
if data.date_taken.is_none() {
data.date_taken = parse_exif_datetime(field);
}
}
_ => {}
}
}
// Extract GPS coordinates
if let Some(lat) = extract_gps_coordinate(&exif, Tag::GPSLatitude, Tag::GPSLatitudeRef) {
data.gps_latitude = Some(lat);
}
if let Some(lon) = extract_gps_coordinate(&exif, Tag::GPSLongitude, Tag::GPSLongitudeRef) {
data.gps_longitude = Some(lon);
}
if let Some(alt) = extract_gps_altitude(&exif) {
data.gps_altitude = Some(alt);
}
debug!("Extracted EXIF data: {:?}", data);
Ok(data)
}
fn get_string_value(field: &exif::Field) -> Option<String> {
match &field.value {
Value::Ascii(vec) => {
if let Some(bytes) = vec.first() {
String::from_utf8(bytes.to_vec())
.ok()
.map(|s| s.trim_end_matches('\0').to_string())
} else {
None
}
}
_ => {
let display = field.display_value().to_string();
if display.is_empty() {
None
} else {
Some(display)
}
}
}
}
fn get_u32_value(field: &exif::Field) -> Option<u32> {
match &field.value {
Value::Short(vec) => vec.first().map(|&v| v as u32),
Value::Long(vec) => vec.first().copied(),
_ => None,
}
}
fn get_rational_value(field: &exif::Field) -> Option<f64> {
match &field.value {
Value::Rational(vec) => {
if let Some(rational) = vec.first() {
if rational.denom == 0 {
None
} else {
Some(rational.num as f64 / rational.denom as f64)
}
} else {
None
}
}
_ => None,
}
}
fn get_rational_string(field: &exif::Field) -> Option<String> {
match &field.value {
Value::Rational(vec) => {
if let Some(rational) = vec.first() {
if rational.denom == 0 {
None
} else if rational.num < rational.denom {
Some(format!("{}/{}", rational.num, rational.denom))
} else {
let value = rational.num as f64 / rational.denom as f64;
Some(format!("{:.2}", value))
}
} else {
None
}
}
_ => None,
}
}
fn parse_exif_datetime(field: &exif::Field) -> Option<i64> {
if let Some(datetime_str) = get_string_value(field) {
use chrono::NaiveDateTime;
// EXIF datetime format: "YYYY:MM:DD HH:MM:SS"
// Note: EXIF dates are local time without timezone info
// We return the timestamp as if it were UTC, and the client will display it as-is
NaiveDateTime::parse_from_str(&datetime_str, "%Y:%m:%d %H:%M:%S")
.ok()
.map(|dt| dt.and_utc().timestamp())
} else {
None
}
}
fn extract_gps_coordinate(exif: &exif::Exif, coord_tag: Tag, ref_tag: Tag) -> Option<f64> {
let coord_field = exif.get_field(coord_tag, In::PRIMARY)?;
let ref_field = exif.get_field(ref_tag, In::PRIMARY)?;
let coordinates = match &coord_field.value {
Value::Rational(vec) => {
if vec.len() < 3 {
return None;
}
let degrees = vec[0].num as f64 / vec[0].denom as f64;
let minutes = vec[1].num as f64 / vec[1].denom as f64;
let seconds = vec[2].num as f64 / vec[2].denom as f64;
degrees + (minutes / 60.0) + (seconds / 3600.0)
}
_ => return None,
};
let reference = get_string_value(ref_field)?;
let sign = if reference.starts_with('S') || reference.starts_with('W') {
-1.0
} else {
1.0
};
Some(coordinates * sign)
}
fn extract_gps_altitude(exif: &exif::Exif) -> Option<f64> {
let alt_field = exif.get_field(Tag::GPSAltitude, In::PRIMARY)?;
match &alt_field.value {
Value::Rational(vec) => {
if let Some(rational) = vec.first() {
if rational.denom == 0 {
None
} else {
let altitude = rational.num as f64 / rational.denom as f64;
// Check if below sea level
if let Some(ref_field) = exif.get_field(Tag::GPSAltitudeRef, In::PRIMARY)
&& let Some(ref_val) = get_u32_value(ref_field)
&& ref_val == 1
{
return Some(-altitude);
}
Some(altitude)
}
} else {
None
}
}
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_supports_exif_jpeg() {
assert!(supports_exif(Path::new("test.jpg")));
assert!(supports_exif(Path::new("test.jpeg")));
assert!(supports_exif(Path::new("test.JPG")));
}
#[test]
fn test_supports_exif_raw_formats() {
assert!(supports_exif(Path::new("test.nef"))); // Nikon
assert!(supports_exif(Path::new("test.NEF")));
assert!(supports_exif(Path::new("test.cr2"))); // Canon
assert!(supports_exif(Path::new("test.cr3"))); // Canon
assert!(supports_exif(Path::new("test.arw"))); // Sony
assert!(supports_exif(Path::new("test.dng"))); // Adobe DNG
}
#[test]
fn test_supports_exif_tiff() {
assert!(supports_exif(Path::new("test.tiff")));
assert!(supports_exif(Path::new("test.tif")));
assert!(supports_exif(Path::new("test.TIFF")));
}
#[test]
fn test_supports_exif_heif() {
assert!(supports_exif(Path::new("test.heif")));
assert!(supports_exif(Path::new("test.heic")));
assert!(supports_exif(Path::new("test.avif")));
}
#[test]
fn test_supports_exif_png_webp() {
assert!(supports_exif(Path::new("test.png")));
assert!(supports_exif(Path::new("test.PNG")));
assert!(supports_exif(Path::new("test.webp")));
assert!(supports_exif(Path::new("test.WEBP")));
}
#[test]
fn test_supports_exif_unsupported() {
assert!(!supports_exif(Path::new("test.mp4")));
assert!(!supports_exif(Path::new("test.mov")));
assert!(!supports_exif(Path::new("test.txt")));
assert!(!supports_exif(Path::new("test.gif")));
}
#[test]
fn test_supports_exif_no_extension() {
assert!(!supports_exif(Path::new("test")));
}
}

View File

@@ -1,88 +0,0 @@
use std::path::Path;
use walkdir::DirEntry;
/// Supported image file extensions
pub const IMAGE_EXTENSIONS: &[&str] = &[
"jpg", "jpeg", "png", "webp", "tiff", "tif", "heif", "heic", "avif", "nef",
];
/// Supported video file extensions
pub const VIDEO_EXTENSIONS: &[&str] = &["mp4", "mov", "avi", "mkv"];
/// Check if a path has an image extension
pub fn is_image_file(path: &Path) -> bool {
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
let ext_lower = ext.to_lowercase();
IMAGE_EXTENSIONS.contains(&ext_lower.as_str())
} else {
false
}
}
/// Check if a path has a video extension
pub fn is_video_file(path: &Path) -> bool {
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
let ext_lower = ext.to_lowercase();
VIDEO_EXTENSIONS.contains(&ext_lower.as_str())
} else {
false
}
}
/// Check if a path has a supported media extension (image or video)
pub fn is_media_file(path: &Path) -> bool {
is_image_file(path) || is_video_file(path)
}
/// Check if a DirEntry is an image file (for walkdir usage)
#[allow(dead_code)]
pub fn direntry_is_image(entry: &DirEntry) -> bool {
is_image_file(entry.path())
}
/// Check if a DirEntry is a video file (for walkdir usage)
#[allow(dead_code)]
pub fn direntry_is_video(entry: &DirEntry) -> bool {
is_video_file(entry.path())
}
/// Check if a DirEntry is a media file (for walkdir usage)
#[allow(dead_code)]
pub fn direntry_is_media(entry: &DirEntry) -> bool {
is_media_file(entry.path())
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn test_is_image_file() {
assert!(is_image_file(Path::new("photo.jpg")));
assert!(is_image_file(Path::new("photo.JPG")));
assert!(is_image_file(Path::new("photo.png")));
assert!(is_image_file(Path::new("photo.nef")));
assert!(!is_image_file(Path::new("video.mp4")));
assert!(!is_image_file(Path::new("document.txt")));
}
#[test]
fn test_is_video_file() {
assert!(is_video_file(Path::new("video.mp4")));
assert!(is_video_file(Path::new("video.MP4")));
assert!(is_video_file(Path::new("video.mov")));
assert!(is_video_file(Path::new("video.avi")));
assert!(!is_video_file(Path::new("photo.jpg")));
assert!(!is_video_file(Path::new("document.txt")));
}
#[test]
fn test_is_media_file() {
assert!(is_media_file(Path::new("photo.jpg")));
assert!(is_media_file(Path::new("video.mp4")));
assert!(is_media_file(Path::new("photo.PNG")));
assert!(!is_media_file(Path::new("document.txt")));
assert!(!is_media_file(Path::new("no_extension")));
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,121 +0,0 @@
/// Geographic calculation utilities for GPS-based search
use std::f64;
/// Calculate distance between two GPS coordinates using the Haversine formula.
/// Returns distance in kilometers.
///
/// # Arguments
/// * `lat1` - Latitude of first point in decimal degrees
/// * `lon1` - Longitude of first point in decimal degrees
/// * `lat2` - Latitude of second point in decimal degrees
/// * `lon2` - Longitude of second point in decimal degrees
///
/// # Example
/// ```
/// use image_api::geo::haversine_distance;
/// let distance = haversine_distance(37.7749, -122.4194, 34.0522, -118.2437);
/// // Distance between San Francisco and Los Angeles (~559 km)
/// ```
pub fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
const EARTH_RADIUS_KM: f64 = 6371.0;
let lat1_rad = lat1.to_radians();
let lat2_rad = lat2.to_radians();
let delta_lat = (lat2 - lat1).to_radians();
let delta_lon = (lon2 - lon1).to_radians();
let a = (delta_lat / 2.0).sin().powi(2)
+ lat1_rad.cos() * lat2_rad.cos() * (delta_lon / 2.0).sin().powi(2);
let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
EARTH_RADIUS_KM * c
}
/// Calculate bounding box for GPS radius query.
/// Returns (min_lat, max_lat, min_lon, max_lon) that encompasses the search radius.
///
/// This is used as a fast first-pass filter for GPS queries, narrowing down
/// candidates before applying the more expensive Haversine distance calculation.
///
/// # Arguments
/// * `lat` - Center latitude in decimal degrees
/// * `lon` - Center longitude in decimal degrees
/// * `radius_km` - Search radius in kilometers
///
/// # Returns
/// A tuple of (min_lat, max_lat, min_lon, max_lon) in decimal degrees
pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f64) {
const EARTH_RADIUS_KM: f64 = 6371.0;
// Calculate latitude delta (same at all latitudes)
let lat_delta = (radius_km / EARTH_RADIUS_KM) * (180.0 / f64::consts::PI);
// Calculate longitude delta (varies with latitude)
let lon_delta = lat_delta / lat.to_radians().cos();
(
lat - lat_delta, // min_lat
lat + lat_delta, // max_lat
lon - lon_delta, // min_lon
lon + lon_delta, // max_lon
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_haversine_distance_sf_to_la() {
// San Francisco to Los Angeles
let distance = haversine_distance(37.7749, -122.4194, 34.0522, -118.2437);
// Should be approximately 559 km
assert!(
(distance - 559.0).abs() < 10.0,
"Distance should be ~559km, got {}",
distance
);
}
#[test]
fn test_haversine_distance_same_point() {
// Same point should have zero distance
let distance = haversine_distance(37.7749, -122.4194, 37.7749, -122.4194);
assert!(
distance < 0.001,
"Same point should have ~0 distance, got {}",
distance
);
}
#[test]
fn test_gps_bounding_box() {
// Test bounding box calculation for 10km radius around San Francisco
let (min_lat, max_lat, min_lon, max_lon) = gps_bounding_box(37.7749, -122.4194, 10.0);
// Verify the bounds are reasonable
assert!(min_lat < 37.7749, "min_lat should be less than center");
assert!(max_lat > 37.7749, "max_lat should be greater than center");
assert!(min_lon < -122.4194, "min_lon should be less than center");
assert!(max_lon > -122.4194, "max_lon should be greater than center");
// Verify bounds span roughly the right distance
let lat_span = max_lat - min_lat;
assert!(
lat_span > 0.1 && lat_span < 0.3,
"Latitude span should be reasonable for 10km"
);
}
#[test]
fn test_haversine_distance_across_equator() {
// Test across equator
let distance = haversine_distance(1.0, 0.0, -1.0, 0.0);
// Should be approximately 222 km
assert!(
(distance - 222.0).abs() < 5.0,
"Distance should be ~222km, got {}",
distance
);
}
}

View File

@@ -1,45 +0,0 @@
#[macro_use]
extern crate diesel;
pub mod ai;
pub mod auth;
pub mod cleanup;
pub mod data;
pub mod database;
pub mod error;
pub mod exif;
pub mod file_types;
pub mod files;
pub mod geo;
pub mod memories;
pub mod otel;
pub mod parsers;
pub mod service;
pub mod state;
pub mod tags;
#[cfg(test)]
pub mod testhelpers;
pub mod utils;
pub mod video;
// Re-export commonly used types
pub use data::{Claims, ThumbnailRequest};
pub use database::{connect, schema};
pub use state::AppState;
// Stub functions for modules that reference main.rs
// These are not used by cleanup_files binary
use std::path::Path;
use walkdir::DirEntry;
pub fn create_thumbnails() {
// Stub - implemented in main.rs
}
pub fn update_media_counts(_media_dir: &Path) {
// Stub - implemented in main.rs
}
pub fn is_video(entry: &DirEntry) -> bool {
file_types::direntry_is_video(entry)
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,112 +0,0 @@
use actix_web::HttpRequest;
use actix_web::http::header::HeaderMap;
use opentelemetry::global::{BoxedSpan, BoxedTracer};
use opentelemetry::propagation::TextMapPropagator;
use opentelemetry::trace::{Span, Status, Tracer};
use opentelemetry::{Context, KeyValue, global};
use opentelemetry_appender_log::OpenTelemetryLogBridge;
use opentelemetry_otlp::WithExportConfig;
use opentelemetry_sdk::Resource;
use opentelemetry_sdk::logs::{BatchLogProcessor, SdkLoggerProvider};
use opentelemetry_sdk::propagation::TraceContextPropagator;
pub fn global_tracer() -> BoxedTracer {
global::tracer("image-server")
}
#[allow(dead_code)]
pub fn init_tracing() {
let resources = Resource::builder()
.with_attributes([
KeyValue::new("service.name", "image-server"),
KeyValue::new("service.version", env!("CARGO_PKG_VERSION")),
])
.build();
let span_exporter = opentelemetry_otlp::SpanExporter::builder()
.with_tonic()
.with_endpoint(std::env::var("OTLP_OTLS_ENDPOINT").unwrap())
.build()
.unwrap();
let tracer_provider = opentelemetry_sdk::trace::SdkTracerProvider::builder()
.with_batch_exporter(span_exporter)
.with_resource(resources)
.build();
global::set_tracer_provider(tracer_provider);
}
#[allow(dead_code)]
pub fn init_logs() {
let otlp_exporter = opentelemetry_otlp::LogExporter::builder()
.with_tonic()
.with_endpoint(std::env::var("OTLP_OTLS_ENDPOINT").unwrap())
.build()
.unwrap();
let exporter = opentelemetry_stdout::LogExporter::default();
let resources = Resource::builder()
.with_attributes([
KeyValue::new("service.name", "image-server"),
KeyValue::new("service.version", env!("CARGO_PKG_VERSION")),
])
.build();
let log_provider = SdkLoggerProvider::builder()
.with_log_processor(BatchLogProcessor::builder(exporter).build())
.with_log_processor(BatchLogProcessor::builder(otlp_exporter).build())
.with_resource(resources)
.build();
let otel_log_appender = OpenTelemetryLogBridge::new(&log_provider);
log::set_boxed_logger(Box::new(otel_log_appender)).expect("Unable to set boxed logger");
//TODO: Still set this with the env? Ideally we still have a clean/simple local logger for local dev
log::set_max_level(log::LevelFilter::Info);
}
struct HeaderExtractor<'a>(&'a HeaderMap);
impl<'a> opentelemetry::propagation::Extractor for HeaderExtractor<'a> {
fn get(&self, key: &str) -> Option<&str> {
self.0.get(key).and_then(|v| v.to_str().ok())
}
fn keys(&self) -> Vec<&str> {
self.0.keys().map(|k| k.as_str()).collect()
}
}
pub fn extract_context_from_request(req: &HttpRequest) -> Context {
let propagator = TraceContextPropagator::new();
propagator.extract(&HeaderExtractor(req.headers()))
}
pub fn trace_db_call<F, O>(
context: &Context,
query_type: &str,
operation: &str,
func: F,
) -> anyhow::Result<O>
where
F: FnOnce(&mut BoxedSpan) -> anyhow::Result<O>,
{
let tracer = global::tracer("db");
let mut span = tracer
.span_builder(format!("db.{}.{}", query_type, operation))
.with_attributes(vec![
KeyValue::new("db.query_type", query_type.to_string().clone()),
KeyValue::new("db.operation", operation.to_string().clone()),
])
.start_with_context(&tracer, context);
let result = func(&mut span);
match &result {
Ok(_) => {
span.set_status(Status::Ok);
}
Err(e) => span.set_status(Status::error(e.to_string())),
}
result
}

View File

@@ -1,183 +0,0 @@
use anyhow::{Context, Result};
use chrono::NaiveDateTime;
use ical::parser::ical::component::IcalCalendar;
use ical::property::Property;
use std::fs::File;
use std::io::BufReader;
#[derive(Debug, Clone)]
pub struct ParsedCalendarEvent {
pub event_uid: Option<String>,
pub summary: String,
pub description: Option<String>,
pub location: Option<String>,
pub start_time: i64,
pub end_time: i64,
pub all_day: bool,
pub organizer: Option<String>,
pub attendees: Vec<String>,
}
pub fn parse_ics_file(path: &str) -> Result<Vec<ParsedCalendarEvent>> {
let file = File::open(path).context("Failed to open .ics file")?;
let reader = BufReader::new(file);
let parser = ical::IcalParser::new(reader);
let mut events = Vec::new();
for calendar_result in parser {
let calendar: IcalCalendar = calendar_result.context("Failed to parse calendar")?;
for event in calendar.events {
// Extract properties
let mut event_uid = None;
let mut summary = None;
let mut description = None;
let mut location = None;
let mut start_time = None;
let mut end_time = None;
let mut all_day = false;
let mut organizer = None;
let mut attendees = Vec::new();
for property in event.properties {
match property.name.as_str() {
"UID" => {
event_uid = property.value;
}
"SUMMARY" => {
summary = property.value;
}
"DESCRIPTION" => {
description = property.value;
}
"LOCATION" => {
location = property.value;
}
"DTSTART" => {
if let Some(ref value) = property.value {
start_time = parse_ical_datetime(value, &property)?;
// Check if it's an all-day event (no time component)
all_day = value.len() == 8; // YYYYMMDD format
}
}
"DTEND" => {
if let Some(ref value) = property.value {
end_time = parse_ical_datetime(value, &property)?;
}
}
"ORGANIZER" => {
organizer = extract_email_from_mailto(property.value.as_deref());
}
"ATTENDEE" => {
if let Some(email) = extract_email_from_mailto(property.value.as_deref()) {
attendees.push(email);
}
}
_ => {}
}
}
// Only include events with required fields
if let (Some(summary_text), Some(start), Some(end)) = (summary, start_time, end_time) {
events.push(ParsedCalendarEvent {
event_uid,
summary: summary_text,
description,
location,
start_time: start,
end_time: end,
all_day,
organizer,
attendees,
});
}
}
}
Ok(events)
}
fn parse_ical_datetime(value: &str, property: &Property) -> Result<Option<i64>> {
// Check for TZID parameter
let _tzid = property.params.as_ref().and_then(|params| {
params
.iter()
.find(|(key, _)| key == "TZID")
.and_then(|(_, values)| values.first())
.cloned()
});
// iCal datetime formats:
// - 20240815T140000Z (UTC)
// - 20240815T140000 (local/TZID)
// - 20240815 (all-day)
let cleaned = value.replace("Z", "").replace("T", "");
// All-day event (YYYYMMDD)
if cleaned.len() == 8 {
let dt = NaiveDateTime::parse_from_str(&format!("{}000000", cleaned), "%Y%m%d%H%M%S")
.context("Failed to parse all-day date")?;
return Ok(Some(dt.and_utc().timestamp()));
}
// DateTime event (YYYYMMDDTHHMMSS)
if cleaned.len() >= 14 {
let dt = NaiveDateTime::parse_from_str(&cleaned[..14], "%Y%m%d%H%M%S")
.context("Failed to parse datetime")?;
// If original had 'Z', it's UTC
let timestamp = if value.ends_with('Z') {
dt.and_utc().timestamp()
} else {
// Treat as UTC for simplicity (proper TZID handling is complex)
dt.and_utc().timestamp()
};
return Ok(Some(timestamp));
}
Ok(None)
}
fn extract_email_from_mailto(value: Option<&str>) -> Option<String> {
value.map(|v| {
// ORGANIZER and ATTENDEE often have format: mailto:user@example.com
if v.starts_with("mailto:") {
v.trim_start_matches("mailto:").to_string()
} else {
v.to_string()
}
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_ical_datetime() {
let prop = Property {
name: "DTSTART".to_string(),
params: None,
value: Some("20240815T140000Z".to_string()),
};
let timestamp = parse_ical_datetime("20240815T140000Z", &prop).unwrap();
assert!(timestamp.is_some());
}
#[test]
fn test_extract_email() {
assert_eq!(
extract_email_from_mailto(Some("mailto:user@example.com")),
Some("user@example.com".to_string())
);
assert_eq!(
extract_email_from_mailto(Some("user@example.com")),
Some("user@example.com".to_string())
);
}
}

View File

@@ -1,134 +0,0 @@
use anyhow::{Context, Result};
use chrono::DateTime;
use serde::Deserialize;
use std::fs::File;
use std::io::BufReader;
#[derive(Debug, Clone)]
pub struct ParsedLocationRecord {
pub timestamp: i64,
pub latitude: f64,
pub longitude: f64,
pub accuracy: Option<i32>,
pub activity: Option<String>,
pub activity_confidence: Option<i32>,
}
// Google Takeout Location History JSON structures
#[derive(Debug, Deserialize)]
struct LocationHistory {
locations: Vec<LocationPoint>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct LocationPoint {
timestamp_ms: Option<String>, // Older format
timestamp: Option<String>, // Newer format (ISO8601)
latitude_e7: Option<i64>,
longitude_e7: Option<i64>,
accuracy: Option<i32>,
activity: Option<Vec<ActivityRecord>>,
}
#[derive(Debug, Deserialize)]
struct ActivityRecord {
activity: Vec<ActivityType>,
#[allow(dead_code)] // Part of JSON structure, may be used in future
timestamp_ms: Option<String>,
}
#[derive(Debug, Deserialize)]
struct ActivityType {
#[serde(rename = "type")]
activity_type: String,
confidence: i32,
}
pub fn parse_location_json(path: &str) -> Result<Vec<ParsedLocationRecord>> {
let file = File::open(path).context("Failed to open location JSON file")?;
let reader = BufReader::new(file);
let history: LocationHistory =
serde_json::from_reader(reader).context("Failed to parse location history JSON")?;
let mut records = Vec::new();
for point in history.locations {
// Parse timestamp (try both formats)
let timestamp = if let Some(ts_ms) = point.timestamp_ms {
// Milliseconds since epoch
ts_ms
.parse::<i64>()
.context("Failed to parse timestamp_ms")?
/ 1000
} else if let Some(ts_iso) = point.timestamp {
// ISO8601 format
DateTime::parse_from_rfc3339(&ts_iso)
.context("Failed to parse ISO8601 timestamp")?
.timestamp()
} else {
continue; // Skip points without timestamp
};
// Convert E7 format to decimal degrees
let latitude = point.latitude_e7.map(|e7| e7 as f64 / 10_000_000.0);
let longitude = point.longitude_e7.map(|e7| e7 as f64 / 10_000_000.0);
// Extract highest-confidence activity
let (activity, activity_confidence) = point
.activity
.as_ref()
.and_then(|activities| activities.first())
.and_then(|record| {
record
.activity
.iter()
.max_by_key(|a| a.confidence)
.map(|a| (a.activity_type.clone(), a.confidence))
})
.unzip();
if let (Some(lat), Some(lon)) = (latitude, longitude) {
records.push(ParsedLocationRecord {
timestamp,
latitude: lat,
longitude: lon,
accuracy: point.accuracy,
activity,
activity_confidence,
});
}
}
Ok(records)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_e7_conversion() {
let lat_e7 = 374228300_i64;
let lat = lat_e7 as f64 / 10_000_000.0;
assert!((lat - 37.42283).abs() < 0.00001);
}
#[test]
fn test_parse_sample_json() {
let json = r#"{
"locations": [
{
"latitudeE7": 374228300,
"longitudeE7": -1221086100,
"accuracy": 20,
"timestampMs": "1692115200000"
}
]
}"#;
let history: LocationHistory = serde_json::from_str(json).unwrap();
assert_eq!(history.locations.len(), 1);
}
}

View File

@@ -1,7 +0,0 @@
pub mod ical_parser;
pub mod location_json_parser;
pub mod search_html_parser;
pub use ical_parser::{ParsedCalendarEvent, parse_ics_file};
pub use location_json_parser::{ParsedLocationRecord, parse_location_json};
pub use search_html_parser::{ParsedSearchRecord, parse_search_html};

View File

@@ -1,209 +0,0 @@
use anyhow::{Context, Result};
use chrono::{DateTime, NaiveDateTime, Utc};
use scraper::{Html, Selector};
use std::fs;
#[derive(Debug, Clone)]
pub struct ParsedSearchRecord {
pub timestamp: i64,
pub query: String,
pub search_engine: Option<String>,
}
pub fn parse_search_html(path: &str) -> Result<Vec<ParsedSearchRecord>> {
let html_content =
fs::read_to_string(path).context("Failed to read search history HTML file")?;
let document = Html::parse_document(&html_content);
let mut records = Vec::new();
// Try multiple selector strategies as Google Takeout format varies
// Strategy 1: Look for specific cell structure
if let Ok(cell_selector) = Selector::parse("div.content-cell") {
for cell in document.select(&cell_selector) {
if let Some(record) = parse_content_cell(&cell) {
records.push(record);
}
}
}
// Strategy 2: Look for outer-cell structure (older format)
if records.is_empty()
&& let Ok(outer_selector) = Selector::parse("div.outer-cell")
{
for cell in document.select(&outer_selector) {
if let Some(record) = parse_outer_cell(&cell) {
records.push(record);
}
}
}
// Strategy 3: Generic approach - look for links and timestamps
if records.is_empty()
&& let Ok(link_selector) = Selector::parse("a")
{
for link in document.select(&link_selector) {
if let Some(href) = link.value().attr("href") {
// Check if it's a search URL
if (href.contains("google.com/search?q=") || href.contains("search?q="))
&& let Some(query) = extract_query_from_url(href)
{
// Try to find nearby timestamp
let timestamp = find_nearby_timestamp(&link);
records.push(ParsedSearchRecord {
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
query,
search_engine: Some("Google".to_string()),
});
}
}
}
}
Ok(records)
}
fn parse_content_cell(cell: &scraper::ElementRef) -> Option<ParsedSearchRecord> {
let link_selector = Selector::parse("a").ok()?;
let link = cell.select(&link_selector).next()?;
let href = link.value().attr("href")?;
let query = extract_query_from_url(href)?;
// Extract timestamp from cell text
let cell_text = cell.text().collect::<Vec<_>>().join(" ");
let timestamp = parse_timestamp_from_text(&cell_text);
Some(ParsedSearchRecord {
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
query,
search_engine: Some("Google".to_string()),
})
}
fn parse_outer_cell(cell: &scraper::ElementRef) -> Option<ParsedSearchRecord> {
let link_selector = Selector::parse("a").ok()?;
let link = cell.select(&link_selector).next()?;
let href = link.value().attr("href")?;
let query = extract_query_from_url(href)?;
let cell_text = cell.text().collect::<Vec<_>>().join(" ");
let timestamp = parse_timestamp_from_text(&cell_text);
Some(ParsedSearchRecord {
timestamp: timestamp.unwrap_or_else(|| Utc::now().timestamp()),
query,
search_engine: Some("Google".to_string()),
})
}
fn extract_query_from_url(url: &str) -> Option<String> {
// Extract query parameter from URL
// Example: https://www.google.com/search?q=rust+programming
if let Some(query_start) = url.find("?q=").or_else(|| url.find("&q=")) {
let query_part = &url[query_start + 3..];
let query_end = query_part.find('&').unwrap_or(query_part.len());
let encoded_query = &query_part[..query_end];
// URL decode
urlencoding::decode(encoded_query)
.ok()
.map(|s| s.to_string())
} else {
None
}
}
fn find_nearby_timestamp(element: &scraper::ElementRef) -> Option<i64> {
// Look for timestamp in parent or sibling elements
if let Some(parent) = element.parent()
&& parent.value().as_element().is_some()
{
let parent_ref = scraper::ElementRef::wrap(parent)?;
let text = parent_ref.text().collect::<Vec<_>>().join(" ");
return parse_timestamp_from_text(&text);
}
None
}
fn parse_timestamp_from_text(text: &str) -> Option<i64> {
// Google Takeout timestamps often look like:
// "Aug 15, 2024, 2:34:56 PM PDT"
// "2024-08-15T14:34:56Z"
// Try ISO8601 first
if let Some(iso_match) = text
.split_whitespace()
.find(|s| s.contains('T') && s.contains('-'))
&& let Ok(dt) = DateTime::parse_from_rfc3339(iso_match)
{
return Some(dt.timestamp());
}
// Try common date patterns
let patterns = [
"%b %d, %Y, %I:%M:%S %p", // Aug 15, 2024, 2:34:56 PM
"%Y-%m-%d %H:%M:%S", // 2024-08-15 14:34:56
"%m/%d/%Y %H:%M:%S", // 08/15/2024 14:34:56
];
for pattern in patterns {
// Extract potential date string
if let Some(date_part) = extract_date_substring(text)
&& let Ok(dt) = NaiveDateTime::parse_from_str(&date_part, pattern)
{
return Some(dt.and_utc().timestamp());
}
}
None
}
fn extract_date_substring(text: &str) -> Option<String> {
// Try to extract date-like substring from text
// This is a heuristic approach for varied formats
// Look for patterns like "Aug 15, 2024, 2:34:56 PM"
if let Some(pos) = text.find(|c: char| c.is_numeric()) {
let rest = &text[pos..];
if let Some(end) =
rest.find(|c: char| !c.is_alphanumeric() && c != ':' && c != ',' && c != ' ')
{
Some(rest[..end].trim().to_string())
} else {
Some(rest.trim().to_string())
}
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_query_from_url() {
let url = "https://www.google.com/search?q=rust+programming&oq=rust";
let query = extract_query_from_url(url);
assert_eq!(query, Some("rust+programming".to_string()));
}
#[test]
fn test_extract_query_with_encoding() {
let url = "https://www.google.com/search?q=hello%20world";
let query = extract_query_from_url(url);
assert_eq!(query, Some("hello world".to_string()));
}
#[test]
fn test_parse_iso_timestamp() {
let text = "Some text 2024-08-15T14:34:56Z more text";
let timestamp = parse_timestamp_from_text(text);
assert!(timestamp.is_some());
}
}

View File

@@ -1,16 +0,0 @@
use actix_web::App;
pub trait ServiceBuilder<T> {
fn add_feature<F>(self, f: F) -> App<T>
where
F: Fn(App<T>) -> App<T>;
}
impl<T> ServiceBuilder<T> for App<T> {
fn add_feature<F>(self, create_feature: F) -> App<T>
where
F: Fn(App<T>) -> App<T>,
{
create_feature(self)
}
}

View File

@@ -1,211 +0,0 @@
use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
use crate::database::{
CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, LocationHistoryDao, SearchHistoryDao,
SqliteCalendarEventDao, SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao,
SqliteLocationHistoryDao, SqliteSearchHistoryDao,
};
use crate::video::actors::{PlaylistGenerator, StreamActor, VideoPlaylistManager};
use actix::{Actor, Addr};
use std::env;
use std::sync::{Arc, Mutex};
pub struct AppState {
pub stream_manager: Arc<Addr<StreamActor>>,
pub playlist_manager: Arc<Addr<VideoPlaylistManager>>,
pub base_path: String,
pub thumbnail_path: String,
pub video_path: String,
pub gif_path: String,
pub excluded_dirs: Vec<String>,
pub ollama: OllamaClient,
pub sms_client: SmsApiClient,
pub insight_generator: InsightGenerator,
}
impl AppState {
pub fn new(
stream_manager: Arc<Addr<StreamActor>>,
base_path: String,
thumbnail_path: String,
video_path: String,
gif_path: String,
excluded_dirs: Vec<String>,
ollama: OllamaClient,
sms_client: SmsApiClient,
insight_generator: InsightGenerator,
) -> Self {
let playlist_generator = PlaylistGenerator::new();
let video_playlist_manager =
VideoPlaylistManager::new(video_path.clone(), playlist_generator.start());
Self {
stream_manager,
playlist_manager: Arc::new(video_playlist_manager.start()),
base_path,
thumbnail_path,
video_path,
gif_path,
excluded_dirs,
ollama,
sms_client,
insight_generator,
}
}
/// Parse excluded directories from environment variable
fn parse_excluded_dirs() -> Vec<String> {
env::var("EXCLUDED_DIRS")
.unwrap_or_default()
.split(',')
.filter(|dir| !dir.trim().is_empty())
.map(|dir| dir.trim().to_string())
.collect()
}
}
impl Default for AppState {
fn default() -> Self {
// Initialize AI clients
let ollama_primary_url = env::var("OLLAMA_PRIMARY_URL").unwrap_or_else(|_| {
env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".to_string())
});
let ollama_fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
let ollama_primary_model = env::var("OLLAMA_PRIMARY_MODEL")
.or_else(|_| env::var("OLLAMA_MODEL"))
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string());
let ollama_fallback_model = env::var("OLLAMA_FALLBACK_MODEL").ok();
let ollama = OllamaClient::new(
ollama_primary_url,
ollama_fallback_url,
ollama_primary_model,
ollama_fallback_model,
);
let sms_api_url =
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
let sms_api_token = env::var("SMS_API_TOKEN").ok();
let sms_client = SmsApiClient::new(sms_api_url, sms_api_token);
// Initialize DAOs
let insight_dao: Arc<Mutex<Box<dyn InsightDao>>> =
Arc::new(Mutex::new(Box::new(SqliteInsightDao::new())));
let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> =
Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
let daily_summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new())));
// Initialize Google Takeout DAOs
let calendar_dao: Arc<Mutex<Box<dyn CalendarEventDao>>> =
Arc::new(Mutex::new(Box::new(SqliteCalendarEventDao::new())));
let location_dao: Arc<Mutex<Box<dyn LocationHistoryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteLocationHistoryDao::new())));
let search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteSearchHistoryDao::new())));
// Load base path
let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
// Initialize InsightGenerator with all data sources
let insight_generator = InsightGenerator::new(
ollama.clone(),
sms_client.clone(),
insight_dao.clone(),
exif_dao.clone(),
daily_summary_dao.clone(),
calendar_dao.clone(),
location_dao.clone(),
search_dao.clone(),
base_path.clone(),
);
Self::new(
Arc::new(StreamActor {}.start()),
base_path,
env::var("THUMBNAILS").expect("THUMBNAILS was not set in the env"),
env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env"),
env::var("GIFS_DIRECTORY").expect("GIFS_DIRECTORY was not set in the env"),
Self::parse_excluded_dirs(),
ollama,
sms_client,
insight_generator,
)
}
}
#[cfg(test)]
impl AppState {
/// Creates an AppState instance for testing with temporary directories
pub fn test_state() -> Self {
use actix::Actor;
// Create a base temporary directory
let temp_dir = tempfile::tempdir().expect("Failed to create temp directory");
let base_path = temp_dir.path().to_path_buf();
// Create subdirectories for thumbnails, videos, and gifs
let thumbnail_path = create_test_subdir(&base_path, "thumbnails");
let video_path = create_test_subdir(&base_path, "videos");
let gif_path = create_test_subdir(&base_path, "gifs");
// Initialize test AI clients
let ollama = OllamaClient::new(
"http://localhost:11434".to_string(),
None,
"llama3.2".to_string(),
None,
);
let sms_client = SmsApiClient::new("http://localhost:8000".to_string(), None);
// Initialize test DAOs
let insight_dao: Arc<Mutex<Box<dyn InsightDao>>> =
Arc::new(Mutex::new(Box::new(SqliteInsightDao::new())));
let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> =
Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
let daily_summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new())));
// Initialize test Google Takeout DAOs
let calendar_dao: Arc<Mutex<Box<dyn CalendarEventDao>>> =
Arc::new(Mutex::new(Box::new(SqliteCalendarEventDao::new())));
let location_dao: Arc<Mutex<Box<dyn LocationHistoryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteLocationHistoryDao::new())));
let search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>> =
Arc::new(Mutex::new(Box::new(SqliteSearchHistoryDao::new())));
// Initialize test InsightGenerator with all data sources
let base_path_str = base_path.to_string_lossy().to_string();
let insight_generator = InsightGenerator::new(
ollama.clone(),
sms_client.clone(),
insight_dao.clone(),
exif_dao.clone(),
daily_summary_dao.clone(),
calendar_dao.clone(),
location_dao.clone(),
search_dao.clone(),
base_path_str.clone(),
);
// Create the AppState with the temporary paths
AppState::new(
Arc::new(StreamActor {}.start()),
base_path_str,
thumbnail_path.to_string_lossy().to_string(),
video_path.to_string_lossy().to_string(),
gif_path.to_string_lossy().to_string(),
Vec::new(), // No excluded directories for test state
ollama,
sms_client,
insight_generator,
)
}
}
/// Helper function to create a subdirectory inside the base directory for testing
#[cfg(test)]
fn create_test_subdir(base_path: &std::path::Path, name: &str) -> std::path::PathBuf {
let dir_path = base_path.join(name);
std::fs::create_dir_all(&dir_path)
.unwrap_or_else(|_| panic!("Failed to create {} directory", name));
dir_path
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,64 +0,0 @@
use actix_web::{
HttpResponse,
body::{BoxBody, MessageBody},
};
use crate::database::{UserDao, models::User};
use std::cell::RefCell;
use std::option::Option;
pub struct TestUserDao {
pub user_map: RefCell<Vec<User>>,
}
impl TestUserDao {
pub fn new() -> Self {
Self {
user_map: RefCell::new(Vec::new()),
}
}
}
impl UserDao for TestUserDao {
fn create_user(&mut self, username: &str, password: &str) -> Option<User> {
let u = User {
id: (self.user_map.borrow().len() + 1) as i32,
username: username.to_string(),
password: password.to_string(),
};
self.user_map.borrow_mut().push(u.clone());
Some(u)
}
fn get_user(&mut self, user: &str, pass: &str) -> Option<User> {
match self
.user_map
.borrow()
.iter()
.find(|&u| u.username == user && u.password == pass)
{
Some(u) => {
let copy = (*u).clone();
Some(copy)
}
None => None,
}
}
fn user_exists(&mut self, user: &str) -> bool {
self.user_map.borrow().iter().any(|u| u.username == user)
}
}
pub trait BodyReader {
fn read_to_str(self) -> String;
}
impl BodyReader for HttpResponse<BoxBody> {
fn read_to_str(self) -> String {
let body = self.into_body().try_into_bytes().unwrap();
std::str::from_utf8(&body).unwrap().to_string()
}
}

View File

@@ -1,83 +0,0 @@
/// Normalize a file path to use forward slashes for cross-platform consistency
/// This ensures paths stored in the database always use `/` regardless of OS
///
/// # Examples
/// ```
/// use image_api::utils::normalize_path;
///
/// assert_eq!(normalize_path("foo\\bar\\baz.jpg"), "foo/bar/baz.jpg");
/// assert_eq!(normalize_path("foo/bar/baz.jpg"), "foo/bar/baz.jpg");
/// ```
pub fn normalize_path(path: &str) -> String {
path.replace('\\', "/")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normalize_path_with_backslashes() {
assert_eq!(normalize_path("foo\\bar\\baz.jpg"), "foo/bar/baz.jpg");
}
#[test]
fn test_normalize_path_with_forward_slashes() {
assert_eq!(normalize_path("foo/bar/baz.jpg"), "foo/bar/baz.jpg");
}
#[test]
fn test_normalize_path_mixed() {
assert_eq!(
normalize_path("foo\\bar/baz\\qux.jpg"),
"foo/bar/baz/qux.jpg"
);
}
#[test]
fn test_normalize_path_empty() {
assert_eq!(normalize_path(""), "");
}
#[test]
fn test_normalize_path_absolute_windows() {
assert_eq!(
normalize_path("C:\\Users\\Photos\\image.jpg"),
"C:/Users/Photos/image.jpg"
);
}
#[test]
fn test_normalize_path_unc_path() {
assert_eq!(
normalize_path("\\\\server\\share\\folder\\file.jpg"),
"//server/share/folder/file.jpg"
);
}
#[test]
fn test_normalize_path_single_filename() {
assert_eq!(normalize_path("image.jpg"), "image.jpg");
}
#[test]
fn test_normalize_path_trailing_slash() {
assert_eq!(normalize_path("foo\\bar\\"), "foo/bar/");
}
#[test]
fn test_normalize_path_multiple_consecutive_backslashes() {
assert_eq!(
normalize_path("foo\\\\bar\\\\\\baz.jpg"),
"foo//bar///baz.jpg"
);
}
#[test]
fn test_normalize_path_deep_nesting() {
assert_eq!(
normalize_path("a\\b\\c\\d\\e\\f\\g\\file.jpg"),
"a/b/c/d/e/f/g/file.jpg"
);
}
}

49
src/video.rs Normal file
View File

@@ -0,0 +1,49 @@
use std::path::Path;
use std::process::Command;
// ffmpeg -i test.mp4 -c:v h264 -flags +cgop -g 30 -hls_time 3 out.m3u8
// ffmpeg -i "filename.mp4" -preset veryfast -c:v libx264 -f hls -hls_list_size 100 -hls_time 2 -crf 24 -vf scale=1080:-2,setsar=1:1 attempt/vid_out.m3u8
pub fn create_playlist(video_path: &str, playlist_file: &str) {
if Path::new(playlist_file).exists() {
println!("Playlist already exists: {}", playlist_file);
return;
}
let result = Command::new("ffmpeg")
.arg("-i")
.arg(video_path)
.arg("-c:v")
.arg("h264")
.arg("-crf")
.arg("23")
.arg("-preset")
.arg("veryfast")
.arg("-hls_time")
.arg("3")
.arg("-hls_list_size")
.arg("100")
.arg("-vf")
.arg("scale=1080:-2,setsar=1:1")
.arg(playlist_file)
.output()
.expect("Expected this to work..");
println!("{:?}", result);
println!("Status: {}", String::from_utf8(result.stdout).unwrap())
}
pub fn generate_video_thumbnail(path: &Path, destination: &Path) {
Command::new("ffmpeg")
.arg("-ss")
.arg("3")
.arg("-i")
.arg(path.to_str().unwrap())
.arg("-vframes")
.arg("1")
.arg("-f")
.arg("image2")
.arg(destination)
.output()
.expect("Failure to create video frame");
}

View File

@@ -1,449 +0,0 @@
use crate::is_video;
use crate::otel::global_tracer;
use actix::prelude::*;
use futures::TryFutureExt;
use log::{debug, error, info, trace, warn};
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, Tracer};
use std::io::Result;
use std::path::{Path, PathBuf};
use std::process::{Child, Command, ExitStatus, Stdio};
use std::sync::Arc;
use tokio::sync::Semaphore;
use walkdir::{DirEntry, WalkDir};
// ffmpeg -i test.mp4 -c:v h264 -flags +cgop -g 30 -hls_time 3 out.m3u8
// ffmpeg -i "filename.mp4" -preset veryfast -c:v libx264 -f hls -hls_list_size 100 -hls_time 2 -crf 24 -vf scale=1080:-2,setsar=1:1 attempt/vid_out.m3u8
pub struct StreamActor;
impl Actor for StreamActor {
type Context = Context<Self>;
}
pub struct ProcessMessage(pub String, pub Child);
impl Message for ProcessMessage {
type Result = Result<ExitStatus>;
}
impl Handler<ProcessMessage> for StreamActor {
type Result = Result<ExitStatus>;
fn handle(&mut self, msg: ProcessMessage, _ctx: &mut Self::Context) -> Self::Result {
trace!("Message received");
let mut process = msg.1;
let result = process.wait();
debug!(
"Finished waiting for: {:?}. Code: {:?}",
msg.0,
result
.as_ref()
.map_or(-1, |status| status.code().unwrap_or(-1))
);
result
}
}
pub async fn create_playlist(video_path: &str, playlist_file: &str) -> Result<Child> {
if Path::new(playlist_file).exists() {
debug!("Playlist already exists: {}", playlist_file);
return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists));
}
let result = Command::new("ffmpeg")
.arg("-i")
.arg(video_path)
.arg("-c:v")
.arg("h264")
.arg("-crf")
.arg("21")
.arg("-preset")
.arg("veryfast")
.arg("-hls_time")
.arg("3")
.arg("-hls_list_size")
.arg("100")
.arg("-vf")
.arg("scale=1080:-2,setsar=1:1")
.arg(playlist_file)
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn();
let start_time = std::time::Instant::now();
loop {
actix::clock::sleep(std::time::Duration::from_secs(1)).await;
if Path::new(playlist_file).exists()
|| std::time::Instant::now() - start_time > std::time::Duration::from_secs(5)
{
break;
}
}
result
}
pub fn generate_video_thumbnail(path: &Path, destination: &Path) {
Command::new("ffmpeg")
.arg("-ss")
.arg("3")
.arg("-i")
.arg(path.to_str().unwrap())
.arg("-vframes")
.arg("1")
.arg("-f")
.arg("image2")
.arg(destination)
.output()
.expect("Failure to create video frame");
}
/// Check if a video is already encoded with h264 codec
/// Returns true if the video uses h264, false otherwise or if detection fails
async fn is_h264_encoded(video_path: &str) -> bool {
let output = tokio::process::Command::new("ffprobe")
.arg("-v")
.arg("error")
.arg("-select_streams")
.arg("v:0")
.arg("-show_entries")
.arg("stream=codec_name")
.arg("-of")
.arg("default=noprint_wrappers=1:nokey=1")
.arg(video_path)
.output()
.await;
match output {
Ok(output) if output.status.success() => {
let codec = String::from_utf8_lossy(&output.stdout);
let codec = codec.trim();
debug!("Detected codec for {}: {}", video_path, codec);
codec == "h264"
}
Ok(output) => {
warn!(
"ffprobe failed for {}: {}",
video_path,
String::from_utf8_lossy(&output.stderr)
);
false
}
Err(e) => {
warn!("Failed to run ffprobe for {}: {}", video_path, e);
false
}
}
}
/// Check if a video has rotation metadata
/// Returns the rotation angle in degrees (0, 90, 180, 270) or 0 if none detected
async fn get_video_rotation(video_path: &str) -> i32 {
let output = tokio::process::Command::new("ffprobe")
.arg("-v")
.arg("error")
.arg("-select_streams")
.arg("v:0")
.arg("-show_entries")
.arg("stream_tags=rotate")
.arg("-of")
.arg("default=noprint_wrappers=1:nokey=1")
.arg(video_path)
.output()
.await;
match output {
Ok(output) if output.status.success() => {
let rotation_str = String::from_utf8_lossy(&output.stdout);
let rotation_str = rotation_str.trim();
if rotation_str.is_empty() {
0
} else {
rotation_str.parse::<i32>().unwrap_or(0)
}
}
_ => 0,
}
}
pub struct VideoPlaylistManager {
playlist_dir: PathBuf,
playlist_generator: Addr<PlaylistGenerator>,
}
impl VideoPlaylistManager {
pub fn new<P: Into<PathBuf>>(
playlist_dir: P,
playlist_generator: Addr<PlaylistGenerator>,
) -> Self {
Self {
playlist_dir: playlist_dir.into(),
playlist_generator,
}
}
}
impl Actor for VideoPlaylistManager {
type Context = Context<Self>;
}
impl Handler<ScanDirectoryMessage> for VideoPlaylistManager {
type Result = ResponseFuture<()>;
fn handle(&mut self, msg: ScanDirectoryMessage, _ctx: &mut Self::Context) -> Self::Result {
let tracer = global_tracer();
let mut span = tracer.start("videoplaylistmanager.scan_directory");
let start = std::time::Instant::now();
info!(
"Starting scan directory for video playlist generation: {}",
msg.directory
);
let video_files = WalkDir::new(&msg.directory)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(is_video)
.collect::<Vec<DirEntry>>();
let scan_dir_name = msg.directory.clone();
let playlist_output_dir = self.playlist_dir.clone();
let playlist_generator = self.playlist_generator.clone();
Box::pin(async move {
for e in video_files {
let path = e.path();
let path_as_str = path.to_str().unwrap();
debug!(
"Sending generate playlist message for path: {}",
path_as_str
);
match playlist_generator
.send(GeneratePlaylistMessage {
playlist_path: playlist_output_dir.to_str().unwrap().to_string(),
video_path: PathBuf::from(path),
})
.await
.expect("Failed to send generate playlist message")
{
Ok(_) => {
span.add_event(
"Playlist generated",
vec![KeyValue::new("video_path", path_as_str.to_string())],
);
debug!(
"Successfully generated playlist for file: '{}'",
path_as_str
);
}
Err(e) => {
warn!("Failed to generate playlist for path '{:?}'. {:?}", path, e);
}
}
}
span.add_event(
"Finished directory scan",
vec![KeyValue::new("directory", scan_dir_name.to_string())],
);
info!(
"Finished directory scan of '{}' in {:?}",
scan_dir_name,
start.elapsed()
);
})
}
}
impl Handler<QueueVideosMessage> for VideoPlaylistManager {
type Result = ();
fn handle(&mut self, msg: QueueVideosMessage, _ctx: &mut Self::Context) -> Self::Result {
if msg.video_paths.is_empty() {
return;
}
info!(
"Queueing {} videos for HLS playlist generation",
msg.video_paths.len()
);
let playlist_output_dir = self.playlist_dir.clone();
let playlist_generator = self.playlist_generator.clone();
for video_path in msg.video_paths {
let path_str = video_path.to_string_lossy().to_string();
debug!("Queueing playlist generation for: {}", path_str);
playlist_generator.do_send(GeneratePlaylistMessage {
playlist_path: playlist_output_dir.to_str().unwrap().to_string(),
video_path,
});
}
}
}
#[derive(Message)]
#[rtype(result = "()")]
pub struct ScanDirectoryMessage {
pub(crate) directory: String,
}
#[derive(Message)]
#[rtype(result = "()")]
pub struct QueueVideosMessage {
pub video_paths: Vec<PathBuf>,
}
#[derive(Message)]
#[rtype(result = "Result<()>")]
pub struct GeneratePlaylistMessage {
pub video_path: PathBuf,
pub playlist_path: String,
}
pub struct PlaylistGenerator {
semaphore: Arc<Semaphore>,
}
impl PlaylistGenerator {
pub(crate) fn new() -> Self {
PlaylistGenerator {
semaphore: Arc::new(Semaphore::new(2)),
}
}
}
impl Actor for PlaylistGenerator {
type Context = Context<Self>;
}
impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
type Result = ResponseFuture<Result<()>>;
fn handle(&mut self, msg: GeneratePlaylistMessage, _ctx: &mut Self::Context) -> Self::Result {
let video_file = msg.video_path.to_str().unwrap().to_owned();
let playlist_path = msg.playlist_path.as_str().to_owned();
let semaphore = self.semaphore.clone();
let playlist_file = format!(
"{}/{}.m3u8",
playlist_path,
msg.video_path.file_name().unwrap().to_str().unwrap()
);
let tracer = global_tracer();
let mut span = tracer
.span_builder("playlistgenerator.generate_playlist")
.with_attributes(vec![
KeyValue::new("video_file", video_file.clone()),
KeyValue::new("playlist_file", playlist_file.clone()),
])
.start(&tracer);
Box::pin(async move {
let wait_start = std::time::Instant::now();
let permit = semaphore
.acquire_owned()
.await
.expect("Unable to acquire semaphore");
debug!(
"Waited for {:?} before starting ffmpeg",
wait_start.elapsed()
);
span.add_event(
"Waited for FFMPEG semaphore",
vec![KeyValue::new(
"wait_time",
wait_start.elapsed().as_secs_f64(),
)],
);
if Path::new(&playlist_file).exists() {
debug!("Playlist already exists: {}", playlist_file);
span.set_status(Status::error(format!(
"Playlist already exists: {}",
playlist_file
)));
return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists));
}
// Check if video is already h264 encoded
let is_h264 = is_h264_encoded(&video_file).await;
// Check for rotation metadata
let rotation = get_video_rotation(&video_file).await;
let has_rotation = rotation != 0;
let use_copy = is_h264 && !has_rotation;
if has_rotation {
info!(
"Video {} has rotation metadata ({}°), transcoding to apply rotation",
video_file, rotation
);
span.add_event(
"Transcoding due to rotation",
vec![KeyValue::new("rotation_degrees", rotation as i64)],
);
} else if use_copy {
info!("Video {} is already h264, using stream copy", video_file);
span.add_event("Using stream copy (h264 detected)", vec![]);
} else {
info!("Video {} needs transcoding to h264", video_file);
span.add_event("Transcoding to h264", vec![]);
}
tokio::spawn(async move {
let mut cmd = tokio::process::Command::new("ffmpeg");
cmd.arg("-i").arg(&video_file);
if use_copy {
// Video is already h264, just copy the stream
// Note: rotation metadata will be preserved in the stream
cmd.arg("-c:v").arg("copy");
cmd.arg("-c:a").arg("aac"); // Still need to ensure audio is compatible
} else {
// Need to transcode - autorotate is enabled by default and will apply rotation
cmd.arg("-c:v").arg("h264");
cmd.arg("-crf").arg("21");
cmd.arg("-preset").arg("veryfast");
cmd.arg("-vf").arg("scale=1080:-2,setsar=1:1");
cmd.arg("-c:a").arg("aac");
}
// Common HLS settings
cmd.arg("-hls_time").arg("3");
cmd.arg("-hls_list_size").arg("100");
cmd.arg(&playlist_file);
cmd.stdout(Stdio::null());
cmd.stderr(Stdio::piped());
let ffmpeg_result = cmd
.output()
.inspect_err(|e| error!("Failed to run ffmpeg on child process: {}", e))
.map_err(|e| std::io::Error::other(e.to_string()))
.await;
// Hang on to the permit until we're done decoding and then explicitly drop
drop(permit);
if let Ok(ref res) = ffmpeg_result {
debug!("ffmpeg output: {:?}", res);
}
span.set_status(Status::Ok);
ffmpeg_result
});
Ok(())
})
}
}

View File

@@ -1,185 +0,0 @@
use futures::TryFutureExt;
use log::{debug, error, info, warn};
use std::io::Result;
use std::process::{Output, Stdio};
use std::time::Instant;
use tokio::process::Command;
pub struct Ffmpeg;
pub enum GifType {
Overview,
OverviewVideo { duration: u32 },
}
impl Ffmpeg {
async fn _generate_playlist(&self, input_file: &str, output_file: &str) -> Result<String> {
let ffmpeg_result: Result<Output> = Command::new("ffmpeg")
.arg("-i")
.arg(input_file)
.arg("-c:v")
.arg("h264")
.arg("-crf")
.arg("21")
.arg("-preset")
.arg("veryfast")
.arg("-hls_time")
.arg("3")
.arg("-hls_list_size")
.arg("100")
.arg("-vf")
.arg("scale=1080:-2,setsar=1:1")
.arg(output_file)
.stdout(Stdio::null())
.stderr(Stdio::piped())
.output()
.inspect_err(|e| error!("Failed to run ffmpeg on child process: {}", e))
.map_err(|e| std::io::Error::other(e.to_string()))
.await;
if let Ok(ref res) = ffmpeg_result {
debug!("ffmpeg output: {:?}", res);
}
ffmpeg_result.map(|_| output_file.to_string())
}
async fn get_video_duration(&self, input_file: &str) -> Result<u32> {
Command::new("ffprobe")
.args(["-i", input_file])
.args(["-show_entries", "format=duration"])
.args(["-v", "quiet"])
.args(["-of", "csv=p=0"])
.output()
.await
.map(|out| String::from_utf8_lossy(&out.stdout).trim().to_string())
.inspect(|duration| debug!("Found video duration: {:?}", duration))
.and_then(|duration| {
duration
.parse::<f32>()
.map(|duration| duration as u32)
.map_err(|e| std::io::Error::other(e.to_string()))
})
.inspect(|duration| debug!("Found video duration: {:?}", duration))
}
pub async fn generate_video_gif(
&self,
input_file: &str,
output_file: &str,
gif_type: GifType,
) -> Result<String> {
info!("Creating gif for: '{}'", input_file);
match gif_type {
GifType::Overview => {
let temp_dir = tempfile::tempdir()?;
let temp_path = temp_dir
.path()
.to_str()
.expect("Unable to make temp_dir a string");
match self
.get_video_duration(input_file)
.and_then(|duration| {
debug!("Creating gif frames for '{}'", input_file);
Command::new("ffmpeg")
.args(["-i", input_file])
.args(["-vf", &format!("fps=20/{}", duration)])
.args(["-q:v", "2"])
.stderr(Stdio::null())
.arg(format!("{}/frame_%03d.jpg", temp_path))
.status()
})
.and_then(|_| {
debug!("Generating palette");
Command::new("ffmpeg")
.args(["-i", &format!("{}/frame_%03d.jpg", temp_path)])
.args(["-vf", "palettegen"])
.arg(format!("{}/palette.png", temp_path))
.stderr(Stdio::null())
.status()
})
.and_then(|_| {
debug!("Creating gif for: '{}'", input_file);
self.create_gif_from_frames(temp_path, output_file)
})
.await
{
Ok(exit_code) => {
if exit_code == 0 {
info!("Created gif for '{}' -> '{}'", input_file, output_file);
} else {
warn!(
"Failed to create gif for '{}' with exit code: {}",
input_file, exit_code
);
}
}
Err(e) => {
error!("Error creating gif for '{}': {:?}", input_file, e);
}
}
}
GifType::OverviewVideo { duration } => {
let start = Instant::now();
match self
.get_video_duration(input_file)
.and_then(|input_duration| {
Command::new("ffmpeg")
.args(["-i", input_file])
.args([
"-vf",
// Grab 1 second of frames equally spaced to create a 'duration' second long video scaled to 720px on longest side
&format!(
"select='lt(mod(t,{}),1)',setpts=N/FRAME_RATE/TB,scale='if(gt(iw,ih),720,-2)':'if(gt(ih,iw),720,-2)",
input_duration / duration
),
])
.arg("-an")
.arg(output_file)
.status()
})
.await
{
Ok(out) => info!("Finished clip '{}' with code {:?} in {:?}", output_file, out.code(), start.elapsed()),
Err(e) => error!("Error creating video overview: {}", e),
}
}
}
Ok(output_file.to_string())
}
async fn create_gif_from_frames(&self, frame_base_dir: &str, output_file: &str) -> Result<i32> {
let output = Command::new("ffmpeg")
.arg("-y")
.args(["-framerate", "4"])
.args(["-i", &format!("{}/frame_%03d.jpg", frame_base_dir)])
.args(["-i", &format!("{}/palette.png", frame_base_dir)])
.args([
"-filter_complex",
// Scale to 480x480 with a center crop
"[0:v]scale=480:-1:flags=lanczos,crop='min(in_w,in_h)':'min(in_w,in_h)':(in_w-out_w)/2:(in_h-out_h)/2, paletteuse",
])
.args(["-loop", "0"]) // loop forever
.args(["-final_delay", "75"])
.arg(output_file)
.stderr(Stdio::piped()) // Change this to capture stderr
.stdout(Stdio::piped()) // Optionally capture stdout too
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
error!("FFmpeg error: {}", stderr);
let stdout = String::from_utf8_lossy(&output.stdout);
debug!("FFmpeg stdout: {}", stdout);
} else {
debug!("FFmpeg successful with exit code: {}", output.status);
}
Ok(output.status.code().unwrap_or(-1))
}
}

View File

@@ -1,67 +0,0 @@
use crate::otel::global_tracer;
use crate::video::ffmpeg::{Ffmpeg, GifType};
use crate::{is_video, update_media_counts};
use log::info;
use opentelemetry::trace::Tracer;
use std::fs;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
pub mod actors;
pub mod ffmpeg;
#[allow(dead_code)]
pub async fn generate_video_gifs() {
tokio::spawn(async {
info!("Starting to make video gifs");
let start = std::time::Instant::now();
let tracer = global_tracer();
tracer.start("creating video gifs");
let gif_base_path = &dotenv::var("GIFS_DIRECTORY").unwrap_or(String::from("gifs"));
let gif_directory: &Path = Path::new(gif_base_path);
fs::create_dir_all(gif_base_path).expect("There was an issue creating directory");
let files = PathBuf::from(dotenv::var("BASE_PATH").unwrap());
let ffmpeg = Ffmpeg;
for file in WalkDir::new(&files)
.into_iter()
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.filter(is_video)
.filter(|entry| {
let path = entry.path();
let relative_path = &path.strip_prefix(&files).unwrap();
let thumb_path = Path::new(gif_directory).join(relative_path);
let gif_path = thumb_path.with_extension("gif");
!gif_path.exists()
})
{
let path = file.path();
let relative_path = &path.strip_prefix(&files).unwrap();
let gif_path = Path::new(gif_directory).join(relative_path);
let gif_path = gif_path.with_extension("gif");
if let Some(parent_dir) = gif_path.parent() {
fs::create_dir_all(parent_dir).unwrap_or_else(|_| {
panic!("There was an issue creating gif directory {:?}", gif_path)
});
}
info!("Generating gif for {:?}", path);
ffmpeg
.generate_video_gif(
path.to_str().unwrap(),
gif_path.to_str().unwrap(),
GifType::Overview,
)
.await
.expect("There was an issue generating the gif");
}
info!("Finished making video gifs in {:?}", start.elapsed());
update_media_counts(&files);
});
}