use serde::{Deserialize, Serialize}; use sqlx::{FromRow, types::chrono::{DateTime, Utc}}; #[derive(Clone, Debug, Deserialize, Serialize, FromRow)] pub struct Domain { pub id: i32, pub name: String, pub tld: String, pub user_id: Option, pub status: Option, pub created_at: Option>, } impl Domain { pub fn full_domain(&self) -> String { format!("{}.{}", self.name, self.tld) } pub fn gurt_url(&self) -> String { format!("gurt://{}.{}", self.name, self.tld) } } #[derive(Clone, Debug, Deserialize, Serialize, FromRow)] pub struct DnsRecord { pub id: i32, pub domain_id: i32, pub record_type: String, pub name: String, pub value: String, pub ttl: Option, pub priority: Option, pub created_at: Option>, } #[derive(Clone, Debug, Serialize)] pub struct SearchResult { pub url: String, pub title: String, pub preview: String, pub domain: String, pub score: f32, pub indexed_at: DateTime, pub icon: Option, pub description: Option, } #[derive(Clone, Debug, Serialize)] pub struct SearchResponse { pub query: String, pub results: Vec, pub total_results: usize, pub page: usize, pub per_page: usize, } #[derive(Clone, Debug, Serialize)] pub struct IndexStats { pub total_documents: usize, pub total_domains: usize, pub index_size_mb: f64, pub last_updated: DateTime, } #[derive(Clone, Debug)] pub struct CrawledPage { pub url: String, pub domain: String, pub title: Option, pub content: String, pub content_hash: String, pub indexed_at: DateTime, pub icon: Option, pub description: Option, } impl CrawledPage { pub fn generate_preview(&self, max_len: usize) -> String { let text = self.content.trim(); if text.len() <= max_len { text.to_string() } else { let mut preview = text.chars().take(max_len).collect::(); if let Some(last_space) = preview.rfind(' ') { preview.truncate(last_space); } preview.push_str("..."); preview } } } #[derive(Clone)] pub struct DomainRepository { pool: sqlx::PgPool, } impl DomainRepository { pub fn new(pool: sqlx::PgPool) -> Self { Self { pool } } pub async fn get_domains_for_crawling(&self, limit: Option) -> Result, sqlx::Error> { let query = if let Some(limit) = limit { sqlx::query_as::<_, Domain>( "SELECT d.id, d.name, d.tld, d.user_id, d.status, d.created_at FROM domains d LEFT JOIN domain_crawl_status dcs ON d.id = dcs.domain_id WHERE d.status = 'approved' AND (dcs.crawl_status IS NULL OR (dcs.crawl_status = 'completed' AND dcs.next_crawl_at <= NOW()) OR (dcs.crawl_status = 'failed' AND dcs.next_crawl_at <= NOW()) OR (dcs.crawl_status = 'pending' AND dcs.next_crawl_at <= NOW())) ORDER BY COALESCE(dcs.last_crawled_at, '1970-01-01'::timestamptz) ASC LIMIT $1" ) .bind(limit) } else { sqlx::query_as::<_, Domain>( "SELECT d.id, d.name, d.tld, d.user_id, d.status, d.created_at FROM domains d LEFT JOIN domain_crawl_status dcs ON d.id = dcs.domain_id WHERE d.status = 'approved' AND (dcs.crawl_status IS NULL OR (dcs.crawl_status = 'completed' AND dcs.next_crawl_at <= NOW()) OR (dcs.crawl_status = 'failed' AND dcs.next_crawl_at <= NOW()) OR (dcs.crawl_status = 'pending' AND dcs.next_crawl_at <= NOW())) ORDER BY COALESCE(dcs.last_crawled_at, '1970-01-01'::timestamptz) ASC" ) }; query.fetch_all(&self.pool).await } pub async fn update_crawl_status( &self, domain_id: i32, status: &str, error_message: Option<&str>, pages_found: Option, next_crawl_hours: Option ) -> Result<(), sqlx::Error> { let next_crawl_at = next_crawl_hours .map(|hours| chrono::Utc::now() + chrono::Duration::hours(hours)); sqlx::query( "INSERT INTO domain_crawl_status (domain_id, crawl_status, error_message, pages_found, last_crawled_at, next_crawl_at, updated_at) VALUES ($1, $2, $3, $4, CASE WHEN $2 IN ('completed','failed') THEN NOW() ELSE NULL END, $5, NOW()) ON CONFLICT (domain_id) DO UPDATE SET crawl_status = $2, error_message = $3, pages_found = CASE WHEN $2 = 'completed' THEN COALESCE($4, domain_crawl_status.pages_found) ELSE domain_crawl_status.pages_found END, last_crawled_at = CASE WHEN $2 IN ('completed','failed') THEN NOW() ELSE domain_crawl_status.last_crawled_at END, next_crawl_at = COALESCE($5, domain_crawl_status.next_crawl_at), updated_at = NOW()" ) .bind(domain_id) .bind(status) .bind(error_message) .bind(pages_found) .bind(next_crawl_at) .execute(&self.pool) .await?; Ok(()) } } #[derive(Clone, Debug, Deserialize, Serialize, FromRow)] pub struct CrawlStatus { pub domain_id: i32, pub last_crawled_at: Option>, pub next_crawl_at: Option>, pub crawl_status: Option, pub error_message: Option, pub pages_found: Option, pub updated_at: Option>, }