diff --git a/flumi/addons/gurt-protocol/bin/windows/gurt_godot.dll b/flumi/addons/gurt-protocol/bin/windows/gurt_godot.dll index efaaffc..a2c3d93 100644 Binary files a/flumi/addons/gurt-protocol/bin/windows/gurt_godot.dll and b/flumi/addons/gurt-protocol/bin/windows/gurt_godot.dll differ diff --git a/protocol/gdextension/Cargo.toml b/protocol/gdextension/Cargo.toml index 845c9de..86e2d03 100644 --- a/protocol/gdextension/Cargo.toml +++ b/protocol/gdextension/Cargo.toml @@ -14,15 +14,15 @@ crate-type = ["cdylib"] [dependencies] gurtlib = { path = "../library" } -godot = "0.1" +godot = { version = "0.1", features = ["experimental-threads"] } tokio = { version = "1.0", features = ["rt"] } url = "2.5" serde_json = "1.0" [profile.release] -opt-level = "z" +opt-level = 3 lto = true codegen-units = 1 +strip = true panic = "abort" -strip = true \ No newline at end of file diff --git a/protocol/gdextension/build.sh b/protocol/gdextension/build.sh index 90f82ca..88b7b58 100644 --- a/protocol/gdextension/build.sh +++ b/protocol/gdextension/build.sh @@ -33,7 +33,7 @@ while [[ $# -gt 0 ]]; do echo "" echo "Options:" echo " -t, --target TARGET Build target (debug|release) [default: release]" - echo " -p, --platform PLATFORM Target platform (windows|linux|macos|current)" + echo " -p, --platform PLATFORM Target platform (windows|linux|macos|macos-intel|current)" echo " -h, --help Show this help message" echo "" exit 0 @@ -82,6 +82,10 @@ case $PLATFORM in LIB_NAME="libgurt_godot.so" ;; macos) + RUST_TARGET="aarch64-apple-darwin" + LIB_NAME="libgurt_godot.dylib" + ;; + macos-intel) RUST_TARGET="x86_64-apple-darwin" LIB_NAME="libgurt_godot.dylib" ;; diff --git a/search-engine/src/crawler.rs b/search-engine/src/crawler.rs index 09c6aa3..fc19e8b 100644 --- a/search-engine/src/crawler.rs +++ b/search-engine/src/crawler.rs @@ -158,7 +158,14 @@ impl DomainCrawler { } fn parse_clanker_txt(&self, content: &str, base_url: &str) -> Result> { - let user_agent = &self.config.search.crawler_user_agent; + Self::parse_clanker_rules( + content, + base_url, + &self.config.search.crawler_user_agent, + ) + } + + fn parse_clanker_rules(content: &str, base_url: &str, user_agent: &str) -> Result> { let mut disallow_all = false; let mut user_agent_matches = false; let mut allowed_urls = Vec::new(); @@ -169,26 +176,31 @@ impl DomainCrawler { continue; } - if let Some(user_agent_value) = line.to_lowercase().strip_prefix("user-agent:") { - let current_user_agent = user_agent_value.trim().to_string(); - user_agent_matches = current_user_agent == "*" || current_user_agent.eq_ignore_ascii_case(user_agent); + let (directive, value) = match line.split_once(':') { + Some((directive, value)) => (directive.trim().to_lowercase(), value.trim()), + None => continue, + }; + + if directive == "user-agent" { + user_agent_matches = + value == "*" || value.eq_ignore_ascii_case(user_agent); continue; } - if user_agent_matches { - if let Some(path_value) = line.to_lowercase().strip_prefix("disallow:") { - let path = path_value.trim(); - if path == "/" { - disallow_all = true; - break; - } - } else if let Some(path_value) = line.to_lowercase().strip_prefix("allow:") { - let path = path_value.trim(); - if !path.is_empty() { - let full_url = Self::normalize_url(format!("{}{}", base_url, path)); - debug!("Added allowed URL from clanker.txt: {}", full_url); - allowed_urls.push(full_url); - } + if !user_agent_matches { + continue; + } + + if directive == "disallow" { + if value == "/" { + disallow_all = true; + break; + } + } else if directive == "allow" { + if !value.is_empty() { + let full_url = Self::normalize_url(format!("{}{}", base_url, value)); + debug!("Added allowed URL from clanker.txt: {}", full_url); + allowed_urls.push(full_url); } } } @@ -719,4 +731,41 @@ impl CrawlStats { duration_seconds: 0, } } +} + +#[cfg(test)] +mod tests { + use super::DomainCrawler; + + #[test] + fn parse_clanker_rules_preserves_case_in_allowed_urls() { + let content = "User-agent: TestBot\nAllow: /getpage?l=Fri,12Sep2025000605_ZzesV.txt\n"; + let result = DomainCrawler::parse_clanker_rules(content, "gurt://wi.ki", "TestBot") + .expect("expected allow list"); + + assert_eq!( + result, + vec!["gurt://wi.ki/getpage?l=Fri,12Sep2025000605_ZzesV.txt".to_string()] + ); + } + + #[test] + fn parse_clanker_rules_handles_case_insensitive_directives() { + let content = "user-Agent: AnotherBot\nAlLoW: /MiXeD/Path.HTML\n"; + let result = DomainCrawler::parse_clanker_rules(content, "gurt://example", "AnotherBot") + .expect("expected allow list"); + + assert_eq!( + result, + vec!["gurt://example/MiXeD/Path.HTML".to_string()] + ); + } + + #[test] + fn parse_clanker_rules_respects_disallow_all() { + let content = "User-agent: Bot\nDisallow: /\n"; + let result = DomainCrawler::parse_clanker_rules(content, "gurt://example", "Bot"); + + assert!(result.is_err()); + } } \ No newline at end of file