Merge branch 'main' of https://github.com/outpoot/gurted

2025-09-25 21:56:56 +03:00
parent e9f8500503 aa533322a7
commit df144c7627
4 changed files with 75 additions and 22 deletions
--- a/flumi/addons/gurt-protocol/bin/windows/gurt_godot.dll
+++ b/flumi/addons/gurt-protocol/bin/windows/gurt_godot.dll
--- a/protocol/gdextension/Cargo.toml
+++ b/protocol/gdextension/Cargo.toml
@@ -14,15 +14,15 @@ crate-type = ["cdylib"]
 [dependencies]
 gurtlib = { path = "../library" }
-godot = "0.1"
+godot = { version = "0.1", features = ["experimental-threads"] }
 tokio = { version = "1.0", features = ["rt"] }
 url = "2.5"
 serde_json = "1.0"
 [profile.release]
-opt-level = "z"
+opt-level = 3
 lto = true
 codegen-units = 1
 strip = true
 panic = "abort"
 strip = true
--- a/protocol/gdextension/build.sh
+++ b/protocol/gdextension/build.sh
@@ -33,7 +33,7 @@ while [[ $# -gt 0 ]]; do
            echo ""
            echo "Options:"
            echo "  -t, --target TARGET      Build target (debug|release) [default: release]"
-            echo "  -p, --platform PLATFORM Target platform (windows|linux|macos|current)"
+            echo "  -p, --platform PLATFORM Target platform (windows|linux|macos|macos-intel|current)"
            echo "  -h, --help              Show this help message"
            echo ""
            exit 0
@@ -82,6 +82,10 @@ case $PLATFORM in
        LIB_NAME="libgurt_godot.so"
        ;;
    macos)
        RUST_TARGET="aarch64-apple-darwin"
        LIB_NAME="libgurt_godot.dylib"
        ;;
    macos-intel)
        RUST_TARGET="x86_64-apple-darwin"
        LIB_NAME="libgurt_godot.dylib"
        ;;
--- a/search-engine/src/crawler.rs
+++ b/search-engine/src/crawler.rs
@@ -158,7 +158,14 @@ impl DomainCrawler {
    }
    fn parse_clanker_txt(&self, content: &str, base_url: &str) -> Result<Vec<String>> {
-        let user_agent = &self.config.search.crawler_user_agent;
+        Self::parse_clanker_rules(
            content,
            base_url,
            &self.config.search.crawler_user_agent,
        )
    }
    fn parse_clanker_rules(content: &str, base_url: &str, user_agent: &str) -> Result<Vec<String>> {
        let mut disallow_all = false;
        let mut user_agent_matches = false;
        let mut allowed_urls = Vec::new();
@@ -169,26 +176,31 @@ impl DomainCrawler {
                continue;
            }
-            if let Some(user_agent_value) = line.to_lowercase().strip_prefix("user-agent:") {
+            let (directive, value) = match line.split_once(':') {
-                let current_user_agent = user_agent_value.trim().to_string();
+                Some((directive, value)) => (directive.trim().to_lowercase(), value.trim()),
-                user_agent_matches = current_user_agent == "*" || current_user_agent.eq_ignore_ascii_case(user_agent);
+                None => continue,
            };
            if directive == "user-agent" {
                user_agent_matches =
                    value == "*" || value.eq_ignore_ascii_case(user_agent);
                continue;
            }
-            if user_agent_matches {
+            if !user_agent_matches {
-                if let Some(path_value) = line.to_lowercase().strip_prefix("disallow:") {
+                continue;
-                    let path = path_value.trim();
+            }
-                    if path == "/" {
+
-                        disallow_all = true;
+            if directive == "disallow" {
-                        break;
+                if value == "/" {
-                    }
+                    disallow_all = true;
-                } else if let Some(path_value) = line.to_lowercase().strip_prefix("allow:") {
+                    break;
-                    let path = path_value.trim();
+                }
-                    if !path.is_empty() {
+            } else if directive == "allow" {
-                        let full_url = Self::normalize_url(format!("{}{}", base_url, path));
+                if !value.is_empty() {
-                        debug!("Added allowed URL from clanker.txt: {}", full_url);
+                    let full_url = Self::normalize_url(format!("{}{}", base_url, value));
-                        allowed_urls.push(full_url);
+                    debug!("Added allowed URL from clanker.txt: {}", full_url);
-                    }
+                    allowed_urls.push(full_url);
                }
            }
        }
@@ -719,4 +731,41 @@ impl CrawlStats {
            duration_seconds: 0,
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::DomainCrawler;
    #[test]
    fn parse_clanker_rules_preserves_case_in_allowed_urls() {
        let content = "User-agent: TestBot\nAllow: /getpage?l=Fri,12Sep2025000605_ZzesV.txt\n";
        let result = DomainCrawler::parse_clanker_rules(content, "gurt://wi.ki", "TestBot")
            .expect("expected allow list");
        assert_eq!(
            result,
            vec!["gurt://wi.ki/getpage?l=Fri,12Sep2025000605_ZzesV.txt".to_string()]
        );
    }
    #[test]
    fn parse_clanker_rules_handles_case_insensitive_directives() {
        let content = "user-Agent: AnotherBot\nAlLoW: /MiXeD/Path.HTML\n";
        let result = DomainCrawler::parse_clanker_rules(content, "gurt://example", "AnotherBot")
            .expect("expected allow list");
        assert_eq!(
            result,
            vec!["gurt://example/MiXeD/Path.HTML".to_string()]
        );
    }
    #[test]
    fn parse_clanker_rules_respects_disallow_all() {
        let content = "User-agent: Bot\nDisallow: /\n";
        let result = DomainCrawler::parse_clanker_rules(content, "gurt://example", "Bot");
        assert!(result.is_err());
    }
 }