update
Some checks failed
Build Gurty / Build Gurty (, ubuntu-latest, linux, x86_64-unknown-linux-gnu) (push) Failing after 1m33s
Build GurtCA / Build GurtCA (, ubuntu-latest, linux, x86_64-unknown-linux-gnu) (push) Failing after 11m20s
Build GDExtension / Build GDExtension (libgurt_godot.so, ubuntu-latest, linux, x86_64-unknown-linux-gnu) (push) Failing after 16m9s
Build Flumi / Build Flumi (Linux, 4.4.1, ubuntu-latest, linux) (push) Failing after 2h10m11s
Build Flumi / Build Flumi (Windows Desktop, 4.4.1, windows-latest, windows) (push) Has been cancelled
Build GDExtension / Build GDExtension (gurt_godot.dll, windows-latest, windows, x86_64-pc-windows-msvc) (push) Has been cancelled
Build GurtCA / Build GurtCA (.exe, windows-latest, windows, x86_64-pc-windows-msvc) (push) Has been cancelled
Build Gurty / Build Gurty (.exe, windows-latest, windows, x86_64-pc-windows-msvc) (push) Has been cancelled
Some checks failed
Build Gurty / Build Gurty (, ubuntu-latest, linux, x86_64-unknown-linux-gnu) (push) Failing after 1m33s
Build GurtCA / Build GurtCA (, ubuntu-latest, linux, x86_64-unknown-linux-gnu) (push) Failing after 11m20s
Build GDExtension / Build GDExtension (libgurt_godot.so, ubuntu-latest, linux, x86_64-unknown-linux-gnu) (push) Failing after 16m9s
Build Flumi / Build Flumi (Linux, 4.4.1, ubuntu-latest, linux) (push) Failing after 2h10m11s
Build Flumi / Build Flumi (Windows Desktop, 4.4.1, windows-latest, windows) (push) Has been cancelled
Build GDExtension / Build GDExtension (gurt_godot.dll, windows-latest, windows, x86_64-pc-windows-msvc) (push) Has been cancelled
Build GurtCA / Build GurtCA (.exe, windows-latest, windows, x86_64-pc-windows-msvc) (push) Has been cancelled
Build Gurty / Build Gurty (.exe, windows-latest, windows, x86_64-pc-windows-msvc) (push) Has been cancelled
This commit is contained in:
38
search-engine/Cargo.lock
generated
38
search-engine/Cargo.lock
generated
@@ -967,24 +967,6 @@ version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
||||
|
||||
[[package]]
|
||||
name = "gurt"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"chrono",
|
||||
"rustls 0.23.31",
|
||||
"rustls-native-certs",
|
||||
"rustls-pemfile 2.2.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-rustls",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gurted-search-engine"
|
||||
version = "0.1.0"
|
||||
@@ -995,7 +977,7 @@ dependencies = [
|
||||
"clap",
|
||||
"futures",
|
||||
"glob",
|
||||
"gurt",
|
||||
"gurtlib",
|
||||
"lol_html",
|
||||
"mime",
|
||||
"regex",
|
||||
@@ -1016,6 +998,24 @@ dependencies = [
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gurtlib"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"chrono",
|
||||
"rustls 0.23.31",
|
||||
"rustls-native-certs",
|
||||
"rustls-pemfile 2.2.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-rustls",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.3.27"
|
||||
|
||||
@@ -76,7 +76,7 @@ impl Config {
|
||||
}
|
||||
|
||||
pub fn gurt_protocol_url(&self) -> String {
|
||||
format!("gurt://{}:{}", self.server.address, self.server.port)
|
||||
format!("lw://{}:{}", self.server.address, self.server.port)
|
||||
}
|
||||
|
||||
pub fn is_allowed_extension(&self, extension: &str) -> bool {
|
||||
|
||||
@@ -418,7 +418,7 @@ impl DomainCrawler {
|
||||
let url_str = Self::normalize_url(absolute_url.to_string());
|
||||
|
||||
// Only include GURT protocol URLs for the same domain
|
||||
if url_str.starts_with("gurt://") {
|
||||
if url_str.starts_with("lw://") {
|
||||
if let Ok(parsed) = Url::parse(&url_str) {
|
||||
if let Some(host) = parsed.host_str() {
|
||||
if let Ok(base_parsed) = Url::parse(base_url) {
|
||||
@@ -742,31 +742,31 @@ mod tests {
|
||||
#[test]
|
||||
fn parse_clanker_rules_preserves_case_in_allowed_urls() {
|
||||
let content = "User-agent: TestBot\nAllow: /getpage?l=Fri,12Sep2025000605_ZzesV.txt\n";
|
||||
let result = DomainCrawler::parse_clanker_rules(content, "gurt://wi.ki", "TestBot")
|
||||
let result = DomainCrawler::parse_clanker_rules(content, "lw://wi.ki", "TestBot")
|
||||
.expect("expected allow list");
|
||||
|
||||
assert_eq!(
|
||||
result,
|
||||
vec!["gurt://wi.ki/getpage?l=Fri,12Sep2025000605_ZzesV.txt".to_string()]
|
||||
vec!["lw://wi.ki/getpage?l=Fri,12Sep2025000605_ZzesV.txt".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_clanker_rules_handles_case_insensitive_directives() {
|
||||
let content = "user-Agent: AnotherBot\nAlLoW: /MiXeD/Path.HTML\n";
|
||||
let result = DomainCrawler::parse_clanker_rules(content, "gurt://example", "AnotherBot")
|
||||
let result = DomainCrawler::parse_clanker_rules(content, "lw://example", "AnotherBot")
|
||||
.expect("expected allow list");
|
||||
|
||||
assert_eq!(
|
||||
result,
|
||||
vec!["gurt://example/MiXeD/Path.HTML".to_string()]
|
||||
vec!["lw://example/MiXeD/Path.HTML".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_clanker_rules_respects_disallow_all() {
|
||||
let content = "User-agent: Bot\nDisallow: /\n";
|
||||
let result = DomainCrawler::parse_clanker_rules(content, "gurt://example", "Bot");
|
||||
let result = DomainCrawler::parse_clanker_rules(content, "lw://example", "Bot");
|
||||
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ impl Domain {
|
||||
}
|
||||
|
||||
pub fn gurt_url(&self) -> String {
|
||||
format!("gurt://{}.{}", self.name, self.tld)
|
||||
format!("lw://{}.{}", self.name, self.tld)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user