fix text-related tag events, remove warnings, make search engine not fetch /index.html separately but /
This commit is contained in:
@@ -80,12 +80,11 @@ static func unescape_html_entities(text: String) -> String:
|
||||
static func preprocess_html_entities(html: String) -> String:
|
||||
var result = ""
|
||||
var i = 0
|
||||
var in_tag = false
|
||||
|
||||
while i < html.length():
|
||||
var char = html[i]
|
||||
var character = html[i]
|
||||
|
||||
if char == "<":
|
||||
if character == "<":
|
||||
# Check if this starts a valid HTML tag
|
||||
var tag_end = html.find(">", i)
|
||||
if tag_end != -1:
|
||||
@@ -97,11 +96,11 @@ static func preprocess_html_entities(html: String) -> String:
|
||||
continue
|
||||
# If not a valid tag, escape it
|
||||
result += "<"
|
||||
elif char == ">":
|
||||
elif character == ">":
|
||||
# Escape standalone > that's not part of a tag
|
||||
result += ">"
|
||||
else:
|
||||
result += char
|
||||
result += character
|
||||
|
||||
i += 1
|
||||
|
||||
@@ -431,7 +430,7 @@ func get_all_images() -> Array[String]:
|
||||
func get_all_scripts() -> Array[String]:
|
||||
return get_attribute_values("script", "src")
|
||||
|
||||
func process_scripts(lua_api: LuaAPI, lua_vm) -> void:
|
||||
func process_scripts(lua_api: LuaAPI, _lua_vm) -> void:
|
||||
if not lua_api:
|
||||
print("Warning: Lua API not available for script processing")
|
||||
return
|
||||
@@ -447,9 +446,9 @@ func process_scripts(lua_api: LuaAPI, lua_vm) -> void:
|
||||
parse_result.external_scripts = []
|
||||
parse_result.external_scripts.append(src)
|
||||
elif not inline_code.is_empty():
|
||||
lua_api.execute_lua_script(inline_code, lua_vm)
|
||||
lua_api.execute_lua_script(inline_code)
|
||||
|
||||
func process_external_scripts(lua_api: LuaAPI, lua_vm, base_url: String = "") -> void:
|
||||
func process_external_scripts(lua_api: LuaAPI, _lua_vm, base_url: String = "") -> void:
|
||||
if not lua_api or not parse_result.external_scripts or parse_result.external_scripts.is_empty():
|
||||
return
|
||||
|
||||
@@ -458,7 +457,7 @@ func process_external_scripts(lua_api: LuaAPI, lua_vm, base_url: String = "") ->
|
||||
for script_url in parse_result.external_scripts:
|
||||
var script_content = await Network.fetch_external_resource(script_url, base_url)
|
||||
if not script_content.is_empty():
|
||||
lua_api.execute_lua_script(script_content, lua_vm)
|
||||
lua_api.execute_lua_script(script_content)
|
||||
|
||||
func get_all_stylesheets() -> Array[String]:
|
||||
return get_attribute_values("style", "src")
|
||||
@@ -470,7 +469,7 @@ func apply_element_styles(node: Control, element: HTMLElement, parser: HTMLParse
|
||||
var text = HTMLParser.get_bbcode_with_styles(element, styles, parser, [])
|
||||
label.text = text
|
||||
|
||||
static func apply_element_bbcode_formatting(element: HTMLElement, styles: Dictionary, content: String, parser: HTMLParser = null) -> String:
|
||||
static func apply_element_bbcode_formatting(element: HTMLElement, styles: Dictionary, content: String) -> String:
|
||||
# Apply general styling first (color, font-weight) for all elements
|
||||
var formatted_content = content
|
||||
|
||||
@@ -553,10 +552,10 @@ static func get_bbcode_with_styles(element: HTMLElement, styles: Dictionary, par
|
||||
if parser != null:
|
||||
child_styles = parser.get_element_styles_with_inheritance(child, "", new_visited)
|
||||
var child_content = HTMLParser.get_bbcode_with_styles(child, child_styles, parser, new_visited)
|
||||
child_content = apply_element_bbcode_formatting(child, child_styles, child_content, parser)
|
||||
child_content = apply_element_bbcode_formatting(child, child_styles, child_content)
|
||||
text += child_content
|
||||
|
||||
# Apply formatting to the current element itself
|
||||
text = apply_element_bbcode_formatting(element, styles, text, parser)
|
||||
text = apply_element_bbcode_formatting(element, styles, text)
|
||||
|
||||
return text
|
||||
|
||||
@@ -155,7 +155,7 @@ func _gurt_clear_interval_handler(vm: LuauVM) -> int:
|
||||
return timeout_manager.clear_interval_handler(vm)
|
||||
|
||||
# Location API handlers
|
||||
func _gurt_location_reload_handler(vm: LuauVM) -> int:
|
||||
func _gurt_location_reload_handler(_vm: LuauVM) -> int:
|
||||
call_deferred("_reload_current_page")
|
||||
return 0
|
||||
|
||||
@@ -634,7 +634,7 @@ func get_dom_node(node: Node, purpose: String = "general") -> Node:
|
||||
return node
|
||||
|
||||
# Main execution function
|
||||
func execute_lua_script(code: String, vm: LuauVM):
|
||||
func execute_lua_script(code: String):
|
||||
if not threaded_vm.lua_thread or not threaded_vm.lua_thread.is_alive():
|
||||
# Start the thread if it's not running
|
||||
threaded_vm.start_lua_thread(dom_parser, self)
|
||||
@@ -642,8 +642,8 @@ func execute_lua_script(code: String, vm: LuauVM):
|
||||
script_start_time = Time.get_ticks_msec() / 1000.0
|
||||
threaded_vm.execute_script_async(code)
|
||||
|
||||
func _on_threaded_script_completed(result: Dictionary):
|
||||
var execution_time = (Time.get_ticks_msec() / 1000.0) - script_start_time
|
||||
func _on_threaded_script_completed(_result: Dictionary):
|
||||
pass
|
||||
|
||||
func _on_print_output(message: String):
|
||||
LuaPrintUtils.lua_print_direct(message)
|
||||
@@ -684,7 +684,7 @@ func _handle_dom_operation(operation: Dictionary):
|
||||
"insert_before":
|
||||
LuaDOMUtils.handle_insert_before(operation, dom_parser, self)
|
||||
"insert_after":
|
||||
LuaDOMUtils.handle_insert_after(operation, dom_parser, self)
|
||||
LuaDOMUtils.handle_insert_after(operation, dom_parser)
|
||||
"replace_child":
|
||||
LuaDOMUtils.handle_replace_child(operation, dom_parser, self)
|
||||
"focus_element":
|
||||
|
||||
@@ -87,7 +87,6 @@ func fetch_text(url: String) -> String:
|
||||
|
||||
var result = response[0] # HTTPClient.Result
|
||||
var response_code = response[1] # int
|
||||
var headers = response[2] # PackedStringArray
|
||||
var body = response[3] # PackedByteArray
|
||||
|
||||
http_request.queue_free()
|
||||
@@ -104,7 +103,7 @@ func fetch_external_resource(url: String, base_url: String = "") -> String:
|
||||
if resolved_url.begins_with("http://") or resolved_url.begins_with("https://"):
|
||||
return await fetch_text(resolved_url)
|
||||
elif resolved_url.begins_with("gurt://"):
|
||||
return await fetch_gurt_resource(resolved_url)
|
||||
return fetch_gurt_resource(resolved_url)
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
@@ -118,7 +118,7 @@ func load_audio_async(src: String) -> void:
|
||||
http_request.queue_free()
|
||||
return
|
||||
|
||||
func _on_audio_download_completed(result: int, response_code: int, headers: PackedStringArray, body: PackedByteArray):
|
||||
func _on_audio_download_completed(_result: int, response_code: int, headers: PackedStringArray, body: PackedByteArray):
|
||||
var http_request = get_children().filter(func(child): return child is HTTPRequest)[0]
|
||||
http_request.queue_free()
|
||||
|
||||
|
||||
@@ -62,6 +62,8 @@ func create_styled_label(text: String, element, parser: HTMLParser) -> RichTextL
|
||||
|
||||
add_child(label)
|
||||
|
||||
parser.register_dom_node(element, label)
|
||||
|
||||
var styles = parser.get_element_styles_with_inheritance(element, "", [])
|
||||
StyleManager.apply_styles_to_label(label, styles, element, parser, text)
|
||||
|
||||
@@ -169,5 +171,9 @@ func create_label(text: String) -> RichTextLabel:
|
||||
label.size_flags_vertical = Control.SIZE_SHRINK_CENTER
|
||||
|
||||
add_child(label)
|
||||
|
||||
if _element and _parser:
|
||||
_parser.register_dom_node(_element, label)
|
||||
|
||||
call_deferred("_apply_auto_resize_to_label", label)
|
||||
return label
|
||||
|
||||
@@ -227,7 +227,7 @@ static func _on_child_mouse_entered(panel: PanelContainer):
|
||||
_on_panel_mouse_entered(panel)
|
||||
|
||||
static func _on_child_mouse_exited(panel: PanelContainer):
|
||||
panel.get_tree().create_timer(0.01).timeout.connect(func(): _check_panel_hover(panel))
|
||||
_create_panel_check_timer(panel)
|
||||
|
||||
static func _on_panel_mouse_entered(panel: PanelContainer):
|
||||
panel.set_meta("is_hovering", true)
|
||||
@@ -241,7 +241,19 @@ static func _on_panel_mouse_entered(panel: PanelContainer):
|
||||
StyleManager.apply_transform_properties_direct(transform_target, hover_styles)
|
||||
|
||||
static func _on_panel_mouse_exited_with_delay(panel: PanelContainer):
|
||||
panel.get_tree().create_timer(0.01).timeout.connect(func(): _check_panel_hover(panel))
|
||||
_create_panel_check_timer(panel)
|
||||
|
||||
static func _create_panel_check_timer(panel: PanelContainer):
|
||||
if not is_instance_valid(panel):
|
||||
return
|
||||
var timer = panel.get_tree().create_timer(0.01)
|
||||
var panel_ref = weakref(panel)
|
||||
timer.timeout.connect(func(): _check_panel_hover_safe(panel_ref))
|
||||
|
||||
static func _check_panel_hover_safe(panel_ref: WeakRef):
|
||||
var panel = panel_ref.get_ref()
|
||||
if panel:
|
||||
_check_panel_hover(panel)
|
||||
|
||||
static func _check_panel_hover(panel: PanelContainer):
|
||||
if not panel or not is_instance_valid(panel):
|
||||
|
||||
@@ -205,7 +205,6 @@ static func load_all_crumbs(domain: String) -> Dictionary:
|
||||
return {}
|
||||
|
||||
var crumbs = {}
|
||||
var current_time = Time.get_ticks_msec() / 1000.0
|
||||
var changed = false
|
||||
|
||||
for crumb_name in crumbs_data:
|
||||
|
||||
@@ -127,7 +127,7 @@ static func handle_element_append(operation: Dictionary, dom_parser: HTMLParser,
|
||||
|
||||
if parent_dom_node:
|
||||
# Render the appended element
|
||||
render_new_element.call_deferred(child_element, parent_dom_node, dom_parser, lua_api)
|
||||
render_new_element.call_deferred(child_element, parent_dom_node, dom_parser)
|
||||
|
||||
static func handle_element_remove(operation: Dictionary, dom_parser: HTMLParser) -> void:
|
||||
var element_id: String = operation.element_id
|
||||
@@ -190,9 +190,9 @@ static func handle_insert_before(operation: Dictionary, dom_parser: HTMLParser,
|
||||
parent_dom_node = dom_parser.parse_result.dom_nodes.get(parent_id, null)
|
||||
|
||||
if parent_dom_node:
|
||||
handle_visual_insertion_by_reference(parent_id, new_child_element, reference_child_id, true, dom_parser, lua_api)
|
||||
handle_visual_insertion_by_reference(parent_id, new_child_element, reference_child_id, true, dom_parser)
|
||||
|
||||
static func handle_insert_after(operation: Dictionary, dom_parser: HTMLParser, lua_api) -> void:
|
||||
static func handle_insert_after(operation: Dictionary, dom_parser: HTMLParser) -> void:
|
||||
var parent_id: String = operation.parent_id
|
||||
var new_child_id: String = operation.new_child_id
|
||||
var reference_child_id: String = operation.reference_child_id
|
||||
@@ -229,7 +229,7 @@ static func handle_insert_after(operation: Dictionary, dom_parser: HTMLParser, l
|
||||
parent_dom_node = dom_parser.parse_result.dom_nodes.get(parent_id, null)
|
||||
|
||||
if parent_dom_node:
|
||||
handle_visual_insertion_by_reference(parent_id, new_child_element, reference_child_id, false, dom_parser, lua_api)
|
||||
handle_visual_insertion_by_reference(parent_id, new_child_element, reference_child_id, false, dom_parser)
|
||||
|
||||
static func handle_replace_child(operation: Dictionary, dom_parser: HTMLParser, lua_api) -> void:
|
||||
var parent_id: String = operation.parent_id
|
||||
@@ -262,13 +262,12 @@ static func handle_replace_child(operation: Dictionary, dom_parser: HTMLParser,
|
||||
# Handle visual rendering
|
||||
handle_visual_replacement(old_child_id, new_child_element, parent_id, dom_parser, lua_api)
|
||||
|
||||
static func render_new_element(element: HTMLParser.HTMLElement, parent_node: Node, dom_parser: HTMLParser, lua_api) -> void:
|
||||
static func render_new_element(element: HTMLParser.HTMLElement, parent_node: Node, dom_parser: HTMLParser) -> void:
|
||||
# Get reference to main scene for rendering
|
||||
var main_scene = Engine.get_main_loop().current_scene
|
||||
if not main_scene:
|
||||
return
|
||||
|
||||
var element_id = element.get_attribute("id")
|
||||
|
||||
# Create the visual node for the element
|
||||
var element_node = await main_scene.create_element_node(element, dom_parser)
|
||||
@@ -341,7 +340,7 @@ static func _find_input_control_with_file_info(node: Node) -> Node:
|
||||
|
||||
return null
|
||||
|
||||
static func _get_select_value(element: HTMLParser.HTMLElement, dom_node: Node) -> String:
|
||||
static func _get_select_value(_element: HTMLParser.HTMLElement, dom_node: Node) -> String:
|
||||
if dom_node is OptionButton:
|
||||
var option_button = dom_node as OptionButton
|
||||
var selected_index = option_button.selected
|
||||
@@ -353,7 +352,7 @@ static func _get_select_value(element: HTMLParser.HTMLElement, dom_node: Node) -
|
||||
return option_button.get_item_text(selected_index)
|
||||
return ""
|
||||
|
||||
static func _set_select_value(element: HTMLParser.HTMLElement, dom_node: Node, value: Variant) -> void:
|
||||
static func _set_select_value(_element: HTMLParser.HTMLElement, dom_node: Node, value: Variant) -> void:
|
||||
if dom_node is OptionButton:
|
||||
var option_button = dom_node as OptionButton
|
||||
var target_value = str(value)
|
||||
@@ -433,7 +432,7 @@ static func clone_element(element: HTMLParser.HTMLElement, deep: bool) -> HTMLPa
|
||||
return cloned
|
||||
|
||||
|
||||
static func handle_visual_insertion_by_reference(parent_element_id: String, new_child_element: HTMLParser.HTMLElement, reference_element_id: String, insert_before: bool, dom_parser: HTMLParser, lua_api) -> void:
|
||||
static func handle_visual_insertion_by_reference(parent_element_id: String, new_child_element: HTMLParser.HTMLElement, reference_element_id: String, insert_before: bool, dom_parser: HTMLParser) -> void:
|
||||
var parent_dom_node: Node = null
|
||||
if parent_element_id == "body":
|
||||
var main_scene = Engine.get_main_loop().current_scene
|
||||
@@ -646,7 +645,7 @@ static func add_element_methods(vm: LuauVM, lua_api: LuaAPI) -> void:
|
||||
vm.lua_pushcallable(LuaDOMUtils._element_unfocus_wrapper, "element.unfocus")
|
||||
vm.lua_setfield(-2, "unfocus")
|
||||
|
||||
_add_classlist_support(vm, lua_api)
|
||||
add_classlist_support(vm)
|
||||
|
||||
vm.lua_newtable()
|
||||
vm.lua_pushcallable(LuaDOMUtils._element_index_wrapper, "element.__index")
|
||||
@@ -876,7 +875,7 @@ static func _element_clone_wrapper(vm: LuauVM) -> int:
|
||||
var cloned_element = clone_element(element, deep)
|
||||
|
||||
# Assign new ID to cloned element
|
||||
var new_id = lua_api.get_or_assign_element_id(cloned_element)
|
||||
lua_api.get_or_assign_element_id(cloned_element)
|
||||
|
||||
# Add to parser's element collection
|
||||
lua_api.dom_parser.parse_result.all_elements.append(cloned_element)
|
||||
@@ -1074,7 +1073,7 @@ static func _element_index_wrapper(vm: LuauVM) -> int:
|
||||
vm.lua_remove(-2)
|
||||
return 1
|
||||
|
||||
static func _add_classlist_support(vm: LuauVM, lua_api: LuaAPI) -> void:
|
||||
static func add_classlist_support(vm: LuauVM) -> void:
|
||||
vm.lua_newtable()
|
||||
|
||||
vm.lua_getfield(-2, "_element_id")
|
||||
@@ -1173,7 +1172,6 @@ static func _classlist_toggle_wrapper(vm: LuauVM) -> int:
|
||||
return 0
|
||||
|
||||
static func _classlist_contains_wrapper(vm: LuauVM) -> int:
|
||||
var start_time = Time.get_ticks_msec()
|
||||
|
||||
var lua_api = vm.get_meta("lua_api") as LuaAPI
|
||||
if not lua_api:
|
||||
|
||||
@@ -220,13 +220,13 @@ static func disconnect_subscription(subscription, lua_api) -> void:
|
||||
match subscription.connected_signal:
|
||||
"pressed":
|
||||
if target_node.has_signal("pressed"):
|
||||
if subscription.has("wrapper_func") and subscription.wrapper_func:
|
||||
if subscription.wrapper_func:
|
||||
target_node.pressed.disconnect(subscription.wrapper_func)
|
||||
else:
|
||||
target_node.pressed.disconnect(lua_api._on_event_triggered.bind(subscription))
|
||||
"gui_input":
|
||||
if target_node.has_signal("gui_input"):
|
||||
if subscription.has("wrapper_func") and subscription.wrapper_func:
|
||||
if subscription.wrapper_func:
|
||||
target_node.gui_input.disconnect(subscription.wrapper_func)
|
||||
else:
|
||||
target_node.gui_input.disconnect(lua_api._on_gui_input_click.bind(subscription))
|
||||
|
||||
@@ -30,8 +30,7 @@ static func _lua_json_parse_handler(vm: LuauVM) -> int:
|
||||
static func _lua_json_stringify_handler(vm: LuauVM) -> int:
|
||||
var value = vm.lua_tovariant(1)
|
||||
|
||||
var json = JSON.new()
|
||||
var json_string = json.stringify(value)
|
||||
var json_string = JSON.stringify(value)
|
||||
|
||||
vm.lua_pushstring(json_string)
|
||||
return 1
|
||||
|
||||
@@ -253,7 +253,6 @@ func _print_handler(vm: LuauVM) -> int:
|
||||
message_parts.append(arg_str)
|
||||
|
||||
var final_message = "\t".join(message_parts)
|
||||
var current_time = Time.get_ticks_msec() / 1000.0
|
||||
|
||||
call_deferred("_emit_print_output", final_message)
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ class WebSocketWrapper:
|
||||
var vm: LuauVM
|
||||
var url: String
|
||||
var websocket: WebSocketPeer
|
||||
var is_connected: bool = false
|
||||
var connection_status: bool = false
|
||||
var event_handlers: Dictionary = {}
|
||||
var timer: Timer
|
||||
var last_state: int = -1
|
||||
@@ -20,7 +20,7 @@ class WebSocketWrapper:
|
||||
websocket = WebSocketPeer.new()
|
||||
|
||||
func connect_to_url():
|
||||
if is_connected:
|
||||
if connection_status:
|
||||
return
|
||||
|
||||
var error = websocket.connect_to_url(url)
|
||||
@@ -54,8 +54,8 @@ class WebSocketWrapper:
|
||||
|
||||
match state:
|
||||
WebSocketPeer.STATE_OPEN:
|
||||
if not is_connected:
|
||||
is_connected = true
|
||||
if not connection_status:
|
||||
connection_status = true
|
||||
trigger_event("open", {})
|
||||
|
||||
# Check for messages
|
||||
@@ -65,8 +65,8 @@ class WebSocketWrapper:
|
||||
trigger_event("message", {"data": message})
|
||||
|
||||
WebSocketPeer.STATE_CLOSED:
|
||||
if is_connected:
|
||||
is_connected = false
|
||||
if connection_status:
|
||||
connection_status = false
|
||||
trigger_event("close", {})
|
||||
|
||||
# Clean up timer
|
||||
@@ -80,26 +80,26 @@ class WebSocketWrapper:
|
||||
|
||||
WebSocketPeer.STATE_CLOSING:
|
||||
# Connection is closing
|
||||
if is_connected:
|
||||
is_connected = false
|
||||
if connection_status:
|
||||
connection_status = false
|
||||
|
||||
_:
|
||||
# Unknown state or connection failed
|
||||
if is_connected:
|
||||
is_connected = false
|
||||
if connection_status:
|
||||
connection_status = false
|
||||
trigger_event("close", {})
|
||||
elif not is_connected:
|
||||
elif not connection_status:
|
||||
# This might be a connection failure
|
||||
trigger_event("error", {"message": "Connection failed or was rejected by server"})
|
||||
|
||||
func send_message(message: String):
|
||||
if is_connected and websocket:
|
||||
if connection_status and websocket:
|
||||
websocket.send_text(message)
|
||||
|
||||
func close_connection():
|
||||
if websocket:
|
||||
websocket.close()
|
||||
is_connected = false
|
||||
connection_status = false
|
||||
|
||||
if timer:
|
||||
timer.queue_free()
|
||||
@@ -222,7 +222,7 @@ static func _websocket_send(vm: LuauVM) -> int:
|
||||
|
||||
# Get wrapper instance
|
||||
var wrapper: WebSocketWrapper = websocket_instances.get(instance_id)
|
||||
if wrapper and wrapper.is_connected:
|
||||
if wrapper and wrapper.connection_status:
|
||||
wrapper.send_message(message)
|
||||
else:
|
||||
vm.luaL_error("WebSocket is not connected")
|
||||
|
||||
@@ -7,13 +7,22 @@ local stats = gurt.select('#stats')
|
||||
|
||||
local function showLoading()
|
||||
loading.classList:remove('hidden')
|
||||
results.text = ''
|
||||
|
||||
local children = results.children
|
||||
for i = #children, 1, -1 do
|
||||
children[i]:remove()
|
||||
end
|
||||
|
||||
stats.text = ''
|
||||
end
|
||||
|
||||
local function displayResults(data)
|
||||
loading.classList:add('hidden')
|
||||
results.text = ''
|
||||
|
||||
local children = results.children
|
||||
for i = #children, 1, -1 do
|
||||
children[i]:remove()
|
||||
end
|
||||
|
||||
if not data.results or #data.results == 0 then
|
||||
local noResultsItem = gurt.create('div', {
|
||||
@@ -94,7 +103,13 @@ local function performSearch(query)
|
||||
displayResults(data)
|
||||
else
|
||||
loading.classList:add('hidden')
|
||||
results.text = ''
|
||||
|
||||
-- Clear all existing children from results
|
||||
local children = results.children
|
||||
for i = #children, 1, -1 do
|
||||
children[i]:remove()
|
||||
end
|
||||
|
||||
stats.text = 'Search failed: ' .. response.status .. ' ' .. response.statusText
|
||||
end
|
||||
end
|
||||
@@ -117,12 +132,22 @@ local function performLuckySearch()
|
||||
gurt.location.goto(randomResult.url)
|
||||
else
|
||||
loading.classList:add('hidden')
|
||||
results.text = ''
|
||||
|
||||
local children = results.children
|
||||
for i = #children, 1, -1 do
|
||||
children[i]:remove()
|
||||
end
|
||||
|
||||
stats.text = 'No sites available for lucky search'
|
||||
end
|
||||
else
|
||||
loading.classList:add('hidden')
|
||||
results.text = ''
|
||||
|
||||
local children = results.children
|
||||
for i = #children, 1, -1 do
|
||||
children[i]:remove()
|
||||
end
|
||||
|
||||
stats.text = 'Lucky search failed'
|
||||
end
|
||||
end
|
||||
|
||||
@@ -185,7 +185,7 @@ impl DomainCrawler {
|
||||
} else if let Some(path_value) = line.to_lowercase().strip_prefix("allow:") {
|
||||
let path = path_value.trim();
|
||||
if !path.is_empty() {
|
||||
let full_url = format!("{}{}", base_url, path);
|
||||
let full_url = Self::normalize_url(format!("{}{}", base_url, path));
|
||||
debug!("Added allowed URL from clanker.txt: {}", full_url);
|
||||
allowed_urls.push(full_url);
|
||||
}
|
||||
@@ -222,19 +222,21 @@ impl DomainCrawler {
|
||||
}
|
||||
|
||||
// Start with the root URL
|
||||
let normalized_base_url = Self::normalize_url(base_url.clone());
|
||||
queue.push_back(CrawlItem {
|
||||
url: base_url.clone(),
|
||||
url: normalized_base_url,
|
||||
depth: 0,
|
||||
});
|
||||
|
||||
// Add all URLs from clanker.txt to the queue
|
||||
for url in clanker_urls {
|
||||
if !visited_urls.contains(&url) {
|
||||
let normalized_url = Self::normalize_url(url);
|
||||
if !visited_urls.contains(&normalized_url) {
|
||||
queue.push_back(CrawlItem {
|
||||
url: url.clone(),
|
||||
url: normalized_url.clone(),
|
||||
depth: 0, // Treat clanker.txt URLs as root level
|
||||
});
|
||||
debug!("Added clanker.txt URL to queue: {}", url);
|
||||
debug!("Added clanker.txt URL to queue: {}", normalized_url);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -268,10 +270,11 @@ impl DomainCrawler {
|
||||
if let Ok(links) = self.extract_links(&page_with_html.original_html, &base_url).await {
|
||||
debug!("Found {} links on {}", links.len(), item.url);
|
||||
for link in links {
|
||||
if self.should_crawl_url(&link, domain) {
|
||||
debug!("Adding link to crawl queue: {}", link);
|
||||
let normalized_link = Self::normalize_url(link);
|
||||
if self.should_crawl_url(&normalized_link, domain) && !visited_urls.contains(&normalized_link) {
|
||||
debug!("Adding link to crawl queue: {}", normalized_link);
|
||||
queue.push_back(CrawlItem {
|
||||
url: link,
|
||||
url: normalized_link,
|
||||
depth: item.depth + 1,
|
||||
});
|
||||
}
|
||||
@@ -358,7 +361,7 @@ impl DomainCrawler {
|
||||
|
||||
let page = CrawledPageWithHtml {
|
||||
crawled_page: CrawledPage {
|
||||
url: url.to_string(),
|
||||
url: Self::normalize_url(url.to_string()),
|
||||
domain: domain.full_domain(),
|
||||
title,
|
||||
content: cleaned_content.clone(),
|
||||
@@ -398,7 +401,7 @@ impl DomainCrawler {
|
||||
// Resolve relative URLs
|
||||
match base.join(href) {
|
||||
Ok(absolute_url) => {
|
||||
let url_str = absolute_url.to_string();
|
||||
let url_str = Self::normalize_url(absolute_url.to_string());
|
||||
|
||||
// Only include GURT protocol URLs for the same domain
|
||||
if url_str.starts_with("gurt://") {
|
||||
@@ -601,6 +604,19 @@ impl DomainCrawler {
|
||||
false
|
||||
}
|
||||
|
||||
fn normalize_url(url: String) -> String {
|
||||
if url.ends_with("/index.html") {
|
||||
let without_index = &url[..url.len() - 11]; // Remove "/index.html" (11 chars)
|
||||
if without_index.ends_with('/') {
|
||||
without_index.to_string()
|
||||
} else {
|
||||
format!("{}/", without_index)
|
||||
}
|
||||
} else {
|
||||
url
|
||||
}
|
||||
}
|
||||
|
||||
fn calculate_content_hash(content: &str) -> String {
|
||||
use sha2::{Sha256, Digest};
|
||||
let mut hasher = Sha256::new();
|
||||
|
||||
@@ -50,7 +50,7 @@
|
||||
if #logMessages > 20 then
|
||||
table.remove(logMessages, 1)
|
||||
end
|
||||
logArea.text = table.concat(logMessages, '\\n')
|
||||
logArea.text = table.concat(logMessages, '\n')
|
||||
end
|
||||
|
||||
-- Function to update status
|
||||
|
||||
Reference in New Issue
Block a user