feat: html parser
This commit is contained in:
169
Scripts/B9/HTMLParser.gd
Normal file
169
Scripts/B9/HTMLParser.gd
Normal file
@@ -0,0 +1,169 @@
|
||||
class_name HTMLParser
|
||||
extends Node
|
||||
|
||||
class HTMLElement:
|
||||
var tag_name: String
|
||||
var attributes: Dictionary = {}
|
||||
var text_content: String = ""
|
||||
var children: Array[HTMLElement] = []
|
||||
var parent: HTMLElement = null
|
||||
var is_self_closing: bool = false
|
||||
|
||||
func _init(tag: String = ""):
|
||||
tag_name = tag
|
||||
|
||||
func get_attribute(name_: String) -> String:
|
||||
return attributes.get(name_, "")
|
||||
|
||||
func has_attribute(name_: String) -> bool:
|
||||
return attributes.has(name_)
|
||||
|
||||
func get_class_name() -> String:
|
||||
return get_attribute("class")
|
||||
|
||||
func get_id() -> String:
|
||||
return get_attribute("id")
|
||||
|
||||
class ParseResult:
|
||||
var root: HTMLElement
|
||||
var all_elements: Array[HTMLElement] = []
|
||||
var errors: Array[String] = []
|
||||
|
||||
func _init():
|
||||
root = HTMLElement.new("document")
|
||||
|
||||
# Properties
|
||||
var xml_parser: XMLParser
|
||||
var bitcode: PackedByteArray
|
||||
var parse_result: ParseResult
|
||||
|
||||
func _init(data: PackedByteArray):
|
||||
bitcode = data
|
||||
xml_parser = XMLParser.new()
|
||||
parse_result = ParseResult.new()
|
||||
|
||||
# Main parsing function
|
||||
func parse() -> ParseResult:
|
||||
xml_parser.open_buffer(bitcode)
|
||||
var element_stack: Array[HTMLElement] = [parse_result.root]
|
||||
|
||||
while xml_parser.read() != ERR_FILE_EOF:
|
||||
match xml_parser.get_node_type():
|
||||
XMLParser.NODE_ELEMENT:
|
||||
var element = create_element()
|
||||
var current_parent = element_stack.back()
|
||||
element.parent = current_parent
|
||||
current_parent.children.append(element)
|
||||
parse_result.all_elements.append(element)
|
||||
|
||||
if not element.is_self_closing:
|
||||
element_stack.append(element)
|
||||
|
||||
XMLParser.NODE_ELEMENT_END:
|
||||
if element_stack.size() > 1:
|
||||
element_stack.pop_back()
|
||||
|
||||
XMLParser.NODE_TEXT:
|
||||
var text = xml_parser.get_node_data().strip_edges()
|
||||
if text.length() > 0 and element_stack.size() > 0:
|
||||
element_stack.back().text_content += text
|
||||
|
||||
return parse_result
|
||||
|
||||
# Creates element from CURRENT xml parser node
|
||||
func create_element() -> HTMLElement:
|
||||
var element = HTMLElement.new(xml_parser.get_node_name())
|
||||
element.is_self_closing = xml_parser.is_empty()
|
||||
|
||||
# Parse attributes
|
||||
for i in range(xml_parser.get_attribute_count()):
|
||||
var attr_name = xml_parser.get_attribute_name(i)
|
||||
var attr_value = xml_parser.get_attribute_value(i)
|
||||
element.attributes[attr_name] = attr_value
|
||||
|
||||
return element
|
||||
|
||||
# Utility functions
|
||||
func find_all(tag: String, attribute: String = "") -> Array[HTMLElement]:
|
||||
if parse_result.all_elements.is_empty():
|
||||
parse()
|
||||
|
||||
var results: Array[HTMLElement] = []
|
||||
for element in parse_result.all_elements:
|
||||
if element.tag_name == tag:
|
||||
if attribute.is_empty() or element.has_attribute(attribute):
|
||||
results.append(element)
|
||||
|
||||
return results
|
||||
|
||||
func find_all_by_class(tag: String, the_class_name: String) -> Array[HTMLElement]:
|
||||
if parse_result.all_elements.is_empty():
|
||||
parse()
|
||||
|
||||
var results: Array[HTMLElement] = []
|
||||
for element in parse_result.all_elements:
|
||||
if element.tag_name == tag and element.get_class() == the_class_name:
|
||||
results.append(element)
|
||||
|
||||
return results
|
||||
|
||||
func find_by_id(element_id: String) -> HTMLElement:
|
||||
if parse_result.all_elements.is_empty():
|
||||
parse()
|
||||
|
||||
for element in parse_result.all_elements:
|
||||
if element.get_id() == element_id:
|
||||
return element
|
||||
|
||||
return null
|
||||
|
||||
func find_first(tag: String, attribute: String = "") -> HTMLElement:
|
||||
var results = find_all(tag, attribute)
|
||||
return results[0] if results.size() > 0 else null
|
||||
|
||||
# Extract attribute values
|
||||
func get_attribute_values(tag: String, attribute: String) -> Array[String]:
|
||||
var elements = find_all(tag, attribute)
|
||||
var values: Array[String] = []
|
||||
|
||||
for element in elements:
|
||||
var value = element.get_attribute(attribute)
|
||||
if value.length() > 0:
|
||||
values.append(value)
|
||||
|
||||
return values
|
||||
|
||||
func get_attribute_values_by_class(tag: String, the_class_name: String, attribute: String) -> Array[String]:
|
||||
var elements = find_all_by_class(tag, the_class_name)
|
||||
var values: Array[String] = []
|
||||
|
||||
for element in elements:
|
||||
var value = element.get_attribute(attribute)
|
||||
if value.length() > 0:
|
||||
values.append(value)
|
||||
|
||||
return values
|
||||
|
||||
# Misc
|
||||
func get_title() -> String:
|
||||
var title_element = find_first("title")
|
||||
return title_element.text_content if title_element != null else ""
|
||||
|
||||
func get_meta_content(name_: String) -> String:
|
||||
var meta_elements = find_all("meta", "name")
|
||||
for element in meta_elements:
|
||||
if element.get_attribute("name") == name_:
|
||||
return element.get_attribute("content")
|
||||
return ""
|
||||
|
||||
func get_all_links() -> Array[String]:
|
||||
return get_attribute_values("a", "href")
|
||||
|
||||
func get_all_images() -> Array[String]:
|
||||
return get_attribute_values("img", "src")
|
||||
|
||||
func get_all_scripts() -> Array[String]:
|
||||
return get_attribute_values("script", "src")
|
||||
|
||||
func get_all_stylesheets() -> Array[String]:
|
||||
return get_attribute_values("style", "src")
|
||||
Reference in New Issue
Block a user