diff --git a/Cargo.lock b/Cargo.lock index 6c64fb6..a0eedfb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -147,7 +147,9 @@ dependencies = [ name = "htmlextract" version = "0.1.0" dependencies = [ + "html5ever 0.22.5 (registry+https://github.com/rust-lang/crates.io-index)", "kuchiki 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "markup5ever 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/Cargo.toml b/Cargo.toml index ffecce2..f197a11 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,5 @@ opt-level = 'z' [dependencies] kuchiki = "0.7.2" structopt = "0.2.15" +html5ever = "0.22.5" +markup5ever = "0.7.5" diff --git a/src/main.rs b/src/main.rs index 29f1fde..6bed137 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,9 @@ use structopt::StructOpt; mod opts; +use html5ever::serialize::{serialize, SerializeOpts}; +use kuchiki::NodeRef; +use markup5ever::serialize::Serialize as MSerialize; use opts::Opts; use std::error::Error; use std::fs::File; @@ -20,6 +23,13 @@ fn read_html(source: &mut I) -> io::Result { Ok(html) } +fn serialize_node(node: &N) -> Result> { + let mut bytes = Vec::new(); + serialize(&mut bytes, node, SerializeOpts::default())?; + let s = String::from_utf8(bytes)?; + Ok(s) +} + fn main() -> Result<(), Box> { let stdin = stdin(); let opt = Opts::from_args(); @@ -58,6 +68,7 @@ fn main() -> Result<(), Box> { //let text = text_node.as_text().unwrap().borrow(); if let Some(child) = as_node.first_child() { + dbg!(serialize_node(&child)); //TODO: Convert Nodes to String, as this only works for Nodes containing plain text if let Some(text) = child.as_text() { // Prints: