use structopt::StructOpt; mod opts; use html5ever::serialize::{serialize, SerializeOpts, TraversalScope}; use markup5ever::serialize::Serialize as MSerialize; use opts::Opts; use std::error::Error; use std::fs::File; use std::io; use std::io::Write; use std::io::{stdin, stdout, Read}; use kuchiki::traits::*; fn read_html(source: &mut I) -> io::Result { let mut html = String::new(); source.read_to_string(&mut html)?; Ok(html) } fn serialize_node(node: &N) -> Result> { let mut bytes = Vec::new(); serialize( &mut bytes, node, SerializeOpts { scripting_enabled: true, create_missing_parent: false, traversal_scope: TraversalScope::IncludeNode, }, )?; let s = String::from_utf8(bytes)?; Ok(s) } fn main() -> Result<(), Box> { let stdin = stdin(); let opt = Opts::from_args(); let html = if let Some(path) = opt.input { let mut file = File::open(&path)?; read_html(&mut file)? } else { let mut handle = stdin.lock(); read_html(&mut handle)? }; let mut out: Box = if let Some(path) = opt.output { let file = File::create(&path)?; Box::new(file) } else { Box::new(stdout()) }; let css_selector = opt.selector; let document = kuchiki::parse_html().one(html); for css_match in document.select(&css_selector).unwrap() { // css_match is a NodeDataRef, but most of the interesting methods are // on NodeRef. Let's get the underlying NodeRef. let as_node = css_match.as_node(); // In this example, as_node represents an HTML node like // //

Hello world!

" // // Which is distinct from just 'Hello world!'. To get rid of that

// tag, we're going to get each element's first child, which will be // a "text" node. // // There are other kinds of nodes, of course. The possibilities are all // listed in the `NodeData` enum in this crate. //let text_node = as_node.first_child().unwrap(); // Let's get the actual text in this text node. A text node wraps around // a RefCell, so we need to call borrow() to get a &str out. //let text = text_node.as_text().unwrap().borrow(); out.write_all(serialize_node(as_node)?.as_bytes())?; /*if let Some(child) = as_node.first_child() { //TODO: Convert Nodes to String, as this only works for Nodes containing plain text if let Some(text) = child.as_text() { // Prints: // // "Hello, world!" // "I love HTML" println!("{:?}", text.borrow()); } }*/ } Ok(()) }