// Create DOM from URL or file$html = SimpleHtmlDom::initDomFromFile('http://www.google.com/');
// Find all images foreach($html->find('img') as$element)
Logger::info($element->src);
// Find all links foreach($html->find('a') as$element)
Logger::info($element->href);
// Create a DOM object from a string$html = SimpleHtmlDom::initDomFromString('<html><body>Hello!</body></html>');
// Create a DOM object from a URL$html = SimpleHtmlDom::initDomFromFile('http://www.google.com/');
// Create a DOM object from a HTML file$html = SimpleHtmlDom::initDomFromFile('test.htm');
Object-oriented way
// Create a DOM object$html = newSimpleHtmlDom();
// Load HTML from a string$html->load('<html><body>Hello!</body></html>');
// Load HTML from a URL $html->loadFile('http://www.google.com/');
// Load HTML from a HTML file $html->loadFile('test.htm');
How to find HTML elements?
Basics
// Find all anchors, returns a array of element objects$ret = $html->find('a');
// Find (N)th anchor, returns element object or null if not found (zero based)$ret = $html->find('a', 0);
// Find lastest anchor, returns element object or null if not found (zero based)$ret = $html->find('a', -1);
// Find all <div> with the id attribute$ret = $html->find('div[id]');
// Find all <div> which attribute id=foo$ret = $html->find('div[id=foo]');
Advanced
// Find all element which id=foo$ret = $html->find('#foo');
// Find all element which class=foo$ret = $html->find('.foo');
// Find all element has attribute id$ret = $html->find('*[id]');
// Find all anchors and images $ret = $html->find('a, img');
// Find all anchors and images with the "title" attribute$ret = $html->find('a[title], img[title]');
Descendant selectors
// Find all <li> in <ul> $es = $html->find('ul li');
// Find Nested <div> tags$es = $html->find('div div div');
// Find all <td> in <table> which class=hello $es = $html->find('table.hello td');
// Find all td tags with attribite align=center in table tags $es = $html->find(''table td[align=center]');
Nested selectors
// Find all <li> in <ul> foreach($html->find('ul') as$ul)
{
foreach($ul->find('li') as$li)
{
// do something...
}
}
// Find first <li> in first <ul> $e = $html->find('ul', 0)->find('li', 0);
Attribute Filters
Supports these operators in attribute selectors:
Filter
Description
[attribute]
Matches elements that have the specified attribute.
[!attribute]
Matches elements that don't have the specified attribute.
[attribute=value]
Matches elements that have the specified attribute with a certain value.
[attribute!=value]
Matches elements that don't have the specified attribute with a certain value.
[attribute^=value]
Matches elements that have the specified attribute and it starts with a certain value.
[attribute$=value]
Matches elements that have the specified attribute and it ends with a certain value.
[attribute*=value]
Matches elements that have the specified attribute and it contains a certain value.
Text & Comments
// Find all text blocks $es = $html->find('text');
// Find all comment (<!--...-->) blocks $es = $html->find('comment');
How to access the HTML element's attributes?
Get, Set and Remove attributes
// Get a attribute ( If the attribute is non-value attribute (eg. checked, selected...), it will returns true or false)$value = $e->href;
// Set a attribute(If the attribute is non-value attribute (eg. checked, selected...), set it's value as true or false)$e->href = 'my link';
// Remove a attribute, set it's value as null! $e->href = null;
// Determine whether a attribute exist? if(isset($e->href))
Logger::info('href exist!');
// Extract contents from HTML
Logger::info($html->plaintext);
// Wrap a element$e->outerText = '<div class="wrap">' . $e->outerText . '<div>';
// Remove a element, set it's outerText as an empty string $e->outerText = '';
// Append a element$e->outerText = $e->outerText . '<div>foo<div>';
// Insert a element$e->outerText = '<div>foo<div>' . $e->outerText;
How to traverse the DOM tree?
Background Knowledge
// If you are not so familiar with HTML DOM, check this link to learn more... // Example
Logger::info($html->find("#div1", 0)->children(1)->children(1)->children(2)->id);
// or
Logger::info($html->getElementById("div1")->childNodes(1)->childNodes(1)->childNodes(2)->getAttribute('id'));
Traverse the DOM tree
Type
Method
Description
mixed
$e->children([int $index])
Returns the Nth child object if index is set, otherwise return an array of children.
element
$e->parent()
Returns the parent of element.
element
$e->firstChild()
Returns the first child of element, or null if not found.
element
$e->lastChild()
Returns the last child of element, or null if not found.
element
$e->nextSibling()
Returns the next sibling of element, or null if not found.
element
$e->prevSibling()
Returns the previous sibling of element, or null if not found.
How to dump contents of DOM object?
Quick way
// Dumps the internal DOM tree back into string $str = $html;
// Print it!
Logger::info($html);
Object-oriented way
// Dumps the internal DOM tree back into string $str = $html->save();
// Dumps the internal DOM tree back into a file $html->save('result.htm');
How to customize the parsing behavior?
Callback function
// Write a function with parameter "$element"publicfunctionmyCallback(SimpleHtmlDomNode$element) {
// Hide all <b> tags if ($element->tag=='b')
$element->outerText = '';
}
// Register the callback function with it's function name$html->setCallback([$this, "myCallback"]);
// Callback function will be invoked while dumping
Logger::info($html);