napoleon / crawl
HTML Crawler
v0.1.7
2019-10-16 08:53 UTC
Requires
- php: ^7.1
Requires (Dev)
- mockery/mockery: ^1.2
- phpunit/phpunit: 5.7
- symfony/var-dumper: ^3.4
This package is auto-updated.
Last update: 2025-03-18 18:34:04 UTC
README
Quick example
Consider this HTML to be the $url
:
<!DOCTYPE html> <html> <head> <title>I am a Page Title</title> </head> <body> <section> <h3> I am a row title </h3> <ul> <li> <a class="anchor -success" href="/redirect/now/1"> Link 1 </a> </li> <li> <a class="anchor" href="/redirect/now/2"> Link 2 </a> </li> </ul> </section> </body> </html>
use Napoleon\Crawler\DOMDocument; $url = 'https://www.example.com'; $document = new DOMDocument($url); print_r($document->html()->get());
Above code result:
array:1 [ 0 => array:4 [ "tagName" => "html" "attributes" => null "content" => "" "children" => array:2 [ 0 => array:4 [ "tagName" => "head" "attributes" => null "content" => "" "children" => array:1 [ 0 => array:4 [ "tagName" => "title" "attributes" => null "content" => "I am a Page Title" "children" => null ] ] ] 1 => array:4 [ "tagName" => "body" "attributes" => null "content" => "" "children" => array:1 [ 0 => array:4 [ "tagName" => "section" "attributes" => null "content" => "" "children" => array:2 [ 0 => array:4 [ "tagName" => "h3" "attributes" => null "content" => "I am a row title" "children" => null ] 1 => array:4 [ "tagName" => "ul" "attributes" => null "content" => "" "children" => array:2 [ 0 => array:4 [ "tagName" => "li" "attributes" => null "content" => "" "children" => array:1 [ 0 => array:4 [ "tagName" => "a" "attributes" => array:2 [ "class" => "anchor -success" "href" => "/redirect/now/1" ] "content" => "Link 1" "children" => null ] ] ] 1 => array:4 [ "tagName" => "li" "attributes" => null "content" => "" "children" => array:1 [ 0 => array:4 [ "tagName" => "a" "attributes" => array:2 [ "class" => "anchor" "href" => "/redirect/now/2" ] "content" => "Link 2" "children" => null ] ] ] ] ] ] ] ] ] ] ] ] ...
Search by specific class name of a tag
Consider this to be https://www.example.com
:
<!DOCTYPE html> <html> <head> <title>I am a Page Title</title> </head> <body> <section> <h3> I am a row title </h3> <ul> <li> <a class="anchor -success" href="/redirect/now/1"> Link 1 </a> </li> <li> <a class="anchor" href="/redirect/now/2"> Link 2 </a> </li> </ul> </section> </body> </html>
use Napoleon\Crawler\DOMDocument; $url = 'https://www.example.com'; $document = new DOMDocument($url); print_r($document->findByClass('anchor')->get());
The result above:
.array:2 [ 0 => array:3 [ "tagName" => "a" "attributes" => array:2 [ "class" => "anchor -success" "href" => "/redirect/now/1" ], "content" => "Link 1", "children" => null ] 1 => array:3 [ "tagName" => "a" "attributes" => array:2 [ "class" => "anchor" "href" => "/redirect/now/2" ], "content" => "Link 2", "children" => null ] ]