-
-
Save bjornjohansen/4905c93f9bd44e6084ec to your computer and use it in GitHub Desktop.
| #!/usr/bin/php | |
| <?php | |
| /** | |
| * @license http://www.wtfpl.net/txt/copying/ WTFPL | |
| */ | |
| date_default_timezone_set( 'UTC' ); | |
| $sitemaps = array( | |
| 'https://bjornjohansen.no/sitemap_index.xml', | |
| ); | |
| $crawler = new BJ_Crawler( $sitemaps ); | |
| $crawler->run(); | |
| /** | |
| * Crawler class | |
| */ | |
| class BJ_Crawler { | |
| protected $_sitemaps = null; | |
| protected $_urls = null; | |
| /** | |
| * Constructor | |
| * | |
| * @param array|string $sitemaps A string with an URL to a XML sitemap, or an array with URLs to XML sitemaps. Sitemap index files works well too. | |
| * | |
| */ | |
| function __construct( $sitemaps = null ) { | |
| $this->_sitemaps = []; | |
| $this->_urls = []; | |
| if ( ! is_null( $sitemaps ) ) { | |
| if ( ! is_array( $sitemaps ) ) { | |
| $sitemaps = array( $sitemaps ); | |
| } | |
| foreach ( $sitemaps as $sitemap ) { | |
| $this->add_sitemap( $sitemap ); | |
| } | |
| } | |
| } | |
| /** | |
| * Add a sitemap URL to our crawl stack. Sitemap index files works too. | |
| * | |
| * @param string $sitemapurl URL to a XML sitemap or sitemap index | |
| */ | |
| public function add_sitemap( $sitemapurl ) { | |
| if ( in_array( $sitemapurl, $this->_sitemaps ) ) { | |
| return; | |
| } | |
| $this->_sitemaps[] = $sitemapurl; | |
| $ch = curl_init(); | |
| curl_setopt( $ch, CURLOPT_URL, $sitemapurl ); | |
| curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); | |
| $content = curl_exec( $ch ); | |
| $http_return_code = curl_getinfo( $ch, CURLINFO_HTTP_CODE ); | |
| if ( '200' != $http_return_code ) { | |
| return false; | |
| } | |
| $xml = new SimpleXMLElement( $content, LIBXML_NOBLANKS ); | |
| if ( ! $xml ) { | |
| return false; | |
| } | |
| switch ( $xml->getName() ) { | |
| case 'sitemapindex': | |
| foreach ( $xml->sitemap as $sitemap ) { | |
| $this->add_sitemap( reset( $sitemap->loc ) ); | |
| } | |
| break; | |
| case 'urlset': | |
| foreach ( $xml->url as $url ) { | |
| $this->add_url( reset( $url->loc ) ); | |
| } | |
| break; | |
| default: | |
| break; | |
| } | |
| } | |
| /** | |
| * Add a URL to our crawl stack | |
| * | |
| * @param string $url URL to check | |
| */ | |
| public function add_url( $url ) { | |
| if ( ! in_array( $url, $this->_urls ) ) { | |
| $this->_urls[] = $url; | |
| } | |
| } | |
| /** | |
| * Run the crawl | |
| */ | |
| public function run() { | |
| // Split our URLs into chunks of 5 URLs to use with curl multi | |
| $chunks = array_chunk( $this->_urls, 5 ); | |
| foreach ( $chunks as $chunk ) { | |
| $mh = curl_multi_init(); | |
| foreach ( $chunk as $url ) { | |
| $ch = curl_init(); | |
| curl_setopt( $ch, CURLOPT_URL, $url ); | |
| curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); | |
| curl_multi_add_handle( $mh, $ch ); | |
| } | |
| $active = null; | |
| do { | |
| $mrc = curl_multi_exec( $mh, $active ); | |
| } while ( CURLM_CALL_MULTI_PERFORM == $mrc ); | |
| while ( $active && CURLM_OK == $mrc ) { | |
| if ( curl_multi_select( $mh ) != -1) { | |
| do { | |
| $mrc = curl_multi_exec( $mh, $active ); | |
| } while ( CURLM_CALL_MULTI_PERFORM == $mrc ); | |
| } | |
| } | |
| } | |
| } | |
| } |
Hi..my sitemap is:
mysite.com/index.php?xml_sitemap=params=
Can I use this script or do I have to edit it?
Thanks
Hello, is it work with wordpress version 5.4.2 and litespeed cache plugin? Please reply me, thank you.
Can I add my sub-sitemap?
@bjornjohansen I'm going to create a Composer package for cache warmup and would like to use parts of your code. Is this okay for you? Of course I'd add a source link to this gist file :)
@bjornjohansen I'm going to create a Composer package for cache warmup and would like to use parts of your code. Is this okay for you? Of course I'd add a source link to this gist file :)
Yes, of course @eliashaeussler
You don’t need to link back here. I’m hereby granting you a WTFPL license :-)
@bjornjohansen I'm going to create a Composer package for cache warmup and would like to use parts of your code. Is this okay for you? Of course I'd add a source link to this gist file :)
Yes, of course @eliashaeussler
You don’t need to link back here. I’m hereby granting you a WTFPL license :-)
Wow, that's very nice of you @bjornjohansen – thanks!
Here we go: https://packagist.org/packages/eliashaeussler/cache-warmup 🎉
Thank you, this is exactly what I was looking for!