摘要:但是假设我们的网站进行经常更新,那么是不是每次我都要手动更新呢。
由于工作的原因,最近需要生成网站的sitemap.xml,谷歌百度了很多地方,没有发现并合适可用的代码,三思之后还是决定自己写吧!虽然可能写的有所缺陷,但是毕竟是认认真真写的,希望对一些后来者有所帮助......
1、为什么要自己写脚本生成sitemap.xml?很多人会说,在网上有现成的工具,扫一下就可以了,没有必要自己写。是的,的确是这样的。但是假设我们的网站进行经常更新,那么是不是每次我都要手动更新sitemap呢。我很懒,那么,有没有更好的方案呢?肯定是有的,我是否可以起一个定时任务,每天晚上更新一次呢,此时脚本就有用武之地了
2、文档目录:配置文件 - config/config.ini.php sitemap主文件 - SiteMap.class.php3、主文件代码
* @version 1.0 */ namespace MaweibinguoSiteMap; class SiteMap { const SCHEMA = "http://www.sitemaps.org/schemas/sitemap/0.9"; /** * @var webUrlList * @access public */ public $webUrlList = array(); /** * @var siteMapList * @access public */ public $siteMapList = array(); /** * @var isUseCookie * @access public */ public $isUseCookie = false; /** * @var cookieFilePath * @access public */ public $cookieFilePath = ""; /** * @var xmlWriter * @access private */ private $_xmlWriter = ""; /** * init basic config * * @access public */ public function __construct() { $this->_xmlWriter = new XMLWriter(); $result = $this->_enviromentTest(); } /** * test the enviroment for the script * * @access pirvate */ private function _enviromentTest() { $sapiType = php_sapi_name (); if( strtolower($sapiType) != "cli" ) { echo " The Script Must Run In Command Lines ", " "; exit(); } } /** * load the configValue for genrating sitemap by configname * * @param string $configName * @return string $configValue * @access public */ public function loadConfig($configName) { /* init return value */ $configValue = ""; /* load config value */ $configPath = __DIR__ . "/config/config.ini.php"; if(file_exists( $configPath )) { require $configPath; } else { echo "Can not find config file", " "; exit(); } $configValue = $$configName; /* return config value */ return $configValue; } /** * generate sitemap.xml for the web * * @param siteMapList * @access public */ public function generateSiteMapXml($siteMapList) { /* init return result */ $result = false; if( !is_array($siteMapList) || count($siteMapList) <= 0 ) { echo "The SiteMap Cotent Is Empty"," "; exit(); } /* check the parameter */ $siteMapPath = $this->loadConfig("SITEMAPPATH"); if(!file_exists($siteMapPath)) { $commandStr = "touch ${siteMapPath}"; exec($commandStr); } if( !is_writable($siteMapPath) ) { echo "Is Not Writeable"," "; exit(); } $this->_xmlWriter->openURI($siteMapPath); $this->_xmlWriter->startDocument("1.0", "UTF-8"); $this->_xmlWriter->setIndent(true); $this->_xmlWriter->startElement("urlset"); $this->_xmlWriter->writeAttribute("xmlns", self::SCHEMA); foreach($siteMapList as $siteMapKey => $siteMapItem) { $this->_xmlWriter->startElement("url"); $this->_xmlWriter->writeElement("loc",$siteMapItem["Url"]); $this->_xmlWriter->writeElement("title",$siteMapItem["Title"]); $changefreq = !empty($siteMapItem["ChangeFreq"]) ? $siteMapItem["ChangeFreq"] : "Daily"; $this->_xmlWriter->writeElement("changefreq",$changefreq); $priority = !empty($siteMapItem["Priority"]) ? $siteMapItem["Priority"] : 0.5; $this->_xmlWriter->writeElement("priority",$priority); $this->_xmlWriter->endElement(); } $this->_xmlWriter->endElement(); /* return return */ return $result; } /** * start to send request to the target url, and get the reponse * * @param string $targetUrl * @return mixed $returnData * @access public */ public function sendRequest($url) { /* init return value */ $responseData = false; /* check the parameter */ if( !filter_var($url, FILTER_VALIDATE_URL) ) { return $responseData; } $connectTimeOut = $this->loadConfig("CURLOPT_CONNECTTIMEOUT"); if( $connectTimeOut === false ) { return $responseData; } $timeOut = $this->loadConfig("CURLOPT_TIMEOUT"); if( $timeOut === false ) { return $responseData; } $handle = curl_init(); curl_setopt($handle, CURLOPT_URL, $url); curl_setopt($handle, CURLOPT_HEADER, false); curl_setopt($handle, CURLOPT_AUTOREFERER, true); curl_setopt($handle, CURLOPT_RETURNTRANSFER , true); curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, $connectTimeOut); curl_setopt($handle, CURLOPT_TIMEOUT, $timeOut); curl_setopt($handle, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; MSIE 5.01; Windows NT 5.0)" ); $headersItem = array( "Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Connection: Keep-Alive" ); curl_setopt($handle, CURLOPT_HTTPHEADER, $headersItem); curl_setopt($handle, CURLOPT_FOLLOWLOCATION, 1); $cookieList = $this->loadConfig("COOKIELIST"); $isUseCookie = $cookieList["IsUseCookie"]; $cookieFilePath = $cookieList["CookiePath"]; if($isUseCookie) { if(!file_exists($cookieFilePath)) { $touchCommand = " touch {$cookieFilePath} "; exec($touchCommand); } curl_setopt($handle, CURLOPT_COOKIEFILE, $cookieFilePath); curl_setopt($handle, CURLOPT_COOKIEJAR, $cookieFilePath); } $responseData = curl_exec($handle); $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); if($httpCode != 200) { $responseData = false; } curl_close($handle); /* return response data */ return $responseData; } /** * get the sitemap content of the url, it contains url, title, priority, changefreq * * @param string $url * @access public */ public function generateSiteMapList($url) { $content = $this->sendRequest($url); if($content !== false) { $tagsList = $this->_parseContent($content, $url); $urlItem = $tagsList["UrlItem"]; $title = $tagsList["Title"]; $siteMapItem = array( "Url" => trim($url), "Title" => trim($title) ); $priority = $this->_calculatePriority($siteMapItem["Url"]); $siteMapItem["Priority"] = $priority; $changefreq = $this->_calculateChangefreq($siteMapItem["Url"]); $siteMapItem["ChangeFreq"] = $changefreq; $this->siteMapList[] = $siteMapItem; foreach($urlItem as $nextUrl) { if( !in_array($nextUrl, $this->webUrlList) ) { $skipUrlList = $this->loadConfig("SKIP_URLLIST"); foreach($skipUrlList as $keyWords) { if( stripos($nextUrl, $keyWords) !== false ) { continue 2; } } $this->webUrlList[] = $nextUrl; echo $nextUrl," "; $this->generateSiteMapList($nextUrl); } } } } /** *teChangefreq get sitemaplist of the web * * @access public * @return array $siteMapList */ public function getSiteMapList() { return $this->siteMapList; } /** * calate the priority of the targeturl * * @param string $targetUrl * @return float $priority * @access private */ private function _calculatePriority($targetUrl) { /* init priority */ $priority = 0.5; /* calculate the priority */ if( filter_var($targetUrl, FILTER_VALIDATE_URL) ) { $priorityList = $this->loadConfig("PRIORITYLIST"); foreach($priorityList as $priorityKey => $priorityValue) { if(stripos($targetUrl, $priorityKey) !== false) { $priority = $priorityValue; break; } } } /* return priority */ return $priority; } /** * calate the changefreq of the targeturl * * @param string $targetUrl * @return float $changefreq * @access private */ private function _calculateChangefreq($targetUrl) { /* init changefreq*/ $changefreq = "Daily"; /* calculate the priority */ if( filter_var($targetUrl, FILTER_VALIDATE_URL) ) { $changefreqList = $this->loadConfig("CHANGEFREQLIST"); foreach($changefreqList as $changefreqKey => $changefreqValue) { if(stripos($targetUrl, $changefreqKey) !== false) { $changefreq = $changefreqValue; break; } } } /* return priority */ return $changefreq; } /** * format url * * @param $url * @param $orginUrl * @access private * @return $formatUrl */ private function _formatUrl($url, $originUrl) { /* init url */ $formatUrl = ""; /* format url */ if( !empty($url) && !empty($originUrl) ) { $badUrlItem = array( "", "/" , "javascript", "javascript:;", "" ); $formatUrl = trim($url); $formatUrl = trim($formatUrl, "#"); $formatUrl = trim($formatUrl, """); $formatUrl = trim($formatUrl, """); if(stripos($formatUrl, "http") === false && !in_array($formatUrl, $badUrlItem)) { if(strpos($formatUrl, "/") === 0) { $domainName = $this->loadConfig("DOMAIN_NAME"); $formatUrl = $domainName . trim($formatUrl, "/"); } else { $formatUrl = substr( $originUrl, 0, strrpos($originUrl, "/") ) ."/". $formatUrl; } } elseif( stripos($formatUrl, "http") === false && in_array($formatUrl, $badUrlItem) ) { $formatUrl = ""; } } /* return url */ return $formatUrl; } /** * check domain is right * * @param $url * @return $url * @access private */ private function _checkDomain($url) { /* init url */ $result = false; /* check domain */ if($url) { $domainName = $this->loadConfig("DOMAIN_NAME"); if( stripos($url, $domainName) === false ) { return $result; } $result = true; } /* return url */ return $result; } /** * parse the response content, so that we can get the urls * * @param string $content * @param string $originUrl * @return array $urlItem * @access public */ public function _parseContent($content, $originUrl) { /* init return data */ $tagsList = array(); /* start parse */ if( !empty($content) && !empty($originUrl) ) { $domainName = $this->loadConfig("DOMAIN_NAME"); /* get the attribute of href for tags */ $regStrForTagA = "#4、配置文件代码$url) { $formatUrl = $this->_formatUrl($url, $originUrl); if( empty($formatUrl) ) { unset($urlItem[$urlKey]); continue; } $result = $this->_checkDomain($formatUrl); if($result === false) { unset($urlItem[$urlKey]); continue; } $urlItem[$urlKey] = $formatUrl; } } $tagsList["UrlItem"] = $urlItem; /* get the title tags content */ $regStrForTitle = "#(.*?)#um"; if( preg_match($regStrForTitle, $content, $matches) ) { $title = $matches[1]; } $tagsList["Title"] = $title; } /* return tagsList */ return $tagsList; } } /* here is a example */ $startTime = microtime(true); echo "/***********************************************************************/"," "; echo "/* start to run {$startTime} */"," "; echo "/***********************************************************************/"," "; $siteMap = new SiteMap(); $domain = $siteMap->loadConfig("DOMAIN_NAME"); $siteMap->generateSiteMapList($domain); $siteMapList = $siteMap->getSiteMapList(); $siteMap->generateSiteMapXml($siteMapList); $endTime = microtime(true); $takeTime = $endTime - $startTime; echo "/***********************************************************************/"," "; echo "/* Had Done, it total take {$takeTime} */"," "; echo "/***********************************************************************/"," "; ?>
true, "CookiePath" => "/tmp/sitemapcookie" ); //sitemap文件的保存地址 $SITEMAPPATH = "./sitemap.xml"; //根据连接关键字设置priority $PRIORITYLIST = array( "product" => "0.8", "device" => "0.6", "intelligent" => "0.4", "course" => "0.2" ); //根据连接关键字设置CHANGEFREQ $CHANGEFREQLIST = array( "product" => "Always", "device" => "Hourly", "intelligent" => "Daily", "course" => "Weekly", "login" => "Monthly", "about" => "Yearly" ); ?>5、获取源码包
单击下载源代码 (提取码:fc1c)
文章版权归作者所有,未经允许请勿转载,若此文章存在违规行为,您可以联系管理员删除。
转载请注明本文地址:https://www.ucloud.cn/yun/21739.html
摘要:输出类似强行删除某插件此方法用于卸载插件失败时的替补方法,老高一般将此方法写入插件的方法里,这样刷新以下后台,出问题的插件就被卸载了。比如老高的插件,就用此方法为系统添加了一个的路由。 此文原本发表于我的博客 老高的技术博客 ,欢迎和老高交流! Helper类为我们封装了很多与插件有关的操作,并且全部是公共静态方法,比如获取系统配置、添加路由、添加面板等功能,是开发插件必不可少的工...
摘要:而我本人需要完成的任务是定时访问一个文件链接去生成,所以访问就不能用去完成,而是要用。本站的这篇下执行定时任务命令详解写的非常详细,建议看一下。 crontab -e 新建/编辑一个任务crontab -l 列出所有任务 crontab 格式: 基本格式 :分钟 小时 日 月 星期 命令第1列表示分钟1~59 每分钟用或者 /1表示第2列表示小时1~23(0表示0点...
showImg(https://segmentfault.com/img/remote/1460000018808058?w=900&h=500); 简介 SEO、sitemap、搜索引擎优化、简单教程 在暧昧期和暗恋期时心里总是悬挂着: ta 为什么还不和我表白? ta 是不是对我没感觉? ta 是不是只是把我当备胎? ta 是不是对谁都这样? 解决问题最简单的方式就是直接 问问对方...
阅读 2649·2023-04-26 00:42
阅读 2796·2021-09-24 10:34
阅读 3808·2021-09-24 09:48
阅读 4142·2021-09-03 10:28
阅读 2574·2019-08-30 15:56
阅读 2768·2019-08-30 15:55
阅读 3251·2019-08-29 12:46
阅读 2241·2019-08-28 17:52