做网站最重要的其中一环肯定是收录,页面没有收录,其他都是空谈,更不会有搜索流量。由于每个行业的网站众多,如何让搜索引擎第一时间发现你的网站页面并且收录呢,百度的主动推送操作不能丢。每当你在百度站长平台提交网站后,百度也会提示你去主动推送URL让蜘蛛第一时间去抓取收录,收录上去了,SEO才有希望。对于海洋CMS专门做影视的程序,百度自动推送URL的功能并不完善,所以做一下教程来实现。
1、在根目录新建一个map目录;
2、在map目录里新建一个index.php,文件代码内容如下:
<?php require_once(dirname(__FILE__)."/../include/common.php"); //前置跳转start $cs=$_SERVER["REQUEST_URI"]; if($GLOBALS['cfg_mskin']==3 AND $GLOBALS['isMobile']==1){header("location:$cfg_mhost$cs");} if($GLOBALS['cfg_mskin']==4 AND $GLOBALS['isMobile']==1){header("location:$cfg_mhost");} //前置跳转end require_once(sea_INC."/main.class.php"); header('Content-Type:text/xml;charset=UTF-8'); if($GLOBALS['cfg_runmode']==2||$GLOBALS['cfg_paramset']==0){ $paras=str_replace(getfileSuffix(),'',$_SERVER['QUERY_STRING']); if(strpos($paras,"-")>0){ $parasArray=explode("-",$paras); $tid=$parasArray[0]; $page=$parasArray[1]; }else{ $tid=intval($paras); $page=1; } $tid = isset($tid) && is_numeric($tid) ? $tid : 0; $page = isset($page) && is_numeric($page) ? $page : 1; }else{ $tid = $$GLOBALS['cfg_paramid']; $page = $$GLOBALS['cfg_parampage']; $tid = isset($tid) && is_numeric($tid) ? $tid : 0; $page = isset($page) && is_numeric($page) ? $page : 1; } $tid=intval($tid); $page=intval($page); //if($tid==0){ // showmsg('参数丢失,请返回!', -1); // exit; //} $GLOBALS[tid]=$tid; echoChannel($tid); function echoChannel($typeId) { global $dsql,$cfg_iscache,$mainClassObj,$page,$t1,$cfg_user,$cfg_basehost; $channelTmpName=getTypeTemplate($typeId); $channelTmpName=empty($channelTmpName) ? "channel.html" : $channelTmpName; $channelTemplatePath = "/map/channel.html"; if($GLOBALS['cfg_mskin']!=0 AND $GLOBALS['cfg_mskin']!=3 AND $GLOBALS['cfg_mskin']!=4 AND $GLOBALS['isMobile']==1) {$channelTemplatePath = "/map/channel.html";} //if (strpos(" ,".getHideTypeIDS().",",",".$typeId.",")>0) exit("<font color='red'>视频列表为空或被隐藏</font><br>"); //if ($cfg_user == 1){ // if (!getUserAuth($typeId, "list")){ShowMsg("您当前的会员级别没有权限浏览此内容!","../member.php",0,20000);exit();} //} $pSize = getPageSizeOnCache($channelTemplatePath,"channel",$channelTmpName); if (empty($pSize)) $pSize=12; $typeIds = getTypeId($typeId); $typename=getTypeName($typeId); if($typeId!="") $extrasql = " or FIND_IN_SET('".$typeId."',v_extratype)<>0 "; else $extrasql = ""; $sql="select count(*) as dd from sea_data where (tid in (".$typeIds.") ".$extrasql.")"; $row = $dsql->GetOne($sql); if(is_array($row)) { $TotalResult = $row['dd']; } else { $TotalResult = 0; } $pCount = ceil($TotalResult/$pSize); $currentTypeId = $typeId; $cacheName = "parse_channel_".$currentTypeId.$GLOBALS['cfg_mskin'].$GLOBALS['isMobile']; if($cfg_iscache){ if(chkFileCache($cacheName)){ $content = getFileCache($cacheName); }else{ $content = parseChannelPart($channelTemplatePath,$currentTypeId); $content = str_replace("{channelpage:typename}",$typename,$content); $content = str_replace("{channelpage:typeid}",$currentTypeId,$content); setFileCache($cacheName,$content); } }else{ $content = parseChannelPart($channelTemplatePath,$currentTypeId); $content = str_replace("{channelpage:typename}",$typename,$content); $content = str_replace("{channelpage:typeid}",$currentTypeId,$content); } $content = str_replace("{channelpage:page}",$page,$content); $content=$mainClassObj->ParsePageList($content,$typeIds,$page,$pCount,$TotalResult,"channel",$currentTypeId); $content=$mainClassObj->parseIf($content); $content=str_replace("{seacms:member}",front_member(),$content); $content = str_replace("{channelpage:order-hit-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=hit&tid=".$typeId,$content); $content = str_replace("{channelpage:order-hitasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=hitasc&tid=".$typeId,$content); $content = str_replace("{channelpage:order-id-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=id&tid=".$typeId,$content); $content = str_replace("{channelpage:order-idasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=idasc&tid=".$typeId,$content); $content = str_replace("{channelpage:order-time-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=time&tid=".$typeId,$content); $content = str_replace("{channelpage:order-timeasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=timeasc&tid=".$typeId,$content); $content = str_replace("{channelpage:order-commend-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=commend&tid=".$typeId,$content); $content = str_replace("{channelpage:order-commendasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=commendasc&tid=".$typeId,$content); $content = str_replace("{channelpage:order-score-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=score&tid=".$typeId,$content); $content = str_replace("{channelpage:order-scoreasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=scoreasc&tid=".$typeId,$content); echo str_replace("{seacms:runinfo}",getRunTime($t1),$content) ; } function parseChannelPart($templatePath,$currentTypeId) { global $mainClassObj; $content=loadFile(sea_ROOT.$templatePath); $content=$mainClassObj->parseTopAndFoot($content); $content = str_replace("{seacms:currenttypeid}",$currentTypeId,$content); $content=$mainClassObj->parseSelf($content); $content=$mainClassObj->parseHistory($content); $content=$mainClassObj->parseGlobal($content); $content=$mainClassObj->parseMenuList($content,"",$currentTypeId); $content=$mainClassObj->parseAreaList($content); $content=$mainClassObj->parseVideoList($content,$currentTypeId); $content=$mainClassObj->parseNewsList($content,$currentTypeId); $content=$mainClassObj->parseTopicList($content); $content = str_replace("{channelpage:typetext}",getTypeText($currentTypeId),$content); $content = str_replace("{channelpage:keywords}",getTypeKeywords($currentTypeId),$content); $content = str_replace("{channelpage:description}",getTypeDescription($currentTypeId),$content); $content = str_replace("{channelpage:title}",getTypeTitle($currentTypeId),$content); return $content; } ?>
3、在map目录下新建一个channel.html文件,代码内容如下:
<?xml version="1.0" encoding="utf-8"?> <urlset> {seacms:channellist size=2000 order=time} <url> <loc>{seacms:siteurl}[channellist:link]</loc> <lastmod>[channellist:time style=yyyy-mm-dd]</lastmod> <changefreq>daily</changefreq> <priority>0.8</priority> </url> {/seacms:channellist} </urlset>
4、从xml文件取数据并做百度主动推送,代码如下:
#coding:utf-8 import requests,time,re,os import sys reload(sys) sys.setdefaultencoding('utf-8') def main(): # 删掉yesterday文件 # if os.path.exists('yesterday.txt'): # os.remove('yesterday.txt') #把xml中的数据拿下来,并和现有的数据去重后,留下的数据单独放到一个文件,并且追加到所有的url txt里 url = 'http://yp.jd.com/00/00_0.xml' r = requests.get(url) zhishi_url = re.findall(r'<loc>(.*?)</loc>',r.content) has_push_list = [url.strip() for url in open('all_url.txt')] f = open('all_url.txt',r'a+')#所有的url f_ytd = open('yesterday_0.txt',r'w+')#昨天发布的文章url f_ytd_m = open('yesterday_m_0.txt',r'w+')#昨天发布的文章url(m) num = 0 txt_index = 0 for link in zhishi_url:#多 if link in has_push_list: pass else: f.write(link+'n')#追加到所有的url txt里 f_ytd.write(link+'n')#把还未推送的url放到单独的文件内 f_ytd_m.write(link.replace('www','m')+'n')#把还未推送的url放到单独的文件内(m) if num%2000 == 1999: f_ytd.close() txt_index += 1 f_ytd = open('yesterday_%s.txt'%txt_index,r'w+') f_ytd_m = open('yesterday_m_%s.txt'%txt_index,r'w+') num += 1 f.close() f_ytd.close() f_ytd_m.close() print 'yesterday has %s'%num print 'crawl done' time.sleep(5) #开始推送 print 'push begin' for i in range(0,txt_index+1): try: headers = {'Content-Type':'text/plain'} url = 'http://data.zz.baidu.com/urls' params = {'site':'www.jd.com','token':'00'}#,'type':'original' r = requests.post(url,params=params,headers=headers,data=open('yesterday_%s.txt'%i,r'rb').read()) #m params_m = {'site':'m.jd.com','token':'00'}#,'type':'original' r_m = requests.post(url,params=params_m,headers=headers,data=open('yesterday_m_ %s.txt'%i,r'rb').read()) print 'PC:'+r.content+','+'M:'+r_m.content except Exception,e: print e continue print 'Finish!!!' if __name__ == '__main__': while True: current_time = time.localtime(time.time()) if((current_time.tm_hour == 18) and (current_time.tm_min == 0) and (current_time.tm_sec == 0)): main()
百度自动推送的好处就是主动,而sitemap的操作是被动,需要搜索引擎蜘蛛主动去抓。但是需要注意的一点是,自动推送不要重复去推,会影响蜘蛛抓取,浪费蜘蛛资源。除了要推送,还有个要注意的地方,海洋CMS的演员链接是动态的URL,并不利于SEO,海洋CMS伪静态位置好尤其重要,请参考:海洋seacms演员名称伪静态设置:自动布局长尾关键词
发表评论
还没有评论,快来抢沙发吧!