最新公告
  • 欢迎您光临站盟网(原知事网),一个优质的网站源码基地、精品网站模板和插件。欢迎加入永久SVIP
  • 海洋CMS自动百度推送 轻轻松松实现十万收录

    正文概述 知事网   2020-08-15 11:08   930

    做网站最重要的其中一环肯定是收录,页面没有收录,其他都是空谈,更不会有搜索流量。由于每个行业的网站众多,如何让搜索引擎第一时间发现你的网站页面并且收录呢,百度的主动推送操作不能丢。每当你在百度站长平台提交网站后,百度也会提示你去主动推送URL让蜘蛛第一时间去抓取收录,收录上去了,SEO才有希望。对于海洋CMS专门做影视的程序,百度自动推送URL的功能并不完善,所以做一下教程来实现。

    1、在根目录新建一个map目录;

    2、在map目录里新建一个index.php,文件代码内容如下:

    <?php
    require_once(dirname(__FILE__)."/../include/common.php");
    //前置跳转start
    $cs=$_SERVER["REQUEST_URI"];
    if($GLOBALS['cfg_mskin']==3 AND $GLOBALS['isMobile']==1){header("location:$cfg_mhost$cs");}
    if($GLOBALS['cfg_mskin']==4 AND $GLOBALS['isMobile']==1){header("location:$cfg_mhost");}
    //前置跳转end
    require_once(sea_INC."/main.class.php");
    
    header('Content-Type:text/xml;charset=UTF-8');
    
    if($GLOBALS['cfg_runmode']==2||$GLOBALS['cfg_paramset']==0){
    $paras=str_replace(getfileSuffix(),'',$_SERVER['QUERY_STRING']);
    if(strpos($paras,"-")>0){
    $parasArray=explode("-",$paras);
    $tid=$parasArray[0];
    $page=$parasArray[1];
    }else{
    $tid=intval($paras);
    $page=1;
    }
    $tid = isset($tid) && is_numeric($tid) ? $tid : 0;
    $page = isset($page) && is_numeric($page) ? $page : 1;
    }else{
    $tid = $$GLOBALS['cfg_paramid'];
    $page = $$GLOBALS['cfg_parampage'];
    $tid = isset($tid) && is_numeric($tid) ? $tid : 0;
    $page = isset($page) && is_numeric($page) ? $page : 1;
    }
    $tid=intval($tid);
    $page=intval($page);
    //if($tid==0){
    // showmsg('参数丢失,请返回!', -1);
    // exit;
    //}
    $GLOBALS[tid]=$tid;
    echoChannel($tid);
    
    function echoChannel($typeId)
    {
    global $dsql,$cfg_iscache,$mainClassObj,$page,$t1,$cfg_user,$cfg_basehost;
    $channelTmpName=getTypeTemplate($typeId);
    $channelTmpName=empty($channelTmpName) ? "channel.html" : $channelTmpName;
    $channelTemplatePath = "/map/channel.html";
    if($GLOBALS['cfg_mskin']!=0 AND $GLOBALS['cfg_mskin']!=3 AND $GLOBALS['cfg_mskin']!=4 AND $GLOBALS['isMobile']==1)
    {$channelTemplatePath = "/map/channel.html";}
    //if (strpos(" ,".getHideTypeIDS().",",",".$typeId.",")>0) exit("<font color='red'>视频列表为空或被隐藏</font><br>");
    //if ($cfg_user == 1){
    // if (!getUserAuth($typeId, "list")){ShowMsg("您当前的会员级别没有权限浏览此内容!","../member.php",0,20000);exit();}
    //}
    $pSize = getPageSizeOnCache($channelTemplatePath,"channel",$channelTmpName);
    if (empty($pSize)) $pSize=12;
    $typeIds = getTypeId($typeId);
    $typename=getTypeName($typeId);
    if($typeId!="")
    $extrasql = " or FIND_IN_SET('".$typeId."',v_extratype)<>0 ";
    else
    $extrasql = "";
    $sql="select count(*) as dd from sea_data where (tid in (".$typeIds.") ".$extrasql.")";
    $row = $dsql->GetOne($sql);
    if(is_array($row))
    {
    $TotalResult = $row['dd'];
    }
    else
    {
    $TotalResult = 0;
    }
    $pCount = ceil($TotalResult/$pSize);
    $currentTypeId = $typeId;
    $cacheName = "parse_channel_".$currentTypeId.$GLOBALS['cfg_mskin'].$GLOBALS['isMobile'];
    if($cfg_iscache){
    if(chkFileCache($cacheName)){
    $content = getFileCache($cacheName);
    }else{
    $content = parseChannelPart($channelTemplatePath,$currentTypeId);
    $content = str_replace("{channelpage:typename}",$typename,$content);
    $content = str_replace("{channelpage:typeid}",$currentTypeId,$content);
    setFileCache($cacheName,$content);
    }
    }else{
    $content = parseChannelPart($channelTemplatePath,$currentTypeId);
    $content = str_replace("{channelpage:typename}",$typename,$content);
    $content = str_replace("{channelpage:typeid}",$currentTypeId,$content);
    }
    $content = str_replace("{channelpage:page}",$page,$content);
    $content=$mainClassObj->ParsePageList($content,$typeIds,$page,$pCount,$TotalResult,"channel",$currentTypeId);
    $content=$mainClassObj->parseIf($content);
    $content=str_replace("{seacms:member}",front_member(),$content);
    $content = str_replace("{channelpage:order-hit-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=hit&tid=".$typeId,$content);
    $content = str_replace("{channelpage:order-hitasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=hitasc&tid=".$typeId,$content);
    
    $content = str_replace("{channelpage:order-id-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=id&tid=".$typeId,$content);
    $content = str_replace("{channelpage:order-idasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=idasc&tid=".$typeId,$content);
    
    $content = str_replace("{channelpage:order-time-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=time&tid=".$typeId,$content);
    $content = str_replace("{channelpage:order-timeasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=timeasc&tid=".$typeId,$content);
    
    $content = str_replace("{channelpage:order-commend-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=commend&tid=".$typeId,$content);
    $content = str_replace("{channelpage:order-commendasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=commendasc&tid=".$typeId,$content);
    
    $content = str_replace("{channelpage:order-score-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=score&tid=".$typeId,$content);
    $content = str_replace("{channelpage:order-scoreasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=scoreasc&tid=".$typeId,$content);
    
    echo str_replace("{seacms:runinfo}",getRunTime($t1),$content) ;
    }
    
    function parseChannelPart($templatePath,$currentTypeId)
    {
    global $mainClassObj;
    $content=loadFile(sea_ROOT.$templatePath);
    $content=$mainClassObj->parseTopAndFoot($content);
    $content = str_replace("{seacms:currenttypeid}",$currentTypeId,$content);
    $content=$mainClassObj->parseSelf($content);
    $content=$mainClassObj->parseHistory($content);
    $content=$mainClassObj->parseGlobal($content);
    $content=$mainClassObj->parseMenuList($content,"",$currentTypeId);
    $content=$mainClassObj->parseAreaList($content);
    $content=$mainClassObj->parseVideoList($content,$currentTypeId);
    $content=$mainClassObj->parseNewsList($content,$currentTypeId);
    $content=$mainClassObj->parseTopicList($content);
    $content = str_replace("{channelpage:typetext}",getTypeText($currentTypeId),$content);
    $content = str_replace("{channelpage:keywords}",getTypeKeywords($currentTypeId),$content);
    $content = str_replace("{channelpage:description}",getTypeDescription($currentTypeId),$content);
    $content = str_replace("{channelpage:title}",getTypeTitle($currentTypeId),$content);
    return $content;
    }
    ?>
    

    3、在map目录下新建一个channel.html文件,代码内容如下:

    <?xml version="1.0" encoding="utf-8"?>
    <urlset>
    {seacms:channellist size=2000 order=time}
    <url>
    <loc>{seacms:siteurl}[channellist:link]</loc>
    <lastmod>[channellist:time style=yyyy-mm-dd]</lastmod>
    <changefreq>daily</changefreq>
    <priority>0.8</priority>
    </url>
    {/seacms:channellist}
    </urlset>
    

    4、从xml文件取数据并做百度主动推送,代码如下:

    #coding:utf-8
    import requests,time,re,os
    import sys
    reload(sys)
    sys.setdefaultencoding('utf-8')
    
    def main():
    # 删掉yesterday文件
    # if os.path.exists('yesterday.txt'):
    # os.remove('yesterday.txt')
    
    #把xml中的数据拿下来,并和现有的数据去重后,留下的数据单独放到一个文件,并且追加到所有的url txt里
    url = 'http://yp.jd.com/00/00_0.xml'
    
    r = requests.get(url)
    zhishi_url = re.findall(r'<loc>(.*?)</loc>',r.content)
    has_push_list = [url.strip() for url in open('all_url.txt')]
    f = open('all_url.txt',r'a+')#所有的url
    f_ytd = open('yesterday_0.txt',r'w+')#昨天发布的文章url
    f_ytd_m = open('yesterday_m_0.txt',r'w+')#昨天发布的文章url(m)
    
    num = 0
    txt_index = 0
    for link in zhishi_url:#多
    if link in has_push_list:
    pass
    else:
    f.write(link+'n')#追加到所有的url txt里
    f_ytd.write(link+'n')#把还未推送的url放到单独的文件内
    f_ytd_m.write(link.replace('www','m')+'n')#把还未推送的url放到单独的文件内(m)
    
    if num%2000 == 1999:
    f_ytd.close()
    txt_index += 1
    f_ytd = open('yesterday_%s.txt'%txt_index,r'w+')
    f_ytd_m = open('yesterday_m_%s.txt'%txt_index,r'w+')
    num += 1
    
    f.close()
    f_ytd.close()
    f_ytd_m.close()
    print 'yesterday has %s'%num
    print 'crawl done'
    
    time.sleep(5)
    
    #开始推送
    print 'push begin'
    for i in range(0,txt_index+1):
    try:
    headers = {'Content-Type':'text/plain'}
    url = 'http://data.zz.baidu.com/urls'
    params = {'site':'www.jd.com','token':'00'}#,'type':'original'
    r = requests.post(url,params=params,headers=headers,data=open('yesterday_%s.txt'%i,r'rb').read())
    
    #m
    params_m = {'site':'m.jd.com','token':'00'}#,'type':'original'
    r_m = requests.post(url,params=params_m,headers=headers,data=open('yesterday_m_
    
    %s.txt'%i,r'rb').read())
    print 'PC:'+r.content+','+'M:'+r_m.content
    
    except Exception,e:
    print e
    continue
    print 'Finish!!!'
    
    if __name__ == '__main__':
    while True:
    current_time = time.localtime(time.time())
    if((current_time.tm_hour == 18) and (current_time.tm_min == 0) and (current_time.tm_sec == 0)):
    main()
    

    百度自动推送的好处就是主动,而sitemap的操作是被动,需要搜索引擎蜘蛛主动去抓。但是需要注意的一点是,自动推送不要重复去推,会影响蜘蛛抓取,浪费蜘蛛资源。除了要推送,还有个要注意的地方,海洋CMS的演员链接是动态的URL,并不利于SEO,海洋CMS伪静态位置好尤其重要,请参考:海洋seacms演员名称伪静态设置:自动布局长尾关键词


    站盟网 » 海洋CMS自动百度推送 轻轻松松实现十万收录

    发表评论

    还没有评论,快来抢沙发吧!

    如需帝国cms功能定制以及二次开发请联系我们

    联系作者
    请选择支付方式
    ×
    支付宝支付
    微信支付
    余额支付
    ×
    微信扫码支付 0 元