[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/lib/exe/ -> indexer.php (source)

   1  <?php
   2  /**
   3   * DokuWiki indexer
   4   *
   5   * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
   6   * @author     Andreas Gohr <andi@splitbrain.org>
   7   */
   8  if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../');
   9  define('DOKU_DISABLE_GZIP_OUTPUT',1);
  10  require_once (DOKU_INC.'inc/init.php');
  11  require_once (DOKU_INC.'inc/auth.php');
  12  require_once (DOKU_INC.'inc/events.php');
  13  session_write_close();  //close session
  14  if(!defined('NL')) define('NL',"\n");
  15  
  16  // Version tag used to force rebuild on upgrade
  17  define('INDEXER_VERSION', 2);
  18  
  19  // keep running after browser closes connection
  20  @ignore_user_abort(true);
  21  
  22  // check if user abort worked, if yes send output early
  23  if(@ignore_user_abort() && !$conf['broken_iua']){
  24      sendGIF(); // send gif
  25      $defer = false;
  26  }else{
  27      $defer = true;
  28  }
  29  
  30  $ID = cleanID($_REQUEST['id']);
  31  
  32  // Catch any possible output (e.g. errors)
  33  if(!$_REQUEST['debug']) ob_start();
  34  
  35  // run one of the jobs
  36  $tmp = array();
  37  $evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp);
  38  if ($evt->advise_before()) {
  39    runIndexer() or
  40    metaUpdate() or
  41    runSitemapper() or
  42    runTrimRecentChanges() or
  43    $evt->advise_after();
  44  }
  45  if($defer) sendGIF();
  46  
  47  if(!$_REQUEST['debug']) ob_end_clean();
  48  exit;
  49  
  50  // --------------------------------------------------------------------
  51  
  52  /**
  53   * Trims the recent changes cache (or imports the old changelog) as needed.
  54   *
  55   * @author Ben Coburn <btcoburn@silicodon.net>
  56   */
  57  function runTrimRecentChanges() {
  58      global $conf;
  59  
  60      // Import old changelog (if needed)
  61      // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly.
  62      // FIXME: Remove this from runTrimRecentChanges when it is no longer needed.
  63      if (isset($conf['changelog_old']) &&
  64          @file_exists($conf['changelog_old']) && !@file_exists($conf['changelog']) &&
  65          !@file_exists($conf['changelog'].'_importing') && !@file_exists($conf['changelog'].'_tmp')) {
  66              $tmp = array(); // no event data
  67              trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp);
  68              return true;
  69      }
  70  
  71      // Trim the Recent Changes
  72      // Trims the recent changes cache to the last $conf['changes_days'] recent
  73      // changes or $conf['recent'] items, which ever is larger.
  74      // The trimming is only done once a day.
  75      if (@file_exists($conf['changelog']) &&
  76          (filectime($conf['changelog'])+86400)<time() &&
  77          !@file_exists($conf['changelog'].'_tmp')) {
  78              io_lock($conf['changelog']);
  79              $lines = file($conf['changelog']);
  80              if (count($lines)<=$conf['recent']) {
  81                  // nothing to trim
  82                  io_unlock($conf['changelog']);
  83                  return false;
  84              }
  85  
  86              io_saveFile($conf['changelog'].'_tmp', '');          // presave tmp as 2nd lock
  87              $trim_time = time() - $conf['recent_days']*86400;
  88              $out_lines = array();
  89  
  90              for ($i=0; $i<count($lines); $i++) {
  91                $log = parseChangelogLine($lines[$i]);
  92                if ($log === false) continue;                      // discard junk
  93                if ($log['date'] < $trim_time) {
  94                  $old_lines[$log['date'].".$i"] = $lines[$i];     // keep old lines for now (append .$i to prevent key collisions)
  95                } else {
  96                  $out_lines[$log['date'].".$i"] = $lines[$i];     // definitely keep these lines
  97                }
  98              }
  99  
 100              // sort the final result, it shouldn't be necessary,
 101              //   however the extra robustness in making the changelog cache self-correcting is worth it
 102              ksort($out_lines);
 103              $extra = $conf['recent'] - count($out_lines);        // do we need extra lines do bring us up to minimum
 104              if ($extra > 0) {
 105                ksort($old_lines);
 106                $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines);
 107              }
 108  
 109              // save trimmed changelog
 110              io_saveFile($conf['changelog'].'_tmp', implode('', $out_lines));
 111              @unlink($conf['changelog']);
 112              if (!rename($conf['changelog'].'_tmp', $conf['changelog'])) {
 113                  // rename failed so try another way...
 114                  io_unlock($conf['changelog']);
 115                  io_saveFile($conf['changelog'], implode('', $out_lines));
 116                  @unlink($conf['changelog'].'_tmp');
 117              } else {
 118                  io_unlock($conf['changelog']);
 119              }
 120              return true;
 121      }
 122  
 123      // nothing done
 124      return false;
 125  }
 126  
 127  /**
 128   * Runs the indexer for the current page
 129   *
 130   * @author Andreas Gohr <andi@splitbrain.org>
 131   */
 132  function runIndexer(){
 133      global $ID;
 134      global $conf;
 135      print "runIndexer(): started".NL;
 136  
 137      // Move index files (if needed)
 138      // Uses the importoldindex plugin to upgrade the index automatically.
 139      // FIXME: Remove this from runIndexer when it is no longer needed.
 140      if (@file_exists($conf['cachedir'].'/page.idx') &&
 141          (!@file_exists($conf['indexdir'].'/page.idx') ||
 142           !filesize($conf['indexdir'].'/page.idx'))  &&
 143          !@file_exists($conf['indexdir'].'/index_importing')) {
 144          echo "trigger TEMPORARY_INDEX_UPGRADE_EVENT\n";
 145          $tmp = array(); // no event data
 146          trigger_event('TEMPORARY_INDEX_UPGRADE_EVENT', $tmp);
 147      }
 148  
 149      if(!$ID) return false;
 150  
 151      // check if indexing needed
 152      $idxtag = metaFN($ID,'.indexed');
 153      if(@file_exists($idxtag)){
 154          if(io_readFile($idxtag) >= INDEXER_VERSION){
 155              $last = @filemtime($idxtag);
 156              if($last > @filemtime(wikiFN($ID))){
 157                  print "runIndexer(): index for $ID up to date".NL;
 158                  return false;
 159              }
 160          }
 161      }
 162  
 163      // try to aquire a lock
 164      $lock = $conf['lockdir'].'/_indexer.lock';
 165      while(!@mkdir($lock,$conf['dmode'])){
 166          usleep(50);
 167          if(time()-@filemtime($lock) > 60*5){
 168              // looks like a stale lock - remove it
 169              @rmdir($lock);
 170              print "runIndexer(): stale lock removed".NL;
 171          }else{
 172              print "runIndexer(): indexer locked".NL;
 173              return false;
 174          }
 175      }
 176      if($conf['dperm']) chmod($lock, $conf['dperm']);
 177  
 178      require_once (DOKU_INC.'inc/indexer.php');
 179  
 180      // upgrade to version 2
 181      if (!@file_exists($conf['indexdir'].'/pageword.idx'))
 182          idx_upgradePageWords();
 183  
 184      // do the work
 185      idx_addPage($ID);
 186  
 187      // we're finished - save and free lock
 188      io_saveFile(metaFN($ID,'.indexed'),INDEXER_VERSION);
 189      @rmdir($lock);
 190      print "runIndexer(): finished".NL;
 191      return true;
 192  }
 193  
 194  /**
 195   * Will render the metadata for the page if not exists yet
 196   *
 197   * This makes sure pages which are created from outside DokuWiki will
 198   * gain their data when viewed for the first time.
 199   */
 200  function metaUpdate(){
 201      global $ID;
 202      print "metaUpdate(): started".NL;
 203  
 204      if(!$ID) return false;
 205      $file = metaFN($ID, '.meta');
 206      echo "meta file: $file".NL;
 207  
 208      // rendering needed?
 209      if (@file_exists($file)) return false;
 210      if (!@file_exists(wikiFN($ID))) return false;
 211  
 212      require_once (DOKU_INC.'inc/common.php');
 213      require_once (DOKU_INC.'inc/parserutils.php');
 214      global $conf;
 215  
 216  
 217      // gather some additional info from changelog
 218      $info = io_grep($conf['changelog'],
 219                      '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/',
 220                      0,true);
 221  
 222      $meta = array();
 223      if(!empty($info)){
 224          $meta['date']['created'] = $info[0][1];
 225          foreach($info as $item){
 226              if($item[4] != '*'){
 227                  $meta['date']['modified'] = $item[1];
 228                  if($item[3]){
 229                      $meta['contributor'][$item[3]] = $item[3];
 230                  }
 231              }
 232          }
 233      }
 234  
 235      $meta = p_render_metadata($ID, $meta);
 236      io_saveFile($file, serialize($meta));
 237  
 238      echo "metaUpdate(): finished".NL;
 239      return true;
 240  }
 241  
 242  /**
 243   * Builds a Google Sitemap of all public pages known to the indexer
 244   *
 245   * The map is placed in the root directory named sitemap.xml.gz - This
 246   * file needs to be writable!
 247   *
 248   * @author Andreas Gohr
 249   * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
 250   */
 251  function runSitemapper(){
 252      global $conf;
 253      print "runSitemapper(): started".NL;
 254      if(!$conf['sitemap']) return false;
 255  
 256      if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){
 257          $sitemap = 'sitemap.xml.gz';
 258      }else{
 259          $sitemap = 'sitemap.xml';
 260      }
 261      print "runSitemapper(): using $sitemap".NL;
 262  
 263      if(@file_exists(DOKU_INC.$sitemap)){
 264          if(!is_writable(DOKU_INC.$sitemap)) return false;
 265      }else{
 266          if(!is_writable(DOKU_INC)) return false;
 267      }
 268  
 269      if(@filesize(DOKU_INC.$sitemap) &&
 270         @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){
 271         print 'runSitemapper(): Sitemap up to date'.NL;
 272         return false;
 273      }
 274  
 275      $pages = file($conf['indexdir'].'/page.idx');
 276      print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL;
 277  
 278      // build the sitemap
 279      ob_start();
 280      print '<?xml version="1.0" encoding="UTF-8"?>'.NL;
 281      print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL;
 282      foreach($pages as $id){
 283          $id = trim($id);
 284          $file = wikiFN($id);
 285  
 286          //skip hidden, non existing and restricted files
 287          if(isHiddenPage($id)) continue;
 288          $date = @filemtime($file);
 289          if(!$date) continue;
 290          if(auth_aclcheck($id,'','') < AUTH_READ) continue;
 291  
 292          print '  <url>'.NL;
 293          print '    <loc>'.wl($id,'',true).'</loc>'.NL;
 294          print '    <lastmod>'.date_iso8601($date).'</lastmod>'.NL;
 295          print '  </url>'.NL;
 296      }
 297      print '</urlset>'.NL;
 298      $data = ob_get_contents();
 299      ob_end_clean();
 300  
 301      //save the new sitemap
 302      io_saveFile(DOKU_INC.$sitemap,$data);
 303  
 304      //ping search engines...
 305      $http = new DokuHTTPClient();
 306      $http->timeout = 8;
 307  
 308      //ping google
 309      print 'runSitemapper(): pinging google'.NL;
 310      $url  = 'http://www.google.com/webmasters/sitemaps/ping?sitemap=';
 311      $url .= urlencode(DOKU_URL.$sitemap);
 312      $resp = $http->get($url);
 313      if($http->error) print 'runSitemapper(): '.$http->error.NL;
 314      print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL;
 315  
 316      //ping yahoo
 317      print 'runSitemapper(): pinging yahoo'.NL;
 318      $url  = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url=';
 319      $url .= urlencode(DOKU_URL.$sitemap);
 320      $resp = $http->get($url);
 321      if($http->error) print 'runSitemapper(): '.$http->error.NL;
 322      print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL;
 323  
 324      //ping microsoft
 325      print 'runSitemapper(): pinging microsoft'.NL;
 326      $url  = 'http://webmaster.live.com/webmaster/ping.aspx?sitemap=';
 327      $url .= urlencode(DOKU_URL.$sitemap);
 328      $resp = $http->get($url);
 329      if($http->error) print 'runSitemapper(): '.$http->error.NL;
 330      print 'runSitemapper(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)).NL;
 331  
 332      print 'runSitemapper(): finished'.NL;
 333      return true;
 334  }
 335  
 336  /**
 337   * Formats a timestamp as ISO 8601 date
 338   *
 339   * @author <ungu at terong dot com>
 340   * @link http://www.php.net/manual/en/function.date.php#54072
 341   */
 342  function date_iso8601($int_date) {
 343     //$int_date: current date in UNIX timestamp
 344     $date_mod = date('Y-m-d\TH:i:s', $int_date);
 345     $pre_timezone = date('O', $int_date);
 346     $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2);
 347     $date_mod .= $time_zone;
 348     return $date_mod;
 349  }
 350  
 351  /**
 352   * Just send a 1x1 pixel blank gif to the browser
 353   *
 354   * @author Andreas Gohr <andi@splitbrain.org>
 355   * @author Harry Fuecks <fuecks@gmail.com>
 356   */
 357  function sendGIF(){
 358      if($_REQUEST['debug']){
 359          header('Content-Type: text/plain');
 360          return;
 361      }
 362      $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7');
 363      header('Content-Type: image/gif');
 364      header('Content-Length: '.strlen($img));
 365      header('Connection: Close');
 366      print $img;
 367      flush();
 368      // Browser should drop connection after this
 369      // Thinks it's got the whole image
 370  }
 371  
 372  //Setup VIM: ex: et ts=4 enc=utf-8 :
 373  // No trailing PHP closing tag - no output please!
 374  // See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php


Generated: Tue Dec 2 01:30:01 2008 Cross-referenced by PHPXref 0.7