[ Index ]

PHP Cross Reference of DokuWiki

title

Body

[close]

/inc/ -> search.php (source)

   1  <?php
   2  /**
   3   * DokuWiki search functions
   4   *
   5   * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
   6   * @author     Andreas Gohr <andi@splitbrain.org>
   7   */
   8  
   9    if(!defined('DOKU_INC')) define('DOKU_INC',fullpath(dirname(__FILE__).'/../').'/');
  10    require_once (DOKU_INC.'inc/common.php');
  11  
  12  /**
  13   * recurse direcory
  14   *
  15   * This function recurses into a given base directory
  16   * and calls the supplied function for each file and directory
  17   *
  18   * @param   array ref $data The results of the search are stored here
  19   * @param   string    $base Where to start the search
  20   * @param   callback  $func Callback (function name or arayy with object,method)
  21   * @param   string    $dir  Current directory beyond $base
  22   * @param   int       $lvl  Recursion Level
  23   * @author  Andreas Gohr <andi@splitbrain.org>
  24   */
  25  function search(&$data,$base,$func,$opts,$dir='',$lvl=1){
  26    $dirs   = array();
  27    $files  = array();
  28  
  29    //read in directories and files
  30    $dh = @opendir($base.'/'.$dir);
  31    if(!$dh) return;
  32    while(($file = readdir($dh)) !== false){
  33      if(preg_match('/^[\._]/',$file)) continue; //skip hidden files and upper dirs
  34      if(is_dir($base.'/'.$dir.'/'.$file)){
  35        $dirs[] = $dir.'/'.$file;
  36        continue;
  37      }
  38      $files[] = $dir.'/'.$file;
  39    }
  40    closedir($dh);
  41    sort($files);
  42    sort($dirs);
  43  
  44    //give directories to userfunction then recurse
  45    foreach($dirs as $dir){
  46      if (call_user_func_array($func, array(&$data,$base,$dir,'d',$lvl,$opts))){
  47        search($data,$base,$func,$opts,$dir,$lvl+1);
  48      }
  49    }
  50    //now handle the files
  51    foreach($files as $file){
  52      call_user_func_array($func, array(&$data,$base,$file,'f',$lvl,$opts));
  53    }
  54  }
  55  
  56  /**
  57   * Wrapper around call_user_func_array.
  58   * 
  59   * @deprecated
  60   */
  61  function search_callback($func,&$data,$base,$file,$type,$lvl,$opts){
  62    return call_user_func_array($func, array(&$data,$base,$file,$type,$lvl,$opts));
  63  }
  64  
  65  /**
  66   * The following functions are userfunctions to use with the search
  67   * function above. This function is called for every found file or
  68   * directory. When a directory is given to the function it has to
  69   * decide if this directory should be traversed (true) or not (false)
  70   * The function has to accept the following parameters:
  71   *
  72   * &$data - Reference to the result data structure
  73   * $base  - Base usually $conf['datadir']
  74   * $file  - current file or directory relative to $base
  75   * $type  - Type either 'd' for directory or 'f' for file
  76   * $lvl   - Current recursion depht
  77   * $opts  - option array as given to search()
  78   *
  79   * return values for files are ignored
  80   *
  81   * All functions should check the ACL for document READ rights
  82   * namespaces (directories) are NOT checked as this would break
  83   * the recursion (You can have an nonreadable dir over a readable
  84   * one deeper nested) also make sure to check the file type (for example
  85   * in case of lockfiles).
  86   */
  87  
  88  /**
  89   * Searches for pages beginning with the given query
  90   *
  91   * @author Andreas Gohr <andi@splitbrain.org>
  92   */
  93  function search_qsearch(&$data,$base,$file,$type,$lvl,$opts){
  94    $item = array();
  95  
  96    if($type == 'd'){
  97      return false; //no handling yet
  98    }
  99  
 100    //only search txt files
 101    if(substr($file,-4) != '.txt') return false;
 102  
 103    //get id
 104    $id = pathID($file);
 105  
 106    //check if it matches the query
 107    if(!preg_match('/^'.preg_quote($opts['query'],'/').'/u',$id)){
 108      return false;
 109    }
 110  
 111    //check ACL
 112    if(auth_quickaclcheck($id) < AUTH_READ){
 113      return false;
 114    }
 115  
 116    $data[]=array( 'id'    => $id,
 117                   'type'  => $type,
 118                   'level' => 1,
 119                   'open'  => true);
 120    return true;
 121  }
 122  
 123  /**
 124   * Build the browsable index of pages
 125   *
 126   * $opts['ns'] is the current namespace
 127   *
 128   * @author  Andreas Gohr <andi@splitbrain.org>
 129   */
 130  function search_index(&$data,$base,$file,$type,$lvl,$opts){
 131    global $conf;
 132    $return = true;
 133  
 134    $item = array();
 135  
 136    if($type == 'd' && !preg_match('#^'.$file.'(/|$)#','/'.$opts['ns'])){
 137      //add but don't recurse
 138      $return = false;
 139    }elseif($type == 'f' && ($opts['nofiles'] || substr($file,-4) != '.txt')){
 140      //don't add
 141      return false;
 142    }
 143  
 144    $id = pathID($file);
 145  
 146    if($type=='d' && $conf['sneaky_index'] && auth_quickaclcheck($id.':') < AUTH_READ){
 147      return false;
 148    }
 149  
 150    //check hidden
 151    if(isHiddenPage($id)){
 152      return false;
 153    }
 154  
 155    //check ACL
 156    if($type=='f' && auth_quickaclcheck($id) < AUTH_READ){
 157      return false;
 158    }
 159  
 160    $data[]=array( 'id'    => $id,
 161                   'type'  => $type,
 162                   'level' => $lvl,
 163                   'open'  => $return );
 164    return $return;
 165  }
 166  
 167  /**
 168   * List all namespaces
 169   *
 170   * @author  Andreas Gohr <andi@splitbrain.org>
 171   */
 172  function search_namespaces(&$data,$base,$file,$type,$lvl,$opts){
 173    if($type == 'f') return true; //nothing to do on files
 174  
 175    $id = pathID($file);
 176    $data[]=array( 'id'    => $id,
 177                   'type'  => $type,
 178                   'level' => $lvl );
 179    return true;
 180  }
 181  
 182  /**
 183   * List all mediafiles in a namespace
 184   *
 185   * @author  Andreas Gohr <andi@splitbrain.org>
 186   */
 187  function search_media(&$data,$base,$file,$type,$lvl,$opts){
 188    //we do nothing with directories
 189    if($type == 'd') {
 190        return ($opts['recursive']);
 191    } 
 192  
 193    $info         = array();
 194    $info['id']   = pathID($file,true);
 195    if($info['id'] != cleanID($info['id'])){
 196      if($opts['showmsg'])
 197        msg(hsc($info['id']).' is not a valid file name for DokuWiki - skipped',-1);
 198      return false; // skip non-valid files
 199    }
 200  
 201    //check ACL for namespace (we have no ACL for mediafiles)
 202    if(auth_quickaclcheck(getNS($info['id']).':*') < AUTH_READ){
 203      return false;
 204    }
 205  
 206    $info['file'] = basename($file);
 207    $info['size'] = filesize($base.'/'.$file);
 208    $info['mtime'] = filemtime($base.'/'.$file);
 209    $info['writable'] = is_writable($base.'/'.$file);
 210    if(preg_match("/\.(jpe?g|gif|png)$/",$file)){
 211      $info['isimg'] = true;
 212      require_once (DOKU_INC.'inc/JpegMeta.php');
 213      $info['meta']  = new JpegMeta($base.'/'.$file);
 214    }else{
 215      $info['isimg'] = false;
 216    }
 217    $data[] = $info;
 218  
 219    return false;
 220  }
 221  
 222  /**
 223   * This function just lists documents (for RSS namespace export)
 224   *
 225   * @author  Andreas Gohr <andi@splitbrain.org>
 226   */
 227  function search_list(&$data,$base,$file,$type,$lvl,$opts){
 228    //we do nothing with directories
 229    if($type == 'd') return false;
 230    //only search txt files
 231    if(substr($file,-4) == '.txt'){
 232      //check ACL
 233      $id = pathID($file);
 234      if(auth_quickaclcheck($id) < AUTH_READ){
 235        return false;
 236      }
 237      $data[]['id'] = $id;
 238    }
 239    return false;
 240  }
 241  
 242  /**
 243   * Quicksearch for searching matching pagenames
 244   *
 245   * $opts['query'] is the search query
 246   *
 247   * @author  Andreas Gohr <andi@splitbrain.org>
 248   */
 249  function search_pagename(&$data,$base,$file,$type,$lvl,$opts){
 250    //we do nothing with directories
 251    if($type == 'd') return true;
 252    //only search txt files
 253    if(substr($file,-4) != '.txt') return true;
 254  
 255    //simple stringmatching
 256    if (!empty($opts['query'])){
 257      if(strpos($file,$opts['query']) !== false){
 258        //check ACL
 259        $id = pathID($file);
 260        if(auth_quickaclcheck($id) < AUTH_READ){
 261          return false;
 262        }
 263        $data[]['id'] = $id;
 264      }
 265    }
 266    return true;
 267  }
 268  
 269  /**
 270   * Just lists all documents
 271   *
 272   * @author  Andreas Gohr <andi@splitbrain.org>
 273   */
 274  function search_allpages(&$data,$base,$file,$type,$lvl,$opts){
 275    //we do nothing with directories
 276    if($type == 'd') return true;
 277    //only search txt files
 278    if(substr($file,-4) != '.txt') return true;
 279  
 280    $data[]['id'] = pathID($file);
 281    return true;
 282  }
 283  
 284  /**
 285   * Search for backlinks to a given page
 286   *
 287   * $opts['ns']    namespace of the page
 288   * $opts['name']  name of the page without namespace
 289   *
 290   * @author  Andreas Gohr <andi@splitbrain.org>
 291   * @deprecated Replaced by ft_backlinks()
 292   */
 293  function search_backlinks(&$data,$base,$file,$type,$lvl,$opts){
 294    //we do nothing with directories
 295    if($type == 'd') return true;
 296    //only search txt files
 297    if(substr($file,-4) != '.txt') return true;
 298  
 299    //absolute search id
 300    $sid = cleanID($opts['ns'].':'.$opts['name']);
 301  
 302    //current id and namespace
 303    $cid = pathID($file);
 304    $cns = getNS($cid);
 305  
 306    //check ACL
 307    if(auth_quickaclcheck($cid) < AUTH_READ){
 308      return false;
 309    }
 310  
 311    //fetch instructions
 312    require_once (DOKU_INC.'inc/parserutils.php');
 313    $instructions = p_cached_instructions($base.$file,true);
 314    if(is_null($instructions)) return false;
 315  
 316    //check all links for match
 317    foreach($instructions as $ins){
 318      if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink') ){
 319        $mid = $ins[1][0];
 320        resolve_pageid($cns,$mid,$exists); //exists is not used
 321        if($mid == $sid){
 322          //we have a match - finish
 323          $data[]['id'] = $cid;
 324          break;
 325        }
 326      }
 327    }
 328  
 329    return false;
 330  }
 331  
 332  /**
 333   * Fulltextsearch
 334   *
 335   * $opts['query'] is the search query
 336   *
 337   * @author  Andreas Gohr <andi@splitbrain.org>
 338   * @deprecated - fulltext indexer is used instead
 339   */
 340  function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
 341    //we do nothing with directories
 342    if($type == 'd') return true;
 343    //only search txt files
 344    if(substr($file,-4) != '.txt') return true;
 345  
 346    //check ACL
 347    $id = pathID($file);
 348    if(auth_quickaclcheck($id) < AUTH_READ){
 349      return false;
 350    }
 351  
 352    //create regexp from queries
 353    $poswords = array();
 354    $negwords = array();
 355    $qpreg = preg_split('/\s+/',$opts['query']);
 356  
 357    foreach($qpreg as $word){
 358      switch(substr($word,0,1)){
 359        case '-':
 360          if(strlen($word) > 1){  // catch single '-'
 361            array_push($negwords,preg_quote(substr($word,1),'#'));
 362          }
 363          break;
 364        case '+':
 365          if(strlen($word) > 1){  // catch single '+'
 366            array_push($poswords,preg_quote(substr($word,1),'#'));
 367          }
 368          break;
 369        default:
 370          array_push($poswords,preg_quote($word,'#'));
 371          break;
 372      }
 373    }
 374  
 375    // a search without any posword is useless
 376    if (!count($poswords)) return true;
 377  
 378    $reg  = '^(?=.*?'.join(')(?=.*?',$poswords).')';
 379    $reg .= count($negwords) ? '((?!'.join('|',$negwords).').)*$' : '.*$';
 380    search_regex($data,$base,$file,$reg,$poswords);
 381    return true;
 382  }
 383  
 384  /**
 385   * Reference search
 386   * This fuction searches for existing references to a given media file
 387   * and returns an array with the found pages. It doesn't pay any
 388   * attention to ACL permissions to find every reference. The caller
 389   * must check if the user has the appropriate rights to see the found
 390   * page and eventually have to prevent the result from displaying.
 391   *
 392   * @param array  $data Reference to the result data structure
 393   * @param string $base Base usually $conf['datadir']
 394   * @param string $file current file or directory relative to $base
 395   * @param char   $type Type either 'd' for directory or 'f' for file
 396   * @param int    $lvl  Current recursion depht
 397   * @param mixed  $opts option array as given to search()
 398   *
 399   * $opts['query'] is the demanded media file name
 400   *
 401   * @author  Andreas Gohr <andi@splitbrain.org>
 402   * @author  Matthias Grimm <matthiasgrimm@users.sourceforge.net>
 403   */
 404  function search_reference(&$data,$base,$file,$type,$lvl,$opts){
 405    global $conf;
 406  
 407    //we do nothing with directories
 408    if($type == 'd') return true;
 409  
 410    //only search txt files
 411    if(substr($file,-4) != '.txt') return true;
 412  
 413    //we finish after 'cnt' references found. The return value
 414    //'false' will skip subdirectories to speed search up.
 415    $cnt = $conf['refshow'] > 0 ? $conf['refshow'] : 1;
 416    if(count($data) >= $cnt) return false;
 417  
 418    $reg = '\{\{ *\:?'.$opts['query'].' *(\|.*)?\}\}';
 419    search_regex($data,$base,$file,$reg,array($opts['query']));
 420    return true;
 421  }
 422  
 423  /* ------------- helper functions below -------------- */
 424  
 425  /**
 426   * fulltext search helper
 427   * searches a text file with a given regular expression
 428   * no ACL checks are performed. This have to be done by
 429   * the caller if necessary.
 430   *
 431   * @param array  $data  reference to array for results
 432   * @param string $base  base directory
 433   * @param string $file  file name to search in
 434   * @param string $reg   regular expression to search for
 435   * @param array  $words words that should be marked in the results
 436   *
 437   * @author  Andreas Gohr <andi@splitbrain.org>
 438   * @author  Matthias Grimm <matthiasgrimm@users.sourceforge.net>
 439   *
 440   * @deprecated - fulltext indexer is used instead
 441   */
 442  function search_regex(&$data,$base,$file,$reg,$words){
 443  
 444    //get text
 445    $text = io_readfile($base.'/'.$file);
 446    //lowercase text (u modifier does not help with case)
 447    $lctext = utf8_strtolower($text);
 448  
 449    //do the fulltext search
 450    $matches = array();
 451    if($cnt = preg_match_all('#'.$reg.'#usi',$lctext,$matches)){
 452      //this is not the best way for snippet generation but the fastest I could find
 453      $q = $words[0];  //use first word for snippet creation
 454      $p = utf8_strpos($lctext,$q);
 455      $f = $p - 100;
 456      $l = utf8_strlen($q) + 200;
 457      if($f < 0) $f = 0;
 458      $snippet = '<span class="search_sep"> ... </span>'.
 459                 htmlspecialchars(utf8_substr($text,$f,$l)).
 460                 '<span class="search_sep"> ... </span>';
 461      $mark    = '('.join('|', $words).')';
 462      $snippet = preg_replace('#'.$mark.'#si','<strong class="search_hit">\\1</strong>',$snippet);
 463  
 464      $data[] = array(
 465        'id'       => pathID($file),
 466        'count'    => preg_match_all('#'.$mark.'#usi',$lctext,$matches),
 467        'poswords' => join(' ',$words),
 468        'snippet'  => $snippet,
 469      );
 470    }
 471  
 472    return true;
 473  }
 474  
 475  
 476  /**
 477   * fulltext sort
 478   *
 479   * Callback sort function for use with usort to sort the data
 480   * structure created by search_fulltext. Sorts descending by count
 481   *
 482   * @author  Andreas Gohr <andi@splitbrain.org>
 483   */
 484  function sort_search_fulltext($a,$b){
 485    if($a['count'] > $b['count']){
 486      return -1;
 487    }elseif($a['count'] < $b['count']){
 488      return 1;
 489    }else{
 490      return strcmp($a['id'],$b['id']);
 491    }
 492  }
 493  
 494  /**
 495   * translates a document path to an ID
 496   *
 497   * @author  Andreas Gohr <andi@splitbrain.org>
 498   * @todo    move to pageutils
 499   */
 500  function pathID($path,$keeptxt=false){
 501    $id = utf8_decodeFN($path);
 502    $id = str_replace('/',':',$id);
 503    if(!$keeptxt) $id = preg_replace('#\.txt$#','',$id);
 504    $id = preg_replace('#^:+#','',$id);
 505    $id = preg_replace('#:+$#','',$id);
 506    return $id;
 507  }
 508  
 509  
 510  //Setup VIM: ex: et ts=2 enc=utf-8 :


Generated: Tue Dec 2 01:30:01 2008 Cross-referenced by PHPXref 0.7