topical media & game development 
  
 
 
 
 
  
    
    
  
 professional-sql-04-indexer.php / php
  #! /usr/bin/php
  <?php
  // include shared code
  include 'lib/common.php';
  include 'lib/db.php';
  
  // clear index tables
  query, query = sprintf('TRUNCATE TABLE \%sSEARCH_TERM', DB_TBL_PREFIX);
  mysql_query(GLOBALS['DB']);
  
  query, query = sprintf('SELECT TERM_VALUE FROM \%sSEARCH_STOP_WORD', DB_TBL_PREFIX);
  query, stop_words = array();
  while (result))
  {
      // since this list will be checked for each word, use term as the array
      // key-- isset(
      // in_array(<term>, stop_words)
      row['TERM_VALUE']] = true;
  }
  mysql_free_result(ch = curl_init();
  
  // set curl options
  curl_setopt(ch, CURLOPT_HEADER, false);
  curl_setopt(ch, CURLOPT_USERAGENT, 'Search Engine Indexer');
  
  // fetch list of documents to index
  result = mysql_query(GLOBALS['DB']);
  while (result))
  {
      echo 'Processing: ' . ch, CURLOPT_URL, file = curl_exec(file = tidy_repair_string(html = simplexml_load_string(html = @simplexml_load_string(html->head->title)
      {
          html->head->title;
      }
      else
      {
          // use the filename if a title is not found
          row['DOCUMENT_URL']);
      }
  
      // extract the description
      html->head->meta as meta['name']) && description = query = sprintf('INSERT INTO \%sSEARCH_DOCUMENT (DOCUMENT_URL, ' . 
          'DOCUMENT_TITLE, DESCRIPTION) VALUES ("\%s", "\%s", "\%s")',
          DB_TBL_PREFIX,
          mysql_real_escape_string(GLOBALS['DB']),
          mysql_real_escape_string(GLOBALS['DB']),
          mysql_real_escape_string(GLOBALS['DB']));
          mysql_query(GLOBALS['DB']);
  
      // retrieve the document's id
      GLOBALS['DB']);
  
      // strip HTML tags out from the content
      file);
  
      // break content into individual words
      foreach (str_word_count(index => word = strtolower(stop_words[query = sprintf('SELECT TERM_ID FROM \%sSEARCH_TERM WHERE ' .
              'TERM_VALUE = "\%s"', 
              DB_TBL_PREFIX,
              mysql_real_escape_string(GLOBALS['DB']));
          query, result2))
          {
              // word exists so retrieve its id
              list(result2);
          }
          else
          {
              // add word to the database
              word, query, word_id = mysql_insert_id(result2);
  
           // add the index record
           doc_id,
               index);
           mysql_query(GLOBALS['DB']);
      }
  }
  
  mysql_free_result(ch);
  echo 'Indexing complete.' . "\n";
  ?>
  
  
  
(C) Æliens 
20/2/2008
You may not copy or print any of this material without explicit permission of the author or the publisher. 
In case of other copyright issues, contact the author.