<?php
// $Id: apachesolr_rdf.index.inc,v 1.1.2.2 2009/07/24 23:47:19 drunkenmonkey Exp $

/**
 * Index the specified resources for the given context.
 */
function apachesolr_rdf_index_resources($uris, $context, $options) {
  extract($options, EXTR_OVERWRITE);

  try {
    // Get the $solr object
    $solr = apachesolr_get_solr($solr_host, $solr_port, $solr_path);
    // If there is no server available, don't continue.
    if (!$solr->ping(variable_get('apachesolr_ping_timeout', 4))) {
      throw new Exception(t('Solr instance not available during indexing: ' .
          '@url', array('@url' => "$solr_host:$solr_port$solr_path")));
    }
  }
  catch (Exception $e) {
    watchdog('Apache Solr RDF', nl2br(check_plain($e->getMessage())), NULL,
        WATCHDOG_ERROR);
    return FALSE;
  }

  // Prepare create_document method
  $create_document = apachesolr_rdf_get_schema_function($schema,
      'create_document');
  if (!$create_document) {
    return FALSE;
  }

  // Prepare documents
  $documents = array();
  $options['context'] = $context;
  foreach ($uris as $uri) {
    $triples = rdf_query($uri, NULL, NULL, $options);
    $triples = rdf_normalize($triples);
    $predicates = $triples[$uri];
    $document = $create_document($uri, $predicates, $context, $options);
    if ($document && $document instanceof Apache_Solr_Document) {
      $documents[$uri] = $document;
    }
  }

  // Index documents
  if (count($documents)) {
    try {
      watchdog('Apache Solr RDF', 'Indexing @count resources.',
          array('@count' => count($documents)));
      // Chunk the adds by 20s
      $docs_chunk = array_chunk($documents, 20, TRUE);
      foreach ($docs_chunk as $docs) {
        $solr->addDocuments($docs);

        foreach ($docs as $uri => $doc) {
          db_query('UPDATE {apachesolr_rdf_resources} SET changed = 0 ' .
              "WHERE uri = '%s' AND context = '%s'", $uri, $context);
        }
      }
      $solr->commit();
    }
    catch (Exception $e) {
      watchdog('Apache Solr RDF', nl2br(check_plain($e->getMessage())), NULL,
          WATCHDOG_ERROR);
      return FALSE;
    }
  }
}

/**
 * Creates the ID used for indexing a resource.
 */
function apachesolr_rdf_create_id($uri, $context) {
  return "$uri@$context";
}

/**
 * Extracts a string from an object returned by rdf_query() and returns it
 * along with its type.
 */
function apachesolr_rdf_extract_object_string($object) {
  if (is_string($object)) {
    return array('type' => 'string', 'string' => $object);
  }
  if ($object instanceof RDF_URIRef) {
    return array('type' => 'uri', 'string' => $object->uri);
  }
  if ($object instanceof RDF_Literal) {
    return array('type' => 'literal', 'string' => $object->value);
  }
  return FALSE;
}

//
// Schema functions
//
// - dynamic fields

/**
 * Creates an Apache_Solr_Document from the specified resource, using the
 * approach with a dynamic field for each predicate.
 */
function apachesolr_rdf_create_document_dynamic($uri, $predicates, $context) {
  $doc = new Apache_Solr_Document;

  $doc->uri_context = apachesolr_rdf_create_id($uri, $context);
  $doc->uri = $uri;
  $doc->context = $context;
  foreach ($predicates as $predicate => $objects) {
    foreach ($objects as $object) {
      $object = apachesolr_rdf_extract_object_string($object);
      $type = $object['type'];
      $string = $object['string'];
      $doc->setMultiValue('property_object', "$predicate $string");
      if ($type == 'uri') {
        $property = $predicate . '_s';
      }
      else {
        $property = $predicate . '_t';
      }
      $doc->setMultiValue($property, $string);
    }
  }

  return $doc;
}

//
// - text data
//

/**
 * Creates an Apache_Solr_Document from the specified resource, using the
 * approach with a dynamic field for each predicate.
 */
function apachesolr_rdf_create_document_text_data($uri, $predicates, $context) {
  $doc = new Apache_Solr_Document;

  $doc->uri_context = apachesolr_rdf_create_id($uri, $context);
  $doc->uri = $uri;
  $doc->context = $context;
  foreach ($predicates as $predicate => $objects) {
    foreach ($objects as $object) {
      $object = apachesolr_rdf_extract_object_string($object);
      $type = $object['type'];
      $string = $object['string'];
      if ($type == 'uri') {
        // For resource objects, index their label
        if ($predicate == APACHESOLR_RDF_TYPE) {
          $doc->setMultiValue('type', $string);
        }
        $triples = rdf_query($string, APACHESOLR_RDF_LABEL);
        $triples = $triples->to_array();
        $triples = rdf_normalize($triples);
        if (!empty($triples[$string])) {
          $triples = $triples[$string];
          $string = '';
          foreach ($triples as $pred => $obj) {
            $obj = apachesolr_rdf_extract_object_string($obj);
            $string .= $obj['string'];
          }
          $string = trim($string);
          $doc->setMultiValue('object_property', $predicate);
          $doc->setMultiValue('object_label', $string);
        }
        else {
          // If there is no text data on the object, fall back to indexing
          // the URI.
          // TODO: Is this a good idea?
          $doc->setMultiValue('object_property', $predicate);
          $doc->setMultiValue('object_label', $string);
        }
      }
      else {
        $doc->setMultiValue('property', $predicate);
        $doc->setMultiValue('text', $string);
      }
    }
  }

  return $doc;
}
