diff options
-rwxr-xr-x | Apache/Solr/Service.php~ | 1197 | ||||
-rwxr-xr-x | solr_search.module | 6 |
2 files changed, 3 insertions, 1200 deletions
diff --git a/Apache/Solr/Service.php~ b/Apache/Solr/Service.php~ deleted file mode 100755 index 8352dd8..0000000 --- a/Apache/Solr/Service.php~ +++ /dev/null @@ -1,1197 +0,0 @@ -<?php -/** - * Copyright (c) 2007-2011, Servigistics, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - Neither the name of Servigistics, Inc. nor the names of - * its contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) - * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD - * @version $Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $ - * - * @package Apache - * @subpackage Solr - * @author Donovan Jimenez <djimenez@conduit-it.com> - */ - -// See Issue #1 (http://code.google.com/p/solr-php-client/issues/detail?id=1) -// Doesn't follow typical include path conventions, but is more convenient for users -require_once(dirname(__FILE__) . '/Exception.php'); -require_once(dirname(__FILE__) . '/HttpTransportException.php'); -require_once(dirname(__FILE__) . '/InvalidArgumentException.php'); - -require_once(dirname(__FILE__) . '/Document.php'); -require_once(dirname(__FILE__) . '/Response.php'); - -require_once(dirname(__FILE__) . '/HttpTransport/Interface.php'); - -/** - * Starting point for the Solr API. Represents a Solr server resource and has - * methods for pinging, adding, deleting, committing, optimizing and searching. - * - * Example Usage: - * <code> - * ... - * $solr = new Apache_Solr_Service(); //or explicitly new Apache_Solr_Service('localhost', 8180, '/solr') - * - * if ($solr->ping()) - * { - * $solr->deleteByQuery('*:*'); //deletes ALL documents - be careful :) - * - * $document = new Apache_Solr_Document(); - * $document->id = uniqid(); //or something else suitably unique - * - * $document->title = 'Some Title'; - * $document->content = 'Some content for this wonderful document. Blah blah blah.'; - * - * $solr->addDocument($document); //if you're going to be adding documents in bulk using addDocuments - * //with an array of documents is faster - * - * $solr->commit(); //commit to see the deletes and the document - * $solr->optimize(); //merges multiple segments into one - * - * //and the one we all care about, search! - * //any other common or custom parameters to the request handler can go in the - * //optional 4th array argument. - * $solr->search('content:blah', 0, 10, array('sort' => 'timestamp desc')); - * } - * ... - * </code> - * - * @todo Investigate using other HTTP clients other than file_get_contents built-in handler. Could provide performance - * improvements when dealing with multiple requests by using HTTP's keep alive functionality - */ -class Apache_Solr_Service -{ - /** - * SVN Revision meta data for this class - */ - const SVN_REVISION = '$Revision: 59 $'; - - /** - * SVN ID meta data for this class - */ - const SVN_ID = '$Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $'; - - /** - * Response writer we'll request - JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning - */ - const SOLR_WRITER = 'json'; - - /** - * NamedList Treatment constants - */ - const NAMED_LIST_FLAT = 'flat'; - const NAMED_LIST_MAP = 'map'; - - /** - * Search HTTP Methods - */ - const METHOD_GET = 'GET'; - const METHOD_POST = 'POST'; - - /** - * Servlet mappings - */ - const PING_SERVLET = 'admin/ping'; - const UPDATE_SERVLET = 'update'; - const SEARCH_SERVLET = 'select'; - const THREADS_SERVLET = 'admin/threads'; - const EXTRACT_SERVLET = 'update/extract'; - - /** - * Server identification strings - * - * @var string - */ - protected $_host, $_port, $_path; - - /** - * Whether {@link Apache_Solr_Response} objects should create {@link Apache_Solr_Document}s in - * the returned parsed data - * - * @var boolean - */ - protected $_createDocuments = true; - - /** - * Whether {@link Apache_Solr_Response} objects should have multivalue fields with only a single value - * collapsed to appear as a single value would. - * - * @var boolean - */ - protected $_collapseSingleValueArrays = true; - - /** - * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values - * are {@link Apache_Solr_Service::NAMED_LIST_MAP} (default) or {@link Apache_Solr_Service::NAMED_LIST_FLAT}. - * - * @var string - */ - protected $_namedListTreatment = self::NAMED_LIST_MAP; - - /** - * Query delimiters. Someone might want to be able to change - * these (to use & instead of & for example), so I've provided them. - * - * @var string - */ - protected $_queryDelimiter = '?', $_queryStringDelimiter = '&', $_queryBracketsEscaped = true; - - /** - * Constructed servlet full path URLs - * - * @var string - */ - protected $_pingUrl, $_updateUrl, $_searchUrl, $_threadsUrl; - - /** - * Keep track of whether our URLs have been constructed - * - * @var boolean - */ - protected $_urlsInited = false; - - /** - * HTTP Transport implementation (pluggable) - * - * @var Apache_Solr_HttpTransport_Interface - */ - protected $_httpTransport = false; - - /** - * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc. - * - * NOTE: inside a phrase fewer characters need escaped, use {@link Apache_Solr_Service::escapePhrase()} instead - * - * @param string $value - * @return string - */ - static public function escape($value) - { - //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters - $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/'; - $replace = '\\\$1'; - - return preg_replace($pattern, $replace, $value); - } - - /** - * Escape a value meant to be contained in a phrase for special query characters - * - * @param string $value - * @return string - */ - static public function escapePhrase($value) - { - $pattern = '/("|\\\)/'; - $replace = '\\\$1'; - - return preg_replace($pattern, $replace, $value); - } - - /** - * Convenience function for creating phrase syntax from a value - * - * @param string $value - * @return string - */ - static public function phrase($value) - { - return '"' . self::escapePhrase($value) . '"'; - } - - /** - * Constructor. All parameters are optional and will take on default values - * if not specified. - * - * @param string $host - * @param string $port - * @param string $path - * @param Apache_Solr_HttpTransport_Interface $httpTransport - */ - public function __construct($host = 'localhost', $port = 8180, $path = '/solr/', $httpTransport = false) - { - $this->setHost($host); - $this->setPort($port); - $this->setPath($path); - - $this->_initUrls(); - - if ($httpTransport) - { - $this->setHttpTransport($httpTransport); - } - - // check that our php version is >= 5.1.3 so we can correct for http_build_query behavior later - $this->_queryBracketsEscaped = version_compare(phpversion(), '5.1.3', '>='); - } - - /** - * Return a valid http URL given this server's host, port and path and a provided servlet name - * - * @param string $servlet - * @return string - */ - protected function _constructUrl($servlet, $params = array()) - { - if (count($params)) - { - //escape all parameters appropriately for inclusion in the query string - $escapedParams = array(); - - foreach ($params as $key => $value) - { - $escapedParams[] = urlencode($key) . '=' . urlencode($value); - } - - $queryString = $this->_queryDelimiter . implode($this->_queryStringDelimiter, $escapedParams); - } - else - { - $queryString = ''; - } - - return 'http://' . $this->_host . ':' . $this->_port . $this->_path . $servlet . $queryString; - } - - /** - * Construct the Full URLs for the three servlets we reference - */ - protected function _initUrls() - { - //Initialize our full servlet URLs now that we have server information - $this->_extractUrl = $this->_constructUrl(self::EXTRACT_SERVLET); - $this->_pingUrl = $this->_constructUrl(self::PING_SERVLET); - $this->_searchUrl = $this->_constructUrl(self::SEARCH_SERVLET); - $this->_threadsUrl = $this->_constructUrl(self::THREADS_SERVLET, array('wt' => self::SOLR_WRITER )); - $this->_updateUrl = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => self::SOLR_WRITER )); - - $this->_urlsInited = true; - } - - protected function _generateQueryString($params) - { - // use http_build_query to encode our arguments because its faster - // than urlencoding all the parts ourselves in a loop - // - // because http_build_query treats arrays differently than we want to, correct the query - // string by changing foo[#]=bar (# being an actual number) parameter strings to just - // multiple foo=bar strings. This regex should always work since '=' will be urlencoded - // anywhere else the regex isn't expecting it - // - // NOTE: before php 5.1.3 brackets were not url encoded by http_build query - we've checked - // the php version in the constructor and put the results in the instance variable. Also, before - // 5.1.2 the arg_separator parameter was not available, so don't use it - if ($this->_queryBracketsEscaped) - { - $queryString = http_build_query($params, null, $this->_queryStringDelimiter); - return preg_replace('/%5B(?:[0-9]|[1-9][0-9]+)%5D=/', '=', $queryString); - } - else - { - $queryString = http_build_query($params); - return preg_replace('/\\[(?:[0-9]|[1-9][0-9]+)\\]=/', '=', $queryString); - } - } - - /** - * Central method for making a get operation against this Solr Server - * - * @param string $url - * @param float $timeout Read timeout in seconds - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If a non 200 response status is returned - */ - protected function _sendRawGet($url, $timeout = FALSE) - { - $httpTransport = $this->getHttpTransport(); - - $httpResponse = $httpTransport->performGetRequest($url, $timeout); - $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays); - - if ($solrResponse->getHttpStatus() != 200) - { - throw new Apache_Solr_HttpTransportException($solrResponse); - } - - return $solrResponse; - } - - /** - * Central method for making a post operation against this Solr Server - * - * @param string $url - * @param string $rawPost - * @param float $timeout Read timeout in seconds - * @param string $contentType - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If a non 200 response status is returned - */ - protected function _sendRawPost($url, $rawPost, $timeout = FALSE, $contentType = 'text/xml; charset=UTF-8') - { - $httpTransport = $this->getHttpTransport(); - - $httpResponse = $httpTransport->performPostRequest($url, $rawPost, $contentType, $timeout); - $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays); - - if ($solrResponse->getHttpStatus() != 200) - { - throw new Apache_Solr_HttpTransportException($solrResponse); - } - - return $solrResponse; - } - - /** - * Returns the set host - * - * @return string - */ - public function getHost() - { - return $this->_host; - } - - /** - * Set the host used. If empty will fallback to constants - * - * @param string $host - * - * @throws Apache_Solr_InvalidArgumentException If the host parameter is empty - */ - public function setHost($host) - { - //Use the provided host or use the default - if (empty($host)) - { - throw new Apache_Solr_InvalidArgumentException('Host parameter is empty'); - } - else - { - $this->_host = $host; - } - - if ($this->_urlsInited) - { - $this->_initUrls(); - } - } - - /** - * Get the set port - * - * @return integer - */ - public function getPort() - { - return $this->_port; - } - - /** - * Set the port used. If empty will fallback to constants - * - * @param integer $port - * - * @throws Apache_Solr_InvalidArgumentException If the port parameter is empty - */ - public function setPort($port) - { - //Use the provided port or use the default - $port = (int) $port; - - if ($port <= 0) - { - throw new Apache_Solr_InvalidArgumentException('Port is not a valid port number'); - } - else - { - $this->_port = $port; - } - - if ($this->_urlsInited) - { - $this->_initUrls(); - } - } - - /** - * Get the set path. - * - * @return string - */ - public function getPath() - { - return $this->_path; - } - - /** - * Set the path used. If empty will fallback to constants - * - * @param string $path - */ - public function setPath($path) - { - $path = trim($path, '/'); - - $this->_path = '/' . $path . '/'; - - if ($this->_urlsInited) - { - $this->_initUrls(); - } - } - - /** - * Get the current configured HTTP Transport - * - * @return HttpTransportInterface - */ - public function getHttpTransport() - { - // lazy load a default if one has not be set - if ($this->_httpTransport === false) - { - require_once(dirname(__FILE__) . '/HttpTransport/FileGetContents.php'); - - $this->_httpTransport = new Apache_Solr_HttpTransport_FileGetContents(); - } - - return $this->_httpTransport; - } - - /** - * Set the HTTP Transport implemenation that will be used for all HTTP requests - * - * @param Apache_Solr_HttpTransport_Interface - */ - public function setHttpTransport(Apache_Solr_HttpTransport_Interface $httpTransport) - { - $this->_httpTransport = $httpTransport; - } - - /** - * Set the create documents flag. This determines whether {@link Apache_Solr_Response} objects will - * parse the response and create {@link Apache_Solr_Document} instances in place. - * - * @param boolean $createDocuments - */ - public function setCreateDocuments($createDocuments) - { - $this->_createDocuments = (bool) $createDocuments; - } - - /** - * Get the current state of teh create documents flag. - * - * @return boolean - */ - public function getCreateDocuments() - { - return $this->_createDocuments; - } - - /** - * Set the collapse single value arrays flag. - * - * @param boolean $collapseSingleValueArrays - */ - public function setCollapseSingleValueArrays($collapseSingleValueArrays) - { - $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays; - } - - /** - * Get the current state of the collapse single value arrays flag. - * - * @return boolean - */ - public function getCollapseSingleValueArrays() - { - return $this->_collapseSingleValueArrays; - } - - /** - * Get the current default timeout setting (initially the default_socket_timeout ini setting) - * in seconds - * - * @return float - * - * @deprecated Use the getDefaultTimeout method on the HTTP transport implementation - */ - public function getDefaultTimeout() - { - return $this->getHttpTransport()->getDefaultTimeout(); - } - - /** - * Set the default timeout for all calls that aren't passed a specific timeout - * - * @param float $timeout Timeout value in seconds - * - * @deprecated Use the setDefaultTimeout method on the HTTP transport implementation - */ - public function setDefaultTimeout($timeout) - { - $this->getHttpTransport()->setDefaultTimeout($timeout); - } - - /** - * Set how NamedLists should be formatted in the response data. This mainly effects - * the facet counts format. - * - * @param string $namedListTreatment - * @throws Apache_Solr_InvalidArgumentException If invalid option is set - */ - public function setNamedListTreatment($namedListTreatment) - { - switch ((string) $namedListTreatment) - { - case Apache_Solr_Service::NAMED_LIST_FLAT: - $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_FLAT; - break; - - case Apache_Solr_Service::NAMED_LIST_MAP: - $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_MAP; - break; - - default: - throw new Apache_Solr_InvalidArgumentException('Not a valid named list treatement option'); - } - } - - /** - * Get the current setting for named list treatment. - * - * @return string - */ - public function getNamedListTreatment() - { - return $this->_namedListTreatment; - } - - /** - * Set the string used to separate the path form the query string. - * Defaulted to '?' - * - * @param string $queryDelimiter - */ - public function setQueryDelimiter($queryDelimiter) - { - $this->_queryDelimiter = $queryDelimiter; - } - - /** - * Set the string used to separate the parameters in thequery string - * Defaulted to '&' - * - * @param string $queryStringDelimiter - */ - public function setQueryStringDelimiter($queryStringDelimiter) - { - $this->_queryStringDelimiter = $queryStringDelimiter; - } - - /** - * Call the /admin/ping servlet, can be used to quickly tell if a connection to the - * server is able to be made. - * - * @param float $timeout maximum time to wait for ping in seconds, -1 for unlimited (default is 2) - * @return float Actual time taken to ping the server, FALSE if timeout or HTTP error status occurs - */ - public function ping($timeout = 2) - { - $start = microtime(true); - - $httpTransport = $this->getHttpTransport(); - - $httpResponse = $httpTransport->performHeadRequest($this->_pingUrl, $timeout); - $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays); - - if ($solrResponse->getHttpStatus() == 200) - { - return microtime(true) - $start; - } - else - { - return false; - } - } - - /** - * Call the /admin/threads servlet and retrieve information about all threads in the - * Solr servlet's thread group. Useful for diagnostics. - * - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If an error occurs during the service call - */ - public function threads() - { - return $this->_sendRawGet($this->_threadsUrl); - } - - /** - * Raw Add Method. Takes a raw post body and sends it to the update service. Post body - * should be a complete and well formed "add" xml document. - * - * @param string $rawPost - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If an error occurs during the service call - */ - public function add($rawPost) - { - return $this->_sendRawPost($this->_updateUrl, $rawPost); - } - - /** - * Add a Solr Document to the index - * - * @param Apache_Solr_Document $document - * @param boolean $allowDups - * @param boolean $overwritePending - * @param boolean $overwriteCommitted - * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details. If left empty this property will not be set in the request. - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If an error occurs during the service call - */ - public function addDocument(Apache_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0) - { - $dupValue = $allowDups ? 'true' : 'false'; - $pendingValue = $overwritePending ? 'true' : 'false'; - $committedValue = $overwriteCommitted ? 'true' : 'false'; - - $commitWithin = (int) $commitWithin; - $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : ''; - - $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>"; - $rawPost .= $this->_documentToXmlFragment($document); - $rawPost .= '</add>'; - - return $this->add($rawPost); - } - - /** - * Add an array of Solr Documents to the index all at once - * - * @param array $documents Should be an array of Apache_Solr_Document instances - * @param boolean $allowDups - * @param boolean $overwritePending - * @param boolean $overwriteCommitted - * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details. If left empty this property will not be set in the request. - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If an error occurs during the service call - */ - public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0) - { - $dupValue = $allowDups ? 'true' : 'false'; - $pendingValue = $overwritePending ? 'true' : 'false'; - $committedValue = $overwriteCommitted ? 'true' : 'false'; - - $commitWithin = (int) $commitWithin; - $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : ''; - - $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>"; - - foreach ($documents as $document) - { - if ($document instanceof Apache_Solr_Document) - { - $rawPost .= $this->_documentToXmlFragment($document); - } - } - - $rawPost .= '</add>'; - - return $this->add($rawPost); - } - - /** - * Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call - * - * @return string - */ - protected function _documentToXmlFragment(Apache_Solr_Document $document) - { - $xml = '<doc'; - - if ($document->getBoost() !== false) - { - $xml .= ' boost="' . $document->getBoost() . '"'; - } - - $xml .= '>'; - - foreach ($document as $key => $value) - { - $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8'); - $fieldBoost = $document->getFieldBoost($key); - - if (is_array($value)) - { - foreach ($value as $multivalue) - { - $xml .= '<field name="' . $key . '"'; - - if ($fieldBoost !== false) - { - $xml .= ' boost="' . $fieldBoost . '"'; - - // only set the boost for the first field in the set - $fieldBoost = false; - } - - $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8'); - - $xml .= '>' . $multivalue . '</field>'; - } - } - else - { - $xml .= '<field name="' . $key . '"'; - - if ($fieldBoost !== false) - { - $xml .= ' boost="' . $fieldBoost . '"'; - } - - $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8'); - - $xml .= '>' . $value . '</field>'; - } - } - - $xml .= '</doc>'; - - // replace any control characters to avoid Solr XML parser exception - return $this->_stripCtrlChars($xml); - } - - /** - * Replace control (non-printable) characters from string that are invalid to Solr's XML parser with a space. - * - * @param string $string - * @return string - */ - protected function _stripCtrlChars($string) - { - // See: http://w3.org/International/questions/qa-forms-utf-8.html - // Printable utf-8 does not include any of these chars below x7F - return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $string); - } - - /** - * Send a commit command. Will be synchronous unless both wait parameters are set to false. - * - * @param boolean $expungeDeletes Defaults to false, merge segments with deletes away - * @param boolean $waitFlush Defaults to true, block until index changes are flushed to disk - * @param boolean $waitSearcher Defaults to true, block until a new searcher is opened and registered as the main query searcher, making the changes visible - * @param float $timeout Maximum expected duration (in seconds) of the commit operation on the server (otherwise, will throw a communication exception). Defaults to 1 hour - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If an error occurs during the service call - */ - - - /* - - public function commit($expungeDeletes = false, $waitFlush = true, $waitSearcher = true, $timeout = 3600) - { - $expungeValue = $expungeDeletes ? 'true' : 'false'; - $flushValue = $waitFlush ? 'true' : 'false'; - $searcherValue = $waitSearcher ? 'true' : 'false'; - - $rawPost = '<commit expungeDeletes="' . $expungeValue . '" waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />'; - - return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); - } - */ - - public function commit($expungeDeletes = false, $waitFlush = true, $waitSearcher = true, $timeout = 3600) - { - $expungeValue = $expungeDeletes ? 'true' : 'false'; - $searcherValue = $waitSearcher ? 'true' : 'false'; - - $rawPost = '<commit expungeDeletes="' . $expungeValue . '" waitSearcher="' . $searcherValue . '" />'; - - return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); - } - - - /** - * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be - * a complete and well formed "delete" xml document - * - * @param string $rawPost Expected to be utf-8 encoded xml document - * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If an error occurs during the service call - */ - public function delete($rawPost, $timeout = 3600) - { - return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); - } - - /** - * Create a delete document based on document ID - * - * @param string $id Expected to be utf-8 encoded - * @param boolean $fromPending - * @param boolean $fromCommitted - * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If an error occurs during the service call - */ - public function deleteById($id, $fromPending = true, $fromCommitted = true, $timeout = 3600) - { - $pendingValue = $fromPending ? 'true' : 'false'; - $committedValue = $fromCommitted ? 'true' : 'false'; - - //escape special xml characters - $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8'); - - $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><id>' . $id . '</id></delete>'; - - return $this->delete($rawPost, $timeout); - } - - /** - * Create and post a delete document based on multiple document IDs. - * - * @param array $ids Expected to be utf-8 encoded strings - * @param boolean $fromPending - * @param boolean $fromCommitted - * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If an error occurs during the service call - */ - public function deleteByMultipleIds($ids, $fromPending = true, $fromCommitted = true, $timeout = 3600) - { - $pendingValue = $fromPending ? 'true' : 'false'; - $committedValue = $fromCommitted ? 'true' : 'false'; - - $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '">'; - - foreach ($ids as $id) - { - //escape special xml characters - $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8'); - - $rawPost .= '<id>' . $id . '</id>'; - } - - $rawPost .= '</delete>'; - - return $this->delete($rawPost, $timeout); - } - - /** - * Create a delete document based on a query and submit it - * - * @param string $rawQuery Expected to be utf-8 encoded - * @param boolean $fromPending - * @param boolean $fromCommitted - * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If an error occurs during the service call - */ - public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true, $timeout = 3600) - { - $pendingValue = $fromPending ? 'true' : 'false'; - $committedValue = $fromCommitted ? 'true' : 'false'; - - // escape special xml characters - $rawQuery = htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8'); - - $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><query>' . $rawQuery . '</query></delete>'; - - return $this->delete($rawPost, $timeout); - } - - /** - * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how - * to use Solr Cell and what parameters are available. - * - * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost." - * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value - * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also - * pass in a document isntance with an "id" field" - the document's value(s) will take precedence). - * - * @param string $file Path to file to extract data from - * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation) - * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params) - * @param string $mimetype optional mimetype specification (for the file being extracted) - * - * @return Apache_Solr_Response - * - * @throws Apache_Solr_InvalidArgumentException if $file, $params, or $document are invalid. - */ - public function extract($file, $params = array(), $document = null, $mimetype = 'application/octet-stream') - { - // check if $params is an array (allow null for default empty array) - if (!is_null($params)) - { - if (!is_array($params)) - { - throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null"); - } - } - else - { - $params = array(); - } - - // if $file is an http request, defer to extractFromUrl instead - if (substr($file, 0, 7) == 'http://' || substr($file, 0, 8) == 'https://') - { - return $this->extractFromUrl($file, $params, $document, $mimetype); - } - - // read the contents of the file - $contents = @file_get_contents($file); - - if ($contents !== false) - { - // add the resource.name parameter if not specified - if (!isset($params['resource.name'])) - { - $params['resource.name'] = basename($file); - } - - // delegate the rest to extractFromString - return $this->extractFromString($contents, $params, $document, $mimetype); - } - else - { - throw new Apache_Solr_InvalidArgumentException("File '{$file}' is empty or could not be read"); - } - } - - /** - * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how - * to use Solr Cell and what parameters are available. - * - * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost." - * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value - * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also - * pass in a document isntance with an "id" field" - the document's value(s) will take precedence). - * - * @param string $data Data that will be passed to Solr Cell - * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation) - * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params) - * @param string $mimetype optional mimetype specification (for the file being extracted) - * - * @return Apache_Solr_Response - * - * @throws Apache_Solr_InvalidArgumentException if $file, $params, or $document are invalid. - * - * @todo Should be using multipart/form-data to post parameter values, but I could not get my implementation to work. Needs revisisted. - */ - public function extractFromString($data, $params = array(), $document = null, $mimetype = 'application/octet-stream') - { - // check if $params is an array (allow null for default empty array) - if (!is_null($params)) - { - if (!is_array($params)) - { - throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null"); - } - } - else - { - $params = array(); - } - - // make sure we receive our response in JSON and have proper name list treatment - $params['wt'] = self::SOLR_WRITER; - $params['json.nl'] = $this->_namedListTreatment; - - // check if $document is an Apache_Solr_Document instance - if (!is_null($document) && $document instanceof Apache_Solr_Document) - { - // iterate document, adding literal.* and boost.* fields to $params as appropriate - foreach ($document as $field => $fieldValue) - { - // check if we need to add a boost.* parameters - $fieldBoost = $document->getFieldBoost($field); - - if ($fieldBoost !== false) - { - $params["boost.{$field}"] = $fieldBoost; - } - - // add the literal.* parameter - $params["literal.{$field}"] = $fieldValue; - } - } - - // params will be sent to SOLR in the QUERY STRING - $queryString = $this->_generateQueryString($params); - - // the file contents will be sent to SOLR as the POST BODY - we use application/octect-stream as default mimetype - return $this->_sendRawPost($this->_extractUrl . $this->_queryDelimiter . $queryString, $data, false, $mimetype); - } - - /** - * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how - * to use Solr Cell and what parameters are available. - * - * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost." - * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value - * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also - * pass in a document isntance with an "id" field" - the document's value(s) will take precedence). - * - * @param string $url URL - * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation) - * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params) - * @param string $mimetype optional mimetype specification (for the file being extracted) - * - * @return Apache_Solr_Response - * - * @throws Apache_Solr_InvalidArgumentException if $url, $params, or $document are invalid. - */ - public function extractFromUrl($url, $params = array(), $document = null, $mimetype = 'application/octet-stream') - { - // check if $params is an array (allow null for default empty array) - if (!is_null($params)) - { - if (!is_array($params)) - { - throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null"); - } - } - else - { - $params = array(); - } - - $httpTransport = $this->getHttpTransport(); - - // read the contents of the URL using our configured Http Transport and default timeout - $httpResponse = $httpTransport->performGetRequest($url); - - // check that its a 200 response - if ($httpResponse->getStatusCode() == 200) - { - // add the resource.name parameter if not specified - if (!isset($params['resource.name'])) - { - $params['resource.name'] = $url; - } - - // delegate the rest to extractFromString - return $this->extractFromString($httpResponse->getBody(), $params, $document, $mimetype); - } - else - { - throw new Apache_Solr_InvalidArgumentException("URL '{$url}' returned non 200 response code"); - } - } - - /** - * Send an optimize command. Will be synchronous unless both wait parameters are set - * to false. - * - * @param boolean $waitFlush - * @param boolean $waitSearcher - * @param float $timeout Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception) - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If an error occurs during the service call - */ - public function optimize($waitFlush = true, $waitSearcher = true, $timeout = 3600) - { - $flushValue = $waitFlush ? 'true' : 'false'; - $searcherValue = $waitSearcher ? 'true' : 'false'; - - $rawPost = '<optimize waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />'; - - return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); - } - - /** - * Simple Search interface - * - * @param string $query The raw query string - * @param int $offset The starting offset for result documents - * @param int $limit The maximum number of result documents to return - * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field) - * @param string $method The HTTP method (Apache_Solr_Service::METHOD_GET or Apache_Solr_Service::METHOD::POST) - * @return Apache_Solr_Response - * - * @throws Apache_Solr_HttpTransportException If an error occurs during the service call - * @throws Apache_Solr_InvalidArgumentException If an invalid HTTP method is used - */ - public function search($query, $offset = 0, $limit = 10, $params = array(), $method = self::METHOD_GET) - { - // ensure params is an array - if (!is_null($params)) - { - if (!is_array($params)) - { - // params was specified but was not an array - invalid - throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null"); - } - } - else - { - $params = array(); - } - - // construct our full parameters - - // common parameters in this interface - $params['wt'] = self::SOLR_WRITER; - $params['json.nl'] = $this->_namedListTreatment; - - $params['q'] = $query; - $params['start'] = $offset; - $params['rows'] = $limit; - - $queryString = $this->_generateQueryString($params); - - if ($method == self::METHOD_GET) - { - return $this->_sendRawGet($this->_searchUrl . $this->_queryDelimiter . $queryString); - } - else if ($method == self::METHOD_POST) - { - return $this->_sendRawPost($this->_searchUrl, $queryString, FALSE, 'application/x-www-form-urlencoded; charset=UTF-8'); - } - else - { - throw new Apache_Solr_InvalidArgumentException("Unsupported method '$method', please use the Apache_Solr_Service::METHOD_* constants"); - } - } -} diff --git a/solr_search.module b/solr_search.module index 963350c..7574924 100755 --- a/solr_search.module +++ b/solr_search.module @@ -208,9 +208,9 @@ function ajax_solr_search_result_callback($form, $form_state) $sno++; $output .= '<li>'; $output .= '<p><b>Book: </b>' . $doc->title . ' (' . $doc->author . ')</p>'; - $output .= '<p><b>Chapter: </b>' . $doc->chapter . '</p>'; - $output .= '<p><b>Example: </b>' . $doc->example . '</p>'; - $output .= '<p><b>Links: </b> <a href="' . base_path() . 'textbook_companion/generate_book/' . $doc->book_id . '"><span style="font-weight:bold;font-size:16px;">⤵</span> Download entire book</a> <a href="https://rcloud.fossee.in/index?eid=' . $doc->id . '" target="_blank"><span style="font-weight:bold;font-size:20px;">»</span> View this example</a></p>'; + $output .= '<p><b>Chapter: </b>' . $doc->chapter[0] . '</p>'; + $output .= '<p><b>Example: </b>' . $doc->example[0] . '</p>'; + $output .= '<p><b>Links: </b> <a href="' . base_path() . 'textbook-companion/download/book/' . $doc->book_id[0]. '"><span style="font-weight:bold;font-size:16px;">⤵</span> Download entire book</a> <a href="127.0.0.1/index?eid=' . $doc->id . '" target="_blank"><span style="font-weight:bold;font-size:20px;">»</span> View this example</a></p>'; $output .= '</li>'; } $output .= '<ol></div>'; |