diff options
25 files changed, 4476 insertions, 1 deletions
@@ -37,3 +37,10 @@ xmlrpc.php /profiles /scripts /themes + +# Ignore vim temp. files +*.swo +*.swp +*~ +*.*.bkp +*.*~ diff --git a/Apache/Solr/Document.php b/Apache/Solr/Document.php new file mode 100755 index 0000000..7b9784a --- /dev/null +++ b/Apache/Solr/Document.php @@ -0,0 +1,367 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: Document.php 54 2011-02-04 16:29:18Z donovan.jimenez $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com> + */ + +/** + * Holds Key / Value pairs that represent a Solr Document along with any associated boost + * values. Field values can be accessed by direct dereferencing such as: + * <code> + * ... + * $document->title = 'Something'; + * echo $document->title; + * ... + * </code> + * + * Additionally, the field values can be iterated with foreach + * + * <code> + * foreach ($document as $fieldName => $fieldValue) + * { + * ... + * } + * </code> + */ +class Apache_Solr_Document implements IteratorAggregate +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision: 54 $'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id: Document.php 54 2011-02-04 16:29:18Z donovan.jimenez $'; + + /** + * Document boost value + * + * @var float + */ + protected $_documentBoost = false; + + /** + * Document field values, indexed by name + * + * @var array + */ + protected $_fields = array(); + + /** + * Document field boost values, indexed by name + * + * @var array array of floats + */ + protected $_fieldBoosts = array(); + + /** + * Clear all boosts and fields from this document + */ + public function clear() + { + $this->_documentBoost = false; + + $this->_fields = array(); + $this->_fieldBoosts = array(); + } + + /** + * Get current document boost + * + * @return mixed will be false for default, or else a float + */ + public function getBoost() + { + return $this->_documentBoost; + } + + /** + * Set document boost factor + * + * @param mixed $boost Use false for default boost, else cast to float that should be > 0 or will be treated as false + */ + public function setBoost($boost) + { + $boost = (float) $boost; + + if ($boost > 0.0) + { + $this->_documentBoost = $boost; + } + else + { + $this->_documentBoost = false; + } + } + + /** + * Add a value to a multi-valued field + * + * NOTE: the solr XML format allows you to specify boosts + * PER value even though the underlying Lucene implementation + * only allows a boost per field. To remedy this, the final + * field boost value will be the product of all specified boosts + * on field values - this is similar to SolrJ's functionality. + * + * <code> + * $doc = new Apache_Solr_Document(); + * + * $doc->addField('foo', 'bar', 2.0); + * $doc->addField('foo', 'baz', 3.0); + * + * // resultant field boost will be 6! + * echo $doc->getFieldBoost('foo'); + * </code> + * + * @param string $key + * @param mixed $value + * @param mixed $boost Use false for default boost, else cast to float that should be > 0 or will be treated as false + */ + public function addField($key, $value, $boost = false) + { + if (!isset($this->_fields[$key])) + { + // create holding array if this is the first value + $this->_fields[$key] = array(); + } + else if (!is_array($this->_fields[$key])) + { + // move existing value into array if it is not already an array + $this->_fields[$key] = array($this->_fields[$key]); + } + + if ($this->getFieldBoost($key) === false) + { + // boost not already set, set it now + $this->setFieldBoost($key, $boost); + } + else if ((float) $boost > 0.0) + { + // multiply passed boost with current field boost - similar to SolrJ implementation + $this->_fieldBoosts[$key] *= (float) $boost; + } + + // add value to array + $this->_fields[$key][] = $value; + } + + /** + * Handle the array manipulation for a multi-valued field + * + * @param string $key + * @param string $value + * @param mixed $boost Use false for default boost, else cast to float that should be > 0 or will be treated as false + * + * @deprecated Use addField(...) instead + */ + public function setMultiValue($key, $value, $boost = false) + { + $this->addField($key, $value, $boost); + } + + /** + * Get field information + * + * @param string $key + * @return mixed associative array of info if field exists, false otherwise + */ + public function getField($key) + { + if (isset($this->_fields[$key])) + { + return array( + 'name' => $key, + 'value' => $this->_fields[$key], + 'boost' => $this->getFieldBoost($key) + ); + } + + return false; + } + + /** + * Set a field value. Multi-valued fields should be set as arrays + * or instead use the addField(...) function which will automatically + * make sure the field is an array. + * + * @param string $key + * @param mixed $value + * @param mixed $boost Use false for default boost, else cast to float that should be > 0 or will be treated as false + */ + public function setField($key, $value, $boost = false) + { + $this->_fields[$key] = $value; + $this->setFieldBoost($key, $boost); + } + + /** + * Get the currently set field boost for a document field + * + * @param string $key + * @return float currently set field boost, false if one is not set + */ + public function getFieldBoost($key) + { + return isset($this->_fieldBoosts[$key]) ? $this->_fieldBoosts[$key] : false; + } + + /** + * Set the field boost for a document field + * + * @param string $key field name for the boost + * @param mixed $boost Use false for default boost, else cast to float that should be > 0 or will be treated as false + */ + public function setFieldBoost($key, $boost) + { + $boost = (float) $boost; + + if ($boost > 0.0) + { + $this->_fieldBoosts[$key] = $boost; + } + else + { + $this->_fieldBoosts[$key] = false; + } + } + + /** + * Return current field boosts, indexed by field name + * + * @return array + */ + public function getFieldBoosts() + { + return $this->_fieldBoosts; + } + + /** + * Get the names of all fields in this document + * + * @return array + */ + public function getFieldNames() + { + return array_keys($this->_fields); + } + + /** + * Get the values of all fields in this document + * + * @return array + */ + public function getFieldValues() + { + return array_values($this->_fields); + } + + /** + * IteratorAggregate implementation function. Allows usage: + * + * <code> + * foreach ($document as $key => $value) + * { + * ... + * } + * </code> + */ + public function getIterator() + { + $arrayObject = new ArrayObject($this->_fields); + + return $arrayObject->getIterator(); + } + + /** + * Magic get for field values + * + * @param string $key + * @return mixed + */ + public function __get($key) + { + if (isset($this->_fields[$key])) + { + return $this->_fields[$key]; + } + + return null; + } + + /** + * Magic set for field values. Multi-valued fields should be set as arrays + * or instead use the addField(...) function which will automatically + * make sure the field is an array. + * + * @param string $key + * @param mixed $value + */ + public function __set($key, $value) + { + $this->setField($key, $value); + } + + /** + * Magic isset for fields values. Do not call directly. Allows usage: + * + * <code> + * isset($document->some_field); + * </code> + * + * @param string $key + * @return boolean + */ + public function __isset($key) + { + return isset($this->_fields[$key]); + } + + /** + * Magic unset for field values. Do not call directly. Allows usage: + * + * <code> + * unset($document->some_field); + * </code> + * + * @param string $key + */ + public function __unset($key) + { + unset($this->_fields[$key]); + unset($this->_fieldBoosts[$key]); + } +}
\ No newline at end of file diff --git a/Apache/Solr/Exception.php b/Apache/Solr/Exception.php new file mode 100755 index 0000000..e6bc4f4 --- /dev/null +++ b/Apache/Solr/Exception.php @@ -0,0 +1,50 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: Exception.php 54 2011-02-04 16:29:18Z donovan.jimenez $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com> + */ + +class Apache_Solr_Exception extends Exception +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision: 54 $'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id: Exception.php 54 2011-02-04 16:29:18Z donovan.jimenez $'; +}
\ No newline at end of file diff --git a/Apache/Solr/HttpTransport/Abstract.php b/Apache/Solr/HttpTransport/Abstract.php new file mode 100755 index 0000000..cf9f76d --- /dev/null +++ b/Apache/Solr/HttpTransport/Abstract.php @@ -0,0 +1,89 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: $ + * + * @package Apache + * @subpackage Solr + * @author Timo Schmidt <timo.schmidt@aoemedia.de>, Donovan Jimenez <djimenez@conduit-it.com> + */ + +/** + * Convenience class that implements the transport implementation. Can be extended by + * real implementations to do some of the common book keeping + */ +abstract class Apache_Solr_HttpTransport_Abstract implements Apache_Solr_HttpTransport_Interface +{ + /** + * Our default timeout value for requests that don't specify a timeout + * + * @var float + */ + private $_defaultTimeout = false; + + /** + * Get the current default timeout setting (initially the default_socket_timeout ini setting) + * in seconds + * + * @return float + */ + public function getDefaultTimeout() + { + // lazy load the default timeout from the ini settings + if ($this->_defaultTimeout === false) + { + $this->_defaultTimeout = (int) ini_get('default_socket_timeout'); + + // double check we didn't get 0 for a timeout + if ($this->_defaultTimeout <= 0) + { + $this->_defaultTimeout = 60; + } + } + + return $this->_defaultTimeout; + } + + /** + * Set the current default timeout for all HTTP requests + * + * @param float $timeout + */ + public function setDefaultTimeout($timeout) + { + $timeout = (float) $timeout; + + if ($timeout >= 0) + { + $this->_defaultTimeout = $timeout; + } + } +}
\ No newline at end of file diff --git a/Apache/Solr/HttpTransport/Curl.php b/Apache/Solr/HttpTransport/Curl.php new file mode 100755 index 0000000..7cb7743 --- /dev/null +++ b/Apache/Solr/HttpTransport/Curl.php @@ -0,0 +1,198 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: $ + * + * @package Apache + * @subpackage Solr + * @author Timo Schmidt <timo.schmidt@aoemedia.de>, Donovan Jimenez <djimenez@conduit-it.com> + */ + +// Require Apache_Solr_HttpTransport_Abstract +require_once(dirname(__FILE__) . '/Abstract.php'); + +/** + * A Curl based HTTP transport. Uses a single curl session for all requests. + */ +class Apache_Solr_HttpTransport_Curl extends Apache_Solr_HttpTransport_Abstract +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision:$'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id:$'; + + /** + * Curl Session Handle + * + * @var resource + */ + private $_curl; + + /** + * Initializes a curl session + */ + public function __construct() + { + // initialize a CURL session + $this->_curl = curl_init(); + + // set common options that will not be changed during the session + curl_setopt_array($this->_curl, array( + // return the response body from curl_exec + CURLOPT_RETURNTRANSFER => true, + + // get the output as binary data + CURLOPT_BINARYTRANSFER => true, + + // we do not need the headers in the output, we get everything we need from curl_getinfo + CURLOPT_HEADER => false + )); + } + + /** + * Closes a curl session + */ + function __destruct() + { + // close our curl session + curl_close($this->_curl); + } + + public function performGetRequest($url, $timeout = false) + { + // check the timeout value + if ($timeout === false || $timeout <= 0.0) + { + // use the default timeout + $timeout = $this->getDefaultTimeout(); + } + + // set curl GET options + curl_setopt_array($this->_curl, array( + // make sure we're returning the body + CURLOPT_NOBODY => false, + + // make sure we're GET + CURLOPT_HTTPGET => true, + + // set the URL + CURLOPT_URL => $url, + + // set the timeout + CURLOPT_TIMEOUT => $timeout + )); + + // make the request + $responseBody = curl_exec($this->_curl); + + // get info from the transfer + $statusCode = curl_getinfo($this->_curl, CURLINFO_HTTP_CODE); + $contentType = curl_getinfo($this->_curl, CURLINFO_CONTENT_TYPE); + + return new Apache_Solr_HttpTransport_Response($statusCode, $contentType, $responseBody); + } + + public function performHeadRequest($url, $timeout = false) + { + // check the timeout value + if ($timeout === false || $timeout <= 0.0) + { + // use the default timeout + $timeout = $this->getDefaultTimeout(); + } + + // set curl HEAD options + curl_setopt_array($this->_curl, array( + // this both sets the method to HEAD and says not to return a body + CURLOPT_NOBODY => true, + + // set the URL + CURLOPT_URL => $url, + + // set the timeout + CURLOPT_TIMEOUT => $timeout + )); + + // make the request + $responseBody = curl_exec($this->_curl); + + // get info from the transfer + $statusCode = curl_getinfo($this->_curl, CURLINFO_HTTP_CODE); + $contentType = curl_getinfo($this->_curl, CURLINFO_CONTENT_TYPE); + + return new Apache_Solr_HttpTransport_Response($statusCode, $contentType, $responseBody); + } + + public function performPostRequest($url, $postData, $contentType, $timeout = false) + { + // check the timeout value + if ($timeout === false || $timeout <= 0.0) + { + // use the default timeout + $timeout = $this->getDefaultTimeout(); + } + + // set curl POST options + curl_setopt_array($this->_curl, array( + // make sure we're returning the body + CURLOPT_NOBODY => false, + + // make sure we're POST + CURLOPT_POST => true, + + // set the URL + CURLOPT_URL => $url, + + // set the post data + CURLOPT_POSTFIELDS => $postData, + + // set the content type + CURLOPT_HTTPHEADER => array("Content-Type: {$contentType}"), + + // set the timeout + CURLOPT_TIMEOUT => $timeout + )); + + // make the request + $responseBody = curl_exec($this->_curl); + + // get info from the transfer + $statusCode = curl_getinfo($this->_curl, CURLINFO_HTTP_CODE); + $contentType = curl_getinfo($this->_curl, CURLINFO_CONTENT_TYPE); + + return new Apache_Solr_HttpTransport_Response($statusCode, $contentType, $responseBody); + } +}
\ No newline at end of file diff --git a/Apache/Solr/HttpTransport/CurlNoReuse.php b/Apache/Solr/HttpTransport/CurlNoReuse.php new file mode 100755 index 0000000..1454958 --- /dev/null +++ b/Apache/Solr/HttpTransport/CurlNoReuse.php @@ -0,0 +1,196 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: $ + * + * @package Apache + * @subpackage Solr + * @author Timo Schmidt <timo.schmidt@aoemedia.de>, Donovan Jimenez <djimenez@conduit-it.com> + */ + +// Require Apache_Solr_HttpTransport_Abstract +require_once(dirname(__FILE__) . '/Abstract.php'); + +/** + * An alternative Curl HTTP transport that opens and closes a curl session for + * every request. This isn't the recommended way to use curl, but some version of + * PHP have memory issues. + */ +class Apache_Solr_HttpTransport_CurlNoReuse extends Apache_Solr_HttpTransport_Abstract +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision:$'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id:$'; + + public function performGetRequest($url, $timeout = false) + { + // check the timeout value + if ($timeout === false || $timeout <= 0.0) + { + // use the default timeout + $timeout = $this->getDefaultTimeout(); + } + + $curl = curl_init(); + + // set curl GET options + curl_setopt_array($curl, array( + // return the response body from curl_exec + CURLOPT_RETURNTRANSFER => true, + + // get the output as binary data + CURLOPT_BINARYTRANSFER => true, + + // we do not need the headers in the output, we get everything we need from curl_getinfo + CURLOPT_HEADER => false, + + // set the URL + CURLOPT_URL => $url, + + // set the timeout + CURLOPT_TIMEOUT => $timeout + )); + + // make the request + $responseBody = curl_exec($curl); + + // get info from the transfer + $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + $contentType = curl_getinfo($curl, CURLINFO_CONTENT_TYPE); + + // close our curl session - we're done with it + curl_close($curl); + + return new Apache_Solr_HttpTransport_Response($statusCode, $contentType, $responseBody); + } + + public function performHeadRequest($url, $timeout = false) + { + // check the timeout value + if ($timeout === false || $timeout <= 0.0) + { + // use the default timeout + $timeout = $this->getDefaultTimeout(); + } + + $curl = curl_init(); + + // set curl HEAD options + curl_setopt_array($curl, array( + // return the response body from curl_exec + CURLOPT_RETURNTRANSFER => true, + + // get the output as binary data + CURLOPT_BINARYTRANSFER => true, + + // we do not need the headers in the output, we get everything we need from curl_getinfo + CURLOPT_HEADER => false, + + // this both sets the method to HEAD and says not to return a body + CURLOPT_NOBODY => true, + + // set the URL + CURLOPT_URL => $url, + + // set the timeout + CURLOPT_TIMEOUT => $timeout + )); + + // make the request + $responseBody = curl_exec($curl); + + // get info from the transfer + $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + $contentType = curl_getinfo($curl, CURLINFO_CONTENT_TYPE); + + // close our curl session - we're done with it + curl_close($curl); + + return new Apache_Solr_HttpTransport_Response($statusCode, $contentType, $responseBody); + } + + public function performPostRequest($url, $postData, $contentType, $timeout = false) + { + // check the timeout value + if ($timeout === false || $timeout <= 0.0) + { + // use the default timeout + $timeout = $this->getDefaultTimeout(); + } + + $curl = curl_init(); + + // set curl POST options + curl_setopt_array($curl, array( + // return the response body from curl_exec + CURLOPT_RETURNTRANSFER => true, + + // get the output as binary data + CURLOPT_BINARYTRANSFER => true, + + // we do not need the headers in the output, we get everything we need from curl_getinfo + CURLOPT_HEADER => false, + + // make sure we're POST + CURLOPT_POST => true, + + // set the URL + CURLOPT_URL => $url, + + // set the post data + CURLOPT_POSTFIELDS => $postData, + + // set the content type + CURLOPT_HTTPHEADER => array("Content-Type: {$contentType}"), + + // set the timeout + CURLOPT_TIMEOUT => $timeout + )); + + // make the request + $responseBody = curl_exec($curl); + + // get info from the transfer + $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + $contentType = curl_getinfo($curl, CURLINFO_CONTENT_TYPE); + + // close our curl session - we're done with it + curl_close($curl); + + return new Apache_Solr_HttpTransport_Response($statusCode, $contentType, $responseBody); + } +}
\ No newline at end of file diff --git a/Apache/Solr/HttpTransport/FileGetContents.php b/Apache/Solr/HttpTransport/FileGetContents.php new file mode 100755 index 0000000..5e01775 --- /dev/null +++ b/Apache/Solr/HttpTransport/FileGetContents.php @@ -0,0 +1,216 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com> + */ + +// Require Apache_Solr_HttpTransport_Abstract +require_once(dirname(__FILE__) . '/Abstract.php'); + +/** + * HTTP Transport implemenation that uses the builtin http URL wrappers and file_get_contents + */ +class Apache_Solr_HttpTransport_FileGetContents extends Apache_Solr_HttpTransport_Abstract +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision: $'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id: $'; + + /** + * Reusable stream context resources for GET and POST operations + * + * @var resource + */ + private $_getContext, $_headContext, $_postContext; + + /** + * Initializes our reuseable get and post stream contexts + */ + public function __construct() + { + $this->_getContext = stream_context_create(); + $this->_headContext = stream_context_create(); + $this->_postContext = stream_context_create(); + } + + public function performGetRequest($url, $timeout = false) + { + // set the timeout if specified + if ($timeout !== FALSE && $timeout > 0.0) + { + // timeouts with file_get_contents seem to need + // to be halved to work as expected + $timeout = (float) $timeout / 2; + + stream_context_set_option($this->_getContext, 'http', 'timeout', $timeout); + } + else + { + // use the default timeout pulled from default_socket_timeout otherwise + stream_context_set_option($this->_getContext, 'http', 'timeout', $this->getDefaultTimeout()); + } + + // $http_response_headers will be updated by the call to file_get_contents later + // see http://us.php.net/manual/en/wrappers.http.php for documentation + // Unfortunately, it will still create a notice in analyzers if we don't set it here + $http_response_header = null; + $responseBody = @file_get_contents($url, false, $this->_getContext); + + return $this->_getResponseFromParts($responseBody, $http_response_header); + } + + public function performHeadRequest($url, $timeout = false) + { + stream_context_set_option($this->_headContext, array( + 'http' => array( + // set HTTP method + 'method' => 'HEAD', + + // default timeout + 'timeout' => $this->getDefaultTimeout() + ) + ) + ); + + // set the timeout if specified + if ($timeout !== FALSE && $timeout > 0.0) + { + // timeouts with file_get_contents seem to need + // to be halved to work as expected + $timeout = (float) $timeout / 2; + + stream_context_set_option($this->_headContext, 'http', 'timeout', $timeout); + } + + // $http_response_headers will be updated by the call to file_get_contents later + // see http://us.php.net/manual/en/wrappers.http.php for documentation + // Unfortunately, it will still create a notice in analyzers if we don't set it here + $http_response_header = null; + $responseBody = @file_get_contents($url, false, $this->_headContext); + + return $this->_getResponseFromParts($responseBody, $http_response_header); + } + + public function performPostRequest($url, $rawPost, $contentType, $timeout = false) + { + stream_context_set_option($this->_postContext, array( + 'http' => array( + // set HTTP method + 'method' => 'POST', + + // Add our posted content type + 'header' => "Content-Type: $contentType", + + // the posted content + 'content' => $rawPost, + + // default timeout + 'timeout' => $this->getDefaultTimeout() + ) + ) + ); + + // set the timeout if specified + if ($timeout !== FALSE && $timeout > 0.0) + { + // timeouts with file_get_contents seem to need + // to be halved to work as expected + $timeout = (float) $timeout / 2; + + stream_context_set_option($this->_postContext, 'http', 'timeout', $timeout); + } + + // $http_response_header will be updated by the call to file_get_contents later + // see http://us.php.net/manual/en/wrappers.http.php for documentation + // Unfortunately, it will still create a notice in analyzers if we don't set it here + $http_response_header = null; + $responseBody = @file_get_contents($url, false, $this->_postContext); + + // reset content of post context to reclaim memory + stream_context_set_option($this->_postContext, 'http', 'content', ''); + + return $this->_getResponseFromParts($responseBody, $http_response_header); + } + + private function _getResponseFromParts($rawResponse, $httpHeaders) + { + //Assume 0, false as defaults + $status = 0; + $contentType = false; + + //iterate through headers for real status, type, and encoding + if (is_array($httpHeaders) && count($httpHeaders) > 0) + { + //look at the first headers for the HTTP status code + //and message (errors are usually returned this way) + // + //HTTP 100 Continue response can also be returned before + //the REAL status header, so we need look until we find + //the last header starting with HTTP + // + //the spec: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.1 + // + //Thanks to Daniel Andersson for pointing out this oversight + while (isset($httpHeaders[0]) && substr($httpHeaders[0], 0, 4) == 'HTTP') + { + // we can do a intval on status line without the "HTTP/1.X " to get the code + $status = intval(substr($httpHeaders[0], 9)); + + // remove this from the headers so we can check for more + array_shift($httpHeaders); + } + + //Look for the Content-Type response header and determine type + //and encoding from it (if possible - such as 'Content-Type: text/plain; charset=UTF-8') + foreach ($httpHeaders as $header) + { + // look for the header that starts appropriately + if (strncasecmp($header, 'Content-Type:', 13) == 0) + { + $contentType = substr($header, 13); + break; + } + } + } + + return new Apache_Solr_HttpTransport_Response($status, $contentType, $rawResponse); + } +}
\ No newline at end of file diff --git a/Apache/Solr/HttpTransport/Interface.php b/Apache/Solr/HttpTransport/Interface.php new file mode 100755 index 0000000..090fc27 --- /dev/null +++ b/Apache/Solr/HttpTransport/Interface.php @@ -0,0 +1,94 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: $ + * + * @package Apache + * @subpackage Solr + * @author Timo Schmidt <timo.schmidt@aoemedia.de>, Donovan Jimenez <djimenez@conduit-it.com> + */ + +// require Apache_Solr_HttpTransport_Response +require_once(dirname(__FILE__) . '/Response.php'); + +/** + * Interface that all Transport (HTTP Requester) implementations must implement. These + * Implementations can then be plugged into the Service instance in order to user their + * the desired method for making HTTP requests + */ +interface Apache_Solr_HttpTransport_Interface +{ + /** + * Get the current default timeout for all HTTP requests + * + * @return float + */ + public function getDefaultTimeout(); + + /** + * Set the current default timeout for all HTTP requests + * + * @param float $timeout + */ + public function setDefaultTimeout($timeout); + + /** + * Perform a GET HTTP operation with an optional timeout and return the response + * contents, use getLastResponseHeaders to retrieve HTTP headers + * + * @param string $url + * @param float $timeout + * @return Apache_Solr_HttpTransport_Response HTTP response + */ + public function performGetRequest($url, $timeout = false); + + /** + * Perform a HEAD HTTP operation with an optional timeout and return the response + * headers - NOTE: head requests have no response body + * + * @param string $url + * @param float $timeout + * @return Apache_Solr_HttpTransport_Response HTTP response + */ + public function performHeadRequest($url, $timeout = false); + + /** + * Perform a POST HTTP operation with an optional timeout and return the response + * contents, use getLastResponseHeaders to retrieve HTTP headers + * + * @param string $url + * @param string $rawPost + * @param string $contentType + * @param float $timeout + * @return Apache_Solr_HttpTransport_Response HTTP response + */ + public function performPostRequest($url, $rawPost, $contentType, $timeout = false); +}
\ No newline at end of file diff --git a/Apache/Solr/HttpTransport/Response.php b/Apache/Solr/HttpTransport/Response.php new file mode 100755 index 0000000..3f113ae --- /dev/null +++ b/Apache/Solr/HttpTransport/Response.php @@ -0,0 +1,255 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com> + */ + +/** + * Represents the required pieces of an HTTP response provided by HTTP transport + * implementations and then consumed by the Apache_Solr_Response class which provides + * decoding + */ +class Apache_Solr_HttpTransport_Response +{ + /** + * Status Messages indexed by Status Code + * Obtained from: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html + * + * @var array + */ + static private $_defaultStatusMessages = array( + // Specific to PHP Solr Client + 0 => "Communication Error", + + // Informational 1XX + 100 => "Continue", + 101 => "Switching Protocols", + + // Successful 2XX + 200 => "OK", + 201 => "Created", + 202 => "Accepted", + 203 => "Non-Authoritative Information", + 204 => "No Content", + 205 => "Reset Content", + 206 => "Partial Content", + + // Redirection 3XX + 300 => "Multiple Choices", + 301 => "Moved Permanently", + 302 => "Found", + 303 => "See Other", + 304 => "Not Modified", + 305 => "Use Proxy", + 307 => "Temporary Redirect", + + // Client Error 4XX + 400 => "Bad Request", + 401 => "Unauthorized", + 402 => "Payment Required", + 403 => "Forbidden", + 404 => "Not Found", + 405 => "Method Not Allowed", + 406 => "Not Acceptable", + 407 => "Proxy Authentication Required", + 408 => "Request Timeout", + 409 => "Conflict", + 410 => "Gone", + 411 => "Length Required", + 412 => "Precondition Failed", + 413 => "Request Entity Too Large", + 414 => "Request-URI Too Long", + 415 => "Unsupported Media Type", + 416 => "Request Range Not Satisfiable", + 417 => "Expectation Failed", + + // Server Error 5XX + 500 => "Internal Server Error", + 501 => "Not Implemented", + 502 => "Bad Gateway", + 503 => "Service Unavailable", + 504 => "Gateway Timeout", + 505 => "HTTP Version Not Supported" + ); + + /** + * Get the HTTP status message based on status code + * + * @return string + */ + public static function getDefaultStatusMessage($statusCode) + { + $statusCode = (int) $statusCode; + + if (isset(self::$_defaultStatusMessages[$statusCode])) + { + return self::$_defaultStatusMessages[$statusCode]; + } + + return "Unknown Status"; + } + + /** + * The response's HTTP status code + * + * @var integer + */ + private $_statusCode; + + /** + * The response's HTTP status message + * + * @var string + */ + private $_statusMessage; + + /** + * The response's mime type + * + * @var string + */ + private $_mimeType; + + /** + * The response's character encoding + * + * @var string + */ + private $_encoding; + + /** + * The response's data + * + * @var string + */ + private $_responseBody; + + /** + * Construct a HTTP transport response + * + * @param integer $statusCode The HTTP status code + * @param string $contentType The VALUE of the Content-Type HTTP header + * @param string $responseBody The body of the HTTP response + */ + public function __construct($statusCode, $contentType, $responseBody) + { + // set the status code, make sure its an integer + $this->_statusCode = (int) $statusCode; + + // lookup up status message based on code + $this->_statusMessage = self::getDefaultStatusMessage($this->_statusCode); + + // set the response body, it should always be a string + $this->_responseBody = (string) $responseBody; + + // parse the content type header value for mimetype and encoding + // first set default values that will remain if we can't find + // what we're looking for in the content type + $this->_mimeType = "text/plain"; + $this->_encoding = "UTF-8"; + + if ($contentType) + { + // now break apart the header to see if there's character encoding + $contentTypeParts = explode(';', $contentType, 2); + + if (isset($contentTypeParts[0])) + { + $this->_mimeType = trim($contentTypeParts[0]); + } + + if (isset($contentTypeParts[1])) + { + // we have a second part, split it further + $contentTypeParts = explode('=', $contentTypeParts[1]); + + if (isset($contentTypeParts[1])) + { + $this->_encoding = trim($contentTypeParts[1]); + } + } + } + } + + /** + * Get the status code of the response + * + * @return integer + */ + public function getStatusCode() + { + return $this->_statusCode; + } + + /** + * Get the status message of the response + * + * @return string + */ + public function getStatusMessage() + { + return $this->_statusMessage; + } + + /** + * Get the mimetype of the response body + * + * @return string + */ + public function getMimeType() + { + return $this->_mimeType; + } + + /** + * Get the charset encoding of the response body. + * + * @return string + */ + public function getEncoding() + { + return $this->_encoding; + } + + /** + * Get the raw response body + * + * @return string + */ + public function getBody() + { + return $this->_responseBody; + } +} diff --git a/Apache/Solr/HttpTransportException.php b/Apache/Solr/HttpTransportException.php new file mode 100755 index 0000000..83c0001 --- /dev/null +++ b/Apache/Solr/HttpTransportException.php @@ -0,0 +1,79 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: HttpTransportException.php 54 2011-02-04 16:29:18Z donovan.jimenez $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com> + */ + +class Apache_Solr_HttpTransportException extends Apache_Solr_Exception +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision: 54 $'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id: HttpTransportException.php 54 2011-02-04 16:29:18Z donovan.jimenez $'; + + /** + * Response for which exception was generated + * + * @var Apache_Solr_Response + */ + private $_response; + + /** + * HttpTransportException Constructor + * + * @param Apache_Solr_Response $response + */ + public function __construct(Apache_Solr_Response $response) + { + parent::__construct("'{$response->getHttpStatus()}' Status: {$response->getHttpStatusMessage()}", $response->getHttpStatus()); + + $this->_response = $response; + } + + /** + * Get the response for which this exception was generated + * + * @return Apache_Solr_Response + */ + public function getResponse() + { + return $this->_response; + } +}
\ No newline at end of file diff --git a/Apache/Solr/InvalidArgumentException.php b/Apache/Solr/InvalidArgumentException.php new file mode 100755 index 0000000..273f3d0 --- /dev/null +++ b/Apache/Solr/InvalidArgumentException.php @@ -0,0 +1,50 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: InvalidArgumentException.php 54 2011-02-04 16:29:18Z donovan.jimenez $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com> + */ + +class Apache_Solr_InvalidArgumentException extends Apache_Solr_Exception +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision: 54 $'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id: InvalidArgumentException.php 54 2011-02-04 16:29:18Z donovan.jimenez $'; +}
\ No newline at end of file diff --git a/Apache/Solr/NoServiceAvailableException.php b/Apache/Solr/NoServiceAvailableException.php new file mode 100755 index 0000000..eead732 --- /dev/null +++ b/Apache/Solr/NoServiceAvailableException.php @@ -0,0 +1,50 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: NoServiceAvailableException.php 54 2011-02-04 16:29:18Z donovan.jimenez $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com> + */ + +class Apache_Solr_NoServiceAvailableException extends Apache_Solr_Exception +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision: 54 $'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id: NoServiceAvailableException.php 54 2011-02-04 16:29:18Z donovan.jimenez $'; +}
\ No newline at end of file diff --git a/Apache/Solr/ParserException.php b/Apache/Solr/ParserException.php new file mode 100755 index 0000000..29283db --- /dev/null +++ b/Apache/Solr/ParserException.php @@ -0,0 +1,50 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: ParserException.php 54 2011-02-04 16:29:18Z donovan.jimenez $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com> + */ + +class Apache_Solr_ParserException extends Apache_Solr_Exception +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision: 54 $'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id: ParserException.php 54 2011-02-04 16:29:18Z donovan.jimenez $'; +}
\ No newline at end of file diff --git a/Apache/Solr/Response.php b/Apache/Solr/Response.php new file mode 100755 index 0000000..1a35e56 --- /dev/null +++ b/Apache/Solr/Response.php @@ -0,0 +1,247 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: Response.php 54 2011-02-04 16:29:18Z donovan.jimenez $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com> + */ + +require_once(dirname(__FILE__) . '/ParserException.php'); + +/** + * Represents a Solr response. Parses the raw response into a set of stdClass objects + * and associative arrays for easy access. + * + * Currently requires json_decode which is bundled with PHP >= 5.2.0, Alternatively can be + * installed with PECL. Zend Framework also includes a purely PHP solution. + */ +class Apache_Solr_Response +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision: 54 $'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id: Response.php 54 2011-02-04 16:29:18Z donovan.jimenez $'; + + /** + * Holds the raw response used in construction + * + * @var Apache_Solr_HttpTransport_Response HTTP response + */ + protected $_response; + + /** + * Whether the raw response has been parsed + * + * @var boolean + */ + protected $_isParsed = false; + + /** + * Parsed representation of the data + * + * @var mixed + */ + protected $_parsedData; + + /** + * Data parsing flags. Determines what extra processing should be done + * after the data is initially converted to a data structure. + * + * @var boolean + */ + protected $_createDocuments = true, + $_collapseSingleValueArrays = true; + + /** + * Constructor. Takes the raw HTTP response body and the exploded HTTP headers + * + * @return Apache_Solr_HttpTransport_Response HTTP response + * @param boolean $createDocuments Whether to convert the documents json_decoded as stdClass instances to Apache_Solr_Document instances + * @param boolean $collapseSingleValueArrays Whether to make multivalued fields appear as single values + */ + public function __construct(Apache_Solr_HttpTransport_Response $response, $createDocuments = true, $collapseSingleValueArrays = true) + { + $this->_response = $response; + $this->_createDocuments = (bool) $createDocuments; + $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays; + } + + /** + * Get the HTTP status code + * + * @return integer + */ + public function getHttpStatus() + { + return $this->_response->getStatusCode(); + } + + /** + * Get the HTTP status message of the response + * + * @return string + */ + public function getHttpStatusMessage() + { + return $this->_response->getStatusMessage(); + } + + /** + * Get content type of this Solr response + * + * @return string + */ + public function getType() + { + return $this->_response->getMimeType(); + } + + /** + * Get character encoding of this response. Should usually be utf-8, but just in case + * + * @return string + */ + public function getEncoding() + { + return $this->_response->getEncoding(); + } + + /** + * Get the raw response as it was given to this object + * + * @return string + */ + public function getRawResponse() + { + return $this->_response->getBody(); + } + + /** + * Magic get to expose the parsed data and to lazily load it + * + * @param string $key + * @return mixed + */ + public function __get($key) + { + if (!$this->_isParsed) + { + $this->_parseData(); + $this->_isParsed = true; + } + + if (isset($this->_parsedData->$key)) + { + return $this->_parsedData->$key; + } + + return null; + } + + /** + * Magic function for isset function on parsed data + * + * @param string $key + * @return boolean + */ + public function __isset($key) + { + if (!$this->_isParsed) + { + $this->_parseData(); + $this->_isParsed = true; + } + + return isset($this->_parsedData->$key); + } + + /** + * Parse the raw response into the parsed_data array for access + * + * @throws Apache_Solr_ParserException If the data could not be parsed + */ + protected function _parseData() + { + //An alternative would be to use Zend_Json::decode(...) + $data = json_decode($this->_response->getBody()); + + // check that we receive a valid JSON response - we should never receive a null + if ($data === null) + { + throw new Apache_Solr_ParserException('Solr response does not appear to be valid JSON, please examine the raw response with getRawResponse() method'); + } + + //if we're configured to collapse single valued arrays or to convert them to Apache_Solr_Document objects + //and we have response documents, then try to collapse the values and / or convert them now + if (($this->_createDocuments || $this->_collapseSingleValueArrays) && isset($data->response) && is_array($data->response->docs)) + { + $documents = array(); + + foreach ($data->response->docs as $originalDocument) + { + if ($this->_createDocuments) + { + $document = new Apache_Solr_Document(); + } + else + { + $document = $originalDocument; + } + + foreach ($originalDocument as $key => $value) + { + //If a result is an array with only a single + //value then its nice to be able to access + //it as if it were always a single value + if ($this->_collapseSingleValueArrays && is_array($value) && count($value) <= 1) + { + $value = array_shift($value); + } + + $document->$key = $value; + } + + $documents[] = $document; + } + + $data->response->docs = $documents; + } + + $this->_parsedData = $data; + } +}
\ No newline at end of file diff --git a/Apache/Solr/Service.php b/Apache/Solr/Service.php new file mode 100755 index 0000000..8352dd8 --- /dev/null +++ b/Apache/Solr/Service.php @@ -0,0 +1,1197 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com> + */ + +// See Issue #1 (http://code.google.com/p/solr-php-client/issues/detail?id=1) +// Doesn't follow typical include path conventions, but is more convenient for users +require_once(dirname(__FILE__) . '/Exception.php'); +require_once(dirname(__FILE__) . '/HttpTransportException.php'); +require_once(dirname(__FILE__) . '/InvalidArgumentException.php'); + +require_once(dirname(__FILE__) . '/Document.php'); +require_once(dirname(__FILE__) . '/Response.php'); + +require_once(dirname(__FILE__) . '/HttpTransport/Interface.php'); + +/** + * Starting point for the Solr API. Represents a Solr server resource and has + * methods for pinging, adding, deleting, committing, optimizing and searching. + * + * Example Usage: + * <code> + * ... + * $solr = new Apache_Solr_Service(); //or explicitly new Apache_Solr_Service('localhost', 8180, '/solr') + * + * if ($solr->ping()) + * { + * $solr->deleteByQuery('*:*'); //deletes ALL documents - be careful :) + * + * $document = new Apache_Solr_Document(); + * $document->id = uniqid(); //or something else suitably unique + * + * $document->title = 'Some Title'; + * $document->content = 'Some content for this wonderful document. Blah blah blah.'; + * + * $solr->addDocument($document); //if you're going to be adding documents in bulk using addDocuments + * //with an array of documents is faster + * + * $solr->commit(); //commit to see the deletes and the document + * $solr->optimize(); //merges multiple segments into one + * + * //and the one we all care about, search! + * //any other common or custom parameters to the request handler can go in the + * //optional 4th array argument. + * $solr->search('content:blah', 0, 10, array('sort' => 'timestamp desc')); + * } + * ... + * </code> + * + * @todo Investigate using other HTTP clients other than file_get_contents built-in handler. Could provide performance + * improvements when dealing with multiple requests by using HTTP's keep alive functionality + */ +class Apache_Solr_Service +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision: 59 $'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $'; + + /** + * Response writer we'll request - JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning + */ + const SOLR_WRITER = 'json'; + + /** + * NamedList Treatment constants + */ + const NAMED_LIST_FLAT = 'flat'; + const NAMED_LIST_MAP = 'map'; + + /** + * Search HTTP Methods + */ + const METHOD_GET = 'GET'; + const METHOD_POST = 'POST'; + + /** + * Servlet mappings + */ + const PING_SERVLET = 'admin/ping'; + const UPDATE_SERVLET = 'update'; + const SEARCH_SERVLET = 'select'; + const THREADS_SERVLET = 'admin/threads'; + const EXTRACT_SERVLET = 'update/extract'; + + /** + * Server identification strings + * + * @var string + */ + protected $_host, $_port, $_path; + + /** + * Whether {@link Apache_Solr_Response} objects should create {@link Apache_Solr_Document}s in + * the returned parsed data + * + * @var boolean + */ + protected $_createDocuments = true; + + /** + * Whether {@link Apache_Solr_Response} objects should have multivalue fields with only a single value + * collapsed to appear as a single value would. + * + * @var boolean + */ + protected $_collapseSingleValueArrays = true; + + /** + * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values + * are {@link Apache_Solr_Service::NAMED_LIST_MAP} (default) or {@link Apache_Solr_Service::NAMED_LIST_FLAT}. + * + * @var string + */ + protected $_namedListTreatment = self::NAMED_LIST_MAP; + + /** + * Query delimiters. Someone might want to be able to change + * these (to use & instead of & for example), so I've provided them. + * + * @var string + */ + protected $_queryDelimiter = '?', $_queryStringDelimiter = '&', $_queryBracketsEscaped = true; + + /** + * Constructed servlet full path URLs + * + * @var string + */ + protected $_pingUrl, $_updateUrl, $_searchUrl, $_threadsUrl; + + /** + * Keep track of whether our URLs have been constructed + * + * @var boolean + */ + protected $_urlsInited = false; + + /** + * HTTP Transport implementation (pluggable) + * + * @var Apache_Solr_HttpTransport_Interface + */ + protected $_httpTransport = false; + + /** + * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc. + * + * NOTE: inside a phrase fewer characters need escaped, use {@link Apache_Solr_Service::escapePhrase()} instead + * + * @param string $value + * @return string + */ + static public function escape($value) + { + //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters + $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/'; + $replace = '\\\$1'; + + return preg_replace($pattern, $replace, $value); + } + + /** + * Escape a value meant to be contained in a phrase for special query characters + * + * @param string $value + * @return string + */ + static public function escapePhrase($value) + { + $pattern = '/("|\\\)/'; + $replace = '\\\$1'; + + return preg_replace($pattern, $replace, $value); + } + + /** + * Convenience function for creating phrase syntax from a value + * + * @param string $value + * @return string + */ + static public function phrase($value) + { + return '"' . self::escapePhrase($value) . '"'; + } + + /** + * Constructor. All parameters are optional and will take on default values + * if not specified. + * + * @param string $host + * @param string $port + * @param string $path + * @param Apache_Solr_HttpTransport_Interface $httpTransport + */ + public function __construct($host = 'localhost', $port = 8180, $path = '/solr/', $httpTransport = false) + { + $this->setHost($host); + $this->setPort($port); + $this->setPath($path); + + $this->_initUrls(); + + if ($httpTransport) + { + $this->setHttpTransport($httpTransport); + } + + // check that our php version is >= 5.1.3 so we can correct for http_build_query behavior later + $this->_queryBracketsEscaped = version_compare(phpversion(), '5.1.3', '>='); + } + + /** + * Return a valid http URL given this server's host, port and path and a provided servlet name + * + * @param string $servlet + * @return string + */ + protected function _constructUrl($servlet, $params = array()) + { + if (count($params)) + { + //escape all parameters appropriately for inclusion in the query string + $escapedParams = array(); + + foreach ($params as $key => $value) + { + $escapedParams[] = urlencode($key) . '=' . urlencode($value); + } + + $queryString = $this->_queryDelimiter . implode($this->_queryStringDelimiter, $escapedParams); + } + else + { + $queryString = ''; + } + + return 'http://' . $this->_host . ':' . $this->_port . $this->_path . $servlet . $queryString; + } + + /** + * Construct the Full URLs for the three servlets we reference + */ + protected function _initUrls() + { + //Initialize our full servlet URLs now that we have server information + $this->_extractUrl = $this->_constructUrl(self::EXTRACT_SERVLET); + $this->_pingUrl = $this->_constructUrl(self::PING_SERVLET); + $this->_searchUrl = $this->_constructUrl(self::SEARCH_SERVLET); + $this->_threadsUrl = $this->_constructUrl(self::THREADS_SERVLET, array('wt' => self::SOLR_WRITER )); + $this->_updateUrl = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => self::SOLR_WRITER )); + + $this->_urlsInited = true; + } + + protected function _generateQueryString($params) + { + // use http_build_query to encode our arguments because its faster + // than urlencoding all the parts ourselves in a loop + // + // because http_build_query treats arrays differently than we want to, correct the query + // string by changing foo[#]=bar (# being an actual number) parameter strings to just + // multiple foo=bar strings. This regex should always work since '=' will be urlencoded + // anywhere else the regex isn't expecting it + // + // NOTE: before php 5.1.3 brackets were not url encoded by http_build query - we've checked + // the php version in the constructor and put the results in the instance variable. Also, before + // 5.1.2 the arg_separator parameter was not available, so don't use it + if ($this->_queryBracketsEscaped) + { + $queryString = http_build_query($params, null, $this->_queryStringDelimiter); + return preg_replace('/%5B(?:[0-9]|[1-9][0-9]+)%5D=/', '=', $queryString); + } + else + { + $queryString = http_build_query($params); + return preg_replace('/\\[(?:[0-9]|[1-9][0-9]+)\\]=/', '=', $queryString); + } + } + + /** + * Central method for making a get operation against this Solr Server + * + * @param string $url + * @param float $timeout Read timeout in seconds + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If a non 200 response status is returned + */ + protected function _sendRawGet($url, $timeout = FALSE) + { + $httpTransport = $this->getHttpTransport(); + + $httpResponse = $httpTransport->performGetRequest($url, $timeout); + $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays); + + if ($solrResponse->getHttpStatus() != 200) + { + throw new Apache_Solr_HttpTransportException($solrResponse); + } + + return $solrResponse; + } + + /** + * Central method for making a post operation against this Solr Server + * + * @param string $url + * @param string $rawPost + * @param float $timeout Read timeout in seconds + * @param string $contentType + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If a non 200 response status is returned + */ + protected function _sendRawPost($url, $rawPost, $timeout = FALSE, $contentType = 'text/xml; charset=UTF-8') + { + $httpTransport = $this->getHttpTransport(); + + $httpResponse = $httpTransport->performPostRequest($url, $rawPost, $contentType, $timeout); + $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays); + + if ($solrResponse->getHttpStatus() != 200) + { + throw new Apache_Solr_HttpTransportException($solrResponse); + } + + return $solrResponse; + } + + /** + * Returns the set host + * + * @return string + */ + public function getHost() + { + return $this->_host; + } + + /** + * Set the host used. If empty will fallback to constants + * + * @param string $host + * + * @throws Apache_Solr_InvalidArgumentException If the host parameter is empty + */ + public function setHost($host) + { + //Use the provided host or use the default + if (empty($host)) + { + throw new Apache_Solr_InvalidArgumentException('Host parameter is empty'); + } + else + { + $this->_host = $host; + } + + if ($this->_urlsInited) + { + $this->_initUrls(); + } + } + + /** + * Get the set port + * + * @return integer + */ + public function getPort() + { + return $this->_port; + } + + /** + * Set the port used. If empty will fallback to constants + * + * @param integer $port + * + * @throws Apache_Solr_InvalidArgumentException If the port parameter is empty + */ + public function setPort($port) + { + //Use the provided port or use the default + $port = (int) $port; + + if ($port <= 0) + { + throw new Apache_Solr_InvalidArgumentException('Port is not a valid port number'); + } + else + { + $this->_port = $port; + } + + if ($this->_urlsInited) + { + $this->_initUrls(); + } + } + + /** + * Get the set path. + * + * @return string + */ + public function getPath() + { + return $this->_path; + } + + /** + * Set the path used. If empty will fallback to constants + * + * @param string $path + */ + public function setPath($path) + { + $path = trim($path, '/'); + + $this->_path = '/' . $path . '/'; + + if ($this->_urlsInited) + { + $this->_initUrls(); + } + } + + /** + * Get the current configured HTTP Transport + * + * @return HttpTransportInterface + */ + public function getHttpTransport() + { + // lazy load a default if one has not be set + if ($this->_httpTransport === false) + { + require_once(dirname(__FILE__) . '/HttpTransport/FileGetContents.php'); + + $this->_httpTransport = new Apache_Solr_HttpTransport_FileGetContents(); + } + + return $this->_httpTransport; + } + + /** + * Set the HTTP Transport implemenation that will be used for all HTTP requests + * + * @param Apache_Solr_HttpTransport_Interface + */ + public function setHttpTransport(Apache_Solr_HttpTransport_Interface $httpTransport) + { + $this->_httpTransport = $httpTransport; + } + + /** + * Set the create documents flag. This determines whether {@link Apache_Solr_Response} objects will + * parse the response and create {@link Apache_Solr_Document} instances in place. + * + * @param boolean $createDocuments + */ + public function setCreateDocuments($createDocuments) + { + $this->_createDocuments = (bool) $createDocuments; + } + + /** + * Get the current state of teh create documents flag. + * + * @return boolean + */ + public function getCreateDocuments() + { + return $this->_createDocuments; + } + + /** + * Set the collapse single value arrays flag. + * + * @param boolean $collapseSingleValueArrays + */ + public function setCollapseSingleValueArrays($collapseSingleValueArrays) + { + $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays; + } + + /** + * Get the current state of the collapse single value arrays flag. + * + * @return boolean + */ + public function getCollapseSingleValueArrays() + { + return $this->_collapseSingleValueArrays; + } + + /** + * Get the current default timeout setting (initially the default_socket_timeout ini setting) + * in seconds + * + * @return float + * + * @deprecated Use the getDefaultTimeout method on the HTTP transport implementation + */ + public function getDefaultTimeout() + { + return $this->getHttpTransport()->getDefaultTimeout(); + } + + /** + * Set the default timeout for all calls that aren't passed a specific timeout + * + * @param float $timeout Timeout value in seconds + * + * @deprecated Use the setDefaultTimeout method on the HTTP transport implementation + */ + public function setDefaultTimeout($timeout) + { + $this->getHttpTransport()->setDefaultTimeout($timeout); + } + + /** + * Set how NamedLists should be formatted in the response data. This mainly effects + * the facet counts format. + * + * @param string $namedListTreatment + * @throws Apache_Solr_InvalidArgumentException If invalid option is set + */ + public function setNamedListTreatment($namedListTreatment) + { + switch ((string) $namedListTreatment) + { + case Apache_Solr_Service::NAMED_LIST_FLAT: + $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_FLAT; + break; + + case Apache_Solr_Service::NAMED_LIST_MAP: + $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_MAP; + break; + + default: + throw new Apache_Solr_InvalidArgumentException('Not a valid named list treatement option'); + } + } + + /** + * Get the current setting for named list treatment. + * + * @return string + */ + public function getNamedListTreatment() + { + return $this->_namedListTreatment; + } + + /** + * Set the string used to separate the path form the query string. + * Defaulted to '?' + * + * @param string $queryDelimiter + */ + public function setQueryDelimiter($queryDelimiter) + { + $this->_queryDelimiter = $queryDelimiter; + } + + /** + * Set the string used to separate the parameters in thequery string + * Defaulted to '&' + * + * @param string $queryStringDelimiter + */ + public function setQueryStringDelimiter($queryStringDelimiter) + { + $this->_queryStringDelimiter = $queryStringDelimiter; + } + + /** + * Call the /admin/ping servlet, can be used to quickly tell if a connection to the + * server is able to be made. + * + * @param float $timeout maximum time to wait for ping in seconds, -1 for unlimited (default is 2) + * @return float Actual time taken to ping the server, FALSE if timeout or HTTP error status occurs + */ + public function ping($timeout = 2) + { + $start = microtime(true); + + $httpTransport = $this->getHttpTransport(); + + $httpResponse = $httpTransport->performHeadRequest($this->_pingUrl, $timeout); + $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays); + + if ($solrResponse->getHttpStatus() == 200) + { + return microtime(true) - $start; + } + else + { + return false; + } + } + + /** + * Call the /admin/threads servlet and retrieve information about all threads in the + * Solr servlet's thread group. Useful for diagnostics. + * + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function threads() + { + return $this->_sendRawGet($this->_threadsUrl); + } + + /** + * Raw Add Method. Takes a raw post body and sends it to the update service. Post body + * should be a complete and well formed "add" xml document. + * + * @param string $rawPost + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function add($rawPost) + { + return $this->_sendRawPost($this->_updateUrl, $rawPost); + } + + /** + * Add a Solr Document to the index + * + * @param Apache_Solr_Document $document + * @param boolean $allowDups + * @param boolean $overwritePending + * @param boolean $overwriteCommitted + * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details. If left empty this property will not be set in the request. + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function addDocument(Apache_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0) + { + $dupValue = $allowDups ? 'true' : 'false'; + $pendingValue = $overwritePending ? 'true' : 'false'; + $committedValue = $overwriteCommitted ? 'true' : 'false'; + + $commitWithin = (int) $commitWithin; + $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : ''; + + $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>"; + $rawPost .= $this->_documentToXmlFragment($document); + $rawPost .= '</add>'; + + return $this->add($rawPost); + } + + /** + * Add an array of Solr Documents to the index all at once + * + * @param array $documents Should be an array of Apache_Solr_Document instances + * @param boolean $allowDups + * @param boolean $overwritePending + * @param boolean $overwriteCommitted + * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details. If left empty this property will not be set in the request. + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0) + { + $dupValue = $allowDups ? 'true' : 'false'; + $pendingValue = $overwritePending ? 'true' : 'false'; + $committedValue = $overwriteCommitted ? 'true' : 'false'; + + $commitWithin = (int) $commitWithin; + $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : ''; + + $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>"; + + foreach ($documents as $document) + { + if ($document instanceof Apache_Solr_Document) + { + $rawPost .= $this->_documentToXmlFragment($document); + } + } + + $rawPost .= '</add>'; + + return $this->add($rawPost); + } + + /** + * Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call + * + * @return string + */ + protected function _documentToXmlFragment(Apache_Solr_Document $document) + { + $xml = '<doc'; + + if ($document->getBoost() !== false) + { + $xml .= ' boost="' . $document->getBoost() . '"'; + } + + $xml .= '>'; + + foreach ($document as $key => $value) + { + $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8'); + $fieldBoost = $document->getFieldBoost($key); + + if (is_array($value)) + { + foreach ($value as $multivalue) + { + $xml .= '<field name="' . $key . '"'; + + if ($fieldBoost !== false) + { + $xml .= ' boost="' . $fieldBoost . '"'; + + // only set the boost for the first field in the set + $fieldBoost = false; + } + + $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8'); + + $xml .= '>' . $multivalue . '</field>'; + } + } + else + { + $xml .= '<field name="' . $key . '"'; + + if ($fieldBoost !== false) + { + $xml .= ' boost="' . $fieldBoost . '"'; + } + + $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8'); + + $xml .= '>' . $value . '</field>'; + } + } + + $xml .= '</doc>'; + + // replace any control characters to avoid Solr XML parser exception + return $this->_stripCtrlChars($xml); + } + + /** + * Replace control (non-printable) characters from string that are invalid to Solr's XML parser with a space. + * + * @param string $string + * @return string + */ + protected function _stripCtrlChars($string) + { + // See: http://w3.org/International/questions/qa-forms-utf-8.html + // Printable utf-8 does not include any of these chars below x7F + return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $string); + } + + /** + * Send a commit command. Will be synchronous unless both wait parameters are set to false. + * + * @param boolean $expungeDeletes Defaults to false, merge segments with deletes away + * @param boolean $waitFlush Defaults to true, block until index changes are flushed to disk + * @param boolean $waitSearcher Defaults to true, block until a new searcher is opened and registered as the main query searcher, making the changes visible + * @param float $timeout Maximum expected duration (in seconds) of the commit operation on the server (otherwise, will throw a communication exception). Defaults to 1 hour + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + + + /* + + public function commit($expungeDeletes = false, $waitFlush = true, $waitSearcher = true, $timeout = 3600) + { + $expungeValue = $expungeDeletes ? 'true' : 'false'; + $flushValue = $waitFlush ? 'true' : 'false'; + $searcherValue = $waitSearcher ? 'true' : 'false'; + + $rawPost = '<commit expungeDeletes="' . $expungeValue . '" waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />'; + + return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); + } + */ + + public function commit($expungeDeletes = false, $waitFlush = true, $waitSearcher = true, $timeout = 3600) + { + $expungeValue = $expungeDeletes ? 'true' : 'false'; + $searcherValue = $waitSearcher ? 'true' : 'false'; + + $rawPost = '<commit expungeDeletes="' . $expungeValue . '" waitSearcher="' . $searcherValue . '" />'; + + return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); + } + + + /** + * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be + * a complete and well formed "delete" xml document + * + * @param string $rawPost Expected to be utf-8 encoded xml document + * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function delete($rawPost, $timeout = 3600) + { + return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); + } + + /** + * Create a delete document based on document ID + * + * @param string $id Expected to be utf-8 encoded + * @param boolean $fromPending + * @param boolean $fromCommitted + * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function deleteById($id, $fromPending = true, $fromCommitted = true, $timeout = 3600) + { + $pendingValue = $fromPending ? 'true' : 'false'; + $committedValue = $fromCommitted ? 'true' : 'false'; + + //escape special xml characters + $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8'); + + $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><id>' . $id . '</id></delete>'; + + return $this->delete($rawPost, $timeout); + } + + /** + * Create and post a delete document based on multiple document IDs. + * + * @param array $ids Expected to be utf-8 encoded strings + * @param boolean $fromPending + * @param boolean $fromCommitted + * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function deleteByMultipleIds($ids, $fromPending = true, $fromCommitted = true, $timeout = 3600) + { + $pendingValue = $fromPending ? 'true' : 'false'; + $committedValue = $fromCommitted ? 'true' : 'false'; + + $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '">'; + + foreach ($ids as $id) + { + //escape special xml characters + $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8'); + + $rawPost .= '<id>' . $id . '</id>'; + } + + $rawPost .= '</delete>'; + + return $this->delete($rawPost, $timeout); + } + + /** + * Create a delete document based on a query and submit it + * + * @param string $rawQuery Expected to be utf-8 encoded + * @param boolean $fromPending + * @param boolean $fromCommitted + * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true, $timeout = 3600) + { + $pendingValue = $fromPending ? 'true' : 'false'; + $committedValue = $fromCommitted ? 'true' : 'false'; + + // escape special xml characters + $rawQuery = htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8'); + + $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><query>' . $rawQuery . '</query></delete>'; + + return $this->delete($rawPost, $timeout); + } + + /** + * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how + * to use Solr Cell and what parameters are available. + * + * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost." + * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value + * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also + * pass in a document isntance with an "id" field" - the document's value(s) will take precedence). + * + * @param string $file Path to file to extract data from + * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation) + * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params) + * @param string $mimetype optional mimetype specification (for the file being extracted) + * + * @return Apache_Solr_Response + * + * @throws Apache_Solr_InvalidArgumentException if $file, $params, or $document are invalid. + */ + public function extract($file, $params = array(), $document = null, $mimetype = 'application/octet-stream') + { + // check if $params is an array (allow null for default empty array) + if (!is_null($params)) + { + if (!is_array($params)) + { + throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null"); + } + } + else + { + $params = array(); + } + + // if $file is an http request, defer to extractFromUrl instead + if (substr($file, 0, 7) == 'http://' || substr($file, 0, 8) == 'https://') + { + return $this->extractFromUrl($file, $params, $document, $mimetype); + } + + // read the contents of the file + $contents = @file_get_contents($file); + + if ($contents !== false) + { + // add the resource.name parameter if not specified + if (!isset($params['resource.name'])) + { + $params['resource.name'] = basename($file); + } + + // delegate the rest to extractFromString + return $this->extractFromString($contents, $params, $document, $mimetype); + } + else + { + throw new Apache_Solr_InvalidArgumentException("File '{$file}' is empty or could not be read"); + } + } + + /** + * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how + * to use Solr Cell and what parameters are available. + * + * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost." + * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value + * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also + * pass in a document isntance with an "id" field" - the document's value(s) will take precedence). + * + * @param string $data Data that will be passed to Solr Cell + * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation) + * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params) + * @param string $mimetype optional mimetype specification (for the file being extracted) + * + * @return Apache_Solr_Response + * + * @throws Apache_Solr_InvalidArgumentException if $file, $params, or $document are invalid. + * + * @todo Should be using multipart/form-data to post parameter values, but I could not get my implementation to work. Needs revisisted. + */ + public function extractFromString($data, $params = array(), $document = null, $mimetype = 'application/octet-stream') + { + // check if $params is an array (allow null for default empty array) + if (!is_null($params)) + { + if (!is_array($params)) + { + throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null"); + } + } + else + { + $params = array(); + } + + // make sure we receive our response in JSON and have proper name list treatment + $params['wt'] = self::SOLR_WRITER; + $params['json.nl'] = $this->_namedListTreatment; + + // check if $document is an Apache_Solr_Document instance + if (!is_null($document) && $document instanceof Apache_Solr_Document) + { + // iterate document, adding literal.* and boost.* fields to $params as appropriate + foreach ($document as $field => $fieldValue) + { + // check if we need to add a boost.* parameters + $fieldBoost = $document->getFieldBoost($field); + + if ($fieldBoost !== false) + { + $params["boost.{$field}"] = $fieldBoost; + } + + // add the literal.* parameter + $params["literal.{$field}"] = $fieldValue; + } + } + + // params will be sent to SOLR in the QUERY STRING + $queryString = $this->_generateQueryString($params); + + // the file contents will be sent to SOLR as the POST BODY - we use application/octect-stream as default mimetype + return $this->_sendRawPost($this->_extractUrl . $this->_queryDelimiter . $queryString, $data, false, $mimetype); + } + + /** + * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how + * to use Solr Cell and what parameters are available. + * + * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost." + * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value + * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also + * pass in a document isntance with an "id" field" - the document's value(s) will take precedence). + * + * @param string $url URL + * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation) + * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params) + * @param string $mimetype optional mimetype specification (for the file being extracted) + * + * @return Apache_Solr_Response + * + * @throws Apache_Solr_InvalidArgumentException if $url, $params, or $document are invalid. + */ + public function extractFromUrl($url, $params = array(), $document = null, $mimetype = 'application/octet-stream') + { + // check if $params is an array (allow null for default empty array) + if (!is_null($params)) + { + if (!is_array($params)) + { + throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null"); + } + } + else + { + $params = array(); + } + + $httpTransport = $this->getHttpTransport(); + + // read the contents of the URL using our configured Http Transport and default timeout + $httpResponse = $httpTransport->performGetRequest($url); + + // check that its a 200 response + if ($httpResponse->getStatusCode() == 200) + { + // add the resource.name parameter if not specified + if (!isset($params['resource.name'])) + { + $params['resource.name'] = $url; + } + + // delegate the rest to extractFromString + return $this->extractFromString($httpResponse->getBody(), $params, $document, $mimetype); + } + else + { + throw new Apache_Solr_InvalidArgumentException("URL '{$url}' returned non 200 response code"); + } + } + + /** + * Send an optimize command. Will be synchronous unless both wait parameters are set + * to false. + * + * @param boolean $waitFlush + * @param boolean $waitSearcher + * @param float $timeout Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function optimize($waitFlush = true, $waitSearcher = true, $timeout = 3600) + { + $flushValue = $waitFlush ? 'true' : 'false'; + $searcherValue = $waitSearcher ? 'true' : 'false'; + + $rawPost = '<optimize waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />'; + + return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); + } + + /** + * Simple Search interface + * + * @param string $query The raw query string + * @param int $offset The starting offset for result documents + * @param int $limit The maximum number of result documents to return + * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field) + * @param string $method The HTTP method (Apache_Solr_Service::METHOD_GET or Apache_Solr_Service::METHOD::POST) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + * @throws Apache_Solr_InvalidArgumentException If an invalid HTTP method is used + */ + public function search($query, $offset = 0, $limit = 10, $params = array(), $method = self::METHOD_GET) + { + // ensure params is an array + if (!is_null($params)) + { + if (!is_array($params)) + { + // params was specified but was not an array - invalid + throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null"); + } + } + else + { + $params = array(); + } + + // construct our full parameters + + // common parameters in this interface + $params['wt'] = self::SOLR_WRITER; + $params['json.nl'] = $this->_namedListTreatment; + + $params['q'] = $query; + $params['start'] = $offset; + $params['rows'] = $limit; + + $queryString = $this->_generateQueryString($params); + + if ($method == self::METHOD_GET) + { + return $this->_sendRawGet($this->_searchUrl . $this->_queryDelimiter . $queryString); + } + else if ($method == self::METHOD_POST) + { + return $this->_sendRawPost($this->_searchUrl, $queryString, FALSE, 'application/x-www-form-urlencoded; charset=UTF-8'); + } + else + { + throw new Apache_Solr_InvalidArgumentException("Unsupported method '$method', please use the Apache_Solr_Service::METHOD_* constants"); + } + } +} diff --git a/Apache/Solr/Service/Balancer.php b/Apache/Solr/Service/Balancer.php new file mode 100755 index 0000000..cad57e5 --- /dev/null +++ b/Apache/Solr/Service/Balancer.php @@ -0,0 +1,914 @@ +<?php +/** + * Copyright (c) 2007-2011, Servigistics, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Servigistics, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com) + * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD + * @version $Id: Balancer.php 54 2011-02-04 16:29:18Z donovan.jimenez $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com>, Dan Wolfe + */ + +// See Issue #1 (http://code.google.com/p/solr-php-client/issues/detail?id=1) +// Doesn't follow typical include path conventions, but is more convenient for users +require_once(dirname(dirname(__FILE__)) . '/Service.php'); + +require_once(dirname(dirname(__FILE__)) . '/NoServiceAvailableException.php'); + +/** + * Reference Implementation for using multiple Solr services in a distribution. Functionality + * includes: + * routing of read / write operations + * failover (on selection) for multiple read servers + */ +class Apache_Solr_Service_Balancer +{ + /** + * SVN Revision meta data for this class + */ + const SVN_REVISION = '$Revision: 54 $'; + + /** + * SVN ID meta data for this class + */ + const SVN_ID = '$Id: Balancer.php 54 2011-02-04 16:29:18Z donovan.jimenez $'; + + protected $_createDocuments = true; + + protected $_readableServices = array(); + protected $_writeableServices = array(); + + protected $_currentReadService = null; + protected $_currentWriteService = null; + + protected $_readPingTimeout = 2; + protected $_writePingTimeout = 4; + + // Configuration for server selection backoff intervals + protected $_useBackoff = false; // Set to true to use more resillient write server selection + protected $_backoffLimit = 600; // 10 minute default maximum + protected $_backoffEscalation = 2.0; // Rate at which to increase backoff period + protected $_defaultBackoff = 2.0; // Default backoff interval + + /** + * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc. + * + * NOTE: inside a phrase fewer characters need escaped, use {@link Apache_Solr_Service::escapePhrase()} instead + * + * @param string $value + * @return string + */ + static public function escape($value) + { + return Apache_Solr_Service::escape($value); + } + + /** + * Escape a value meant to be contained in a phrase for special query characters + * + * @param string $value + * @return string + */ + static public function escapePhrase($value) + { + return Apache_Solr_Service::escapePhrase($value); + } + + /** + * Convenience function for creating phrase syntax from a value + * + * @param string $value + * @return string + */ + static public function phrase($value) + { + return Apache_Solr_Service::phrase($value); + } + + /** + * Constructor. Takes arrays of read and write service instances or descriptions + * + * @param array $readableServices + * @param array $writeableServices + */ + public function __construct($readableServices = array(), $writeableServices = array()) + { + //setup readable services + foreach ($readableServices as $service) + { + $this->addReadService($service); + } + + //setup writeable services + foreach ($writeableServices as $service) + { + $this->addWriteService($service); + } + } + + public function setReadPingTimeout($timeout) + { + $this->_readPingTimeout = $timeout; + } + + public function setWritePingTimeout($timeout) + { + $this->_writePingTimeout = $timeout; + } + + public function setUseBackoff($enable) + { + $this->_useBackoff = $enable; + } + + /** + * Generates a service ID + * + * @param string $host + * @param integer $port + * @param string $path + * @return string + */ + protected function _getServiceId($host, $port, $path) + { + return $host . ':' . $port . $path; + } + + /** + * Adds a service instance or service descriptor (if it is already + * not added) + * + * @param mixed $service + * + * @throws Apache_Solr_InvalidArgumentException If service descriptor is not valid + */ + public function addReadService($service) + { + if ($service instanceof Apache_Solr_Service) + { + $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); + + $this->_readableServices[$id] = $service; + } + else if (is_array($service)) + { + if (isset($service['host']) && isset($service['port']) && isset($service['path'])) + { + $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); + + $this->_readableServices[$id] = $service; + } + else + { + throw new Apache_Solr_InvalidArgumentException('A Readable Service description array does not have all required elements of host, port, and path'); + } + } + } + + /** + * Removes a service instance or descriptor from the available services + * + * @param mixed $service + * + * @throws Apache_Solr_InvalidArgumentException If service descriptor is not valid + */ + public function removeReadService($service) + { + $id = ''; + + if ($service instanceof Apache_Solr_Service) + { + $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); + } + else if (is_array($service)) + { + if (isset($service['host']) && isset($service['port']) && isset($service['path'])) + { + $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); + } + else + { + throw new Apache_Solr_InvalidArgumentException('A Readable Service description array does not have all required elements of host, port, and path'); + } + } + else if (is_string($service)) + { + $id = $service; + } + + if ($id && isset($this->_readableServices[$id])) + { + unset($this->_readableServices[$id]); + } + } + + /** + * Adds a service instance or service descriptor (if it is already + * not added) + * + * @param mixed $service + * + * @throws Apache_Solr_InvalidArgumentException If service descriptor is not valid + */ + public function addWriteService($service) + { + if ($service instanceof Apache_Solr_Service) + { + $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); + + $this->_writeableServices[$id] = $service; + } + else if (is_array($service)) + { + if (isset($service['host']) && isset($service['port']) && isset($service['path'])) + { + $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); + + $this->_writeableServices[$id] = $service; + } + else + { + throw new Apache_Solr_InvalidArgumentException('A Writeable Service description array does not have all required elements of host, port, and path'); + } + } + } + + /** + * Removes a service instance or descriptor from the available services + * + * @param mixed $service + * + * @throws Apache_Solr_InvalidArgumentException If service descriptor is not valid + */ + public function removeWriteService($service) + { + $id = ''; + + if ($service instanceof Apache_Solr_Service) + { + $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); + } + else if (is_array($service)) + { + if (isset($service['host']) && isset($service['port']) && isset($service['path'])) + { + $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); + } + else + { + throw new Apache_Solr_InvalidArgumentException('A Readable Service description array does not have all required elements of host, port, and path'); + } + } + else if (is_string($service)) + { + $id = $service; + } + + if ($id && isset($this->_writeableServices[$id])) + { + unset($this->_writeableServices[$id]); + } + } + + /** + * Iterate through available read services and select the first with a ping + * that satisfies configured timeout restrictions (or the default) + * + * @return Apache_Solr_Service + * + * @throws Apache_Solr_NoServiceAvailableException If there are no read services that meet requirements + */ + protected function _selectReadService($forceSelect = false) + { + if (!$this->_currentReadService || !isset($this->_readableServices[$this->_currentReadService]) || $forceSelect) + { + if ($this->_currentReadService && isset($this->_readableServices[$this->_currentReadService]) && $forceSelect) + { + // we probably had a communication error, ping the current read service, remove it if it times out + if ($this->_readableServices[$this->_currentReadService]->ping($this->_readPingTimeout) === false) + { + $this->removeReadService($this->_currentReadService); + } + } + + if (count($this->_readableServices)) + { + // select one of the read services at random + $ids = array_keys($this->_readableServices); + + $id = $ids[rand(0, count($ids) - 1)]; + $service = $this->_readableServices[$id]; + + if (is_array($service)) + { + //convert the array definition to a client object + $service = new Apache_Solr_Service($service['host'], $service['port'], $service['path']); + $this->_readableServices[$id] = $service; + } + + $service->setCreateDocuments($this->_createDocuments); + $this->_currentReadService = $id; + } + else + { + throw new Apache_Solr_NoServiceAvailableException('No read services were available'); + } + } + + return $this->_readableServices[$this->_currentReadService]; + } + + /** + * Iterate through available write services and select the first with a ping + * that satisfies configured timeout restrictions (or the default) + * + * @return Apache_Solr_Service + * + * @throws Apache_Solr_NoServiceAvailableException If there are no write services that meet requirements + */ + protected function _selectWriteService($forceSelect = false) + { + if($this->_useBackoff) + { + return $this->_selectWriteServiceSafe($forceSelect); + } + + if (!$this->_currentWriteService || !isset($this->_writeableServices[$this->_currentWriteService]) || $forceSelect) + { + if ($this->_currentWriteService && isset($this->_writeableServices[$this->_currentWriteService]) && $forceSelect) + { + // we probably had a communication error, ping the current read service, remove it if it times out + if ($this->_writeableServices[$this->_currentWriteService]->ping($this->_writePingTimeout) === false) + { + $this->removeWriteService($this->_currentWriteService); + } + } + + if (count($this->_writeableServices)) + { + // select one of the read services at random + $ids = array_keys($this->_writeableServices); + + $id = $ids[rand(0, count($ids) - 1)]; + $service = $this->_writeableServices[$id]; + + if (is_array($service)) + { + //convert the array definition to a client object + $service = new Apache_Solr_Service($service['host'], $service['port'], $service['path']); + $this->_writeableServices[$id] = $service; + } + + $this->_currentWriteService = $id; + } + else + { + throw new Apache_Solr_NoServiceAvailableException('No write services were available'); + } + } + + return $this->_writeableServices[$this->_currentWriteService]; + } + + /** + * Iterate through available write services and select the first with a ping + * that satisfies configured timeout restrictions (or the default). The + * timeout period will increase until a connection is made or the limit is + * reached. This will allow for increased reliability with heavily loaded + * server(s). + * + * @return Apache_Solr_Service + * + * @throws Apache_Solr_NoServiceAvailableException If there are no write services that meet requirements + */ + + protected function _selectWriteServiceSafe($forceSelect = false) + { + if (!$this->_currentWriteService || !isset($this->_writeableServices[$this->_currentWriteService]) || $forceSelect) + { + if (count($this->_writeableServices)) + { + $backoff = $this->_defaultBackoff; + + do { + // select one of the read services at random + $ids = array_keys($this->_writeableServices); + + $id = $ids[rand(0, count($ids) - 1)]; + $service = $this->_writeableServices[$id]; + + if (is_array($service)) + { + //convert the array definition to a client object + $service = new Apache_Solr_Service($service['host'], $service['port'], $service['path']); + $this->_writeableServices[$id] = $service; + } + + $this->_currentWriteService = $id; + + $backoff *= $this->_backoffEscalation; + + if($backoff > $this->_backoffLimit) + { + throw new Apache_Solr_NoServiceAvailableException('No write services were available. All timeouts exceeded.'); + } + + } while($this->_writeableServices[$this->_currentWriteService]->ping($backoff) === false); + } + else + { + throw new Apache_Solr_NoServiceAvailableException('No write services were available'); + } + } + + return $this->_writeableServices[$this->_currentWriteService]; + } + + /** + * Set the create documents flag. This determines whether {@link Apache_Solr_Response} objects will + * parse the response and create {@link Apache_Solr_Document} instances in place. + * + * @param boolean $createDocuments + */ + public function setCreateDocuments($createDocuments) + { + $this->_createDocuments = (bool) $createDocuments; + + // set on current read service + if ($this->_currentReadService) + { + $service = $this->_selectReadService(); + $service->setCreateDocuments($createDocuments); + } + } + + /** + * Get the current state of teh create documents flag. + * + * @return boolean + */ + public function getCreateDocuments() + { + return $this->_createDocuments; + } + + /** + * Raw Add Method. Takes a raw post body and sends it to the update service. Post body + * should be a complete and well formed "add" xml document. + * + * @param string $rawPost + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function add($rawPost) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->add($rawPost); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Add a Solr Document to the index + * + * @param Apache_Solr_Document $document + * @param boolean $allowDups + * @param boolean $overwritePending + * @param boolean $overwriteCommitted + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function addDocument(Apache_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->addDocument($document, $allowDups, $overwritePending, $overwriteCommitted); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Add an array of Solr Documents to the index all at once + * + * @param array $documents Should be an array of Apache_Solr_Document instances + * @param boolean $allowDups + * @param boolean $overwritePending + * @param boolean $overwriteCommitted + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->addDocuments($documents, $allowDups, $overwritePending, $overwriteCommitted); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Send a commit command. Will be synchronous unless both wait parameters are set + * to false. + * + * @param boolean $waitFlush + * @param boolean $waitSearcher + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function commit($optimize = true, $waitFlush = true, $waitSearcher = true, $timeout = 3600) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->commit($optimize, $waitFlush, $waitSearcher, $timeout); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be + * a complete and well formed "delete" xml document + * + * @param string $rawPost + * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function delete($rawPost, $timeout = 3600) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->delete($rawPost, $timeout); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Create a delete document based on document ID + * + * @param string $id + * @param boolean $fromPending + * @param boolean $fromCommitted + * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function deleteById($id, $fromPending = true, $fromCommitted = true, $timeout = 3600) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->deleteById($id, $fromPending, $fromCommitted, $timeout); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Create and post a delete document based on multiple document IDs. + * + * @param array $ids Expected to be utf-8 encoded strings + * @param boolean $fromPending + * @param boolean $fromCommitted + * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function deleteByMultipleIds($ids, $fromPending = true, $fromCommitted = true, $timeout = 3600) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->deleteByMultipleId($ids, $fromPending, $fromCommitted, $timeout); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Create a delete document based on a query and submit it + * + * @param string $rawQuery + * @param boolean $fromPending + * @param boolean $fromCommitted + * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true, $timeout = 3600) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->deleteByQuery($rawQuery, $fromPending, $fromCommitted, $timeout); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how + * to use Solr Cell and what parameters are available. + * + * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost." + * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value + * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also + * pass in a document isntance with an "id" field" - the document's value(s) will take precedence). + * + * @param string $file Path to file to extract data from + * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation) + * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params) + * @param string $mimetype optional mimetype specification (for the file being extracted) + * + * @return Apache_Solr_Response + * + * @throws Apache_Solr_InvalidArgumentException if $file, $params, or $document are invalid. + */ + public function extract($file, $params = array(), $document = null, $mimetype = 'application/octet-stream') + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->extract($file, $params, $document, $mimetype); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how + * to use Solr Cell and what parameters are available. + * + * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost." + * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value + * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also + * pass in a document isntance with an "id" field" - the document's value(s) will take precedence). + * + * @param string $data Data that will be passed to Solr Cell + * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation) + * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params) + * @param string $mimetype optional mimetype specification (for the file being extracted) + * + * @return Apache_Solr_Response + * + * @throws Apache_Solr_InvalidArgumentException if $file, $params, or $document are invalid. + * + * @todo Should be using multipart/form-data to post parameter values, but I could not get my implementation to work. Needs revisisted. + */ + public function extractFromString($data, $params = array(), $document = null, $mimetype = 'application/octet-stream') + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->extractFromString($data, $params, $document, $mimetype); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Send an optimize command. Will be synchronous unless both wait parameters are set + * to false. + * + * @param boolean $waitFlush + * @param boolean $waitSearcher + * @param float $timeout Maximum expected duration of the optimize operation on the server (otherwise, will throw a communication exception) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function optimize($waitFlush = true, $waitSearcher = true, $timeout = 3600) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->optimize($waitFlush, $waitSearcher, $timeout); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Simple Search interface + * + * @param string $query The raw query string + * @param int $offset The starting offset for result documents + * @param int $limit The maximum number of result documents to return + * @param array $params key / value pairs for query parameters, use arrays for multivalued parameters + * @param string $method The HTTP method (Apache_Solr_Service::METHOD_GET or Apache_Solr_Service::METHOD::POST) + * @return Apache_Solr_Response + * + * @throws Apache_Solr_HttpTransportException If an error occurs during the service call + */ + public function search($query, $offset = 0, $limit = 10, $params = array(), $method = Apache_Solr_Service::METHOD_GET) + { + $service = $this->_selectReadService(); + + do + { + try + { + return $service->search($query, $offset, $limit, $params, $method); + } + catch (Apache_Solr_HttpTransportException $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectReadService(true); + } while ($service); + + return false; + } +} @@ -1 +1,11 @@ -# r_solr_search
\ No newline at end of file +# r_solr_search +This is used in R code search feature on R TBC website. + +##### Requirements: +> Apache Solr 8.2.0 + +##### Author: +> Prashant Sinalkar +> FOSSEE, IIT Bombay + + diff --git a/bootstrap.inc b/bootstrap.inc new file mode 100755 index 0000000..d2a2b3a --- /dev/null +++ b/bootstrap.inc @@ -0,0 +1,21 @@ + +<?php + + /* Domain name of the Solr server */ + define('SOLR_SERVER_HOSTNAME', 'localhost'); + + /* HTTP Port to connection */ + define('SOLR_SERVER_PORT', '8983'); + + /* HTTP Basic Authentication Username */ + define('SOLR_SERVER_USERNAME', ''); + + /* HTTP Basic Authentication password */ + define('SOLR_SERVER_PASSWORD', ''); + + /* HTTP connection timeout */ + /* This is maximum time in seconds allowed for the http data transfer operation. Default value is 30 seconds */ + define('SOLR_SERVER_TIMEOUT', 500000); + // collection path + define('SOLR_SERVER_COLLECTION_PATH', 'solr/r_code_search_collection'); +?> diff --git a/btn_goto_top.png b/btn_goto_top.png Binary files differnew file mode 100755 index 0000000..773b273 --- /dev/null +++ b/btn_goto_top.png diff --git a/scrolltopcontrol.js b/scrolltopcontrol.js new file mode 100755 index 0000000..806aaea --- /dev/null +++ b/scrolltopcontrol.js @@ -0,0 +1,76 @@ +//** jQuery Scroll to Top Control script- (c) Dynamic Drive DHTML code library: http://www.dynamicdrive.com. +//** Available/ usage terms at http://www.dynamicdrive.com (March 30th, 09') +//** v1.1 (April 7th, 09'): +//** 1) Adds ability to scroll to an absolute position (from top of page) or specific element on the page instead. +//** 2) Fixes scroll animation not working in Opera. + + +var scrolltotop={ + //startline: Integer. Number of pixels from top of doc scrollbar is scrolled before showing control + //scrollto: Keyword (Integer, or "Scroll_to_Element_ID"). How far to scroll document up when control is clicked on (0=top). + setting: {startline:700, scrollto:100, scrollduration:100, fadeduration:[500, 100]}, + controlHTML: '<img src="up_arrow1.png" style="width:34px; height:34px; " />', //HTML for control, which is auto wrapped in DIV w/ ID="topcontrol" + controlattrs: {offsetx:30, offsety:30 }, //offset of control relative to right/ bottom of window corner + anchorkeyword: '#top', //Enter href value of HTML anchors on the page that should also act as "Scroll Up" links + + state: {isvisible:false, shouldvisible:false}, + + scrollup:function(){ + if (!this.cssfixedsupport) //if control is positioned using JavaScript + this.$control.css({opacity:0}) //hide control immediately after clicking it + var dest=isNaN(this.setting.scrollto)? this.setting.scrollto : parseInt(this.setting.scrollto) + if (typeof dest=="string" && jQuery('#'+dest).length==1) //check element set by string exists + dest=jQuery('#'+dest).offset().top + else + dest=0 + this.$body.animate({scrollTop: dest}, this.setting.scrollduration); + }, + + keepfixed:function(){ + var $window=jQuery(window) + var controlx=$window.scrollLeft() + $window.width() - this.$control.width() - this.controlattrs.offsetx + var controly=$window.scrollTop() + $window.height() - this.$control.height() - this.controlattrs.offsety + this.$control.css({left:controlx+'px', top:controly+'px'}) + }, + + togglecontrol:function(){ + var scrolltop=jQuery(window).scrollTop() + if (!this.cssfixedsupport) + this.keepfixed() + this.state.shouldvisible=(scrolltop>=this.setting.startline)? true : false + if (this.state.shouldvisible && !this.state.isvisible){ + this.$control.stop().animate({opacity:1}, this.setting.fadeduration[0]) + this.state.isvisible=true + } + else if (this.state.shouldvisible==false && this.state.isvisible){ + this.$control.stop().animate({opacity:0}, this.setting.fadeduration[1]) + this.state.isvisible=false + } + }, + + init:function(){ + jQuery(document).ready(function($){ + var mainobj=scrolltotop + var iebrws=document.all + mainobj.cssfixedsupport=!iebrws || iebrws && document.compatMode=="CSS1Compat" && window.XMLHttpRequest //not IE or IE7+ browsers in standards mode + mainobj.$body=(window.opera)? (document.compatMode=="CSS1Compat"? $('html') : $('body')) : $('html,body') + mainobj.$control=$('<div id="topcontrol">'+mainobj.controlHTML+'</div>') + .css({position:mainobj.cssfixedsupport? 'fixed' : 'absolute', bottom:mainobj.controlattrs.offsety, right:mainobj.controlattrs.offsetx, opacity:0, cursor:'pointer'}) + .attr({title:'Scroll Back to Top'}) + .click(function(){mainobj.scrollup(); return false}) + .appendTo('body') + if (document.all && !window.XMLHttpRequest && mainobj.$control.text()!='') //loose check for IE6 and below, plus whether control contains any text + mainobj.$control.css({width:mainobj.$control.width()}) //IE6- seems to require an explicit width on a DIV containing text + mainobj.togglecontrol() + $('a[href="' + mainobj.anchorkeyword +'"]').click(function(){ + mainobj.scrollup() + return false + }) + $(window).bind('scroll resize', function(e){ + mainobj.togglecontrol() + }) + }) + } +} + +scrolltotop.init()
\ No newline at end of file diff --git a/settings.inc b/settings.inc new file mode 100755 index 0000000..cd02e2c --- /dev/null +++ b/settings.inc @@ -0,0 +1,34 @@ +<?php + function tbc_solr_search_settings_form($form_state) { + + $form['tbc_example_file_path'] = array( + '#type' => 'textfield', + '#title' => t('TBC Example File Path'), + '#size' => 50, + '#required' => TRUE, + '#default_value' => variable_get('tbc_example_file_path', ''), + ); + + $form['tbc_solr_max_results'] = array( + '#type' => 'textfield', + '#title' => t('Solr max results'), + '#size' => 50, + '#required' => TRUE, + '#default_value' => variable_get('tbc_solr_max_results', ''), + ); + + $form['submit'] = array( + '#type' => 'submit', + '#value' => t('Submit') + ); + + return $form; + } + + function tbc_solr_search_settings_form_submit($form, &$form_state) { + + variable_set('tbc_example_file_path', $form_state['values']['tbc_example_file_path']); + variable_set('tbc_solr_max_results', $form_state['values']['tbc_solr_max_results']); + + drupal_set_message(t('Settings Updated Successfully!'), 'status'); + } diff --git a/solr_search.css b/solr_search.css new file mode 100755 index 0000000..76b6894 --- /dev/null +++ b/solr_search.css @@ -0,0 +1,32 @@ +.search-result ol li p { + margin: 3px 0; +} + +.search-result ol li { + margin-bottom: 15px; + border-bottom: 1px dotted lightgrey; +} + +p.next-res a{ + text-align: right; +} + +.search-result{ + position:relative; + +} +.search-result div#topcontrol{ + display:none; + background:url('btn_goto_top.png'); + position: fixed; + bottom: 65px; + right: 30px; + opacity: 1; + cursor: pointer; +} +.search-result div#topcontrol a{ + width: 36px; + height: 36px; + display:block; + outline: 0; +} diff --git a/solr_search.info b/solr_search.info new file mode 100755 index 0000000..07fd3eb --- /dev/null +++ b/solr_search.info @@ -0,0 +1,5 @@ +name = R TBC Search +description = search inside textbook companion examples +package = IITB +version = 1.x +core = 7.x diff --git a/solr_search.js b/solr_search.js new file mode 100755 index 0000000..8774e13 --- /dev/null +++ b/solr_search.js @@ -0,0 +1,11 @@ +$(document).ready(function(){ + $("#topcontrol").stop().fadeOut(); + $(window).bind("scroll", function() { + if ($(this).scrollTop() > 950) { + $("#topcontrol").show(); + + } else { + $("#topcontrol").hide(); + } +}); +}); diff --git a/solr_search.module b/solr_search.module new file mode 100755 index 0000000..7574924 --- /dev/null +++ b/solr_search.module @@ -0,0 +1,227 @@ +<?php + +include "bootstrap.inc"; + +function solr_search_permission() +{ + + return array( + "administer solr-search" => array( + "title" => t("administer solr-search"), + "description" => t("administer solr-search."), + )); +} + +function solr_search_menu() +{ + $items = array(); + + // solr search admin settings + $items['admin/settings/solr_search'] = array( + 'title' => 'TBC Solr-Search admin settings', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('tbc_solr_search_settings_form'), + 'access arguments' => array('administer solr-search'), + 'type' => MENU_NORMAL_ITEM, + 'file' => 'settings.inc', + ); + + $items['tbc_solr_search'] = array( + 'title' => 'Search within Textbook Companion Examples', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('tbc_solr_search_form'), + 'access callback' => true, + 'type' => MENU_NORMAL_ITEM, + ); + + $items['add_all_books_to_solr'] = array( + 'title' => 'Add books to Solr', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('add_all_books_to_solr_form'), + 'access arguments' => array('administer solr-search'), + 'type' => MENU_NORMAL_ITEM, + ); + + return $items; +} + +function tbc_solr_search_form($form, &$form_state) +{ + + $form['tbc_search_key'] = array( + '#type' => 'textfield', + '#title' => t('Enter your keyword (Note: special characters ^ ( ) { } \" \' [ ] are not allowed)'), + '#required' => true, + ); + $form['submit'] = array( + '#type' => 'submit', + '#value' => t('Search'), + '#ajax' => array( + 'callback' => 'ajax_solr_search_result_callback', + ), + ); + $form['tbc_search_result'] = array( + '#type' => 'item', + '#markup' => '<div id="ajax-search-result-replace"></div>', + + ); + + return $form; +} + +function tbc_solr_search_form_validate($form, &$form_state) +{ + + if (preg_match('/[^a-zA-Z0-9]/', $form_state['values']['tbc_search_key'])) { + form_set_error('tbc_search_key', t('Special characters not allowed')); + } + return; +} + +function tbc_solr_search_form_submit($form, &$form_state) +{ + + //drupal_goto('tbc_solr_search', array('key' => $_POST['tbc_search_key'])); + // ajax_solr_search_result_callback($form,$form_state); +} +function add_all_books_to_solr_form($form, &$form_state) +{ + + $form['submit'] = array( + '#type' => 'submit', + '#value' => t('Add to Solr'), + // '#suffix' => $output, + ); + + return $form; +} + +function add_all_books_to_solr_form_submit($form, &$form_state) +{ + + global $user; + ini_set('max_execution_time', 500000); + $root_path = variable_get('tbc_example_file_path', ''); + //var_dump($root_path);die; + $options = array( + 'hostname' => SOLR_SERVER_HOSTNAME, + 'port' => SOLR_SERVER_PORT, + 'login' => SOLR_SERVER_USERNAME, + 'password' => SOLR_SERVER_PASSWORD, + 'path' => SOLR_SERVER_COLLECTION_PATH, + + ); + + $client = new SolrClient($options); + + $book_q = db_query("SELECT * FROM {textbook_companion_preference} WHERE approval_status=1 AND proposal_id IN (SELECT id from textbook_companion_proposal where proposal_status=3)"); + + while ($book_data = $book_q->fetchObject()) { + + $chapter_q = db_query("SELECT * FROM {textbook_companion_chapter} WHERE preference_id = :pref_id", array(":pref_id" => $book_data->id)); + + while ($chapter_row = $chapter_q->fetchObject()) { + //$example_q = db_query("SELECT * FROM {textbook_companion_example} WHERE chapter_id = %d AND approval_status = 1", $chapter_row->id); + $query = db_select('textbook_companion_example'); + $query->fields('textbook_companion_example'); + $query->condition('chapter_id', $chapter_row->id); + $query->condition('approval_status', 1); + $example_q = $query->execute(); + while ($example_row = $example_q->fetchObject()) { + //$example_files_q = db_query("SELECT * FROM {textbook_companion_example_files} WHERE example_id = %d", $example_row->id); + $example_files_q = db_query("select * from textbook_companion_preference tcp join textbook_companion_chapter tcc on tcp.id=tcc.preference_id join textbook_companion_example tce ON tcc.id=tce.chapter_id join textbook_companion_example_files tcef on tce.id=tcef.example_id where tcef.example_id= :example_id", array( + ':example_id' => $example_row->id, + )); + $extensions = array('R', 'r'); + $example_data = ''; + while ($example_files_row = $example_files_q->fetchObject()) { + if (in_array(end(explode('.', $root_path . $example_files_row->directory_name . '/' . $example_files_row->filepath)), $extensions)) { + // echo $example_files_row->filepath."<br />"; + $my_file = $root_path . $example_files_row->directory_name . '/' . $example_files_row->filepath; + $handle = fopen($my_file, 'r'); + $example_data = fread($handle, filesize($my_file)); + fclose($handle); + } + } + $doc = new SolrInputDocument(); + $doc->addField('id', $example_row->id); + $doc->addField('chapter_id', $chapter_row->id); + $doc->addField('book_id', $book_data->id); + $doc->addField('title', mb_convert_encoding($book_data->book, "UTF-8")); + $doc->addField('author', mb_convert_encoding($book_data->author, "UTF-8")); + $doc->addField('chapter', mb_convert_encoding(($chapter_row->number . '. ' . $chapter_row->name), "UTF-8")); + $doc->addField('example', mb_convert_encoding(($example_row->number . '. ' . $example_row->caption), "UTF-8")); + $doc->addField('content', mb_convert_encoding($example_data, "UTF-8")); + $doc->addField('added_by', $user->name); + $doc->addField('timestamp', date('Y-m-d H:i:s')); + $updateResponse = $client->addDocument($doc, false); + $client->commit(); + } + } + } + + drupal_set_message('Books under are updated to solr'); +} + +function ajax_solr_search_result_callback($form, $form_state) +{ + $output = ''; + //$key = ''; + //var_dump($form_state['values']['tbc_search_key']);die; + $path = drupal_get_path('module', 'solr_search'); + drupal_add_css($path . '/solr_search.css', 'module'); + drupal_add_js($path . '/solr_search.js', 'module'); + if (isset($form_state['values']['tbc_search_key'])) { + $key = $form_state['values']['tbc_search_key']; + $output = ''; + $numfound = ''; + if ($key != '') { + $options = array( + 'hostname' => SOLR_SERVER_HOSTNAME, + 'login' => SOLR_SERVER_USERNAME, + 'password' => SOLR_SERVER_PASSWORD, + 'port' => SOLR_SERVER_PORT, + 'path' => SOLR_SERVER_COLLECTION_PATH, + ); + $client = new SolrClient($options); + + $query = new SolrQuery(); + + $query->setQuery('content:' . $key); + + $query->setStart(0); + + $query->setRows(variable_get('tbc_solr_max_results', '')); + + $query->addField('id')->addField('book_id')->addField('title')->addField('author')->addField('chapter')->addField('example')->addField('content')->addSortField('title', 0); + + $query_response = $client->query($query); + + $response = $query_response->getResponse(); + + $sno = 0; + $output = '<p><b>' . $response['response']->numFound . ' result(s) found</b></p>'; + if ($response['response']->numFound > 0) { + $output .= '<div class="search-result"><div id="topcontrol" title="Scroll Back to Top"><a href="#top"></a></div><ol>'; + $download_link = ''; + foreach ($response['response']->docs as $doc) { + $sno++; + $output .= '<li>'; + $output .= '<p><b>Book: </b>' . $doc->title . ' (' . $doc->author . ')</p>'; + $output .= '<p><b>Chapter: </b>' . $doc->chapter[0] . '</p>'; + $output .= '<p><b>Example: </b>' . $doc->example[0] . '</p>'; + $output .= '<p><b>Links: </b> <a href="' . base_path() . 'textbook-companion/download/book/' . $doc->book_id[0]. '"><span style="font-weight:bold;font-size:16px;">⤵</span> Download entire book</a> <a href="127.0.0.1/index?eid=' . $doc->id . '" target="_blank"><span style="font-weight:bold;font-size:20px;">»</span> View this example</a></p>'; + $output .= '</li>'; + } + $output .= '<ol></div>'; + } + } else { + $output .= '<p><b>0 result(s) found</b></p>'; + } + } + + $commands[] = ajax_command_html("#ajax-search-result-replace", $output); + + return array('#type' => 'ajax', '#commands' => $commands); + +} |