summaryrefslogtreecommitdiff
path: root/thirdparty1/linux/include/opencv2/flann/autotuned_index.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty1/linux/include/opencv2/flann/autotuned_index.h')
-rw-r--r--thirdparty1/linux/include/opencv2/flann/autotuned_index.h588
1 files changed, 588 insertions, 0 deletions
diff --git a/thirdparty1/linux/include/opencv2/flann/autotuned_index.h b/thirdparty1/linux/include/opencv2/flann/autotuned_index.h
new file mode 100644
index 0000000..6ffb929
--- /dev/null
+++ b/thirdparty1/linux/include/opencv2/flann/autotuned_index.h
@@ -0,0 +1,588 @@
+/***********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
+ * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
+ *
+ * THE BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *************************************************************************/
+#ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_
+#define OPENCV_FLANN_AUTOTUNED_INDEX_H_
+
+#include "general.h"
+#include "nn_index.h"
+#include "ground_truth.h"
+#include "index_testing.h"
+#include "sampling.h"
+#include "kdtree_index.h"
+#include "kdtree_single_index.h"
+#include "kmeans_index.h"
+#include "composite_index.h"
+#include "linear_index.h"
+#include "logger.h"
+
+namespace cvflann
+{
+
+template<typename Distance>
+NNIndex<Distance>* create_index_by_type(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance);
+
+
+struct AutotunedIndexParams : public IndexParams
+{
+ AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1)
+ {
+ (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED;
+ // precision desired (used for autotuning, -1 otherwise)
+ (*this)["target_precision"] = target_precision;
+ // build tree time weighting factor
+ (*this)["build_weight"] = build_weight;
+ // index memory weighting factor
+ (*this)["memory_weight"] = memory_weight;
+ // what fraction of the dataset to use for autotuning
+ (*this)["sample_fraction"] = sample_fraction;
+ }
+};
+
+
+template <typename Distance>
+class AutotunedIndex : public NNIndex<Distance>
+{
+public:
+ typedef typename Distance::ElementType ElementType;
+ typedef typename Distance::ResultType DistanceType;
+
+ AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
+ dataset_(inputData), distance_(d)
+ {
+ target_precision_ = get_param(params, "target_precision",0.8f);
+ build_weight_ = get_param(params,"build_weight", 0.01f);
+ memory_weight_ = get_param(params, "memory_weight", 0.0f);
+ sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
+ bestIndex_ = NULL;
+ }
+
+ AutotunedIndex(const AutotunedIndex&);
+ AutotunedIndex& operator=(const AutotunedIndex&);
+
+ virtual ~AutotunedIndex()
+ {
+ if (bestIndex_ != NULL) {
+ delete bestIndex_;
+ bestIndex_ = NULL;
+ }
+ }
+
+ /**
+ * Method responsible with building the index.
+ */
+ virtual void buildIndex()
+ {
+ std::ostringstream stream;
+ bestParams_ = estimateBuildParams();
+ print_params(bestParams_, stream);
+ Logger::info("----------------------------------------------------\n");
+ Logger::info("Autotuned parameters:\n");
+ Logger::info("%s", stream.str().c_str());
+ Logger::info("----------------------------------------------------\n");
+
+ bestIndex_ = create_index_by_type(dataset_, bestParams_, distance_);
+ bestIndex_->buildIndex();
+ speedup_ = estimateSearchParams(bestSearchParams_);
+ stream.str(std::string());
+ print_params(bestSearchParams_, stream);
+ Logger::info("----------------------------------------------------\n");
+ Logger::info("Search parameters:\n");
+ Logger::info("%s", stream.str().c_str());
+ Logger::info("----------------------------------------------------\n");
+ }
+
+ /**
+ * Saves the index to a stream
+ */
+ virtual void saveIndex(FILE* stream)
+ {
+ save_value(stream, (int)bestIndex_->getType());
+ bestIndex_->saveIndex(stream);
+ save_value(stream, get_param<int>(bestSearchParams_, "checks"));
+ }
+
+ /**
+ * Loads the index from a stream
+ */
+ virtual void loadIndex(FILE* stream)
+ {
+ int index_type;
+
+ load_value(stream, index_type);
+ IndexParams params;
+ params["algorithm"] = (flann_algorithm_t)index_type;
+ bestIndex_ = create_index_by_type<Distance>(dataset_, params, distance_);
+ bestIndex_->loadIndex(stream);
+ int checks;
+ load_value(stream, checks);
+ bestSearchParams_["checks"] = checks;
+ }
+
+ /**
+ * Method that searches for nearest-neighbors
+ */
+ virtual void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
+ {
+ int checks = get_param<int>(searchParams,"checks",FLANN_CHECKS_AUTOTUNED);
+ if (checks == FLANN_CHECKS_AUTOTUNED) {
+ bestIndex_->findNeighbors(result, vec, bestSearchParams_);
+ }
+ else {
+ bestIndex_->findNeighbors(result, vec, searchParams);
+ }
+ }
+
+
+ IndexParams getParameters() const
+ {
+ return bestIndex_->getParameters();
+ }
+
+ SearchParams getSearchParameters() const
+ {
+ return bestSearchParams_;
+ }
+
+ float getSpeedup() const
+ {
+ return speedup_;
+ }
+
+
+ /**
+ * Number of features in this index.
+ */
+ virtual size_t size() const
+ {
+ return bestIndex_->size();
+ }
+
+ /**
+ * The length of each vector in this index.
+ */
+ virtual size_t veclen() const
+ {
+ return bestIndex_->veclen();
+ }
+
+ /**
+ * The amount of memory (in bytes) this index uses.
+ */
+ virtual int usedMemory() const
+ {
+ return bestIndex_->usedMemory();
+ }
+
+ /**
+ * Algorithm name
+ */
+ virtual flann_algorithm_t getType() const
+ {
+ return FLANN_INDEX_AUTOTUNED;
+ }
+
+private:
+
+ struct CostData
+ {
+ float searchTimeCost;
+ float buildTimeCost;
+ float memoryCost;
+ float totalCost;
+ IndexParams params;
+ };
+
+ void evaluate_kmeans(CostData& cost)
+ {
+ StartStopTimer t;
+ int checks;
+ const int nn = 1;
+
+ Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n",
+ get_param<int>(cost.params,"iterations"),
+ get_param<int>(cost.params,"branching"));
+ KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_);
+ // measure index build time
+ t.start();
+ kmeans.buildIndex();
+ t.stop();
+ float buildTime = (float)t.value;
+
+ // measure search time
+ float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
+
+ float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
+ cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory;
+ cost.searchTimeCost = searchTime;
+ cost.buildTimeCost = buildTime;
+ Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
+ }
+
+
+ void evaluate_kdtree(CostData& cost)
+ {
+ StartStopTimer t;
+ int checks;
+ const int nn = 1;
+
+ Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees"));
+ KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
+
+ t.start();
+ kdtree.buildIndex();
+ t.stop();
+ float buildTime = (float)t.value;
+
+ //measure search time
+ float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
+
+ float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
+ cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
+ cost.searchTimeCost = searchTime;
+ cost.buildTimeCost = buildTime;
+ Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
+ }
+
+
+ // struct KMeansSimpleDownhillFunctor {
+ //
+ // Autotune& autotuner;
+ // KMeansSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}
+ //
+ // float operator()(int* params) {
+ //
+ // float maxFloat = numeric_limits<float>::max();
+ //
+ // if (params[0]<2) return maxFloat;
+ // if (params[1]<0) return maxFloat;
+ //
+ // CostData c;
+ // c.params["algorithm"] = KMEANS;
+ // c.params["centers-init"] = CENTERS_RANDOM;
+ // c.params["branching"] = params[0];
+ // c.params["max-iterations"] = params[1];
+ //
+ // autotuner.evaluate_kmeans(c);
+ //
+ // return c.timeCost;
+ //
+ // }
+ // };
+ //
+ // struct KDTreeSimpleDownhillFunctor {
+ //
+ // Autotune& autotuner;
+ // KDTreeSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}
+ //
+ // float operator()(int* params) {
+ // float maxFloat = numeric_limits<float>::max();
+ //
+ // if (params[0]<1) return maxFloat;
+ //
+ // CostData c;
+ // c.params["algorithm"] = KDTREE;
+ // c.params["trees"] = params[0];
+ //
+ // autotuner.evaluate_kdtree(c);
+ //
+ // return c.timeCost;
+ //
+ // }
+ // };
+
+
+
+ void optimizeKMeans(std::vector<CostData>& costs)
+ {
+ Logger::info("KMEANS, Step 1: Exploring parameter space\n");
+
+ // explore kmeans parameters space using combinations of the parameters below
+ int maxIterations[] = { 1, 5, 10, 15 };
+ int branchingFactors[] = { 16, 32, 64, 128, 256 };
+
+ int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
+ costs.reserve(costs.size() + kmeansParamSpaceSize);
+
+ // evaluate kmeans for all parameter combinations
+ for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
+ for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
+ CostData cost;
+ cost.params["algorithm"] = FLANN_INDEX_KMEANS;
+ cost.params["centers_init"] = FLANN_CENTERS_RANDOM;
+ cost.params["iterations"] = maxIterations[i];
+ cost.params["branching"] = branchingFactors[j];
+
+ evaluate_kmeans(cost);
+ costs.push_back(cost);
+ }
+ }
+
+ // Logger::info("KMEANS, Step 2: simplex-downhill optimization\n");
+ //
+ // const int n = 2;
+ // // choose initial simplex points as the best parameters so far
+ // int kmeansNMPoints[n*(n+1)];
+ // float kmeansVals[n+1];
+ // for (int i=0;i<n+1;++i) {
+ // kmeansNMPoints[i*n] = (int)kmeansCosts[i].params["branching"];
+ // kmeansNMPoints[i*n+1] = (int)kmeansCosts[i].params["max-iterations"];
+ // kmeansVals[i] = kmeansCosts[i].timeCost;
+ // }
+ // KMeansSimpleDownhillFunctor kmeans_cost_func(*this);
+ // // run optimization
+ // optimizeSimplexDownhill(kmeansNMPoints,n,kmeans_cost_func,kmeansVals);
+ // // store results
+ // for (int i=0;i<n+1;++i) {
+ // kmeansCosts[i].params["branching"] = kmeansNMPoints[i*2];
+ // kmeansCosts[i].params["max-iterations"] = kmeansNMPoints[i*2+1];
+ // kmeansCosts[i].timeCost = kmeansVals[i];
+ // }
+ }
+
+
+ void optimizeKDTree(std::vector<CostData>& costs)
+ {
+ Logger::info("KD-TREE, Step 1: Exploring parameter space\n");
+
+ // explore kd-tree parameters space using the parameters below
+ int testTrees[] = { 1, 4, 8, 16, 32 };
+
+ // evaluate kdtree for all parameter combinations
+ for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
+ CostData cost;
+ cost.params["algorithm"] = FLANN_INDEX_KDTREE;
+ cost.params["trees"] = testTrees[i];
+
+ evaluate_kdtree(cost);
+ costs.push_back(cost);
+ }
+
+ // Logger::info("KD-TREE, Step 2: simplex-downhill optimization\n");
+ //
+ // const int n = 1;
+ // // choose initial simplex points as the best parameters so far
+ // int kdtreeNMPoints[n*(n+1)];
+ // float kdtreeVals[n+1];
+ // for (int i=0;i<n+1;++i) {
+ // kdtreeNMPoints[i] = (int)kdtreeCosts[i].params["trees"];
+ // kdtreeVals[i] = kdtreeCosts[i].timeCost;
+ // }
+ // KDTreeSimpleDownhillFunctor kdtree_cost_func(*this);
+ // // run optimization
+ // optimizeSimplexDownhill(kdtreeNMPoints,n,kdtree_cost_func,kdtreeVals);
+ // // store results
+ // for (int i=0;i<n+1;++i) {
+ // kdtreeCosts[i].params["trees"] = kdtreeNMPoints[i];
+ // kdtreeCosts[i].timeCost = kdtreeVals[i];
+ // }
+ }
+
+ /**
+ * Chooses the best nearest-neighbor algorithm and estimates the optimal
+ * parameters to use when building the index (for a given precision).
+ * Returns a dictionary with the optimal parameters.
+ */
+ IndexParams estimateBuildParams()
+ {
+ std::vector<CostData> costs;
+
+ int sampleSize = int(sample_fraction_ * dataset_.rows);
+ int testSampleSize = std::min(sampleSize / 10, 1000);
+
+ Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
+
+ // For a very small dataset, it makes no sense to build any fancy index, just
+ // use linear search
+ if (testSampleSize < 10) {
+ Logger::info("Choosing linear, dataset too small\n");
+ return LinearIndexParams();
+ }
+
+ // We use a fraction of the original dataset to speedup the autotune algorithm
+ sampledDataset_ = random_sample(dataset_, sampleSize);
+ // We use a cross-validation approach, first we sample a testset from the dataset
+ testDataset_ = random_sample(sampledDataset_, testSampleSize, true);
+
+ // We compute the ground truth using linear search
+ Logger::info("Computing ground truth... \n");
+ gt_matches_ = Matrix<int>(new int[testDataset_.rows], testDataset_.rows, 1);
+ StartStopTimer t;
+ t.start();
+ compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
+ t.stop();
+
+ CostData linear_cost;
+ linear_cost.searchTimeCost = (float)t.value;
+ linear_cost.buildTimeCost = 0;
+ linear_cost.memoryCost = 0;
+ linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR;
+
+ costs.push_back(linear_cost);
+
+ // Start parameter autotune process
+ Logger::info("Autotuning parameters...\n");
+
+ optimizeKMeans(costs);
+ optimizeKDTree(costs);
+
+ float bestTimeCost = costs[0].searchTimeCost;
+ for (size_t i = 0; i < costs.size(); ++i) {
+ float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
+ if (timeCost < bestTimeCost) {
+ bestTimeCost = timeCost;
+ }
+ }
+
+ float bestCost = costs[0].searchTimeCost / bestTimeCost;
+ IndexParams bestParams = costs[0].params;
+ if (bestTimeCost > 0) {
+ for (size_t i = 0; i < costs.size(); ++i) {
+ float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
+ memory_weight_ * costs[i].memoryCost;
+ if (crtCost < bestCost) {
+ bestCost = crtCost;
+ bestParams = costs[i].params;
+ }
+ }
+ }
+
+ delete[] gt_matches_.data;
+ delete[] testDataset_.data;
+ delete[] sampledDataset_.data;
+
+ return bestParams;
+ }
+
+
+
+ /**
+ * Estimates the search time parameters needed to get the desired precision.
+ * Precondition: the index is built
+ * Postcondition: the searchParams will have the optimum params set, also the speedup obtained over linear search.
+ */
+ float estimateSearchParams(SearchParams& searchParams)
+ {
+ const int nn = 1;
+ const size_t SAMPLE_COUNT = 1000;
+
+ assert(bestIndex_ != NULL); // must have a valid index
+
+ float speedup = 0;
+
+ int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT);
+ if (samples > 0) {
+ Matrix<ElementType> testDataset = random_sample(dataset_, samples);
+
+ Logger::info("Computing ground truth\n");
+
+ // we need to compute the ground truth first
+ Matrix<int> gt_matches(new int[testDataset.rows], testDataset.rows, 1);
+ StartStopTimer t;
+ t.start();
+ compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
+ t.stop();
+ float linear = (float)t.value;
+
+ int checks;
+ Logger::info("Estimating number of checks\n");
+
+ float searchTime;
+ float cb_index;
+ if (bestIndex_->getType() == FLANN_INDEX_KMEANS) {
+ Logger::info("KMeans algorithm, estimating cluster border factor\n");
+ KMeansIndex<Distance>* kmeans = (KMeansIndex<Distance>*)bestIndex_;
+ float bestSearchTime = -1;
+ float best_cb_index = -1;
+ int best_checks = -1;
+ for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
+ kmeans->set_cb_index(cb_index);
+ searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
+ if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
+ bestSearchTime = searchTime;
+ best_cb_index = cb_index;
+ best_checks = checks;
+ }
+ }
+ searchTime = bestSearchTime;
+ cb_index = best_cb_index;
+ checks = best_checks;
+
+ kmeans->set_cb_index(best_cb_index);
+ Logger::info("Optimum cb_index: %g\n", cb_index);
+ bestParams_["cb_index"] = cb_index;
+ }
+ else {
+ searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
+ }
+
+ Logger::info("Required number of checks: %d \n", checks);
+ searchParams["checks"] = checks;
+
+ speedup = linear / searchTime;
+
+ delete[] gt_matches.data;
+ delete[] testDataset.data;
+ }
+
+ return speedup;
+ }
+
+private:
+ NNIndex<Distance>* bestIndex_;
+
+ IndexParams bestParams_;
+ SearchParams bestSearchParams_;
+
+ Matrix<ElementType> sampledDataset_;
+ Matrix<ElementType> testDataset_;
+ Matrix<int> gt_matches_;
+
+ float speedup_;
+
+ /**
+ * The dataset used by this index
+ */
+ const Matrix<ElementType> dataset_;
+
+ /**
+ * Index parameters
+ */
+ float target_precision_;
+ float build_weight_;
+ float memory_weight_;
+ float sample_fraction_;
+
+ Distance distance_;
+
+
+};
+}
+
+#endif /* OPENCV_FLANN_AUTOTUNED_INDEX_H_ */