/********************************************************************* * * Software License Agreement (BSD License) * * Copyright (c) 2008, Willow Garage, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * Neither the name of the Willow Garage nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Author: Alex Teichman *********************************************************************/ #include #include using namespace std; using namespace Eigen; using boost::shared_ptr; #define EIGEN_NO_DEBUG bool g_int = false; MultiBooster::MultiBooster(MultiBoosterDataset* mbd, int num_threads) : version_string_(CLASSIFIER_VERSION), mbd_(NULL), verbose_(false), total_objs_seen_(vector(mbd->class_map_.size())), num_bg_seen_(0), prior_(VectorXf::Zero(mbd->class_map_.size())), wc_limiter_(0), num_threads_(num_threads) { useDataset(mbd, true); battery_ = vector< vector >(feature_map_.size()); } MultiBooster::MultiBooster(istream& in, int num_threads) : version_string_(CLASSIFIER_VERSION), mbd_(0), verbose_(false), wc_limiter_(0), num_threads_(num_threads) { deserialize(in); } MultiBooster::MultiBooster(string filename, int num_threads) : version_string_(CLASSIFIER_VERSION), mbd_(0), verbose_(false), wc_limiter_(0), num_threads_(num_threads) { ifstream f; f.open(filename.c_str()); if(f.fail()) { cerr << "Failed to open file " << filename << endl; throw 1; } deserialize(f); f.close(); } MultiBooster::MultiBooster(const MultiBooster& mb) : version_string_(mb.version_string_), class_map_(mb.class_map_), feature_map_(mb.feature_map_), mbd_(mb.mbd_), verbose_(mb.verbose_), total_objs_seen_(mb.total_objs_seen_), num_bg_seen_(mb.num_bg_seen_), prior_(mb.prior_), wc_limiter_(0), log_weights_(mb.log_weights_), num_threads_(mb.num_threads_) { // -- Copy weak classifiers. pwcs_ = vector(mb.pwcs_.size(), 0); for(size_t t=0; t >(feature_map_.size()); for(size_t t=0; tfsid_].push_back(pwcs_[t]); } MultiBooster::~MultiBooster() { for(size_t i=0; itranslate(class_translator, feature_translator); } // -- Translate battery and centers, and expand if we are getting new feature spaces. vector< vector > new_battery(feature_translator.size()); vector new_centers(feature_translator.size()); for(size_t d=0; d trees(feature_translator.size(), NULL); for(size_t d=0; d total_objs_seen(class_translator.size()); for(int c=0; c= vals_.rows()); for(size_t c=0; c<(size_t)vals_.rows(); ++c) { size_t idx = class_translator.toMap2(c); new_vals(idx) = vals_(c); } vals_ = new_vals; } void updateAccumulators(MatrixXd* H, VectorXd* g, const vector& idx) { assert(H->rows() == H->cols()); assert(g->rows() == H->rows()); for(int i=0; i<(int)idx.size(); ++i) { (*g)(idx[i])++; for(int j=0; j<(int)idx.size(); ++j) { (*H)(idx[i], idx[j])++; } } } void MultiBooster::updateGradientAndHessian(MultiBoosterDataset* mbd) { if(pwcs_.empty()) return; augmentAndTranslate(mbd); // -- Initialize Hessian and gradient accumulators if we haven't yet. if(H_neg_.empty()) { cout << "Initializing H and g." << endl; assert(H_pos_.empty() && g_pos_.empty() && g_neg_.empty()); H_neg_.reserve(class_map_.size()); H_pos_.reserve(class_map_.size()); g_neg_.reserve(class_map_.size()); g_pos_.reserve(class_map_.size()); for(size_t c=0; cobjs_.size(); ++m) { //cout << m << "/" << mbd->objs_.size() << endl; if((int)m%(int)(mbd->objs_.size() / 10) == 0) cout << "."; cout.flush(); // -- Build the activation vector from all feature spaces. vector idx; idx.reserve(pwcs_.size()); for(size_t d=0; d act; findActivatedWCs(*mbd->objs_[m], d, &act); if(act.empty()) continue; for(size_t t=0; tid_); } //cout << "idx: " << (double)idx.size() / (double)pwcs_.size() << endl; // -- For each class, update the gradient and Hessian accumulators. for(size_t c=0; cymc_(c,m) == 1) { updateAccumulators(&H_pos_[c], &g_pos_[c], idx); } else if(mbd->ymc_(c,m) == -1) { updateAccumulators(&H_neg_[c], &g_neg_[c], idx); } else assert(0); } } cout << " done. Took " << (double)(clock() - start) / (double) CLOCKS_PER_SEC << " seconds." << endl; } void MultiBooster::augmentAndTranslate(MultiBoosterDataset* mbd) { // -- Augment our name maps with the dataset's and apply them to all data structures in the classifier. NameMapping new_class_map = class_map_; new_class_map.augment(mbd->class_map_); NameMapping new_feature_map = feature_map_; new_feature_map.augment(mbd->feature_map_); applyNewMappings(new_class_map, new_feature_map, false); // -- Put the dataset into our naming convention. mbd->applyNewMappings(class_map_, feature_map_); } //! Loads a dataset for training. void MultiBooster::useDataset(MultiBoosterDataset* mbd, bool reset_statistics) { mbd_ = mbd; augmentAndTranslate(mbd); // -- Make sure there are no unlabeled objects. (-2s) for(size_t m=0; mobjs_.size(); ++m) { if(mbd_->objs_[m]->label_ <= -2) { cerr << "Supervised learning on a dataset with unlabeled data makes no sense." << endl; throw 1; } } // -- Update statistics. if(reset_statistics) { num_bg_seen_ = mbd->num_bg_; for(size_t c=0; cnum_objs_of_class_.size(); ++c) total_objs_seen_[c] = mbd->num_objs_of_class_[c]; } else { num_bg_seen_ += mbd->num_bg_; for(size_t c=0; cnum_objs_of_class_.size(); ++c) total_objs_seen_[c] += mbd->num_objs_of_class_[c]; } // -- Update gradient and Hessian. //updateGradientAndHessian();// Making the user do this manually for now. // -- Learn prior and initialize the log_weights_ with it. learnPrior(); //hrt.start(); recomputeLogWeights(); //hrt.stop(); //cout << "recomputeLogWeights took " << hrt.getSec() << " seconds." << endl; } void MultiBooster::balanceResponsesMEL(MultiBoosterDataset* dataset, Eigen::SparseMatrix* indicator) { balanceResponses(false, dataset, indicator); } void MultiBooster::balanceResponsesMLS(MultiBoosterDataset* dataset, Eigen::SparseMatrix* indicator) { balanceResponses(true, dataset, indicator); } void MultiBooster::balanceResponses(bool mls, MultiBoosterDataset* dataset, Eigen::SparseMatrix* indicator) { // -- Don't allow there to be any differences in the mapping. assert(dataset->class_map_.compare(class_map_)); assert(dataset->feature_map_.compare(feature_map_)); mbd_ = dataset; // -- Make sure there are no unlabeled objects. (-2s) for(size_t m=0; mobjs_.size(); ++m) { if(mbd_->objs_[m]->label_ <= -2) { cerr << "Supervised learning on a dataset with unlabeled data makes no sense." << endl; throw 1; } } // -- Reset the statistics (assumes we're getting an augmented version of an old dataset, not totally new data). num_bg_seen_ = mbd_->num_bg_; for(size_t c=0; cnum_objs_of_class_.size(); ++c) total_objs_seen_[c] = mbd_->num_objs_of_class_[c]; // -- Learn the prior based on new statistics. cout << "Old prior: " << prior_.transpose() << endl; learnPrior(); cout << "New prior: " << prior_.transpose() << endl; // -- Compute data that will be shared between the solutions for each class. bool cleanup = false; if(!indicator) { indicator = new Eigen::SparseMatrix(); cleanup = true; computeIndicatorMatrix(*dataset, indicator); } // -- Run response balancing for each class. double mean_obj = 0; //for(size_t i = 0; i < class_map_.size(); ++i) { for(int i = class_map_.size() - 1; i >= 0; --i) { if(mls) { cout << "Running MLS response balancing for class " << i << " == " << class_map_.toName(i) << endl; HighResTimer hrt; hrt.start(); mean_obj += balanceResponsesMLS(*dataset, indicator, i); hrt.stop(); cout << "Optimization took " << hrt.getSeconds() << " seconds." << endl; } else { cout << "Running MEL response balancing for class " << i << " == " << class_map_.toName(i) << endl; HighResTimer hrt; hrt.start(); mean_obj += balanceResponsesMEL(*dataset, indicator, i); hrt.stop(); cout << "Optimization took " << hrt.getSeconds() << " seconds." << endl; } } mean_obj /= (double)class_map_.size(); cout << "Mean objective: " << mean_obj << endl; // -- Update log weights based on the new prior and response values. // Use the cached indicator matrix. vector& objs = mbd_->objs_; assert(class_map_.size() > 0); assert(objs.size() > 0); // First set the log weights to be just the prior. log_weights_ = MatrixXd::Zero(class_map_.size(), objs.size()); for(int m = 0; m < log_weights_.cols(); ++m) log_weights_.col(m) = (prior_.array() * (-mbd_->ymc_.col(m)).array()).matrix().cast(); // Now add all the WCs. assert((size_t)indicator->cols() == objs.size()); for(int m = 0; m < indicator->cols(); ++m) { for(Eigen::SparseMatrix::InnerIterator it(*indicator, m); it; ++it) { assert(it.value() == 1); WeakClassifier* wc = pwcs_[it.row()]; log_weights_.col(m) += (mbd_->ymc_.col(m).array() * (-wc->vals_.array())).matrix().cast(); } } // -- Clean up indicator if it wasn't passed in. if(cleanup) { delete indicator; } } void MultiBooster::setUpRBData(const MultiBoosterDataset& dataset, int label, Eigen::SparseMatrix* indicator, Eigen::VectorXd* b) { *b = VectorXd::Zero(dataset.objs_.size()); for(int i = 0; i < indicator->cols(); ++i) { b->coeffRef(i) = -dataset.ymc_(label, i) * prior_(label); assert(fabs(dataset.ymc_(label, i)) == 1); for(Eigen::SparseMatrix::InnerIterator it(*indicator, i); it; ++it) { indicator->coeffRef(it.row(), it.col()) *= -dataset.ymc_(label, i); // Revert these changes with cleanUpRBData(); } } } void MultiBooster::cleanUpRBData(const MultiBoosterDataset& dataset, int label, Eigen::SparseMatrix* indicator) { // -- Revert the changes we made to the indicator matrix. for(int i = 0; i < indicator->cols(); ++i) { assert(fabs(dataset.ymc_(label, i)) == 1); for(Eigen::SparseMatrix::InnerIterator it(*indicator, i); it; ++it) { indicator->coeffRef(it.row(), it.col()) *= -dataset.ymc_(label, i); } } } // indicator is numWCs x numTrEx. double MultiBooster::balanceResponsesMEL(const MultiBoosterDataset& dataset, Eigen::SparseMatrix* indicator, int label) { assert((size_t)indicator->cols() == dataset.objs_.size()); assert(dataset.class_map_.compare(class_map_)); assert(dataset.feature_map_.compare(feature_map_)); // -- Set up the optimization data. VectorXd b; setUpRBData(dataset, label, indicator, &b); // -- Set up MEL. ObjectiveMELSparse obj_mel(indicator, &b); GradientMELSparse grad_mel(indicator, &b); VectorXd init = VectorXd::Zero(pwcs_.size()); for(int i = 0; i < init.rows(); ++i) init(i) = pwcs_[i]->vals_(label); if(verbose_) cout << "Response balancing (MEL) initial objective: " << obj_mel.eval(init) << endl; // -- Run the solver. double tol = 1e-5; // was -6 double alpha = 0.3; double beta = 0.5; //double stepsize = 1.0; //GradientSolver solver_mel(&obj_mel, &grad_mel, tol, alpha, beta, 0, 1, true); NesterovGradientSolver solver_mel(&obj_mel, &grad_mel, tol, alpha, beta, 0, 1, verbose_); HighResTimer hrt; hrt.start(); VectorXd solution = solver_mel.solve(init); hrt.stop(); // -- Install the new responses. for(size_t i = 0; i < pwcs_.size(); ++i) pwcs_[i]->vals_(label) = solution(i); double pstar = obj_mel.eval(solution); if(verbose_) { cout << "Optimization took " << hrt.getSeconds() << " seconds." << endl; cout << "Solution: " << endl << solution.transpose() << endl; cout << "Optimal value: " << pstar << endl; } cleanUpRBData(dataset, label, indicator); return pstar; } // indicator is numWCs x numTrEx. double MultiBooster::balanceResponsesMLS(const MultiBoosterDataset& dataset, Eigen::SparseMatrix* indicator, int label) { assert((size_t)indicator->cols() == dataset.objs_.size()); assert(dataset.class_map_.compare(class_map_)); assert(dataset.feature_map_.compare(feature_map_)); // -- Set up the optimization data. VectorXd b; setUpRBData(dataset, label, indicator, &b); b *= 2.0; // b is a vector with abs vals of the prior, which was trained with MEL and therefore is 1/2 log odds. We want full log odds. // -- Set up MLS. ObjectiveMLSSparse obj_mls(indicator, &b); GradientMLSSparse grad_mls(indicator, &b); VectorXd init = VectorXd::Zero(pwcs_.size()); for(int i = 0; i < init.rows(); ++i) init(i) = pwcs_[i]->vals_(label) * 2.0; if(verbose_) cout << "Response balancing (MLS) initial objective: " << obj_mls.eval(init) << endl; // -- Run the solver. double tol = 1e-5; // was -6 double alpha = 0.3; double beta = 0.5; // double alpha = 0.01; // Super crude settings. // double beta = 0.1; //double stepsize = 1.0; //GradientSolver solver_mls(&obj_mls, &grad_mls, tol, alpha, beta, 0, 1, true); HessianMLSSparse hess_mls(indicator, &b); NesterovGradientSolver solver_mls(&obj_mls, &grad_mls, tol, alpha, beta, 0, 1, verbose_); //solver_mls.hessian_ = &hess_mls; // Evaluate the condition number at every step. HighResTimer hrt; hrt.start(); VectorXd solution = solver_mls.solve(init); hrt.stop(); // -- Install the new responses. for(size_t i = 0; i < pwcs_.size(); ++i) pwcs_[i]->vals_(label) = solution(i) * 0.5; // Back to 1/2 log odds. double pstar = obj_mls.eval(solution); if(verbose_) { cout << "Optimization took " << hrt.getSeconds() << " seconds." << endl; cout << "Solution: " << endl << solution.transpose() << endl; cout << "Optimal value: " << pstar << endl; } cleanUpRBData(dataset, label, indicator); return pstar; } void MultiBooster::computeIndicatorMatrix(const MultiBoosterDataset& dataset, Eigen::SparseMatrix* indicator) const { assert(indicator); for(size_t i = 0; i < pwcs_.size(); ++i) assert(pwcs_[i]->id_ == i); *indicator = Eigen::SparseMatrix(pwcs_.size(), dataset.objs_.size()); int expected_num_nonzero = dataset.objs_.size() * pwcs_.size() * 0.1; indicator->startFill(expected_num_nonzero); int num_nonzero = 0; for(size_t i = 0; i < dataset.objs_.size(); ++i) { vector activations; treeClassify(*dataset.objs_[i], NULL, NULL, &activations); for(size_t j = 0; j < activations.size(); ++j) { ++num_nonzero; indicator->fillrand(activations[j]->id_, i) = 1.0; //indicator->coeffRef(activations[j]->id_, i) = 1.0; } } indicator->endFill(); cout << "Expected " << expected_num_nonzero << " nonzeros, got " << num_nonzero << endl; } void MultiBooster::learnPrior() { assert(total_objs_seen_.size() == class_map_.size()); assert(total_objs_seen_.size() != 0); prior_ = VectorXf::Zero(class_map_.size()); double total_objs = 0; total_objs += num_bg_seen_; for(size_t c=0; c 1e-6) { // cout << "obj: " << obj << ", jiggled_obj " << jiggled_obj << endl; // } // // cout << "obj: " << obj << ", jiggled_obj " << jiggled_obj << endl; // assert(obj - jiggled_obj <= 1e-6); // } // } } bool MultiBooster::save(string filename) { ofstream f; f.open(filename.c_str()); if(f.fail()) { cerr << "Failed to open file " << filename << endl; return false; } serialize(f); f.close(); return true; } WeakClassifier::WeakClassifier(istream& is) : serialization_version_(WC_VERSION) { string line; is >> line; assert(line.compare("weak_classifier") == 0); is >> line; assert(line.compare("serialization_version") == 0); size_t sv; is >> sv; if(sv != serialization_version_) { cerr << "Wrong version of WeakClassifier. Expected " << serialization_version_ << ", got " << sv << "." << endl; throw 1; } is >> line; assert(line.compare("id") == 0); is >> id_; is >> line; is >> fsid_; getline(is, line); getline(is, line); assert(line.compare("theta") == 0); is.read((char*)&theta_, sizeof(float)); getline(is, line); getline(is, line); assert(line.compare("length_squared") == 0); is.read((char*)&length_squared_, sizeof(float)); is >> line; assert(line.compare("center") == 0); size_t num_rows; is >> num_rows; getline(is, line); // Get off the line before. >> does not appear to eat the newline at the end, but read() does. float* buf = (float*)malloc(sizeof(float)*num_rows); is.read((char*)buf, sizeof(float)*num_rows); VectorXf tmp = Eigen::Map(buf, num_rows); //TODO: This copy is probably not necessary. center_ = tmp; free(buf); is >> line; assert(line.compare("vals") == 0); is >> num_rows; getline(is, line); float* buf2 = (float*)malloc(sizeof(float)*num_rows); is.read((char*)buf2, sizeof(float)*num_rows); tmp = Eigen::Map(buf2, num_rows); //TODO: This copy is probably not necessary. vals_ = tmp; free(buf2); getline(is, line); // Consume the final end-line. } string WeakClassifier::serialize() { float fbuf = 0; ostringstream oss; oss << "weak_classifier" << endl; oss << "serialization_version" << endl; oss << serialization_version_ << endl; oss << "id" << endl; oss << id_ << endl; oss << "fsid" << endl; oss << fsid_ << endl; oss << "theta" << endl; oss.write((char*)&theta_, sizeof(float)); oss << endl; // oss << setprecision(20) << theta_ << endl; oss << "length_squared" << endl; //oss << setprecision(20) << length_squared_ << endl; oss.write((char*)&length_squared_, sizeof(float)); oss << endl; oss << "center" << endl; oss << center_.rows() << endl; for(int i=0; iid_ == t); out << pwcs_[t]->serialize() << endl; } out << endl; } void MultiBooster::deserialize(istream& is) { // -- Reset everything. mbd_ = NULL; battery_.clear(); pwcs_.clear(); //TODO: this could be a mem leak if you are doing something weird. Only call this function from the file constructor. //log_weights_ = MatrixXd(0,0); // TODO: Again, this could cause bad behavior if log_weights_ is set, then a new classifier is deserialized here - things wouldn't match up. string line; getline(is, line); if(line.compare(version_string_) != 0) { cerr << "Log is of the wrong type!" << endl; cerr << "First line is: " << line << " instead of " << version_string_ << endl; exit(1); } // -- Extract the class and feature maps. class_map_ = NameMapping(is); feature_map_ = NameMapping(is); getline(is, line); // Eat the newline that << doesn't get. // -- Extract statistics. total_objs_seen_ = vector(class_map_.size()); is >> line; assert(line.compare("total_objs_seen") == 0); for(size_t c=0; c> total_objs_seen_[c]; // cout << "class " << c << ": " << total_objs_seen_[c] << endl; } is >> line; assert(line.compare("num_bg_seen") == 0); is >> num_bg_seen_; // -- Extract prior. is >> line; assert(line.compare("prior") == 0); deserializeVector(is, &prior_); // cout << "Extracted prior: " << prior_.transpose() << endl; assert((size_t)prior_.rows() == class_map_.size()); assert(prior_.cols() == 1); // -- Get the weak classifiers. size_t num_wcs; is >> line; assert(line.compare("num_wcs") == 0); is >> num_wcs; // cout << num_wcs << endl; pwcs_.resize(num_wcs); for(size_t t=0; t >(feature_map_.size()); // centers_ = vector(feature_map_.size()); for(size_t t=0; tfsid_].push_back(pwcs_[t]); } // for(size_t i=0; icenter_.rows(), battery_[i].size()); // for(size_t j=0; jcenter_; // } // } // -- Initialize Hessian and gradient accumulators. TODO: Make this save and load. // assert(H_pos_.empty() && g_pos_.empty() && g_neg_.empty() && H_neg_.empty()); // H_neg_.reserve(class_map_.size()); // H_pos_.reserve(class_map_.size()); // g_neg_.reserve(class_map_.size()); // g_pos_.reserve(class_map_.size()); // for(size_t c=0; c* activated) { assert(activated->empty()); vector &wcs = battery_[fsid]; activated->reserve(wcs.size()); if(!obj.descriptors_[fsid].vector || wcs.empty()) return; const descriptor& desc = obj.descriptors_[fsid]; assert(desc.vector->rows() == wcs[0]->center_.rows()); for(size_t t=0; tcenter_, desc.length_squared, wcs[t]->length_squared_); if(desc.length_squared == 0) assert(desc.vector->sum() == 0); if(wcs[t]->length_squared_ == 0) { assert(wcs[t]->center_.sum() == 0); } if(sd <= wcs[t]->theta_) { activated->push_back(wcs[t]); } } // -- Test 1d random projection heuristic. // TODO: Remove this. if(false) { vector consider(wcs.size(), true); size_t num_proj = 10; for(size_t iproj = 0; iproj < num_proj; ++iproj) { // Get a random 1d subspace. VectorXf proj = VectorXf::Zero(wcs[0]->center_.size()); for(int i=0; i wc_proj(wcs.size()); for(size_t i=0; icenter_.dot(proj); double dist = abs(wc_proj[i] - obj_proj); if(dist > sqrt(wcs[i]->theta_)) { consider[i] = false; for(size_t j=0; jsize(); ++j) { WeakClassifier* wc = (*activated)[j]; assert(wc == pwcs_[wc->id_]); if(wc->id_ == wcs[i]->id_) { cout << "Bad elimination. " << obj_proj << " " << wc_proj[i] << ", dist: " << abs(obj_proj - wc_proj[i]) << ", sqrt(theta): " << sqrt(wcs[i]->theta_); cout << ", euc: " << sqrt(fastEucSquared(*desc.vector, wc->center_, desc.length_squared, wc->length_squared_)) << endl; } } } } } // Make sure we didn't eliminate any that actually activate. for(size_t i=0; isize(); ++j) { WeakClassifier* wc = (*activated)[j]; if(wc->id_ == wcs[i]->id_) { cout << "Found bad elim after the fact." << endl; cout << "dist & theta: " << fastEucSquared(*desc.vector, wc->center_, desc.length_squared, wc->length_squared_) << " " << wc->theta_ << endl; cout << wc->status(class_map_, feature_map_) << endl; assert(0); } } } // Count up how many we have to consider. size_t num_consider = 0; for(size_t i=0; iclass_map_.compare(class_map_)); assert(dataset->feature_map_.compare(feature_map_)); mbd_ = dataset; // -- Make sure there are no unlabeled objects. (-2s) for(size_t m=0; mobjs_.size(); ++m) { if(mbd_->objs_[m]->label_ <= -2) { cerr << "Supervised learning on a dataset with unlabeled data makes no sense." << endl; throw 1; } } // -- Reset the statistics (assumes we're getting an augmented version of an old dataset, not totally new data). num_bg_seen_ = mbd_->num_bg_; for(size_t c=0; cnum_objs_of_class_.size(); ++c) total_objs_seen_[c] = mbd_->num_objs_of_class_[c]; // -- Learn the prior based on new statistics. cout << "Old prior: " << prior_.transpose() << endl; learnPrior(); cout << "New prior: " << prior_.transpose() << endl; if(verbose_) cout << "Objective before response relearning: " << classify(mbd_) << endl; // -- Reset log_weights_ to be just the prior. vector& objs = mbd_->objs_; assert(class_map_.size() > 0); assert(objs.size() > 0); log_weights_ = MatrixXd::Zero(class_map_.size(), objs.size()); for(size_t m=0; mymc_.col(m).array()).matrix().cast(); // -- Learn new WC responses and update log_weights_. vector utils(pwcs_.size()); for(size_t t=0; t inside; inside.reserve(objs.size()); VectorXd sum_weights_pos = VectorXd::Zero(class_map_.size()); VectorXd sum_weights_neg = VectorXd::Zero(class_map_.size()); for(size_t m=0; mdescriptors_[wc.fsid_]; // -- If no feature of this type, ignore. if(!desc.vector) continue; // -- If not in the hypersphere, ignore. if(fastEucSquared(wc.center_, *desc.vector, wc.length_squared_, desc.length_squared) > wc.theta_) continue; inside.push_back(m); for(size_t c=0; cymc_(c,m); denominators(c) += weight; } } // -- Set the response values for this weak classifier. for(size_t c=0; cymc_(c,idx) * wc.vals_(c); } } // -- Compute the utility. utils[t] = 0; for(size_t c=0; cymc_(c,idx) == 1) sum_weights_pos(c) += exp(log_weights_(c,idx)); else if(mbd_->ymc_(c,idx) == -1) sum_weights_neg(c) += exp(log_weights_(c,idx)); else { cout << "ymc must be in -1, +1" << endl; assert(0); } } utils[t] += (exp(wc.vals_(c)) - 1) * sum_weights_pos(c); utils[t] += (exp(-wc.vals_(c)) - 1) * sum_weights_neg(c); } } if(verbose_) { double objective = classify(mbd_); cout << "Objective after response relearning: " << objective << endl; // cout << "Objective after response updating: " << computeObjective() << endl; } // -- Pruning. if(min_util > 0) { cout << "Pruning not implemented." << endl; assert(0); assert(pwcs_.size() == utils.size()); vector new_pwcs; new_pwcs.reserve(pwcs_.size()); for(size_t i = 0; i < utils.size(); ++i) { if(utils[i] > min_util) new_pwcs.push_back(pwcs_[i]); } // installNewWeakClassifiers(new_pwcs); { // -- Update the indexing. for(size_t i = 0; i < pwcs_.size(); ++i) pwcs_[i]->id_ = i; // -- Rebuild batteries. // -- Rebuild trees. // -- Anything else to update? } } if(max_wcs > 0) { vector< pair > util_idx(pwcs_.size()); for(size_t t=0; t > emacs = greater< pair >(); sort(util_idx.begin(), util_idx.end(), emacs); //Descending. if(verbose_) cout << "Pruning " << pwcs_.size() - max_wcs << " weak classifiers." << endl; // -- Make new pwcs_ and battery_. vector pwcs_new; vector< vector > battery_new(feature_map_.size()); pwcs_new.reserve(max_wcs); for(size_t t=0; tid_ = t; pwcs_new.push_back(pwc); battery_new[pwc->fsid_].push_back(pwc); } else { //cout << "deleting wc " << util_idx[t].second << " with util " << util_idx[t].first << endl; delete pwcs_[util_idx[t].second]; } } pwcs_ = pwcs_new; battery_ = battery_new; // cout << "same: " << objective_after_pruning - objective << " " << min_util_in_classifier << endl; if(verbose_) { double objective_after_pruning = classify(mbd_); cout << "Objective after pruning: " << objective_after_pruning << endl; cout << status() << endl; } //cout << "Relearning responses." << endl; // relearnResponses(mbd); // TODO: Should instead be able to decrement log_weights_ for those pruned, then recompute responses without redoing euc distances. } } void MultiBooster::relearnResponses2(MultiBoosterDataset* dataset, Eigen::SparseMatrix* indicator, double min_util, double prune) { assert(min_util >= 0); assert(prune >= 0 && prune <= 1); if(min_util != 0) assert(prune == 0); if(prune != 0) assert(min_util == 0); // -- Don't allow there to be any differences in the mapping. assert(dataset->class_map_.compare(class_map_)); assert(dataset->feature_map_.compare(feature_map_)); mbd_ = dataset; // -- Make sure there are no unlabeled objects. (-2s) for(size_t m=0; mobjs_.size(); ++m) { if(mbd_->objs_[m]->label_ <= -2) { cerr << "Supervised learning on a dataset with unlabeled data makes no sense." << endl; throw 1; } } // -- Reset the statistics (assumes we're getting an augmented version of an old dataset, not totally new data). num_bg_seen_ = mbd_->num_bg_; for(size_t c=0; cnum_objs_of_class_.size(); ++c) total_objs_seen_[c] = mbd_->num_objs_of_class_[c]; // -- Learn the prior based on new statistics. cout << "Old prior: " << prior_.transpose() << endl; learnPrior(); cout << "New prior: " << prior_.transpose() << endl; // -- Compute which training examples are in which weak classifiers. assert(indicator); HighResTimer hrt; computeIndicatorMatrix(*dataset, indicator); hrt.stop(); cout << "Relearning responses: computing indicator matrix took " << hrt.getMinutes() << " minutes." << endl; assert(mbd_->objs_.size() == (size_t)(indicator->cols())); assert(pwcs_.size() == (size_t)(indicator->rows())); // -- Reset log_weights_ to be just the prior. vector& objs = mbd_->objs_; assert(class_map_.size() > 0); assert(objs.size() > 0); log_weights_ = MatrixXd::Zero(class_map_.size(), objs.size()); for(size_t m=0; mymc_.col(m).array()).matrix().cast(); // -- Learn new WC responses and update log_weights_. vector to_delete(pwcs_.size(), false); VectorXd utils = VectorXd::Zero(pwcs_.size()); for(size_t t=0; t inside; inside.reserve(objs.size()); for(size_t m=0; mdescriptors_[wc.fsid_]; // -- If no feature of this type, ignore. if(!desc.vector) continue; // -- If not in the hypersphere, ignore. if(indicator->coeff(t, m) != 1) { assert(indicator->coeff(t, m) == 0); //assert(fastEucSquared(wc.center_, *desc.vector, wc.length_squared_, desc.length_squared) > wc.theta_); continue; } inside.push_back(m); for(size_t c=0; cymc_(c,m); denominators(c) += weight; } } // -- Set the response values for this weak classifier. for(size_t c=0; cymc_(c, idx) == 1) sum_weights_pos += exp(log_weights_(c, idx)); else if(mbd_->ymc_(c, idx) == -1) sum_weights_neg += exp(log_weights_(c, idx)); else { cout << "ymc must be in -1, +1" << endl; assert(0); } } utils(t) += (1 - exp(-wc.vals_(c))) * sum_weights_pos; utils(t) += (1 - exp(wc.vals_(c))) * sum_weights_neg; } utils(t) /= (double)(class_map_.size() * mbd_->objs_.size()); // -- If below threshold, set responses to zero and mark for deletion. assert(to_delete[t] == false); if(min_util != 0 && utils(t) < min_util) { wc.vals_.setZero(); to_delete[t] = true; } else assert(to_delete[t] == false); // -- Update the weights for each training example inside the hypersphere. if(!to_delete[t]) { for(size_t c = 0; c < class_map_.size(); ++c) { for(size_t m = 0; m < inside.size(); ++m) { size_t idx = inside[m]; log_weights_(c,idx) += -mbd_->ymc_(c,idx) * wc.vals_(c); } } } } if(prune != 0) { // -- Sort the WCs by utility. vector< pair > index; index.reserve(pwcs_.size()); for(int i = 0; i < utils.rows(); ++i) index.push_back(pair(utils(i), i)); sort(index.begin(), index.end()); assert(index.back().first > index.front().first); // -- Mark the worst for deletion. for(size_t i = 0; i < (size_t)((double)index.size() * prune); ++i) { to_delete[index[i].second] = true; if(i > 0) assert(index[i].first > index[i-1].first); } } // -- Delete those weak classifiers that were below threshold. vector new_pwcs; new_pwcs.reserve(pwcs_.size()); assert(pwcs_.size() == to_delete.size()); int num_deleted = 0; for(size_t i = 0; i < to_delete.size(); ++i) { if(to_delete[i]) { delete pwcs_[i]; pwcs_[i] = NULL; ++num_deleted; } else new_pwcs.push_back(pwcs_[i]); } installNewWeakClassifiers(new_pwcs); if(verbose_) { cout << "Response relearning complete." << endl; if(min_util != 0) cout << "Pruning with min_util = " << min_util << " removed " << num_deleted << " / " << to_delete.size() << " wcs." << endl; if(prune != 0) { cout << "Pruned away fixed " << prune << " of WCs = " << num_deleted << " / " << to_delete.size() << endl; cout << "MB now has " << pwcs_.size() << " WCs." << endl; } } } void MultiBooster::installNewWeakClassifiers(vector pwcs) { // -- Update the indexing. for(size_t i = 0; i < pwcs.size(); ++i) pwcs[i]->id_ = i; // -- Rebuild wc battery. // TODO: What if this deletes all WCs in a descriptor space? vector< vector > battery(feature_map_.size()); for(size_t i = 0; i < battery.size(); ++i) battery.reserve(pwcs.size()); for(size_t i = 0; i < pwcs.size(); ++i) battery[pwcs[i]->fsid_].push_back(pwcs[i]); pwcs_ = pwcs; battery_ = battery; // -- Rebuild trees. computeTrees(); } void MultiBooster::recomputeLogWeights() { assert(mbd_); vector& objs = mbd_->objs_; assert(class_map_.size() > 0); assert(objs.size() > 0); log_weights_ = MatrixXd::Zero(class_map_.size(), objs.size()); for(size_t m=0; mymc_(c,m); } } } double MultiBooster::computeTrainingSetAccuracy() const { assert(mbd_); double acc = 0; for(int m = 0; m < log_weights_.cols(); ++m) { for(int c = 0; c < log_weights_.rows(); ++c) { if(log_weights_(c, m) < 0.0) ++acc; } } acc /= (double)(log_weights_.cols() * log_weights_.rows()); return acc; } void MultiBooster::train(int num_candidates, int max_secs, int max_wcs, double min_util, double min_objective, int min_wcs, void (*debugHook)(WeakClassifier)) { assert(min_wcs >= 0); signal(SIGINT,sigint); time_t start, end; time(&start); float obj, obj2; HighResTimer hrt; hrt.start(); obj = computeObjective(); hrt.stop(); cout << "Computing objective took " << hrt.getSeconds() << " seconds." << endl; //obj2 = classify(mbd_); if(verbose_) { cout << setprecision(20) << "Objective (from log_weights_): " << obj << endl; //cout << setprecision(20) << "Objective (from classify()) : " << obj2 << endl; } //assert(abs(obj - obj2) / min(obj, obj2) < 1e-3); double checkpoint_interval = 1800; // Seconds. int checkpoint_num = 1; while(true) { float util = 0; obj2 = obj; learnWC(num_candidates, &util); time(&end); if(debugHook != NULL) debugHook(*pwcs_.back()); obj = computeObjective(); assert(obj < obj2); if(verbose_) { cout << "Total weak classifiers: " << pwcs_.size() << endl; cout << "Objective (from log_weights_): " << obj << endl; double pred_acc = 1.0 - (obj * obj / (1.0 + obj * obj)); cout << "Objective of " << obj << " predicts accuracy of " << pred_acc << endl; double training_set_accuracy = computeTrainingSetAccuracy(); cout << "Actual accuracy on training set: " << training_set_accuracy << endl; } if(pwcs_.size() >= (size_t)min_wcs) { if(max_secs != 0 && difftime(end,start) > max_secs) { if(verbose_) cout << "Ending training because max time has been reached." << endl; break; } if(max_wcs != 0 && pwcs_.size() >= (size_t)max_wcs) { if(verbose_) cout << "Ending training because max number of weak classifiers has been reached." << endl; break; } if(g_int) { if(verbose_) cout << "Ending training because of user control-c." << endl; break; } if(util < min_util) { if(verbose_) cout << "Ending training because of min utility criterion: " << util << " < min util of " << min_util << endl; break; } if(min_objective != 0 && obj < min_objective) { if(verbose_) cout << "Ending training because of min objective criterion: " << obj << " < min objective of " << min_objective << endl; break; } } else if(verbose_) cout << "Minimum number of wcs (" << min_wcs << ") not yet reached." << endl; // -- Autosave every half hour. if(floor((double)difftime(end,start) / checkpoint_interval) == checkpoint_num) { checkpoint_num++; string savename = "multibooster-autosave.d"; if(verbose_) cout << "Autosaving classifier to " << savename << endl; save(savename); } } computeTrees(); if(verbose_) { MatrixXd log_weights; //cout << "Objective (from classify()): " << classify(mbd_, 0.0, NULL, &log_weights) << endl; // cout << (log_weights_ - log_weights).transpose() << endl; cout << "Done training." << endl; } // -- Test response balancing. // updateGradientAndHessian(mbd_); // balanceResponses(); // cout << "Objective (from classify(), after response balancing): " << classify(mbd_) << endl; // balanceResponsesLineSearch(mbd_); // cout << "Objective (from classify(), after line search): " << classify(mbd_) << endl; // save("tmp.d"); // MultiBooster mb; // mb.load("tmp.d"); // cout << "Objective (from classify() of saved and loaded): " << mb.classify(*mbd_) << endl; // cout << "Compare? " << compare(mb, true) << endl; // cout << status() << endl; // cout << mb.status() << endl; } bool WeakClassifier::compare(const WeakClassifier& wc, bool verbose) { double tol = 1e-4; if(serialization_version_ != wc.serialization_version_) return false; if(fsid_ != wc.fsid_) return false; if(abs(length_squared_ - wc.length_squared_) > tol) { cout << "length_squared_ " << length_squared_ << " " << wc.length_squared_ << endl; return false; } if(abs((center_ - wc.center_).dot(center_ - wc.center_)) > tol) return false; if(abs(theta_ - wc.theta_) > tol) { if(verbose) { cout << "theta for wc " << id_ << ": " << theta_ << endl; cout << "theta for wc " << wc.id_ << ": " << wc.theta_ << endl; cout << "Difference: " << theta_ - wc.theta_ << endl; } return false; } if(vals_.rows() != wc.vals_.rows()) { if(verbose) cout << "size of vals" << endl; return false; } for(size_t c=0; c<(size_t)vals_.rows(); ++c) { if(abs(vals_(c) - wc.vals_(c)) > tol) { if(verbose) { cout << "Vals are different." << endl; cout << "Orig: " << vals_.transpose() << endl; cout << "Other: " << wc.vals_.transpose() << endl; } return false; } } if(id_ != wc.id_) { if(verbose) cout << "ids: " << id_ << " " << wc.id_ << endl; return false; } return true; } //! Comprehensive with high probability, but not guaranteed. bool MultiBooster::compare(const MultiBooster& mb, bool verbose) { if(pwcs_.size() != mb.pwcs_.size()) { if(verbose) cout << "Different number of weak classifiers!" << endl; return false; } if(!class_map_.compare(mb.class_map_)) return false; if(!feature_map_.compare(mb.feature_map_)) return false; if(num_bg_seen_ != mb.num_bg_seen_) { if(verbose) cout << "Different num_bg_seen_" << endl; return false; } for(size_t c=0; ccompare(*mb.pwcs_[i], verbose)) { if(verbose) { cout << "Different wcs." << endl; cout << pwcs_[i]->status(class_map_, feature_map_) << endl; cout << mb.pwcs_[i]->status(mb.class_map_, mb.feature_map_) << endl; cout << pwcs_[i]->serialize() << endl; cout << mb.pwcs_[i]->serialize() << endl; } return false; } } return true; } vector< shared_ptr > MultiBooster::createRandomWeakClassifiers(int num_candidates) { assert(mbd_); // -- Get the weights matrix. MatrixXd weights = log_weights_.array().exp().matrix(); // -- Float accuracy makes these numbers match only to 3 digits. Double -> 12 digits. // double total_weight = 0; // for(int i=0; i > cand; for(int iCand=0; iCand= 0 && obj_id < weights.cols()); // -- If the object doesn't have any descriptors, move on. Object& obj = *mbd_->objs_[obj_id]; bool valid = false; for(size_t i=0; i wc(new WeakClassifier()); wc->fsid_ = fsid; wc->center_ = *v; wc->theta_ = 0; wc->length_squared_ = 0; wc->vals_ = VectorXf::Zero(mbd_->class_map_.size()); wc->id_ = -1; cand.push_back(wc); } return cand; } std::string MultiBooster::status(bool verbose) { char tmp[1000]; string st("Classifier Status: \n"); if(wc_limiter_ != 0) { sprintf(tmp, " ** Using the first %zu weak classifiers\n", wc_limiter_); st.append(tmp); } st.append(" nClasses: "); sprintf(tmp, "%zu \n", class_map_.size()); st.append(tmp); st.append(" Class priors:\n"); for(size_t i=0; i > nwcs_idx(feature_map_.size()); for(size_t i=0; i > emacs = greater< pair >(); sort(nwcs_idx.begin(), nwcs_idx.end(), emacs); // Descending. st.append(" nWeakClassifiers \t Name \n"); // -- Print descriptor spaces in order of most number of weak classifiers. for(size_t i=0; istatus(class_map_, feature_map_) << endl; } } return st; } void MultiBooster::learnWC(int num_candidates, float* util) { assert(mbd_); // time_t start, end; // time(&start); HighResTimer hrt; hrt.start(); vector< shared_ptr > cand = createRandomWeakClassifiers(num_candidates); if(verbose_) cout << "Added " << num_candidates << " candidate wcs" << endl; // -- Create pipeline nodes for each candidate. vector< shared_ptr > plnodes(cand.size()); vector< shared_ptr > nodes(cand.size()); for(size_t i = 0; i < cand.size(); ++i) { nodes[i] = shared_ptr(new WCEvaluator(this, mbd_, cand[i])); plnodes[i] = nodes[i]; } // -- Evaluate all candidates in parallel. pipeline::Pipeline pl(num_threads_, plnodes); pl.compute(); if(verbose_) cout << endl << endl; // -- Choose the best one. size_t best_idx = 0; float max_utility = -FLT_MAX; for(size_t i = 0; i < nodes.size(); ++i) { if(nodes[i]->utility_ > max_utility) { max_utility = nodes[i]->utility_; best_idx = i; } } // -- Add the new weak classifier. if(verbose_) cout << "Found weak classifier with utility " << max_utility << endl; //assert(max_utility > 0); Fails in semisupervised learning tests. Not a big deal there.. WeakClassifier* best = new WeakClassifier(*nodes[best_idx]->wc_); best->length_squared_ = best->center_.dot(best->center_); best->id_ = pwcs_.size(); battery_[best->fsid_].push_back(best); pwcs_.push_back(best); // -- Compute the new log weights. for(int m=0; mnum_contained_tr_ex_; ++m) { int idx = (*nodes[best_idx]->distance_idx_)[m].second; for(size_t c=0; cclass_map_.size(); ++c) { log_weights_(c, idx) += -mbd_->ymc_(c, idx) * best->vals_(c); } } // -- Display stats about the weak classifier. hrt.stop(); if(verbose_) { cout << "Took " << setprecision(8) << hrt.getSeconds() << " seconds to try " << num_candidates << " wcs." << endl; cout << best->status(class_map_, feature_map_) << endl; cout << "WC encompasses at least one point from " << nodes[best_idx]->num_contained_tr_ex_ << " out of " << mbd_->objs_.size() << " objects." << endl; } *util = max_utility; } WeakClassifier::WeakClassifier() : serialization_version_(WC_VERSION) { } WeakClassifier::WeakClassifier(const WeakClassifier& wc) : serialization_version_(wc.serialization_version_), fsid_(wc.fsid_), center_(wc.center_), length_squared_(wc.length_squared_), theta_(wc.theta_), vals_(wc.vals_), id_(wc.id_) { } string WeakClassifier::status(const NameMapping& class_map, const NameMapping& feature_map) const { ostringstream oss(ostringstream::out); oss << feature_map.toName(fsid_) << " feature." << endl; oss << "Id: " << id_ << endl; oss << "Theta: " << theta_ << endl; oss << "Vals: " << endl; for(size_t i=0; iobjs_.size()); assert((size_t)log_weights_.rows() == class_map_.size()); for(size_t m=0; mobjs_.size(); ++m) { for(size_t c=0; cobjs_.size() * class_map_.size()); } } // obj /= (double)(mbd_->objs_.size() * class_map_.size()); // cout << log_weights_.rows() << " " << log_weights_.cols() << ". obj " << obj << endl; // cout << log_weights_ << endl; assert(!isinf(obj)); return obj; } VectorXf MultiBooster::classify(Object &obj) { VectorXf response = prior_; for(size_t d=0; d act; findActivatedWCs(obj, d, &act); for(size_t a = 0; aid_ > wc_limiter_) continue; response += act[a]->vals_; } //cout << "regular (" << d << "): " << act.size() << endl; } return response; } VectorXf MultiBooster::treeClassify(const Object &obj, int* num_euc, int* num_dot, vector* activations) const { if(trees_.empty()) { assert(pwcs_.size() == 0); } VectorXf response = prior_; if(num_euc && num_dot) { *num_euc = 0; *num_dot = 0; } if(activations) { assert(activations->empty()); activations->reserve(pwcs_.size()); } assert(trees_.size() == obj.descriptors_.size()); assert(trees_.size() == battery_.size()); for(size_t d=0; d act; trees_[d]->query(*desc.vector, desc.length_squared, &act, &this_num_euc, &this_num_dot); if(num_euc && num_dot) { *num_euc += this_num_euc; *num_dot += this_num_dot; } for(size_t a = 0; aid_ > wc_limiter_) continue; response += act[a]->vals_; } // -- Return the activated weak classifiers. if(activations) { activations->insert(activations->end(), act.begin(), act.end()); } } return response; } void MultiBooster::computeStatisticsForDataset(MultiBoosterDataset* mbd) { augmentAndTranslate(mbd); // -- Set up log_weights with prior. Note this is NOT using log_weights_ in the class. MatrixXd log_weights = MatrixXd::Zero(class_map_.size(), mbd->objs_.size()); for(size_t c=0; cobjs_.size(); ++m) { log_weights(c,m) = -mbd->ymc_(c,m) * prior_[c]; } } // -- Get number of training examples inside each weak classifier and build log_weights. vector inside(pwcs_.size()); for(size_t t=0; tobjs_.size(); ++m) { descriptor& desc = mbd->objs_[m]->descriptors_[pwcs_[t]->fsid_]; if(desc.vector) { float sd = fastEucSquared(*desc.vector, pwcs_[t]->center_, desc.length_squared, pwcs_[t]->length_squared_); if(sd <= pwcs_[t]->theta_) { inside[t]++; for(size_t c=0; cymc_(c,m) * pwcs_[t]->vals_(c); } } } } // cerr << inside[t] << " " << endl; // TODO: make this not a hack. } // -- Set up storage for indices. vector< pair > wc_idx(pwcs_.size()); for(size_t t=0; tobjs_.size(); ++m) { descriptor& desc = mbd->objs_[m]->descriptors_[pwcs_[t]->fsid_]; if(desc.vector) { float sd = fastEucSquared(*desc.vector, pwcs_[t]->center_, desc.length_squared, pwcs_[t]->length_squared_); if(sd <= pwcs_[t]->theta_) { for(size_t c=0; cymc_(c,m) * pwcs_[t]->vals_(c)) - 1) / (class_map_.size() * mbd->objs_.size()); wc_idx[t].first += util; } } } } } // -- Sort by utility. greater< pair > emacs = greater< pair >(); sort(wc_idx.begin(), wc_idx.end(), emacs); //Descending. // -- Print out the top 10 weak classifiers. // size_t top_k = 10; // for(size_t t=0; tstatus(class_map_, feature_map_) << endl; // } } int MultiBooster::pctWCsBelowThreshold(double thresh) { double below_thresh = 0; for(size_t i = 0; i < pwcs_.size(); ++i) { WeakClassifier& wc = *pwcs_[i]; bool valid = false; for(int j = 0; j < wc.vals_.rows(); ++j) { if(fabs(wc.vals_(j)) > thresh) valid = true; } if(!valid) ++below_thresh; } return below_thresh / (double)pwcs_.size(); } double computeDensity(const vector& pwcs) { MatrixXf overlap = MatrixXf::Zero(pwcs.size(), pwcs.size()); for(size_t i=0; icenter_, pwcs[i]->center_, pwcs[j]->length_squared_, pwcs[i]->length_squared_); dist = sqrt(dist); if(dist <= sqrt(pwcs[i]->theta_) + sqrt(pwcs[j]->theta_)) { overlap(i,j) = 1; overlap(j,i) = 1; } } } double density = overlap.sum() / (double)(pwcs.size() * pwcs.size()); return density; } void MultiBooster::computeTrees() { clock_t start = clock(); trees_.clear(); trees_.resize(battery_.size(), NULL); for(size_t i=0; icenter_.rows() << " is " << computeDensity(battery_[i]) << endl; // } } double MultiBooster::classify(MultiBoosterDataset* mbd, float threshold, PerfStats* results, MatrixXd* log_weights, bool tree) { // -- Make sure that all the names agree, etc. assert(mbd->class_map_.compare(class_map_)); assert(mbd->feature_map_.compare(feature_map_)); //augmentAndTranslate(mbd); if(log_weights) *log_weights = MatrixXd::Zero(class_map_.size(), mbd->objs_.size()); // -- Compute the responses for all objects. double objective=0.0; VectorXf response = VectorXf::Zero(class_map_.size()); for(size_t m=0; mobjs_.size(); ++m) { if(mbd->objs_[m]->label_ <= -2) { cerr << "Getting an objective function score on a dataset with unlabeled data makes no sense." << endl; throw 1; } if(tree) response = treeClassify(*mbd->objs_[m]); else response = classify(*mbd->objs_[m]); if(results) results->incrementStats(mbd->objs_[m]->label_, (response.array() - threshold).matrix()); if(log_weights) log_weights->col(m) += (response.cast().array() * -mbd->ymc_.col(m).cast().array()).matrix(); assert((int)class_map_.size() == response.rows()); for(size_t c=0; cymc_(c, m) * response(c)) / (double)(mbd->objs_.size() * class_map_.size()); } } //objective /= mbd->objs_.size() * class_map_.size(); return objective; } void MultiBooster::classifySpeedTest(MultiBoosterDataset* mbd, PerfStats* results, PerfStats* treeResults, PerfStats* localResults) { // -- Make sure that all the names agree, etc. augmentAndTranslate(mbd); // -- Compute the responses for all objects. cout << "Starting regular." << endl; clock_t start = clock(); VectorXf response = VectorXf::Zero(class_map_.size()); for(size_t m=0; mobjs_.size(); ++m) { if(mbd->objs_[m]->label_ <= -2) { cerr << "Getting an objective function score on a dataset with unlabeled data makes no sense." << endl; throw 1; } response = classify(*mbd->objs_[m]); if(results) results->incrementStats(mbd->objs_[m]->label_, response); } cout << "Regular: " << (double)(clock() - start) / (double)CLOCKS_PER_SEC << " sec." << endl; start = clock(); double av_euc = 0; double av_dot = 0; for(size_t m=0; mobjs_.size(); ++m) { if(mbd->objs_[m]->label_ <= -2) { cerr << "Getting an objective function score on a dataset with unlabeled data makes no sense." << endl; throw 1; } int num_euc = 0, num_dot = 0; response = treeClassify(*mbd->objs_[m], &num_euc, &num_dot); av_euc += num_euc; av_dot += num_dot; if(treeResults) treeResults->incrementStats(mbd->objs_[m]->label_, response); } cout << "Tree: " << (double)(clock() - start) / (double)CLOCKS_PER_SEC << " sec." << endl; av_euc /= (double) mbd->objs_.size(); av_dot /= (double) mbd->objs_.size(); cout << "Average number of euc distance computations: " << av_euc << " (vs " << pwcs_.size() << ")." << endl; cout << "Average number of dot prods: " << av_dot << endl; } void deserializeMatrix(istream& is, MatrixXf* target) { int rows; int cols; string str; is >> rows; is >> cols; getline(is, str); float* buf = (float*)malloc(sizeof(float)*rows*cols); is.read((char*)buf, sizeof(float)*rows*cols); MatrixXf tmp = Eigen::Map(buf, rows, cols); //TODO: This copy is probably not necessary. *target = tmp; free(buf); getline(is, str); } string serializeMatrix(const MatrixXf& mat) { ostringstream oss; oss << mat.rows() << endl; oss << mat.cols() << endl; float fbuf = 0; for(int i=0; i> rows; getline(is, str); float* buf = (float*)malloc(sizeof(float)*rows); is.read((char*)buf, sizeof(float)*rows); VectorXf tmp = Eigen::Map(buf, rows); //TODO: This copy is probably not necessary. *target = tmp; free(buf); getline(is, str); } string serializeVector(const VectorXf& vec) { ostringstream oss; oss << vec.rows() << endl; float fbuf = 0; for(int j=0; j wc) : mbd_(mbd), mb_(mb), wc_(wc), utility_(-FLT_MAX), num_contained_tr_ex_(-1) { } string WCEvaluator::_getName() const { return "WCEvaluator"; } void WCEvaluator::_flush() { mbd_ = NULL; mb_ = NULL; wc_.reset(); utility_ = -FLT_MAX; distance_idx_.reset(); num_contained_tr_ex_ = -1; } void WCEvaluator::_compute() { WeakClassifier wc = *wc_; //wc is the working copy. size_t num_classes = mb_->class_map_.size(); MatrixXd& log_weights = mb_->log_weights_; // -- Get all distances and sort. distance_idx_ = mbd_->computeSortedDistances(wc.fsid_, wc.center_); // (*distance_idx)[j] is distance, idx (i.e. objs_[idx]) // -- Setup vars for evaluating weak classifiers efficiently. vector weight_sum_pos(num_classes); // sum of weight inside hypersphere for each class vector weight_sum_neg(num_classes); // sum of weight inside hypersphere for all y_m^c == -1 VectorXd numerators = VectorXd::Zero(num_classes); VectorXd denominators = VectorXd::Zero(num_classes); // -- For all training examples in order of distance. utility_ = -FLT_MAX; for(size_t m=0; msize(); ++m) { if(m>0) assert((*distance_idx_)[m-1].first <= (*distance_idx_)[m].first); int idx = (*distance_idx_)[m].second; // Object idx of the mth closest training example double util_this_theta = 0; for(size_t c=0; cymc_(c,idx); if(denominators(c) == 0) wc.vals_(c) = 0; else wc.vals_(c) = numerators(c) / denominators(c); // -- Update the sum of weights in the hypersphere. assert(mbd_->ymc_(c,idx) == 1 || mbd_->ymc_(c,idx) == -1); if(mbd_->ymc_(c,idx) == 1) weight_sum_pos[c] += exp(log_weights(c,idx)); else weight_sum_neg[c] += exp(log_weights(c,idx)); util_this_theta += ((1-exp(-wc.vals_(c))) * weight_sum_pos[c] + (1-exp(wc.vals_(c))) * weight_sum_neg[c]) / (float)(mbd_->objs_.size() * num_classes); } // Doing the division here makes 32bit vs 64bit give different answers. // util_this_theta /= objs.size() * class_map_.size(); // -- If the next object has exactly the same distance, include it too, since it will be included when we call compute. if(m < distance_idx_->size() - 1) { if(distance_idx_->at(m).first == distance_idx_->at(m+1).first) continue; } // -- If this is the best so far, copy in to wc_. if(util_this_theta > utility_) { utility_ = util_this_theta; // wc.theta_ = (*distance_idx_)[m].first; // Set theta to be exactly the distance^2 to this training example. if(m < distance_idx_->size() - 1) { //double dist = (sqrt((*distance_idx_)[m].first) + sqrt((*distance_idx_)[m+1].first)) / 2.0; // wc.theta_ = dist * dist; //wc.theta_ = distance_idx_->at(m).first / 2.0 + distance_idx_->at(m+1).first / 2.0; wc.theta_ = distance_idx_->at(m).first + FLT_MIN; *wc_ = wc; // cout << "mean dist: " << dist << ", dist1^2 " << distance_idx_->at(m).first << ", dist2^2 " << distance_idx_->at(m+1).first << endl;m // cout << wc.theta_ << " " << distance_idx_->at(m).first << endl; assert(wc_->theta_ >= distance_idx_->at(m).first); assert(wc_->theta_ < distance_idx_->at(m+1).first); } else { double diff = (sqrt((*distance_idx_)[m].first) - sqrt((*distance_idx_)[m-1].first)) / 2.0; double dist = sqrt((*distance_idx_)[m].first) + diff; wc.theta_ = dist * dist; *wc_ = wc; } num_contained_tr_ex_ = m+1; } } if(mb_->verbose_) { cout << "."; cout.flush(); } }