/********************************************************************* * * Software License Agreement (BSD License) * * Copyright (c) 2008, Willow Garage, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * Neither the name of the Willow Garage nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Author: Alex Teichman *********************************************************************/ #include #include using namespace std; using boost::shared_ptr; using namespace Eigen; #define EIGEN_NO_DEBUG MultiBoosterDataset::MultiBoosterDataset(vector classes, vector feature_spaces) : version_string_(DATASET_VERSION), class_map_(classes), feature_map_(feature_spaces) { } MultiBoosterDataset::MultiBoosterDataset(const NameMapping& class_map, const NameMapping& descriptor_map) : version_string_(DATASET_VERSION), class_map_(class_map), feature_map_(descriptor_map) { } MultiBoosterDataset::MultiBoosterDataset(string filename) : version_string_(DATASET_VERSION), class_map_(vector()), feature_map_(vector()) { if(!load(filename)) { cerr << "Unable to load MultiBoosterDataset " << filename << endl; throw 1; } } MultiBoosterDataset::MultiBoosterDataset(const MultiBoosterDataset& mbd) : version_string_(mbd.version_string_), class_map_(mbd.class_map_), feature_map_(mbd.feature_map_) { // -- Copy in the objects. for(unsigned int i=0; itranslate(class_translator, fs_translator); // Background objects keep their label of -1. The class map only has positive classes. } class_map_ = new_class_map; feature_map_ = new_feature_map; // -- Update ymc_, num_objs_of_class_ as well. setObjs(objs_); } void MultiBoosterDataset::decimate(float decimation) { assert(decimation > 0.0 && decimation < 1.0); random_shuffle(objs_.begin(), objs_.end()); crop(decimation); } void MultiBoosterDataset::crop(float cropval) { assert(cropval > 0.0 && cropval < 1.0); size_t max_obj_idx = cropval * objs_.size(); for(size_t m=max_obj_idx+1; m features = feature_map_.getIdToNameMapping(); vector::iterator fit = find(features.begin(), features.end(), drop); assert(fit != features.end()); size_t idx = feature_map_.toId(drop); for(size_t m=0; mdescriptors_.erase(objs_[m]->descriptors_.begin() + idx); } features.erase(fit); feature_map_ = NameMapping(features); } void MultiBoosterDataset::dropFeatures(vector droplist) { for(size_t i=0; i features = feature_map_.getIdToNameMapping(); if(find(features.begin(), features.end(), droplist[i]) == features.end()) { cerr << "There is no feature " << droplist[i] << " in this dataset." << endl; } else { dropFeature(droplist[i]); } } } bool MultiBoosterDataset::join(const MultiBoosterDataset& mbd2) { // -- Augment NameMapping tmp_class_map_ = class_map_; tmp_class_map_.augment(mbd2.class_map_); NameMapping tmp_feature_map_ = feature_map_; tmp_feature_map_.augment(mbd2.feature_map_); applyNewMappings(tmp_class_map_, tmp_feature_map_); // -- Copy in mbd2's objects, translating to our names. NameTranslator class_translator(mbd2.class_map_, class_map_); NameTranslator fs_translator(mbd2.feature_map_, feature_map_); const vector& objs2 = mbd2.objs_; for(unsigned int i=0; itranslate(class_translator, fs_translator); } // -- Recompute ymc_ and num_objs_of_class_. setObjs(objs_); return true; } bool MultiBoosterDataset::compare(const MultiBoosterDataset& mbd) { if(mbd.version_string_.compare(version_string_) != 0) return false; if(mbd.class_map_.compare(class_map_) == false) return false; if(mbd.feature_map_.compare(feature_map_) == false) return false; if(mbd.ymc_ != ymc_) return false; if(mbd.num_bg_ != num_bg_) return false; if(objs_.size() != mbd.objs_.size()) return false; if(mbd.num_objs_of_class_.size() != num_objs_of_class_.size()) return false; for(size_t i=0; iequals(*mbd.objs_[i])) { return false; } return true; } void MultiBoosterDataset::setObjs(const vector &objs) { objs_ = objs; num_objs_of_class_.clear(); num_objs_of_class_.resize(class_map_.size(), 0); num_bg_ = 0; // -- Get num_objs_of_class_. for(unsigned int m=0; mlabel_ == -1) num_bg_++; else if(objs[m]->label_ == -2) continue; else { assert(objs[m]->label_ >= 0); num_objs_of_class_[(size_t)objs[m]->label_]++; } } // -- Construct ymc_ ymc_ = MatrixXf::Zero(class_map_.size(), objs.size()); for(unsigned int m=0; mlabel_ == -2) { ymc_(c,m) = 0; } else if(objs_[m]->label_ == (int)c) ymc_(c,m) = 1; else ymc_(c,m) = -1; } } // -- Make sure every object has just one class. for(int m=0; mstatus(class_map_, feature_map_); } return oss.str(); } /************************************************************ * Object ************************************************************/ Object::Object(const Object& o) { label_ = o.label_; descriptors_.resize(o.descriptors_.size()); for(size_t i=0; i(num_descriptors); for(int i = 0; i < num_descriptors; ++i) { bool valid; in.read((char*)&valid, sizeof(bool)); if(!valid) descriptors_[i].vector = NULL; else { descriptors_[i].vector = new VectorXf(); eigen_extensions::deserialize(in, descriptors_[i].vector); descriptors_[i].length_squared = descriptors_[i].vector->squaredNorm(); } } } void Object::serialize(ostream& out) const { out.write((char*)&label_, sizeof(int)); size_t num_descriptors = descriptors_.size(); out.write((char*)&num_descriptors, sizeof(size_t)); for(size_t d=0; drows() << " dimensions) " << endl; if(showDescriptors) { oss << v->transpose() << endl; } } else oss << " -- no descriptor for this object." << endl; } return oss.str(); } void Object::translate(const NameTranslator& class_translator, const NameTranslator& fs_translator) { if(label_ > -1) // -1 (bg) and -2 (unlabeled) should keep their labels. label_ = class_translator.toMap2(label_); descriptor filler; filler.vector = NULL; filler.length_squared = 0; vector desc2(fs_translator.size(), filler); for(size_t i=0; irows() != descriptors_[j].vector->rows()) return false; if(*other.descriptors_[j].vector != *descriptors_[j].vector) return false; } return true; } uint64_t Object::numBytes() const { uint64_t num_bytes = sizeof(int); for(size_t i = 0; i < descriptors_.size(); ++i) { if(descriptors_[i].vector) num_bytes += descriptors_[i].vector->rows() * sizeof(float); } return num_bytes; } std::string MultiBoosterDataset::status() { ostringstream oss (ostringstream::out); oss << "MultiBoosterDataset status: \n"; if(objs_.size() < 1) { oss << "No objects!" << "\n"; return oss.str(); } size_t num_unlabeled = 0; for(size_t i=0; ilabel_ == -2) num_unlabeled++; } oss << " nClasses: " << class_map_.size() << "\n"; oss << " nObjects: " << objs_.size() << "\n"; oss << " nDescriptors: " << feature_map_.size() << "\n"; oss << " nBackground: " << num_bg_ << "\n"; oss << " nUnlabeled: " << num_unlabeled << "\n"; oss << " num_objs_of_class_: " << "\n"; for(size_t i=0; i nPts(feature_map_.size(), 0); vector dim(feature_map_.size(), -1); for(unsigned int m=0; mrows() != 0) dim[d] = desc.vector->rows(); } } oss << " nPts: " << endl; for(size_t i=0; idescriptors_.begin(); fit != objs_[0]->descriptors_.end(); fit++) { // cout << " " << fit->first << ": " << endl << fit->second->transpose() << endl; // } return oss.str(); } bool MultiBoosterDataset::save(string filename) { ofstream f; f.open(filename.c_str()); if(f.fail()) { cerr << "Failed to open file " << filename << endl; return false; } f << version_string_ << endl; f << class_map_.serialize() << endl; f << feature_map_.serialize() << endl; size_t num_objs = objs_.size(); f.write((char*)&num_objs, sizeof(size_t)); for(unsigned int i=0; iserialize(f); } f.close(); return true; } bool MultiBoosterDataset::load(string filename, bool quiet) { ifstream f; f.open(filename.c_str()); if(f.fail()) { cerr << "Failed to open file " << filename << endl; return false; } string line; getline(f, line); if(line.compare(version_string_) != 0) { cerr << "Log " << filename << " is of the wrong type!" << endl; return false; } class_map_ = NameMapping(f); feature_map_ = NameMapping(f); getline(f, line); // Eat the space after the name mapping. This really needs to be fixed. size_t num_objs; f.read((char*)&num_objs, sizeof(size_t)); vector objs(num_objs, NULL); for(size_t i = 0; i < objs.size(); ++i) objs[i] = new Object(f); f.close(); for(size_t i = 0; i < objs.size(); ++i) assert(objs[i]); setObjs(objs); return true; } MultiBoosterDataset::~MultiBoosterDataset() { for(size_t i=0; i > > MultiBoosterDataset::computeSortedDistances(size_t fsid, VectorXf center) { float center_length_squared = center.dot(center); shared_ptr< vector< pair > > distance_idx(new vector< pair >(0)); distance_idx->reserve(objs_.size()); for(size_t j=0; jdescriptors_[fsid].vector; if(!f) //Ignore the objects that don't have this descriptor. continue; float dist = fastEucSquared(*f, center, objs_[j]->descriptors_[fsid].length_squared, center_length_squared); distance_idx->push_back(pair(dist, j)); } sort(distance_idx->begin(), distance_idx->end()); //Ascending sort. return distance_idx; }