15 #ifndef __MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_TREE_HPP
16 #define __MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_TREE_HPP
56 template<
typename FitnessFunction = GiniImpurity,
57 template<
typename>
class NumericSplitType =
59 template<
typename>
class CategoricalSplitType =
60 HoeffdingCategoricalSplit
92 template<
typename MatType>
95 const arma::Row<size_t>& labels,
97 const bool batchTraining =
true,
102 const CategoricalSplitType<FitnessFunction>& categoricalSplitIn
103 = CategoricalSplitType<FitnessFunction>(0, 0),
104 const NumericSplitType<FitnessFunction>& numericSplitIn =
105 NumericSplitType<FitnessFunction>(0));
127 const size_t numClasses,
132 const CategoricalSplitType<FitnessFunction>& categoricalSplitIn
133 = CategoricalSplitType<FitnessFunction>(0, 0),
134 const NumericSplitType<FitnessFunction>& numericSplitIn =
135 NumericSplitType<FitnessFunction>(0),
136 std::unordered_map<
size_t, std::pair<size_t, size_t>>*
160 template<
typename MatType>
161 void Train(
const MatType& data,
162 const arma::Row<size_t>& labels,
163 const bool batchTraining =
true);
171 template<
typename VecType>
172 void Train(
const VecType& point,
const size_t label);
229 template<
typename VecType>
239 template<
typename VecType>
240 size_t Classify(
const VecType& point)
const;
253 template<
typename VecType>
254 void Classify(
const VecType& point,
size_t& prediction,
double& probability)
264 template<
typename MatType>
265 void Classify(
const MatType& data, arma::Row<size_t>& predictions)
const;
278 template<
typename MatType>
280 arma::Row<size_t>& predictions,
281 arma::rowvec& probabilities)
const;
289 template<
typename Archive>
290 void Serialize(Archive& ar,
const unsigned int );
342 #include "hoeffding_tree_impl.hpp"
NumericSplitType< FitnessFunction > NumericSplit
Allow access to the numeric split type.
size_t Classify(const VecType &point) const
Classify the given point, using this node and the entire (sub)tree beneath it.
HoeffdingTree(const MatType &data, const data::DatasetInfo &datasetInfo, const arma::Row< size_t > &labels, const size_t numClasses, const bool batchTraining=true, const double successProbability=0.95, const size_t maxSamples=0, const size_t checkInterval=100, const size_t minSamples=100, const CategoricalSplitType< FitnessFunction > &categoricalSplitIn=CategoricalSplitType< FitnessFunction >(0, 0), const NumericSplitType< FitnessFunction > &numericSplitIn=NumericSplitType< FitnessFunction >(0))
Construct the Hoeffding tree with the given parameters and given training data.
bool ownsInfo
Whether or not we own the dataset information.
The HoeffdingTree object represents all of the necessary information for a Hoeffding-bound-based deci...
Linear algebra utility functions, generally performed on matrices or vectors.
size_t checkInterval
The number of samples that should be seen before checking for a split.
~HoeffdingTree()
Clean up memory.
size_t CalculateDirection(const VecType &point) const
Given a point and that this node is not a leaf, calculate the index of the child node this point woul...
double & MajorityProbability()
Modify the probability of the majority class.
size_t minSamples
The minimum number of samples for splitting.
size_t numSamples
The number of samples seen so far by this node.
HoeffdingNumericSplit< FitnessFunction, double > HoeffdingDoubleNumericSplit
Convenience typedef.
size_t SplitDimension() const
Get the splitting dimension (size_t(-1) if no split).
double SuccessProbability() const
Get the confidence required for a split.
double MajorityProbability() const
Get the probability of the majority class (based on training samples).
bool ownsMappings
Indicates whether or not we own the mappings.
CategoricalSplitType< FitnessFunction >::SplitInfo categoricalSplit
If the split is categorical, this holds the splitting information.
std::vector< NumericSplitType< FitnessFunction > > numericSplits
Information for splitting of numeric features (used before split).
void Train(const MatType &data, const arma::Row< size_t > &labels, const bool batchTraining=true)
Train on a set of points, either in streaming mode or in batch mode, with the given labels...
void Serialize(Archive &ar, const unsigned int)
Serialize the split.
size_t MinSamples() const
Get the minimum number of samples for a split.
size_t MaxSamples() const
Get the maximum number of samples before a split is forced.
NumericSplitType< FitnessFunction >::SplitInfo numericSplit
If the split is numeric, this holds the splitting information.
const HoeffdingTree & Child(const size_t i) const
Get a child.
size_t maxSamples
The maximum number of samples we can see before splitting.
size_t CheckInterval() const
Get the number of samples before a split check is performed.
size_t MajorityClass() const
Get the majority class.
size_t majorityClass
The majority class of this node.
Auxiliary information for a dataset, including mappings to/from strings and the datatype of each dime...
std::unordered_map< size_t, std::pair< size_t, size_t > > * dimensionMappings
This structure is owned by this node only if it is the root of the tree.
Include all of the base components required to write MLPACK methods, and the main MLPACK Doxygen docu...
double successProbability
The required probability of success for a split to be performed.
double majorityProbability
The empirical probability of a point this node saw having the majority class.
size_t numClasses
The number of classes this node is trained on.
std::vector< CategoricalSplitType< FitnessFunction > > categoricalSplits
Information for splitting of categorical features (used before split).
size_t SplitCheck()
Check if a split would satisfy the conditions of the Hoeffding bound with the node's specified succes...
std::vector< HoeffdingTree * > children
If the split has occurred, these are the children.
CategoricalSplitType< FitnessFunction > CategoricalSplit
Allow access to the categorical split type.
size_t NumChildren() const
Get the number of children.
size_t & MajorityClass()
Modify the majority class.
size_t splitDimension
The dimension that this node has split on.
const data::DatasetInfo * datasetInfo
The dataset information.
HoeffdingTree & Child(const size_t i)
Modify a child.
void CreateChildren()
Given that this node should split, create the children.