1 #ifndef GENESIS_UTILS_MATH_REGRESSION_FACTOR_H_
2 #define GENESIS_UTILS_MATH_REGRESSION_FACTOR_H_
87 template <
class ForwardIterator>
89 ForwardIterator first,
91 std::vector<typename ForwardIterator::value_type>
const& levels,
92 std::vector<typename ForwardIterator::value_type>
const& exclude
95 using T =
typename ForwardIterator::value_type;
99 if( levels.empty() ) {
101 while( it != last ) {
102 result.
levels.push_back( *it );
109 std::sort( result.
levels.begin(), result.
levels.end() );
110 auto uit = std::unique( result.
levels.begin(), result.
levels.end() );
116 std::sort( result.
levels.begin(), result.
levels.end() );
117 auto uit = std::unique( result.
levels.begin(), result.
levels.end() );
118 if( uit != result.
levels.end() ) {
119 throw std::invalid_argument(
"Provided levels are not unique." );
127 for(
auto const& e : exclude ) {
128 auto fit = std::find( result.
levels.begin(), result.
levels.end(), e );
129 if( fit != result.
levels.end() ) {
130 result.
levels.erase( fit );
137 while( it != last ) {
138 auto fit = std::find( result.
levels.begin(), result.
levels.end(), *it );
139 if( fit != result.
levels.end() ) {
140 result.
values.push_back( std::distance( result.
levels.begin(), fit ));
143 result.
values.push_back( std::numeric_limits<std::size_t>::max() );
151 template <
class ForwardIterator>
153 ForwardIterator first,
154 ForwardIterator last,
155 std::vector<typename ForwardIterator::value_type>
const& levels
161 std::vector<typename ForwardIterator::value_type>()
165 template <
class ForwardIterator>
167 ForwardIterator first,
173 std::vector<typename ForwardIterator::value_type>(),
174 std::vector<typename ForwardIterator::value_type>()
195 auto result = std::vector<size_t>( factor.
levels.size(), 0 );
196 for(
auto val : factor.
values ) {
197 if( val < factor.
levels.size() ) {
216 T
const& reference_level,
217 std::vector<std::string>
const& row_names = std::vector<std::string>{}
220 if( factor.
levels.empty() ) {
221 throw std::runtime_error(
"Cannot create indicator variable from empty factor." );
223 if( ! row_names.empty() && row_names.size() != factor.
values.size() ) {
224 throw std::runtime_error(
225 "Row names for indicator variable do not have the same size as the values of the factor."
235 auto const rit = std::find( factor.
levels.begin(), factor.
levels.end(), reference_level );
236 if( rit == factor.
levels.end() ) {
237 throw std::runtime_error(
238 "Cannot create indicator variable. "
239 "Provided reference level is not part of the factor levels."
242 auto const ref_idx = std::distance( factor.
levels.begin(), rit );
243 assert( ref_idx >= 0 );
247 if( row_names.empty() ) {
248 for(
size_t i = 0; i < factor.
values.size(); ++i ) {
249 result.add_unnamed_row();
252 assert( row_names.size() == factor.
values.size() );
253 for(
size_t i = 0; i < factor.
values.size(); ++i ) {
254 result.add_row( row_names[i] );
257 assert( result.rows() == factor.
values.size() );
260 for(
size_t lvl_idx = 0; lvl_idx < factor.
levels.size(); ++lvl_idx ) {
261 if( lvl_idx ==
static_cast<size_t>( ref_idx )) {
266 auto make_name = []( T
const& level ){
267 std::stringstream ss;
275 auto col_name = make_name( factor.
levels[ ref_idx ] ) +
"." + make_name( factor.
levels[ lvl_idx ] );
276 auto& col = result.add_col<
double>( col_name );
279 for(
size_t val_idx = 0; val_idx < factor.
values.size(); ++val_idx ) {
281 col[val_idx] = std::numeric_limits<double>::quiet_NaN();
282 }
else if( factor.
values[val_idx] == lvl_idx ) {
289 assert( factor.
levels.size() > 0 );
290 assert( result.cols() == factor.
levels.size() - 1 );
304 std::vector<std::string>
const& row_names = std::vector<std::string>{}
306 if( factor.
levels.empty() ) {
307 throw std::runtime_error(
"Cannot create indicator variable from empty factor." );
312 auto const maxit = std::max_element( smry.begin(), smry.end() );
313 size_t const max_level = std::distance( smry.begin(), maxit );
314 assert( max_level < factor.
levels.size() );
322 #endif // include guard