44 namespace population {
51 std::shared_ptr< utils::BaseInputSource > source
53 std::vector<MapBimReader::Feature> result;
54 read_( source, [&](
Feature&& feat ){
55 result.push_back( std::move( feat ));
61 std::shared_ptr< utils::BaseInputSource > source
64 read_( source, [&](
Feature&& feat ){
65 result.
add( feat.chromosome, feat.coordinate, feat.coordinate );
71 std::shared_ptr< utils::BaseInputSource > source,
80 std::shared_ptr< utils::BaseInputSource > source,
84 read_( source, [&](
Feature&& feat ){
85 target.
add( feat.chromosome, feat.coordinate, feat.coordinate,
merge );
93 void MapBimReader::read_(
94 std::shared_ptr< utils::BaseInputSource > source,
95 std::function<
void(Feature&&)> callback
106 std::vector<std::string> buffer;
107 size_t expected_columns = 0;
108 size_t found_columns;
109 while(( found_columns = parse_line_( it, feat, buffer ) )) {
110 if( expected_columns == 0 ) {
111 expected_columns = found_columns;
112 }
else if( expected_columns != found_columns ) {
114 assert( it.line() > 0 );
115 throw std::runtime_error(
116 "Inconsistent number of columns in map/bim input. Expected " +
117 std::to_string( expected_columns ) +
" based on first row, but found " +
121 assert( found_columns == expected_columns );
124 if( feat.coordinate == 0 ) {
126 assert( it.line() > 0 );
127 throw std::runtime_error(
128 "Invalid base-pair coordinate 0 in map/bim input in line " +
134 if( !( feat.coordinate < 0 && skip_negative_coordinates_ )) {
135 callback( std::move( feat ));
141 size_t MapBimReader::parse_line_(
142 utils::InputStream& input_stream,
143 MapBimReader::Feature& feature,
144 std::vector<std::string>& buffer
147 size_t found_columns = 0;
148 auto& it = input_stream;
150 return found_columns;
156 while( it && *it !=
'\n' ) {
161 if( buffer.size() < found_columns + 1 ) {
162 buffer.resize( found_columns + 1 );
167 return c !=
'\t' && c !=
'\n';
169 if( buffer[found_columns].empty() ) {
170 throw std::runtime_error(
"Invalid empty entry of map/bim input at " + it.at() );
175 if( it && *it ==
'\t' ) {
177 if( ! it || *it ==
'\n' ) {
178 throw std::runtime_error(
"Unexpected end of map/bim input at " + it.at() );
182 assert( found_columns <= buffer.size() );
186 assert( *it ==
'\n' );
193 if( found_columns < buffer.size() ) {
194 return found_columns;
196 assert( found_columns == buffer.size() );
199 if( found_columns < 3 || found_columns > 6 ) {
200 throw std::runtime_error(
202 " found, but 3-6 expected) of map/bim input at " + it.at()
208 assert( found_columns > 2 );
209 assert( buffer.size() > 2 );
210 assert( found_columns == buffer.size() );
211 feature.chromosome = buffer[0];
212 feature.variant_id = buffer[1];
215 auto get_position_ = [&]( std::string
const& value ){
218 throw std::runtime_error(
219 "Invalid map/bim input with (centi)morgan position that is not a numeric value (\"" +
220 value +
"\") at " + it.at()
225 auto get_coordinate_ = [&]( std::string
const& value ){
228 throw std::runtime_error(
229 "Invalid map/bim input with base pair coordinate that is not a numeric value (\"" +
230 value +
"\") at " + it.at()
235 auto get_allele_ = [&]( std::string
const& value ){
236 if( value.size() != 1 ) {
237 throw std::runtime_error(
238 "Invalid map/bim input with allele that is not a single char (\"" +
239 value +
"\") at " + it.at()
242 assert( value.size() == 1 );
248 assert( found_columns == buffer.size() );
249 assert( found_columns >= 3 && found_columns <= 6 );
250 switch( found_columns ) {
253 feature.coordinate = get_coordinate_( buffer[2] );
258 feature.position = get_position_( buffer[2] );
259 feature.coordinate = get_coordinate_( buffer[3] );
264 feature.coordinate = get_coordinate_( buffer[2] );
265 feature.allele_1 = get_allele_( buffer[3] );
266 feature.allele_2 = get_allele_( buffer[4] );
271 feature.position = get_position_( buffer[2] );
272 feature.coordinate = get_coordinate_( buffer[3] );
273 feature.allele_1 = get_allele_( buffer[4] );
274 feature.allele_2 = get_allele_( buffer[5] );
283 return found_columns;