43 namespace population {
50 std::shared_ptr< utils::BaseInputSource > source
52 std::vector<BedReader::Feature> result;
53 read_( source, [&](
Feature&& feat ){
54 result.push_back( std::move( feat ));
60 std::shared_ptr< utils::BaseInputSource > source
63 read_( source, [&](
Feature&& feat ){
64 result.
add( feat.chrom, feat.chrom_start, feat.chrom_end );
70 std::shared_ptr< utils::BaseInputSource > source,
79 std::shared_ptr< utils::BaseInputSource > source,
83 read_( source, [&](
Feature&& feat ){
84 target.
add( feat.chrom, feat.chrom_start, feat.chrom_end,
merge );
92 void BedReader::read_(
93 std::shared_ptr< utils::BaseInputSource > source,
94 std::function<
void(Feature&&)> callback
103 size_t expected_columns = 0;
104 size_t found_columns;
105 while(( found_columns = parse_line_( it, feat ) )) {
106 if( expected_columns == 0 ) {
107 expected_columns = found_columns;
108 }
else if( expected_columns != found_columns ) {
110 assert( it.line() > 0 );
111 throw std::runtime_error(
112 "Inconsistent number of columns in BED input. Expected " +
113 std::to_string( expected_columns ) +
" based on first row, but found " +
117 assert( found_columns == expected_columns );
118 callback( std::move( feat ));
123 size_t BedReader::parse_line_(
124 utils::InputStream& input_stream,
125 BedReader::Feature& feature
128 size_t found_columns = 0;
129 auto& it = input_stream;
131 return found_columns;
139 bool is_comment =
true;
140 std::string first_word;
141 while( is_comment ) {
142 first_word = parse_string_( it );
144 first_word ==
"browser" ||
145 first_word ==
"track" ||
156 return found_columns;
161 feature.chrom = first_word;
162 if( ! next_field_( it, found_columns )) {
163 throw std::runtime_error(
164 "BED input expected to have three mandatory columns chrom,start,end in the beginning "
165 "of the line, but only chrom was found at " + it.at()
168 feature.chrom_start = utils::parse_unsigned_integer<size_t>( it ) + 1;
169 if( ! next_field_( it, found_columns )) {
170 throw std::runtime_error(
171 "BED input expected to have three mandatory columns chrom,start,end in the beginning "
172 "of the line, but only chrom and start were found at " + it.at()
175 feature.chrom_end = utils::parse_unsigned_integer<size_t>( it );
180 if( ! next_field_( it, found_columns )) {
181 return found_columns;
183 feature.name = parse_string_( it );
186 if( ! next_field_( it, found_columns )) {
187 return found_columns;
189 feature.score = utils::parse_unsigned_integer<size_t>( it );
190 if( feature.score > 1000 ) {
191 throw std::runtime_error(
"Invalid score > 1000 in BED input at " + it.at() );
195 if( ! next_field_( it, found_columns )) {
196 return found_columns;
199 return c ==
'+' || c ==
'-' || c ==
'.';
203 if( ! next_field_( it, found_columns )) {
204 return found_columns;
206 feature.thick_start = utils::parse_unsigned_integer<size_t>( it ) + 1;
209 if( ! next_field_( it, found_columns )) {
210 return found_columns;
212 feature.thick_end = utils::parse_unsigned_integer<size_t>( it );
215 if( ! next_field_( it, found_columns )) {
216 return found_columns;
218 feature.item_rgb = parse_string_( it );
221 if( ! next_field_( it, found_columns )) {
222 return found_columns;
224 feature.block_count = utils::parse_unsigned_integer<size_t>( it );
227 if( ! next_field_( it, found_columns )) {
228 return found_columns;
230 auto block_sizes =
utils::split( parse_string_( it ),
"," );
231 for(
auto const& bs : block_sizes ) {
233 feature.block_sizes.push_back( stoull( bs ));
236 if( feature.block_sizes.size() != feature.block_count ) {
237 throw std::runtime_error(
238 "Invalid blockSizes length in BED input. Expected " +
239 std::to_string( feature.block_count ) +
" based on blockCount, but found " +
240 std::to_string( feature.block_sizes.size() ) +
" values instead, at " + it.at()
245 if( ! next_field_( it, found_columns )) {
246 return found_columns;
248 auto block_starts =
utils::split( parse_string_( it ),
"," );
249 for(
auto const& bs : block_sizes ) {
251 feature.block_starts.push_back( stoull( bs ));
254 if( feature.block_starts.size() != feature.block_count ) {
255 throw std::runtime_error(
256 "Invalid blockStarts length in BED input. Expected " +
257 std::to_string( feature.block_count ) +
" based on blockCount, but found " +
258 std::to_string( feature.block_starts.size() ) +
" values instead, at " + it.at()
263 while( next_field_( it, found_columns ) ) {
269 return found_columns;
272 bool BedReader::next_field_( utils::InputStream& input_stream,
size_t& found_columns )
const
279 if( ! input_stream || *input_stream ==
'\n' ) {
280 assert( !input_stream || *input_stream ==
'\n' );
284 assert( input_stream && *input_stream !=
'\n' );
286 return c ==
'\t' || c ==
' ';
289 return c ==
'\t' || c ==
' ';
291 if( ! input_stream || *input_stream ==
'\n' ) {
292 throw std::runtime_error(
"Unexpected end of BED input at " + input_stream.at() );
298 std::string BedReader::parse_string_( utils::InputStream& input_stream )
const
303 return c !=
'\t' && c !=
' ' && c !=
'\n';