43 namespace population {
50 std::shared_ptr< utils::BaseInputSource > source
52 std::vector<BedReader::Feature> result;
53 read_( source, [&](
Feature&& feat ){
54 result.push_back( std::move( feat ));
60 std::shared_ptr< utils::BaseInputSource > source,
69 std::shared_ptr< utils::BaseInputSource > source,
73 read_( source, [&](
Feature&& feat ){
74 target.
add( feat.chrom, feat.chrom_start, feat.chrom_end,
merge );
82 void BedReader::read_(
83 std::shared_ptr< utils::BaseInputSource > source,
84 std::function<
void(Feature&&)> callback
93 size_t expected_columns = 0;
95 while(( found_columns = parse_line_( it, feat ) )) {
96 if( expected_columns == 0 ) {
97 expected_columns = found_columns;
98 }
else if( expected_columns != found_columns ) {
100 assert( it.line() > 0 );
101 throw std::runtime_error(
102 "Inconsistent number of columns in BED input. Expected " +
103 std::to_string( expected_columns ) +
" based on first row, but found " +
107 assert( found_columns == expected_columns );
108 callback( std::move( feat ));
113 size_t BedReader::parse_line_(
114 utils::InputStream& input_stream,
115 BedReader::Feature& feature
118 size_t found_columns = 0;
119 auto& it = input_stream;
121 return found_columns;
129 bool is_comment =
true;
130 std::string first_word;
131 while( is_comment ) {
132 first_word = parse_string_( it );
134 first_word ==
"browser" ||
135 first_word ==
"track" ||
146 return found_columns;
151 feature.chrom = first_word;
152 if( ! next_field_( it, found_columns )) {
153 throw std::runtime_error(
154 "BED input expected to have three mandatory columns chrom,start,end in the beginning "
155 "of the line, but only chrom was found at " + it.at()
158 feature.chrom_start = utils::parse_unsigned_integer<size_t>( it ) + 1;
159 if( ! next_field_( it, found_columns )) {
160 throw std::runtime_error(
161 "BED input expected to have three mandatory columns chrom,start,end in the beginning "
162 "of the line, but only chrom and start were found at " + it.at()
165 feature.chrom_end = utils::parse_unsigned_integer<size_t>( it );
170 if( ! next_field_( it, found_columns )) {
171 return found_columns;
173 feature.name = parse_string_( it );
176 if( ! next_field_( it, found_columns )) {
177 return found_columns;
179 feature.score = utils::parse_unsigned_integer<size_t>( it );
180 if( feature.score > 1000 ) {
181 throw std::runtime_error(
"Invalid score > 1000 in BED input at " + it.at() );
185 if( ! next_field_( it, found_columns )) {
186 return found_columns;
189 return c ==
'+' || c ==
'-' || c ==
'.';
193 if( ! next_field_( it, found_columns )) {
194 return found_columns;
196 feature.thick_start = utils::parse_unsigned_integer<size_t>( it ) + 1;
199 if( ! next_field_( it, found_columns )) {
200 return found_columns;
202 feature.thick_end = utils::parse_unsigned_integer<size_t>( it );
205 if( ! next_field_( it, found_columns )) {
206 return found_columns;
208 feature.item_rgb = parse_string_( it );
211 if( ! next_field_( it, found_columns )) {
212 return found_columns;
214 feature.block_count = utils::parse_unsigned_integer<size_t>( it );
217 if( ! next_field_( it, found_columns )) {
218 return found_columns;
220 auto block_sizes =
utils::split( parse_string_( it ),
"," );
221 for(
auto const& bs : block_sizes ) {
223 feature.block_sizes.push_back( stoull( bs ));
226 if( feature.block_sizes.size() != feature.block_count ) {
227 throw std::runtime_error(
228 "Invalid blockSizes length in BED input. Expected " +
229 std::to_string( feature.block_count ) +
" based on blockCount, but found " +
230 std::to_string( feature.block_sizes.size() ) +
" values instead, at " + it.at()
235 if( ! next_field_( it, found_columns )) {
236 return found_columns;
238 auto block_starts =
utils::split( parse_string_( it ),
"," );
239 for(
auto const& bs : block_sizes ) {
241 feature.block_starts.push_back( stoull( bs ));
244 if( feature.block_starts.size() != feature.block_count ) {
245 throw std::runtime_error(
246 "Invalid blockStarts length in BED input. Expected " +
247 std::to_string( feature.block_count ) +
" based on blockCount, but found " +
248 std::to_string( feature.block_starts.size() ) +
" values instead, at " + it.at()
253 while( next_field_( it, found_columns ) ) {
259 return found_columns;
262 bool BedReader::next_field_( utils::InputStream& input_stream,
size_t& found_columns )
const
269 if( ! input_stream || *input_stream ==
'\n' ) {
270 assert( !input_stream || *input_stream ==
'\n' );
274 assert( input_stream && *input_stream !=
'\n' );
276 return c ==
'\t' || c ==
' ';
279 return c ==
'\t' || c ==
' ';
281 if( ! input_stream || *input_stream ==
'\n' ) {
282 throw std::runtime_error(
"Unexpected end of BED input at " + input_stream.at() );
288 std::string BedReader::parse_string_( utils::InputStream& input_stream )
const
293 return c !=
'\t' && c !=
' ' && c !=
'\n';