11/*  vcfconvert.c -- convert between VCF/BCF and related formats. 
22
3-     Copyright (C) 2013-2021  Genome Research Ltd. 
3+     Copyright (C) 2013-2023  Genome Research Ltd. 
44
55    Author: Petr Danecek <[email protected] > 66
@@ -59,7 +59,7 @@ struct _args_t
5959    bcf_hdr_t  * header ;
6060    void  (* convert_func )(struct  _args_t  * );
6161    struct  {
62-         int  total , skipped , hom_rr , het_ra , hom_aa , het_aa , missing ;  
62+         int  total , skipped , hom_rr , het_ra , hom_aa , het_aa , missing ;
6363    } n ;
6464    kstring_t  str ;
6565    int32_t  * gts ;
@@ -160,7 +160,7 @@ static int _set_chrom_pos_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr)
160160    // REF,ALT 
161161    args -> str .l  =  0 ;
162162    se  =  ++ ss ;
163-     while  ( se  <  tsv -> se  &&  * se != '_'  ) se ++ ;  
163+     while  ( se  <  tsv -> se  &&  * se != '_'  ) se ++ ;
164164    if  ( * se != '_'  ) return  -1 ;
165165    kputsn (ss ,se - ss ,& args -> str );
166166    ss  =  ++ se ;
@@ -202,14 +202,14 @@ static int tsv_setter_chrom_pos_ref_alt_or_id(tsv_t *tsv, bcf1_t *rec, void *usr
202202{
203203    args_t  * args  =  (args_t * )usr ;
204204    if  ( _set_chrom_pos_ref_alt (tsv ,rec ,usr )== 0  )  return  0 ;
205-     rec -> pos  =  -1 ;  // mark the record as unset 
205+     rec -> pos  =  CSI_COOR_EMPTY ;  // mark the record as unset 
206206    if  ( !args -> output_vcf_ids ) return  0 ;
207207    return  tsv_setter_id (tsv ,rec ,usr );
208208}
209209static  int  tsv_setter_chrom_pos_ref_alt_id_or_die (tsv_t  * tsv , bcf1_t  * rec , void  * usr )
210210{
211211    args_t  * args  =  (args_t * )usr ;
212-     if  ( rec -> pos != -1  )
212+     if  ( rec -> pos != CSI_COOR_EMPTY  )
213213    {
214214        if  ( !args -> output_vcf_ids  ) return  0 ;
215215        return  tsv_setter_id (tsv ,rec ,usr );
@@ -269,12 +269,12 @@ static int tsv_setter_gt_gp(tsv_t *tsv, bcf1_t *rec, void *usr)
269269        if  ( aa  >= ab  )
270270        {
271271            if  ( aa  >= bb  ) args -> gts [2 * i + 0 ] =  args -> gts [2 * i + 1 ] =  bcf_gt_unphased (0 );
272-             else  args -> gts [2 * i + 0 ] =  args -> gts [2 * i + 1 ] =  bcf_gt_unphased (1 );  
272+             else  args -> gts [2 * i + 0 ] =  args -> gts [2 * i + 1 ] =  bcf_gt_unphased (1 );
273273        }
274-         else  if  ( ab  >= bb  )  
274+         else  if  ( ab  >= bb  )
275275        {
276276            args -> gts [2 * i + 0 ] =  bcf_gt_unphased (0 );
277-             args -> gts [2 * i + 1 ] =  bcf_gt_unphased (1 );  
277+             args -> gts [2 * i + 1 ] =  bcf_gt_unphased (1 );
278278        }
279279        else  args -> gts [2 * i + 0 ] =  args -> gts [2 * i + 1 ] =  bcf_gt_unphased (1 );
280280    }
@@ -293,7 +293,7 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
293293    else  { a0  =  bcf_gt_phased (0 ); a1  =  bcf_gt_phased (1 ); }
294294
295295    // up is short for "unphased" 
296-     int  nup  =  0 ;  
296+     int  nup  =  0 ;
297297    for  (i = 0 ; i < nsamples ; i ++ )
298298    {
299299        char  * ss  =  tsv -> ss  +  4 * i  +  nup ;
@@ -324,11 +324,11 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
324324                break ;
325325            default  :
326326                fprintf (stderr ,"Could not parse: [%c][%s]\n" , ss [all * 2 + up ],tsv -> ss );
327-                 return  -1 ;  
327+                 return  -1 ;
328328            }
329329            if ( ss [all * 2 + up + 1 ]== '*'  ) up  =  up  +  1 ;
330330        }
331-          
331+ 
332332        if (up  &&  up  !=  2 )
333333        {
334334            fprintf (stderr ,"Missing unphased marker '*': [%c][%s]" , ss [2 + up ], tsv -> ss );
@@ -356,13 +356,13 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
356356static  void  gensample_to_vcf (args_t  * args )
357357{
358358    /* 
359-      *  Inpute: IMPUTE2 output (indentation changed here for clarity):   
359+      *  Inpute: IMPUTE2 output (indentation changed here for clarity): 
360360     * 
361361     *      20:62116619_C_T 20:62116619     62116619 C T 0.969 0.031 0 ... 
362362     *      ---             20:62116698_C_A 62116698 C A 1     0     0 ... 
363363     * 
364364     *  Second column is expected in the form of CHROM:POS_REF_ALT. We use second 
365-      *  column because the first can be empty ("--") when filling sites from reference   
365+      *  column because the first can be empty ("--") when filling sites from reference 
366366     *  panel. When the option --vcf-ids is given, the first column is used to set the 
367367     *  VCF ID. 
368368     * 
@@ -784,7 +784,7 @@ char *init_sample2sex(bcf_hdr_t *hdr, char *sex_fname)
784784    }
785785    for  (i = 0 ; i < nlines ; i ++ ) free (lines [i ]);
786786    free (lines );
787-     for  (i = 0 ; i < bcf_hdr_nsamples (hdr ); i ++ )  
787+     for  (i = 0 ; i < bcf_hdr_nsamples (hdr ); i ++ )
788788        if  ( !sample2sex [i ] ) error ("Missing sex for sample %s in %s\n" , bcf_hdr_int2id (hdr , BCF_DT_SAMPLE , i ),sex_fname );
789789    return  sample2sex ;
790790}
@@ -847,7 +847,7 @@ static void vcf_to_gensample(args_t *args)
847847    if  (sample_fname ) fprintf (stderr , "Sample file: %s\n" , sample_fname );
848848
849849    // write samples file 
850-     if  (sample_fname )  
850+     if  (sample_fname )
851851    {
852852        char  * sample2sex  =  NULL ;
853853        if  ( args -> sex_fname  ) sample2sex  =  init_sample2sex (args -> header ,args -> sex_fname );
@@ -877,7 +877,7 @@ static void vcf_to_gensample(args_t *args)
877877        return ;
878878    }
879879
880-     int  prev_rid  =  -1 , prev_pos  =  -1 ;
880+     int  prev_rid  =  -1 , prev_pos  =  CSI_COOR_EMPTY ;
881881    int  no_alt  =  0 , non_biallelic  =  0 , filtered  =  0 , ndup  =  0 , nok  =  0 ;
882882    BGZF  * gout  =  bgzf_open (gen_fname , gen_compressed  ? "wg"  : "wu" );
883883    while  ( bcf_sr_next_line (args -> files ) )
@@ -915,7 +915,7 @@ static void vcf_to_gensample(args_t *args)
915915            nok ++ ;
916916        }
917917    }
918-     fprintf (stderr , "%d records written, %d skipped: %d/%d/%d/%d no-ALT/non-biallelic/filtered/duplicated\n" ,  
918+     fprintf (stderr , "%d records written, %d skipped: %d/%d/%d/%d no-ALT/non-biallelic/filtered/duplicated\n" ,
919919        nok , no_alt + non_biallelic + filtered + ndup , no_alt , non_biallelic , filtered , ndup );
920920
921921    if  ( str .m  ) free (str .s );
@@ -976,7 +976,7 @@ static void vcf_to_haplegendsample(args_t *args)
976976    {
977977        char  * sample2sex  =  NULL ;
978978        if  ( args -> sex_fname  ) sample2sex  =  init_sample2sex (args -> header ,args -> sex_fname );
979-          
979+ 
980980        int  i ;
981981        BGZF  * sout  =  bgzf_open (sample_fname , sample_compressed  ? "wg"  : "wu" );
982982        str .l  =  0 ;
@@ -1078,7 +1078,7 @@ static void vcf_to_hapsample(args_t *args)
10781078        kputs ("%CHROM:%POS\\_%REF\\_%FIRST_ALT %ID %POS %REF %FIRST_ALT " , & str );
10791079    else 
10801080        kputs ("%CHROM %CHROM:%POS\\_%REF\\_%FIRST_ALT %POS %REF %FIRST_ALT " , & str );
1081-      
1081+ 
10821082    if  ( args -> hap2dip  )
10831083        kputs ("%_GT_TO_HAP2\n" , & str );
10841084    else 
@@ -1229,7 +1229,7 @@ static inline int tsv_setter_aa1(args_t *args, char *ss, char *se, int alleles[]
12291229    if  ( alleles [a0 ]< 0  ) alleles [a0 ] =  (* nals )++ ;
12301230    if  ( alleles [a1 ]< 0  ) alleles [a1 ] =  (* nals )++ ;
12311231
1232-     gts [0 ] =  bcf_gt_unphased (alleles [a0 ]);  
1232+     gts [0 ] =  bcf_gt_unphased (alleles [a0 ]);
12331233    gts [1 ] =  ss [1 ] ? bcf_gt_unphased (alleles [a1 ]) : bcf_int32_vector_end ;
12341234
12351235    if  ( ref == a0  &&  ref == a1   ) args -> n .hom_rr ++ ;    // hom ref: RR 
@@ -1265,7 +1265,7 @@ static int tsv_setter_aa(tsv_t *tsv, bcf1_t *rec, void *usr)
12651265        }
12661266        ret  =  tsv_setter_aa1 (args , tsv -> ss , tsv -> se , alleles , & nals , iref , args -> gts + i * 2 );
12671267        if  ( ret == -1  ) error ("Error parsing the site %s:%" PRId64 ", expected two characters\n" , bcf_hdr_id2name (args -> header ,rec -> rid ),(int64_t ) rec -> pos + 1 );
1268-         if  ( ret == -2  )  
1268+         if  ( ret == -2  )
12691269        {
12701270            // something else than a SNP 
12711271            free (ref );
@@ -1275,7 +1275,7 @@ static int tsv_setter_aa(tsv_t *tsv, bcf1_t *rec, void *usr)
12751275
12761276    args -> str .l  =  0 ;
12771277    kputc (ref [0 ], & args -> str );
1278-     for  (i = 0 ; i < 5 ; i ++ )  
1278+     for  (i = 0 ; i < 5 ; i ++ )
12791279    {
12801280        if  ( alleles [i ]> 0  )
12811281        {
@@ -1419,7 +1419,7 @@ static void gvcf_to_vcf(args_t *args)
14191419        {
14201420            int  pass  =  filter_test (args -> filter , line , NULL );
14211421            if  ( args -> filter_logic  &  FLT_EXCLUDE  ) pass  =  pass  ? 0  : 1 ;
1422-             if  ( !pass  )  
1422+             if  ( !pass  )
14231423            {
14241424                if  ( bcf_write (out_fh ,hdr ,line )!= 0   ) error ("[%s] Error: cannot write to %s\n" , __func__ ,args -> outfname );
14251425                continue ;
@@ -1667,7 +1667,7 @@ int main_vcfconvert(int argc, char *argv[])
16671667        else  args -> infname  =  argv [optind ];
16681668    }
16691669    if  ( !args -> infname  ) usage ();
1670-      
1670+ 
16711671    if  ( args -> convert_func  ) args -> convert_func (args );
16721672    else  vcf_to_vcf (args );
16731673
0 commit comments