diff --git a/bin/reformat_lofreq.py b/bin/reformat_lofreq.py index 79844731..12bab6f5 100755 --- a/bin/reformat_lofreq.py +++ b/bin/reformat_lofreq.py @@ -24,9 +24,11 @@ def read_vcf(filename): try: df = pd.read_csv(vcf, header=None, sep='\t') df.columns = line[:-1].split('\t') + not_empty = True except pd.errors.EmptyDataError as e: df = pd.DataFrame(columns=line[:-1].split('\t')) - return df, header + not_empty = False + return df, header, not_empty def write_vcf(filename, df, header): with open(filename, 'w') as vcf: @@ -42,18 +44,19 @@ def write_vcf(filename, df, header): args = vars(parser.parse_args()) - vcf, header = read_vcf(args['lofreq_vcf_file']) - vcf['FORMAT'] = 'GT:AD:DP:GQ:PL' - - for idx, row in vcf.iterrows(): - info = [ast.literal_eval(i.split('=')[1]) for i in row['INFO'].split(';')[:4]] - ref_dp = sum(info[3][:2]) - alt_dp = sum(info[3][2:]) - GT = 1 - AD = '{},{}'.format(ref_dp, alt_dp) - DP = sum(info[3]) - GQ = 99 - PL = '1800,0' - vcf.loc[idx, args['lofreq_sample_name']] = '{}:{}:{}:{}:{}'.format(GT,AD,DP,GQ,PL) + vcf, header, not_empty = read_vcf(args['lofreq_vcf_file']) + if not_empty: + vcf['FORMAT'] = 'GT:AD:DP:GQ:PL' + + for idx, row in vcf.iterrows(): + info = [ast.literal_eval(i.split('=')[1]) for i in row['INFO'].split(';')[:4]] + ref_dp = sum(info[3][:2]) + alt_dp = sum(info[3][2:]) + GT = 1 + AD = '{},{}'.format(ref_dp, alt_dp) + DP = sum(info[3]) + GQ = 99 + PL = '1800,0' + vcf.loc[idx, args['lofreq_sample_name']] = '{}:{}:{}:{}:{}'.format(GT,AD,DP,GQ,PL) write_vcf(args['outfile'], vcf, header)