Skip to content

Commit

Permalink
Update reformat_lofreq.py
Browse files Browse the repository at this point in the history
Fixed an issue where empty VCf files would have the FORMAT tag
  • Loading branch information
LennertVerboven committed May 15, 2023
1 parent 793b9b4 commit aa53379
Showing 1 changed file with 17 additions and 14 deletions.
31 changes: 17 additions & 14 deletions bin/reformat_lofreq.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ def read_vcf(filename):
try:
df = pd.read_csv(vcf, header=None, sep='\t')
df.columns = line[:-1].split('\t')
not_empty = True
except pd.errors.EmptyDataError as e:
df = pd.DataFrame(columns=line[:-1].split('\t'))
return df, header
not_empty = False
return df, header, not_empty

def write_vcf(filename, df, header):
with open(filename, 'w') as vcf:
Expand All @@ -42,18 +44,19 @@ def write_vcf(filename, df, header):

args = vars(parser.parse_args())

vcf, header = read_vcf(args['lofreq_vcf_file'])
vcf['FORMAT'] = 'GT:AD:DP:GQ:PL'

for idx, row in vcf.iterrows():
info = [ast.literal_eval(i.split('=')[1]) for i in row['INFO'].split(';')[:4]]
ref_dp = sum(info[3][:2])
alt_dp = sum(info[3][2:])
GT = 1
AD = '{},{}'.format(ref_dp, alt_dp)
DP = sum(info[3])
GQ = 99
PL = '1800,0'
vcf.loc[idx, args['lofreq_sample_name']] = '{}:{}:{}:{}:{}'.format(GT,AD,DP,GQ,PL)
vcf, header, not_empty = read_vcf(args['lofreq_vcf_file'])
if not_empty:
vcf['FORMAT'] = 'GT:AD:DP:GQ:PL'

for idx, row in vcf.iterrows():
info = [ast.literal_eval(i.split('=')[1]) for i in row['INFO'].split(';')[:4]]
ref_dp = sum(info[3][:2])
alt_dp = sum(info[3][2:])
GT = 1
AD = '{},{}'.format(ref_dp, alt_dp)
DP = sum(info[3])
GQ = 99
PL = '1800,0'
vcf.loc[idx, args['lofreq_sample_name']] = '{}:{}:{}:{}:{}'.format(GT,AD,DP,GQ,PL)

write_vcf(args['outfile'], vcf, header)

0 comments on commit aa53379

Please sign in to comment.