Skip to content

Commit 6dc65c6

Browse files
Broken tests reviewed and updated as needed.
1 parent 7cfa00b commit 6dc65c6

File tree

4 files changed

+36
-47
lines changed

4 files changed

+36
-47
lines changed

metapool/prep.py

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,16 @@
1-
import re
2-
import os
3-
import gzip
4-
import warnings
5-
import pandas as pd
6-
7-
from glob import glob
1+
from collections import Counter, defaultdict
82
from datetime import datetime
9-
from string import ascii_letters, digits
3+
from glob import glob
104
from metapool.mp_strings import get_short_name_and_id
115
from metapool.plate import PlateReplication
12-
from collections import Counter
6+
from os import sep, listdir
7+
from os.path import (basename, isdir, join, split, abspath, exists,
8+
normpath)
139
from string import ascii_letters, digits
14-
from os import sep
15-
from os.path import join, split, abspath, exists
16-
from collections import defaultdict
10+
from gzip import open as gz_open
11+
import pandas as pd
12+
import re
13+
import warnings
1714

1815

1916
REQUIRED_MF_COLUMNS = {'sample_name', 'barcode', 'primer', 'primer_plate',
@@ -139,7 +136,7 @@ def parse_illumina_run_id(run_id):
139136

140137
def is_nonempty_gz_file(name):
141138
"""Taken from https://stackoverflow.com/a/37878550/379593"""
142-
with gzip.open(name, 'rb') as f:
139+
with gz_open(name, 'rb') as f:
143140
try:
144141
file_content = f.read(1)
145142
return len(file_content) > 0
@@ -171,11 +168,11 @@ def get_run_prefix(run_path, project, sample_id, lane):
171168
The run prefix of the sequence file in the lane, only if the sequence
172169
file is not empty.
173170
"""
174-
base = os.path.join(run_path, project)
171+
base = join(run_path, project)
175172
path = base
176173

177-
qc = os.path.join(base, 'trimmed_sequences')
178-
hf = os.path.join(base, 'filtered_sequences')
174+
qc = join(base, 'trimmed_sequences')
175+
hf = join(base, 'filtered_sequences')
179176

180177
if _exists_and_has_files(qc) and _exists_and_has_files(hf):
181178
path = hf
@@ -188,10 +185,10 @@ def get_run_prefix(run_path, project, sample_id, lane):
188185

189186
search_me = '%s_S*_L*%s_R*.fastq.gz' % (sample_id, lane)
190187

191-
results = glob(os.path.join(path, search_me))
188+
results = glob(join(path, search_me))
192189

193190
with open('found_files.log', 'a') as f:
194-
f.write("SEARCHING: %s\n" % os.path.join(path, "FFFF", search_me))
191+
f.write("SEARCHING: %s\n" % join(path, "FFFF", search_me))
195192
for item in results:
196193
f.write("%s\n" % item)
197194
f.write("\n")
@@ -200,7 +197,7 @@ def get_run_prefix(run_path, project, sample_id, lane):
200197
if len(results) == 2:
201198
forward, reverse = sorted(results)
202199
if is_nonempty_gz_file(forward) and is_nonempty_gz_file(reverse):
203-
f, r = os.path.basename(forward), os.path.basename(reverse)
200+
f, r = basename(forward), basename(reverse)
204201
if len(f) != len(r):
205202
raise ValueError("Forward and reverse sequences filenames "
206203
"don't match f:%s r:%s" % (f, r))
@@ -228,15 +225,15 @@ def get_run_prefix(run_path, project, sample_id, lane):
228225

229226

230227
def get_run_prefix_mf(run_path, project):
231-
search_path = os.path.join(run_path, project, 'amplicon',
232-
'*_SMPL1_S*R?_*.fastq.gz')
228+
search_path = join(run_path, project, 'amplicon',
229+
'*_SMPL1_S*R?_*.fastq.gz')
233230
results = glob(search_path)
234231

235232
# at this stage there should only be two files forward and reverse
236233
if len(results) == 2:
237234
forward, reverse = sorted(results)
238235
if is_nonempty_gz_file(forward) and is_nonempty_gz_file(reverse):
239-
f, r = os.path.basename(forward), os.path.basename(reverse)
236+
f, r = basename(forward), basename(reverse)
240237
if len(f) != len(r):
241238
raise ValueError("Forward and reverse sequences filenames "
242239
"don't match f:%s r:%s" % (f, r))
@@ -263,12 +260,12 @@ def get_run_prefix_mf(run_path, project):
263260

264261

265262
def _file_list(path):
266-
return [f for f in os.listdir(path)
267-
if not os.path.isdir(os.path.join(path, f))]
263+
return [f for f in listdir(path)
264+
if not isdir(join(path, f))]
268265

269266

270267
def _exists_and_has_files(path):
271-
return os.path.exists(path) and len(_file_list(path))
268+
return exists(path) and len(_file_list(path))
272269

273270

274271
def get_machine_code(instrument_model):
@@ -459,7 +456,7 @@ def preparations_for_run(run_path, sheet, generated_prep_columns,
459456
Dictionary keyed by run identifier, project name and lane. Values are
460457
preparations represented as DataFrames.
461458
"""
462-
_, run_id = os.path.split(os.path.normpath(run_path))
459+
_, run_id = split(normpath(run_path))
463460
run_date, instrument_code = parse_illumina_run_id(run_id)
464461
instrument_model, run_center = get_model_and_center(instrument_code)
465462

@@ -509,8 +506,6 @@ def log_me(msg):
509506

510507
all_columns = sorted(carried_prep_columns + generated_prep_columns)
511508

512-
from json import dumps
513-
514509
for project, project_sheet in sheet.groupby('sample_project'):
515510
project_name, qiita_id = get_short_name_and_id(project)
516511

@@ -1076,7 +1071,7 @@ def _find_filtered_files(fp):
10761071
tmp = fastq_fp.replace(fp, '')
10771072
# remove any leading and/or trailing '/' characters from the
10781073
# remaining path.
1079-
# use os.sep instead of '/' to be more platform independent.
1074+
# use sep instead of '/' to be more platform independent.
10801075
tmp = tmp.strip(sep)
10811076
tmp = tmp.split(sep)
10821077

@@ -1124,5 +1119,3 @@ def _foo_get_run_prefix(file_name):
11241119

11251120
# if no orientations were found, then return None.
11261121
return None if pos == -1 else file_name[0:pos]
1127-
1128-

metapool/scripts/tests/test_seqpro.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from os.path import join, exists
88
from subprocess import Popen, PIPE
99
import pandas as pd
10-
import warnings
1110
from glob import glob
1211
from os.path import basename
1312

@@ -37,7 +36,7 @@ def setUp(self):
3736
)
3837

3938
def tearDown(self):
40-
#rmtree(self.vf_test_dir, ignore_errors=True)
39+
rmtree(self.vf_test_dir, ignore_errors=True)
4140
pass
4241

4342
def test_fastp_run(self):
@@ -173,7 +172,8 @@ def test_fastp_run(self):
173172
"raw_reads_r1r2": 2300000,
174173
"total_biological_reads_r1r2": 61404.0,
175174
"quality_filtered_reads_r1r2": 16.0,
176-
"fraction_passing_quality_filter": 6.956521739130435e-06
175+
"fraction_passing_quality_filter":
176+
6.956521739130435e-06
177177
}
178178
},
179179
"200318_A00953_0082_AH5TWYDSXY.Trojecp_666.1.tsv": {
@@ -391,7 +391,7 @@ def test_fastp_run(self):
391391
self.assertEqual(sorted([basename(x) for x
392392
in glob("./*.tsv")]), exp_preps)
393393

394-
for prep, exp_lines in zip(exp_preps, [5,4]):
394+
for prep, exp_lines in zip(exp_preps, [5, 4]):
395395
with open(prep) as f:
396396
lines = f.readlines()
397397
lines = [x.strip() for x in lines]

metapool/tests/data/runs/200318_A00953_0082_AH5TWYDSXY/mgv90_test_sheet.csv

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ ReverseComplement,0,,,,,,,,,
2222
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Well_description
2323
1,sample1,sample1,FooBar_666_p1,A1,iTru7_107_07,CCGACTAT,iTru5_01_A,ACCGACAA,Project_1111,s1
2424
1,sample2,sample2,FooBar_666_p1,A2,iTru7_107_08,CCGACTAT,iTru5_01_A,CTTCGCAA,Project_1111,s2
25-
3,sample1,sample1,FooBar_666_p1,A3,iTru7_107_09,GCCTTGTT,iTru5_01_A,AACACCAC,Project_1111,s1
26-
3,sample2,sample2,FooBar_666_p1,A4,iTru7_107_10,AACTTGCC,iTru5_01_A,CGTATCTC,Project_1111,s2
27-
3,sample3,sample3,FooBar_666_p1,A5,iTru7_107_11,CAATGTGG,iTru5_01_A,GGTACGAA,Trojecp_666,s5
28-
3,sample4,sample4,FooBar_666_p1,B6,iTru7_107_12,AAGGCTGA,iTru5_01_A,CGATCGAT,Trojecp_666,s6
29-
3,sample5,sample5,FooBar_666_p1,B8,iTru7_107_13,TTACCGAG,iTru5_01_A,AAGACACC,Trojecp_666,s7
25+
1,sample3,sample3,FooBar_666_p1,A3,iTru7_107_09,GCCTTGTT,iTru5_01_A,AACACCAC,Project_1111,s3
26+
1,sample4,sample4,FooBar_666_p1,A4,iTru7_107_10,AACTTGCC,iTru5_01_A,CGTATCTC,Project_1111,s4
27+
1,sample5,sample5,FooBar_666_p1,A5,iTru7_107_11,CAATGTGG,iTru5_01_A,GGTACGAA,Trojecp_666,s5
28+
1,sample6,sample6,FooBar_666_p1,B6,iTru7_107_12,AAGGCTGA,iTru5_01_A,CGATCGAT,Trojecp_666,s6
29+
1,sample7,sample7,FooBar_666_p1,B8,iTru7_107_13,TTACCGAG,iTru5_01_A,AAGACACC,Trojecp_666,s7
3030
,,,,,,,,,,
3131
[Bioinformatics],,,,,,,,,,
3232
Sample_Project,QiitaID,BarcodesAreRC,ForwardAdapter,ReverseAdapter,HumanFiltering,library_construction_protocol,experiment_design_description,,,

metapool/tests/test_count.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,15 @@ def test_parsefier_multiple_matches_raises(self):
5454
run = os.path.join(tmp, 'funky-rerun-with-repeated-samples')
5555
shutil.copytree(self.run_dir, run)
5656

57-
# sample 3 exists, but not with cell number S458, so this should
57+
# sample 5 exists, but not with cell number S458, so this should
5858
# raise an error because if this happense something else went wrong
5959
fake = os.path.join(run, 'Trojecp_666', 'json',
60-
'sample3_S458_L003_R1_001.json')
60+
'sample5_S458_L001_R1_001.json')
6161
with open(fake, 'w') as f:
6262
f.write(json.dumps({}))
6363

6464
msg = ('Multiple matches found for the same samples in the same '
65-
'lane, only one match is expected: sample3 in lane 3')
65+
'lane, only one match is expected: sample5 in lane 1')
6666
with self.assertRaisesRegex(ValueError, msg):
6767
_parsefier(run, self.ss, 'json', '.json', 'halloween',
6868
lambda x: 1)
@@ -290,12 +290,8 @@ def test_raw_read_counts_malformed_lane(self):
290290

291291
def test_raw_read_counts(self):
292292
obs = raw_read_counts(self.run_dir, self.ss)
293-
294293
exp = self.stats[['raw_reads_r1r2']] * 2
295294

296-
exp.to_csv('exp.csv', sep=',', index=True)
297-
obs.to_csv('obs.csv', sep=',', index=True)
298-
299295
pd.testing.assert_frame_equal(obs.sort_index(), exp)
300296

301297
def tearDown(self):

0 commit comments

Comments
 (0)