Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adde from_date and to_date options to limit the processed files #202

Merged
merged 3 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion phockup.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,28 @@ def parse_args(args=sys.argv[1:]):
files by an additional level after sorting by date.
"""
)

parser.add_argument(
'--from_date',
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please replace _ with -

type=str,
default=None,
help="""\
Limit the operations to the files that are older than from_date (inclusive).
The date must be specified in format YYYY-MM-DD
Files with unknown date won't be skipped.
"""
)

parser.add_argument(
'--to_date',
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please replace _ with -

type=str,
default=None,
help="""\
Limit the operations to the files that are newer than from_date (inclusive).
The date must be specified in format YYYY-MM-DD
Files with unknown date won't be skipped.
"""
)

return parser.parse_args(args)

Expand Down Expand Up @@ -344,7 +366,9 @@ def main(options):
no_date_dir=options.no_date_dir,
skip_unknown=options.skip_unknown,
output_prefix=options.output_prefix,
output_suffix=options.output_suffix
output_suffix=options.output_suffix,
from_date=options.from_date,
to_date=options.to_date
)


Expand Down
30 changes: 28 additions & 2 deletions src/phockup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ def __init__(self, input_dir, output_dir, **args):
self.max_depth = args.get('max_depth', -1)
# default to concurrency of one to retain existing behavior
self.max_concurrency = args.get("max_concurrency", 1)

self.from_date = args.get("from_date", None)
self.to_date = args.get("to_date", None)
if self.from_date is not None:
self.from_date = Date.strptime(f"{self.from_date} 00:00:00", "%Y-%m-%d %H:%M:%S")
if self.to_date is not None:
self.to_date = Date.strptime(f"{self.to_date} 23:59:59", "%Y-%m-%d %H:%M:%S")

if self.max_concurrency > 1:
logger.info(f"Using {self.max_concurrency} workers to process files.")

Expand Down Expand Up @@ -249,7 +257,7 @@ def process_file(self, filename):

progress = f'{filename}'

output, target_file_name, target_file_path, target_file_type = self.get_file_name_and_path(filename)
output, target_file_name, target_file_path, target_file_type, file_date = self.get_file_name_and_path(filename)
suffix = 1
target_file = target_file_path

Expand All @@ -261,6 +269,7 @@ def process_file(self, filename):
logger.info(progress)
break

date_unknown = file_date == None or output.endswith(self.no_date_dir)
if self.skip_unknown and output.endswith(self.no_date_dir):
# Skip files that didn't generate a path from EXIF data
progress = f"{progress} => skipped, unknown date EXIF information for '{target_file_name}'"
Expand All @@ -270,6 +279,22 @@ def process_file(self, filename):
logger.info(progress)
break

if not date_unknown:
skip = False
if type(file_date) is dict:
file_date = file_date["date"]
if self.from_date is not None and file_date < self.from_date:
progress = f"{progress} => {filename} skipped: date {file_date} is older than from_date {self.from_date}"
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please replace _ with - for option name and maybe add -- to the option name (--from-date)

skip = True
if self.to_date is not None and file_date > self.to_date:
progress = f"{progress} => {filename} skipped: date {file_date} is newer than to_date {self.to_date}"
skip = True
if skip:
if self.progress:
self.pbar.write(progress)
logger.info(progress)
break

if os.path.isfile(target_file):
if filename != target_file and filecmp.cmp(filename, target_file, shallow=False):
progress = f'{progress} => skipped, duplicated file {target_file}'
Expand Down Expand Up @@ -330,6 +355,7 @@ def get_file_name_and_path(self, filename):
if exif_data and 'MIMEType' in exif_data:
target_file_type = self.get_file_type(exif_data['MIMEType'])

date = None
if target_file_type in ['image', 'video']:
date = Date(filename).from_exif(exif_data, self.timestamp, self.date_regex,
self.date_field)
Expand All @@ -342,7 +368,7 @@ def get_file_name_and_path(self, filename):
target_file_name = os.path.basename(filename)

target_file_path = os.path.sep.join([output, target_file_name])
return output, target_file_name, target_file_path, target_file_type
return output, target_file_name, target_file_path, target_file_type, date

def process_xmp(self, original_filename, file_name, suffix, output):
"""
Expand Down
63 changes: 63 additions & 0 deletions tests/test_phockup.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,3 +473,66 @@ def test_skip_unknown():
assert len([name for name in os.listdir(dir4) if
os.path.isfile(os.path.join(dir4, name))]) == 1
shutil.rmtree('output', ignore_errors=True)

def test_from_date():
shutil.rmtree('output', ignore_errors=True)
Phockup('input', 'output', from_date="2017-10-06")
dir1 = 'output/2017/01/01'
dir2 = 'output/2017/10/06'
dir3 = 'output/unknown'
dir4 = 'output/2018/01/01/'
assert os.path.isdir(dir1)
assert os.path.isdir(dir2)
assert os.path.isdir(dir3)
assert os.path.isdir(dir4)
assert len([name for name in os.listdir(dir1) if
os.path.isfile(os.path.join(dir1, name))]) == 0
assert len([name for name in os.listdir(dir2) if
os.path.isfile(os.path.join(dir2, name))]) == 1
assert len([name for name in os.listdir(dir3) if
os.path.isfile(os.path.join(dir3, name))]) == 1
assert len([name for name in os.listdir(dir4) if
os.path.isfile(os.path.join(dir4, name))]) == 1
shutil.rmtree('output', ignore_errors=True)

def test_to_date():
shutil.rmtree('output', ignore_errors=True)
Phockup('input', 'output', to_date="2017-10-06", progress=True)
dir1 = 'output/2017/01/01'
dir2 = 'output/2017/10/06'
dir3 = 'output/unknown'
dir4 = 'output/2018/01/01/'
assert os.path.isdir(dir1)
assert os.path.isdir(dir2)
assert os.path.isdir(dir3)
assert os.path.isdir(dir4)
assert len([name for name in os.listdir(dir1) if
os.path.isfile(os.path.join(dir1, name))]) == 3
assert len([name for name in os.listdir(dir2) if
os.path.isfile(os.path.join(dir2, name))]) == 1
assert len([name for name in os.listdir(dir3) if
os.path.isfile(os.path.join(dir3, name))]) == 1
assert len([name for name in os.listdir(dir4) if
os.path.isfile(os.path.join(dir4, name))]) == 0
shutil.rmtree('output', ignore_errors=True)

def test_from_date_to_date():
shutil.rmtree('output', ignore_errors=True)
Phockup('input', 'output', to_date="2017-10-06", from_date="2017-01-02", progress=True)
dir1 = 'output/2017/01/01'
dir2 = 'output/2017/10/06'
dir3 = 'output/unknown'
dir4 = 'output/2018/01/01/'
assert os.path.isdir(dir1)
assert os.path.isdir(dir2)
assert os.path.isdir(dir3)
assert os.path.isdir(dir4)
assert len([name for name in os.listdir(dir1) if
os.path.isfile(os.path.join(dir1, name))]) == 0
assert len([name for name in os.listdir(dir2) if
os.path.isfile(os.path.join(dir2, name))]) == 1
assert len([name for name in os.listdir(dir3) if
os.path.isfile(os.path.join(dir3, name))]) == 1
assert len([name for name in os.listdir(dir4) if
os.path.isfile(os.path.join(dir4, name))]) == 0
shutil.rmtree('output', ignore_errors=True)