Skip to content

Commit

Permalink
tidying
Browse files Browse the repository at this point in the history
  • Loading branch information
omad committed Nov 6, 2023
1 parent c297a17 commit 7cef9ca
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 40 deletions.
3 changes: 3 additions & 0 deletions odc/stats/__main__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Having this file allows running:
# python -m odc.stats
#
from .cli import main

main()
5 changes: 4 additions & 1 deletion odc/stats/_cli_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,10 @@ def click_range2d(ctx, param, value):
raise click.ClickException(str(e)) from None


CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) # , max_content_width=120)


@click.version_option(package_name="odc_stats")
@click.group(help="Stats command line interface")
@click.group(help="Stats command line interface", context_settings=CONTEXT_SETTINGS)
def main():
pass
7 changes: 4 additions & 3 deletions odc/stats/_cli_locate_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,11 @@ def locate_grids(grid_shape, extent_shape, attr_filter):
@click.argument("grid-shape", type=str)
@click.argument("extent-shape", type=str)
@click.argument("csv-path", type=str, required=False)
@click.option("--verbose", "-v", is_flag=True, help="Be verbose")
def cli(attr_filter, grid_shape, extent_shape, csv_path, verbose):
def cli(attr_filter, grid_shape, extent_shape, csv_path):
"""
Generate a list of grids overlapping with the input shape extent
GRID_SHAPE is the geojson or ESRI shape file of the grids.
GRID_SHAPE is the GeoJSON or ESRI shape file of the grids.
EXTENT_SHAPE is the ESRI shape file where the extent covers the grids.
Expand All @@ -68,8 +67,10 @@ def cli(attr_filter, grid_shape, extent_shape, csv_path, verbose):
if csv_path is None:
tmp_path = tempfile.gettempdir()
csv_path = path.join(tmp_path, "extent_grids.csv")

with open(csv_path, "w", encoding="utf8") as f:
f.write(csv_buffer.read())

print("Results saved to", csv_path)


Expand Down
2 changes: 1 addition & 1 deletion odc/stats/_cli_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
),
)
@click.option(
"--location", type=str, help="Output location prefix as a uri: s3://bucket/path/"
"--location", type=str, help="Output location prefix as a URI: s3://bucket/path/"
)
@click.option("--max-processing-time", type=int, help="Max seconds per task")
@click.option("--from-sqs", type=str, help="Read tasks from SQS", default="")
Expand Down
83 changes: 50 additions & 33 deletions odc/stats/_cli_save_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,54 +19,66 @@
"dataset_filter",
]

VALID_FREQUENCIES = (
"annual",
"annual-fy",
"semiannual",
"seasonal",
"quartely",
"3month-seasons",
"rolling-3months",
"nov-mar",
"apr-oct",
"all",
)


@main.command("save-tasks")
@click.option(
"--grid",
type=str,
help=(
"Grid name or spec: au-{10|20|30|60},africa-{10|20|30|60},"
"albers-au-25 (legacy one) 'crs;pixel_resolution;shape_in_pixels'"
"\b\nGrid Name: au-{10|20|30|60},africa-{10|20|30|60},albers-au-25 (legacy) or\n"
"Grid Spec: eg. 'crs;pixel_resolution;shape_in_pixels'"
),
default=None,
)
@click.option(
"--year",
type=int,
help=(
"Only extract datasets for a given year."
"\b\n"
"Only extract datasets for a given year. \n"
"This is a shortcut for --temporal-range=<int>--P1Y"
),
)
@click.option(
"--temporal-range",
type=str,
help=(
"Only extract datasets for a given time range,"
"\b\n"
"Only extract datasets for a given time range, \n"
"Example '2020-05--P1M' month of May 2020"
),
)
@click.option(
"--frequency",
type=str,
help=(
"Specify temporal binning: "
"annual|annual-fy|semiannual|seasonal|quartely|3month-seasons|rolling-3months|nov-mar|apr-oct|all"
),
help=f'\b\nSpecify temporal binning: \n{"|".join(VALID_FREQUENCIES)}',
)
@click.option("--env", "-E", type=str, help="Datacube environment name")
@click.option(
"-z",
"complevel",
type=int,
default=6,
help="Compression setting for zstandard 1-fast, 9+ good but slow",
help="Compression setting for zstandard. 1-fast, 9+ good but slow",
)
@click.option(
"--overwrite", is_flag=True, default=None, help="Overwrite output if it exists"
"--overwrite", is_flag=True, default=None, help="Overwrite OUTPUT if it exists"
)
@click.option(
"--tiles", help='Limit query to tiles example: "0:3,2:4"', callback=click_range2d
"--tiles", help="Limit query to tiles. eg. '0:3,2:4'", callback=click_range2d
)
@click.option(
"--debug",
Expand All @@ -78,24 +90,27 @@
@click.option(
"--gqa",
type=float,
help="Only save datasets that pass `gqa_iterative_mean_xy <= gqa` test",
help=(
"\b\nOnly save datasets that pass the filter: \n"
"`gqa_iterative_mean_xy <= GQA`"
),
)
@click.option(
"--usgs-collection-category",
type=str,
help=(
"Only save datasets that pass "
"`collection_category == usgs_collection_category` test"
"\b\nOnly save datasets that pass the filter: \n"
"`collection_category == USGS_COLLECTION_CATEGORY`"
),
)
@click.option(
"--dataset-filter",
type=str,
default=None,
help='Filter to apply on datasets - {"collection_category": "T1"}',
help='\b\nFilter to apply on datasets. \nA JSON Object. eg. {"collection_category": "T1"}',
)
@click_yaml_cfg("--config", help="Save tasks Config")
@click.option("--input-products", type=str, default="")
@click_yaml_cfg("--config", help="Stats Configuration File")
@click.option("--input-products", type=str, default="One or more Products to process")
@click.argument("output", type=str, nargs=1, default="")
# pylint: disable=too-many-arguments, too-many-locals
# pylint: disable=too-many-branches, too-many-statements
Expand All @@ -117,8 +132,22 @@ def save_tasks(
usgs_collection_category=None,
):
"""
Prepare tasks for processing (query db).
Preparation and setup for running ODC Stats Processing.
\b
1. Queries an ODC Database for Datasets
2. Groups them into Chunks based on Time and a Spatial Grid
3. Saves the results into a task cache file OUTPUT, plus CSV and GeoJSON
files for manual checks.
The configuration comes from CLI flags, a configuration file or
by combining both. CLI Flags override settings in a configuration file.
OUTPUTS
=======
odc-stats save-tasks
<todo more help goes here>
\b
Expand Down Expand Up @@ -167,28 +196,16 @@ def save_tasks(
if _cfg.get("grid") is None:
print(
"grid must be one of au-{10|20|30|60}, africa-{10|20|30|60}, \
albers_au_25 (legacy one) or custom like 'epsg:3857;30;5000' \
albers_au_25 (legacy) or custom like 'epsg:3857;30;5000' \
(30m pixels 5,000 per side in epsg:3857) ",
file=sys.stderr,
)
sys.exit(1)

if _cfg.get("frequency") is not None:
if _cfg.get("frequency") not in (
"annual",
"annual-fy",
"semiannual",
"seasonal",
"quartely",
"3month-seasons",
"rolling-3months",
"nov-mar",
"apr-oct",
"all",
):
if _cfg.get("frequency") not in VALID_FREQUENCIES:
print(
f"""Frequency must be one of annual|annual-fy|semiannual|seasonal|
quartely|3month-seasons|rolling-3months|nov-mar|apr-oct|all
f"""Frequency must be one of {"|".join(VALID_FREQUENCIES)}
and not '{frequency}'""",
file=sys.stderr,
)
Expand Down
2 changes: 1 addition & 1 deletion odc/stats/_gjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def compute_grid_info(
cells: Dict[TileIdx_xy, Any], resolution: float = math.inf, title_width: int = 0
) -> Dict[TileIdx_xy, Any]:
"""
Compute geojson feature for every cell in ``cells``.
Compute GeoJSON feature for every cell in ``cells``.
Where ``cells`` is produced by ``bin_dataset_stream``
"""
if title_width == 0:
Expand Down
2 changes: 2 additions & 0 deletions odc/stats/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ def __init__(
public: bool = False,
):
"""
Write a set of dataset files to S3 or Filesystem based on an input dask delayed graph.
:param creds: S3 write credentials
:param cog_opts: Configure compression settings, globally and per-band
:param acl: Canned ACL string:
Expand Down
6 changes: 5 additions & 1 deletion odc/stats/proc.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,11 @@ def _init_dask(self) -> Client:
configure_s3_access(
aws_unsigned=aws_unsigned, cloud_defaults=True, client=c
)
self._log.info("Started local Dask %s", client)
self._log.info(
"Started local Dask %s. Dashboard available at: %s",
client,
client.dashboard_link,
)

return client

Expand Down

0 comments on commit 7cef9ca

Please sign in to comment.