Skip to content

Commit

Permalink
extension for landcover pipeline and plugins (#138)
Browse files Browse the repository at this point in the history
* reproject dem data and allow its time to ignored

* generalize product fusing

* fix test utils

* allow optional product and datasets

* refine landcover plugin for l1 output

* change output band name of landcover

* add test for plugin a1

* add test for plugins

* pin new head of odc-algo

---------

Co-authored-by: Emma Ai <[email protected]>
  • Loading branch information
emmaai and Emma Ai committed Jul 1, 2024
1 parent 656a91d commit 9477637
Show file tree
Hide file tree
Showing 11 changed files with 712 additions and 115 deletions.
48 changes: 37 additions & 11 deletions odc/stats/_cli_save_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
"gqa",
"input_products",
"dataset_filter",
"ignore_time",
"optional_products",
]


Expand Down Expand Up @@ -59,7 +61,7 @@
"-z",
"complevel",
type=int,
default=6,
default=None,
help="Compression setting for zstandard 1-fast, 9+ good but slow",
)
@click.option(
Expand Down Expand Up @@ -94,6 +96,19 @@
default=None,
help='Filter to apply on datasets - {"collection_category": "T1"}',
)
@click.option(
"--ignore-time",
multiple=True,
default=None,
help="Ignore time for particular products in input, e.g., --ignore-time ga_srtm_dem1sv1_0",
)
@click.option(
"--optional-products",
multiple=True,
default=None,
help="Allow the products to be optional and not present for every tile, "
"e.g., --optional-products ga_ls_mangrove_cover_cyear_3",
)
@click_yaml_cfg("--config", help="Save tasks Config")
@click.option("--input-products", type=str, default="")
@click.argument("output", type=str, nargs=1, default="")
Expand All @@ -115,6 +130,8 @@ def save_tasks(
debug=False,
gqa=None,
usgs_collection_category=None,
ignore_time=None,
optional_products=None,
):
"""
Prepare tasks for processing (query db).
Expand All @@ -138,27 +155,34 @@ def save_tasks(

_cfg = {k: config.get(k) for k in CONFIG_ITEMS if config.get(k) is not None}

print(f"config from yaml {_cfg} {complevel}")

cfg_from_cli = {
k: v
for k, v in dict(
grid=grid,
frequency=frequency,
gqa=gqa,
input_products=input_products,
complevel=complevel,
dataset_filter=dataset_filter,
overwrite=overwrite,
).items()
if v is not None and v != ""
for k, v in {
"grid": grid,
"frequency": frequency,
"gqa": gqa,
"input_products": input_products,
"complevel": complevel,
"dataset_filter": dataset_filter,
"overwrite": overwrite,
"ignore_time": ignore_time,
"optional_products": optional_products,
}.items()
if v
}

_log.info("Config overrides: %s", cfg_from_cli)
_cfg.update(cfg_from_cli)
_cfg.setdefault("complevel", 6)
_log.info("Using config: %s", _cfg)

gqa = _cfg.pop("gqa", None)
input_products = _cfg.pop("input_products", None)
dataset_filter = _cfg.pop("dataset_filter", None)
ignore_time = _cfg.pop("ignore_time", None)
optional_products = _cfg.pop("optional_products", None)

if input_products is None:
print("Input products has to be specified", file=sys.stderr)
Expand Down Expand Up @@ -252,6 +276,8 @@ def collection_category_predicate(ds):
tiles=tiles,
predicate=predicate,
debug=debug,
ignore_time=ignore_time,
optional_products=optional_products,
msg=on_message,
)
except ValueError as e:
Expand Down
38 changes: 23 additions & 15 deletions odc/stats/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ def render_assembler_metadata(
Put together metadata document for the output of this task. It needs the source_dataset to inherit
several properties and lineages. It also needs the output_dataset to get the measurement information.
"""
# pylint:disable=too-many-branches
dataset_assembler = DatasetAssembler(
naming_conventions=self.product.naming_conventions_values,
dataset_location=Path(self.product.explorer_path),
Expand All @@ -410,17 +411,24 @@ def render_assembler_metadata(
for dataset in self.datasets:
if "fused" in dataset.type.name:
sources = [e["id"] for e in dataset.metadata.sources.values()]
platforms.append(dataset.metadata_doc["properties"]["eo:platform"])
if isinstance(
dataset.metadata_doc["properties"]["eo:instrument"], list
):
instruments += dataset.metadata_doc["properties"]["eo:instrument"]
else:
instruments += [dataset.metadata_doc["properties"]["eo:instrument"]]
if dataset.metadata_doc["properties"].get("eo:platform") is not None:
platforms.append(dataset.metadata_doc["properties"]["eo:platform"])
if dataset.metadata_doc["properties"].get("eo:instrument") is not None:
if isinstance(
dataset.metadata_doc["properties"]["eo:instrument"], list
):
instruments += dataset.metadata_doc["properties"][
"eo:instrument"
]
else:
instruments += [
dataset.metadata_doc["properties"]["eo:instrument"]
]
dataset_assembler.note_source_datasets(
self.product.classifier, *sources
)
else:
dataset.metadata_doc.setdefault("$schema", "")
source_datasetdoc = serialise.from_doc(
dataset.metadata_doc, skip_validation=True
)
Expand Down Expand Up @@ -534,7 +542,7 @@ def render_metadata(
proj_ext.apply(geobox.crs.epsg, transform=geobox.transform, shape=geobox.shape)

# Lineage last
item.properties["odc:lineage"] = dict(inputs=inputs)
item.properties["odc:lineage"] = {"inputs": inputs}

# Add all the assets
for band, path in self.paths(ext=ext).items():
Expand Down Expand Up @@ -672,13 +680,13 @@ def __bool__(self):
class TaskRunnerConfig: # pylint:disable=too-many-instance-attributes
@staticmethod
def default_cog_settings():
return dict(
compress="deflate",
zlevel=9,
blocksize=800,
ovr_blocksize=256, # ovr_blocksize must be powers of 2 for some reason in GDAL
overview_resampling="average",
)
return {
"compress": "deflate",
"zlevel": 9,
"blocksize": 800,
"ovr_blocksize": 256, # ovr_blocksize must be powers of 2 for some reason in GDAL
"overview_resampling": "average",
}

# Input
filedb: str = ""
Expand Down
4 changes: 4 additions & 0 deletions odc/stats/plugins/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ class StatsPluginInterface(ABC):
VERSION = "0.0.0"
PRODUCT_FAMILY = "statistics"

# pylint:disable=too-many-arguments
def __init__(
self,
resampling: str = "bilinear",
input_bands: Optional[Sequence[str]] = None,
optional_bands: Optional[Sequence[str]] = None,
chunks: Optional[Mapping[str, int]] = None,
basis: Optional[str] = None,
group_by: str = "solar_day",
Expand All @@ -29,6 +31,7 @@ def __init__(
):
self.resampling = resampling
self.input_bands = input_bands if input_bands is not None else []
self.optional_bands = optional_bands if optional_bands is not None else []
self.chunks = chunks if chunks is not None else {"y": -1, "x": -1}
self.basis = basis
self.group_by = group_by
Expand Down Expand Up @@ -61,6 +64,7 @@ def input_data(
fuser=self.fuser,
resampling=self.resampling,
chunks=self.chunks,
optional_bands=self.optional_bands,
**kwargs,
)
return xx
Expand Down
1 change: 1 addition & 0 deletions odc/stats/plugins/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def import_all():

# TODO: make that more automatic
modules = [
"odc.stats.plugins.lc_veg_class_a1",
"odc.stats.plugins.lc_fc_wo_a0",
"odc.stats.plugins.mangroves",
"odc.stats.plugins.fc_percentiles",
Expand Down
Loading

0 comments on commit 9477637

Please sign in to comment.