Skip to content

Commit

Permalink
Merge pull request #796 from GoogleCloudPlatform/release-branch
Browse files Browse the repository at this point in the history
Version 1.10.1
  • Loading branch information
nick-stroud committed Dec 22, 2022
2 parents 5693e89 + 7210cd3 commit 3c03c9a
Show file tree
Hide file tree
Showing 50 changed files with 432 additions and 407 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/dependency-review.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Dependency Review Action
#
# This Action will scan dependency manifest files that change as part of a Pull Request, surfacing known-vulnerable versions of the packages declared or updated in the PR. Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable packages will be blocked from merging.
#
# Source repository: https://github.com/actions/dependency-review-action
# Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement
name: 'Dependency Review'
on: [pull_request]

permissions:
contents: read

jobs:
dependency-review:
runs-on: ubuntu-latest
steps:
- name: 'Checkout Repository'
uses: actions/checkout@v3
- name: 'Dependency Review'
uses: actions/dependency-review-action@v2
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ HPC deployments on the Google Cloud Platform.`,
log.Fatalf("cmd.Help function failed: %s", err)
}
},
Version: "v1.10.0",
Version: "v1.10.1",
Annotations: annotation,
}
)
Expand Down
2 changes: 1 addition & 1 deletion community/examples/AMD/hpc-cluster-amd-slurmv5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ deployment_groups:
# these images must match the images used by Slurm modules below because
# we are building OpenMPI with PMI support in libaries contained in
# Slurm installation
family: schedmd-v5-slurm-22-05-4-hpc-centos-7
family: schedmd-v5-slurm-22-05-6-hpc-centos-7
project: schedmd-slurm-public

- id: low_cost_node_group
Expand Down
58 changes: 0 additions & 58 deletions community/examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,61 +4,3 @@ This directory contains a set of community example blueprint files that can be
fed into gHPC to create a deployment. For more information on how to read, write
and configure a custom blueprint, see
[the core examples folder](../../examples/README.md).

## Community Examples

### AMD

Examples using AMD HPC technologies can be found in the
[AMD folder](AMD). More information can be found in the
[readme](AMD/README.md).

### Intel

Examples using Intel HPC technologies can be found in the
[Intel folder](intel). More information can be found in the
[readme](intel/README.md).

### cloud-batch.yaml

[See description in core](../../examples/README.md/#cloud-batchyaml--)

### spack-gromacs.yaml

[See description in core](../../examples/README.md#spack-gromacsyaml--)

### omnia-cluster.yaml

[See description in core](../../examples/README.md#omnia-clusteryaml--)

### hpc-cluster-small-sharedvpc.yaml

[See description in core](../../examples/README.md#hpc-cluster-small-sharedvpcyaml--)

### hpc-cluster-localssd.yaml

[See description in core](../../examples/README.md#hpc-cluster-localssdyaml-community-badge-experimental-badge)

### slurm-gcp-v5-hpc-centos7.yaml

[See description in core](../../examples/README.md#slurm-gcp-v5-hpc-centos7yaml-)

### slurm-gcp-v5-ubuntu2004.yaml

[See description in core](../../examples/README.md#slurm-gcp-v5-ubuntu2004yaml-)

### slurm-gcp-v5-high-io.yaml

[See description in core](../../examples/README.md#slurm-gcp-v5-high-ioyaml-)

### htcondor-pool.yaml

[See description in core](../../examples/README.md#htcondor-poolyaml--)

### quantum-circuit-simulator.yaml

[See description in core](../../examples/README.md#quantum-circuit-simulatoryaml-)

### starccm-tutorial.yaml

[See description in core](../../examples/README.md#starccm-tutorialyaml--)
2 changes: 1 addition & 1 deletion community/examples/quantum-circuit-simulator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ deployment_groups:
conda config --system --add channels nvidia/label/cuquantum-22.07.1
conda update -n base conda --yes
conda create -n qsim python=3.9 --yes
conda install -n qsim cuda cuquantum make cmake cxx-compiler --yes
conda install -n qsim cuda cuquantum make cmake cxx-compiler=1.5.1 --yes
echo "cuda ==11.5.*" > /opt/conda/envs/qsim/conda-meta/pinned
conda clean -p -t --yes
conda activate qsim
Expand Down
2 changes: 1 addition & 1 deletion community/examples/slurm-gcp-v5-ubuntu2004.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ vars:
region: us-central1
zone: us-central1-c
instance_image:
family: schedmd-v5-slurm-22-05-4-ubuntu-2004-lts
family: schedmd-v5-slurm-22-05-6-ubuntu-2004-lts
project: projects/schedmd-slurm-public/global/images/family


Expand Down
2 changes: 1 addition & 1 deletion community/examples/starccm-tutorial.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ deployment_groups:
machine_type: c2-standard-60
instance_count: 4
placement_policy:
vm_count: 4 # Note: should match instance count
vm_count: null
collocation: "COLLOCATED"
availability_domain_count: null

Expand Down
10 changes: 5 additions & 5 deletions community/front-end/ofe/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ argcomplete==2.0.0
asgiref==3.5.0
astroid==2.9.3
cachetools==5.0.0
certifi==2021.10.8
certifi==2022.12.7
cffi==1.15.0
charset-normalizer==2.0.12
click==7.1.2
cryptography==36.0.1
defusedxml==0.7.1
dill==0.3.4
Django==3.2.12
Django==3.2.16
django-allauth==0.48.0
django-extensions==3.1.5
# Need version 0.11.0 to be released with fixes for Django 3.2
Expand Down Expand Up @@ -39,15 +39,15 @@ lazy-object-proxy==1.7.1
libcst==0.4.1
mccabe==0.6.1
mypy-extensions==0.4.3
oauthlib==3.2.0
oauthlib==3.2.1
platformdirs==2.5.0
pre-commit==2.17.0
proto-plus==1.20.1
protobuf==3.19.4
protobuf==3.19.5
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycparser==2.21
PyJWT==2.3.0
PyJWT==2.4.0
pylint==2.12.2
pylint-django==2.5.0
pylint-plugin-utils==0.7
Expand Down
104 changes: 5 additions & 99 deletions community/modules/README.md
Original file line number Diff line number Diff line change
@@ -1,101 +1,7 @@
# Community Modules

To learn more about using and writing HPC toolkit modules, see the [core
module documentation](../../modules/README.md).

## Compute

* [**SchedMD-slurm-on-gcp-partition**](compute/SchedMD-slurm-on-gcp-partition/README.md):
Creates a Slurm partition that can be used by the
SchedMD-slurm_on_gcp_controller.
* [**schedmd-slurm-gcp-v5-partition**](compute/schedmd-slurm-gcp-v5-partition/README.md):
Creates a Slurm partition that can be used by the
[schedmd-slurm-gcp-v5-controller] module.
* [**schedmd-slurm-gcp-v5-node-group**](compute/schedmd-slurm-gcp-v5-node-group/README.md):
Defines a node group that can be used as input to the
[schedmd-slurm-gcp-v5-partition] module.
* [**pbspro-execution**](compute/pbspro-execution/README.md):
Creates execution hosts for use in a PBS Professional cluster.

[schedmd-slurm-gcp-v5-controller]: scheduler/schedmd-slurm-gcp-v5-controller/README.md
[schedmd-slurm-gcp-v5-partition]: compute/schedmd-slurm-gcp-v5-partition/README.md

## Database

*
[**slurm-cloudsql-federation**](database/slurm-cloudsql-federation/README.md):
Creates a [Google SQL Instance](https://cloud.google.com/sql/) meant to be
integrated with a
[slurm controller](./third-pary/scheduler/SchedMD-slurm-on-gcp-controller/README.md).

## File System

* [**nfs-server**](file-system/nfs-server/README.md): Creates a VM instance and
configures an NFS server that can be mounted by other VM instances.

* [**DDN-EXAScaler**](third-party/file-system/DDN-EXAScaler/README.md): Creates
a [DDN EXAscaler lustre](<https://www.ddn.com/partners/google-cloud-platform/>)
file system. This module has
[license costs](https://console.developers.google.com/marketplace/product/ddnstorage/exascaler-cloud).

## Project

* [**new-project**](project/new-project/README.md): Creates a Google Cloud Projects

* [**service-account**](project/service-account/README.md): Creates [service
accounts](https://cloud.google.com/iam/docs/service-accounts) for a GCP project.

* [**service-enablement**](project/service-enablement/README.md): Allows
enabling various APIs for a Google Cloud Project

## Scripts

* [**omnia-install**](scripts/omnia-install/README.md): Installs SLURM via omnia
onto a cluster of compute VMs

* [**pbspro-preinstall**](scripts/pbspro-preinstall/README.md): Creates a
Cloud Storage bucket in which to save PBS Professional RPM packages for use
by PBS clusters.

* [**pbspro-install**](scripts/pbspro-install/README.md): Creates a
Toolkit runner to install [PBS Professional][pbspro] from RPM packages.

* [**pbspro-qmgr**](scripts/pbspro-qmgr/README.md): Creates a
Toolkit runner to run common `qmgr` commands when configuring a PBS
Professional cluster.

* [**spack-install**](scripts/spack-install/README.md): Creates a startup script
to install spack on an instance or the slurm controller

* [**wait-for-startup**](scripts/wait-for-startup/README.md): Waits for
successful completion of a startup script on a compute VM

## Scheduler

* [**SchedMD-slurm-on-gcp-controller**](scheduler/SchedMD-slurm-on-gcp-controller/README.md):
Creates a Slurm controller node using
[slurm-gcp](https://github.com/SchedMD/slurm-gcp/tree/master/tf/modules/controller)

* [**SchedMD-slurm-on-gcp-login-node**](scheduler/SchedMD-slurm-on-gcp-login-node/README.md):
Creates a Slurm login node using
[slurm-gcp](https://github.com/SchedMD/slurm-gcp/tree/master/tf/modules/login)

* [**schedmd-slurm-gcp-v5-login**](scheduler/schedmd-slurm-gcp-v5-login/README.md):
Creates a Slurm login node using [slurm-gcp] version 5.

* [**schedmd-slurm-gcp-v5-controller**](scheduler/schedmd-slurm-gcp-v5-controller/README.md):
Creates a Slurm controller using [slurm-gcp] version 5.

* [**schedmd-slurm-gcp-v5-hybrid**](scheduler/schedmd-slurm-gcp-v5-hybrid/README.md):
Creates configurations for hybrid partitions that can be used with an
on-premise Slurm cluster. This module uses the
[slurm-controller-hybrid](https://github.com/SchedMD/slurm-gcp/tree/v5.1.0/terraform/slurm_cluster/modules/slurm_controller_hybrid)
from the slurm-gcp project.

* [**pbspro-client**](scheduler/pbspro-client/README.md):
Creates a client host for submitting jobs to a PBS Professional cluster.

* [**pbspro-server**](scheduler/pbspro-server/README.md):
Creates a server host for operating a PBS Professional cluster.

[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/v5.1.0
This directory contains modules that rely on partner resources, have been
contributed by outside developers or are in early development by the HPC Toolkit
team. The modules in this directory are listed alongside core modules in the
[core modules README](../../modules/README.md). There you can also learn more
about general use and how to write custom HPC toolkit modules.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.10.0"
module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.10.1"
}

required_version = ">= 0.14.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ No modules.
| <a name="input_enable_smt"></a> [enable\_smt](#input\_enable\_smt) | Enables Simultaneous Multi-Threading (SMT) on instance. | `bool` | `false` | no |
| <a name="input_enable_spot_vm"></a> [enable\_spot\_vm](#input\_enable\_spot\_vm) | Enable the partition to use spot VMs (https://cloud.google.com/spot-vms). | `bool` | `false` | no |
| <a name="input_gpu"></a> [gpu](#input\_gpu) | Definition of requested GPU resources. | <pre>object({<br> count = number,<br> type = string<br> })</pre> | `null` | no |
| <a name="input_instance_image"></a> [instance\_image](#input\_instance\_image) | Defines the image that will be used in the node group VM instances. If not<br>provided, the Slurm on GCP default published images will be used.<br><br>Expected Fields:<br>name: The name of the image. Mutually exclusive with family.<br>family: The image family to use. Mutually exclusive with name.<br>project: The project where the image is hosted.<br><br>Custom images must comply with Slurm on GCP requirements; it is highly<br>advised to use the packer templates provided by Slurm on GCP when<br>constructing custom slurm images.<br><br>More information can be found in the slurm-gcp docs:<br>https://github.com/SchedMD/slurm-gcp/blob/v5.1.0/docs/images.md#public-image. | `map(string)` | `{}` | no |
| <a name="input_instance_image"></a> [instance\_image](#input\_instance\_image) | Defines the image that will be used in the node group VM instances. This<br>value is overridden if any of `source_image`, `source_image_family` or<br>`source_image_project` are set.<br><br>Expected Fields:<br>name: The name of the image. Mutually exclusive with family.<br>family: The image family to use. Mutually exclusive with name.<br>project: The project where the image is hosted.<br><br>Custom images must comply with Slurm on GCP requirements; it is highly<br>advised to use the packer templates provided by Slurm on GCP when<br>constructing custom slurm images.<br><br>More information can be found in the slurm-gcp docs:<br>https://github.com/SchedMD/slurm-gcp/blob/5.3.0/docs/images.md#public-image. | `map(string)` | <pre>{<br> "family": "schedmd-v5-slurm-22-05-6-hpc-centos-7",<br> "project": "projects/schedmd-slurm-public/global/images/family"<br>}</pre> | no |
| <a name="input_instance_template"></a> [instance\_template](#input\_instance\_template) | Self link to a custom instance template, used in place of other VM instance definition variables. | `string` | `null` | no |
| <a name="input_labels"></a> [labels](#input\_labels) | Labels to add to partition compute instances. List of key key, value pairs. | `any` | `{}` | no |
| <a name="input_machine_type"></a> [machine\_type](#input\_machine\_type) | Compute Platform machine type to use for this partition compute nodes. | `string` | `"c2-standard-60"` | no |
Expand All @@ -141,6 +141,9 @@ No modules.
| <a name="input_project_id"></a> [project\_id](#input\_project\_id) | Project in which the HPC deployment will be created. | `string` | n/a | yes |
| <a name="input_service_account"></a> [service\_account](#input\_service\_account) | Service account to attach to the compute instances. If not set, the<br>default compute service account for the given project will be used with the<br>"https://www.googleapis.com/auth/cloud-platform" scope. | <pre>object({<br> email = string<br> scopes = set(string)<br> })</pre> | `null` | no |
| <a name="input_shielded_instance_config"></a> [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless<br>enable\_shielded\_vm is 'true'.<br>- enable\_integrity\_monitoring : Compare the most recent boot measurements to the<br> integrity policy baseline and return a pair of pass/fail results depending on<br> whether they match or not.<br>- enable\_secure\_boot : Verify the digital signature of all boot components, and<br> halt the boot process if signature verification fails.<br>- enable\_vtpm : Use a virtualized trusted platform module, which is a<br> specialized computer chip you can use to encrypt objects like keys and<br> certificates. | <pre>object({<br> enable_integrity_monitoring = bool<br> enable_secure_boot = bool<br> enable_vtpm = bool<br> })</pre> | <pre>{<br> "enable_integrity_monitoring": true,<br> "enable_secure_boot": true,<br> "enable_vtpm": true<br>}</pre> | no |
| <a name="input_source_image"></a> [source\_image](#input\_source\_image) | The custom VM image. It is recommended to use `instance_image` instead. | `string` | `""` | no |
| <a name="input_source_image_family"></a> [source\_image\_family](#input\_source\_image\_family) | The custom VM image family. It is recommended to use `instance_image` instead. | `string` | `""` | no |
| <a name="input_source_image_project"></a> [source\_image\_project](#input\_source\_image\_project) | The hosting the custom VM image. It is recommended to use `instance_image` instead. | `string` | `""` | no |
| <a name="input_spot_instance_config"></a> [spot\_instance\_config](#input\_spot\_instance\_config) | Configuration for spot VMs. | <pre>object({<br> termination_action = string<br> })</pre> | `null` | no |
| <a name="input_tags"></a> [tags](#input\_tags) | Network tag list. | `list(string)` | `[]` | no |
| <a name="input_zone_policy_allow"></a> [zone\_policy\_allow](#input\_zone\_policy\_allow) | Partition nodes will prefer to be created in the listed zones. If a zone appears<br>in both zone\_policy\_allow and zone\_policy\_deny, then zone\_policy\_deny will take<br>priority for that zone. | `set(string)` | `[]` | no |
Expand Down
13 changes: 10 additions & 3 deletions community/modules/compute/schedmd-slurm-gcp-v5-node-group/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@

locals {

# Handle VM image format from 2 sources, prioritize source_image* variables
# over instance_image
source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != ""
source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "")
source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "")
source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "")

node_group = {
# Group Definition
group_name = var.name
Expand Down Expand Up @@ -43,9 +50,9 @@ locals {
on_host_maintenance = var.on_host_maintenance
preemptible = var.preemptible
shielded_instance_config = var.shielded_instance_config
source_image_family = lookup(var.instance_image, "family", "")
source_image_project = lookup(var.instance_image, "project", "")
source_image = lookup(var.instance_image, "name", "")
source_image_family = local.source_image_family
source_image_project = local.source_image_project
source_image = local.source_image
tags = var.tags
access_config = var.access_config
service_account = var.service_account != null ? var.service_account : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ variable "metadata" {

variable "instance_image" {
description = <<-EOD
Defines the image that will be used in the node group VM instances. If not
provided, the Slurm on GCP default published images will be used.
Defines the image that will be used in the node group VM instances. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.
Expected Fields:
name: The name of the image. Mutually exclusive with family.
Expand All @@ -86,12 +87,15 @@ variable "instance_image" {
Custom images must comply with Slurm on GCP requirements; it is highly
advised to use the packer templates provided by Slurm on GCP when
constructing custom slurm images.
More information can be found in the slurm-gcp docs:
https://github.com/SchedMD/slurm-gcp/blob/v5.1.0/docs/images.md#public-image.
https://github.com/SchedMD/slurm-gcp/blob/5.3.0/docs/images.md#public-image.
EOD
type = map(string)
default = {}
default = {
family = "schedmd-v5-slurm-22-05-6-hpc-centos-7"
project = "projects/schedmd-slurm-public/global/images/family"
}

validation {
condition = length(var.instance_image) == 0 || (
Expand All @@ -104,6 +108,24 @@ variable "instance_image" {
}
}

variable "source_image_project" {
type = string
description = "The hosting the custom VM image. It is recommended to use `instance_image` instead."
default = ""
}

variable "source_image_family" {
type = string
description = "The custom VM image family. It is recommended to use `instance_image` instead."
default = ""
}

variable "source_image" {
type = string
description = "The custom VM image. It is recommended to use `instance_image` instead."
default = ""
}

variable "tags" {
type = list(string)
description = "Network tag list."
Expand Down
Loading

0 comments on commit 3c03c9a

Please sign in to comment.