Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 33 additions & 16 deletions batch/batch/cloud/gcp/driver/create_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ....instance_config import InstanceConfig
from ...resource_utils import unreserved_worker_data_disk_size_gib
from ...utils import ACCEPTABLE_QUERY_JAR_URL_PREFIX
from ..resource_utils import GPUConfig, gcp_machine_type_to_parts, machine_type_to_gpu
from ..resource_utils import GPUConfig, gcp_local_ssd_count, gcp_machine_type_to_parts, machine_type_to_gpu

log = logging.getLogger('create_instance')

Expand Down Expand Up @@ -62,21 +62,28 @@ def create_vm_config(
region = instance_config.region_for(zone)
docker_run_gpu_args = '--runtime=nvidia --gpus all' if machine_type_to_gpu(machine_type_full) else ''
if local_ssd_data_disk:
worker_data_disk = {
'type': 'SCRATCH',
'autoDelete': True,
'interface': 'NVME',
'initializeParams': {'diskType': f'zones/{zone}/diskTypes/local-ssd'},
}
worker_data_disk_name = 'nvme0n1'
num_local_ssds = gcp_local_ssd_count(parts.machine_family, cores)
worker_data_disks = [
{
'type': 'SCRATCH',
'autoDelete': True,
'interface': 'NVME',
'initializeParams': {'diskType': f'zones/{zone}/diskTypes/local-ssd'},
}
for _ in range(num_local_ssds)
]
worker_data_disk_name = 'md0' if num_local_ssds > 1 else 'nvme0n1'
else:
worker_data_disk = {
'autoDelete': True,
'initializeParams': {
'diskType': f'projects/{project}/zones/{zone}/diskTypes/pd-ssd',
'diskSizeGb': str(data_disk_size_gb),
},
}
num_local_ssds = 0
worker_data_disks = [
{
'autoDelete': True,
'initializeParams': {
'diskType': f'projects/{project}/zones/{zone}/diskTypes/pd-ssd',
'diskSizeGb': str(data_disk_size_gb),
},
}
]
worker_data_disk_name = 'nvme0n2' if 'g2' in machine_type else 'sdb'

if job_private:
Expand Down Expand Up @@ -123,7 +130,7 @@ def scheduling() -> dict:
'diskSizeGb': str(boot_disk_size_gb),
},
},
worker_data_disk,
*worker_data_disks,
],
'networkInterfaces': [
{
Expand Down Expand Up @@ -175,6 +182,7 @@ def scheduling() -> dict:
set -x

WORKER_DATA_DISK_NAME="{worker_data_disk_name}"
NUM_LOCAL_SSDS="{num_local_ssds}"
UNRESERVED_WORKER_DATA_DISK_SIZE_GB="{unreserved_disk_storage_gb}"
ACCEPTABLE_QUERY_JAR_URL_PREFIX="{ACCEPTABLE_QUERY_JAR_URL_PREFIX}"

Expand Down Expand Up @@ -252,6 +260,15 @@ def scheduling() -> dict:

sudo systemctl restart google-cloud-ops-agent

# combine multiple local SSDs into a single RAID0 array
if [ "$NUM_LOCAL_SSDS" -gt 1 ]; then
DEVICES=""
for i in $(seq 1 $NUM_LOCAL_SSDS); do
DEVICES="$DEVICES /dev/nvme0n$i"
done
mdadm --create /dev/md0 --level=0 --raid-devices=$NUM_LOCAL_SSDS $DEVICES --force --run
fi
Comment on lines +264 to +270
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The RAID script constructs device paths incorrectly for N2 machines. The loop seq 1 $NUM_LOCAL_SSDS generates indices 1, 2, 3... which creates device names /dev/nvme0n1, /dev/nvme0n2, /dev/nvme0n3. However, when multiple local SSDs are attached to a GCP instance, they appear as /dev/nvme0n1, /dev/nvme0n2, etc. BUT the script is missing sudo for the mdadm command, and more critically, NVMe device indices in GCP start from a specific offset depending on the boot disk. The script should verify device existence before use.

if [ "$NUM_LOCAL_SSDS" -gt 1 ]; then
    DEVICES=""
    for i in $(seq 0 $(($NUM_LOCAL_SSDS - 1))); do
        DEVICE="/dev/nvme0n$((i+1))"
        # Wait for device to be ready
        while [ ! -e "$DEVICE" ]; do sleep 1; done
        DEVICES="$DEVICES $DEVICE"
    done
    sudo mdadm --create /dev/md0 --level=0 --raid-devices=$NUM_LOCAL_SSDS $DEVICES --force --run
fi

The missing sudo will cause the mdadm command to fail with permission denied.

Spotted by Graphite

Fix in Graphite


Is this helpful? React 👍 or 👎 to let us know.


# format worker data disk
sudo mkfs.xfs -m reflink=1 -n ftype=1 /dev/$WORKER_DATA_DISK_NAME
sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME
Expand Down
6 changes: 4 additions & 2 deletions batch/batch/cloud/gcp/driver/pricing.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ def instance_family_from_sku(sku: dict) -> Optional[str]:
category = sku['category']
if category['resourceGroup'] == 'N1Standard':
return 'n1'
if sku['description'].startswith("N2 Instance") or sku['description'].startswith("Spot Preemptible N2 Instance"):
return 'n2'
if sku['description'].startswith("G2 Instance") or sku['description'].startswith("Spot Preemptible G2 Instance"):
return 'g2'
if sku['description'].startswith("A2 Instance") or sku['description'].startswith("Spot Preemptible A2 Instance"):
Expand Down Expand Up @@ -283,7 +285,7 @@ def process_accelerator_sku(sku: dict, regions: List[str]) -> List[GCPAccelerato
def process_memory_sku(sku: dict, regions: List[str]) -> List[GCPMemoryPrice]:
category = sku['category']
assert category['resourceFamily'] == 'Compute', sku
assert 'Ram' in sku['description']
assert 'Ram' in sku['description'] or 'Memory' in sku['description']

instance_family = instance_family_from_sku(sku)
preemptible = preemptible_from_sku(sku)
Expand Down Expand Up @@ -392,7 +394,7 @@ async def fetch_prices(
if 'Core' in sku['description']:
for compute_price in process_compute_sku(sku, regions):
yield compute_price
elif 'Ram' in sku['description']:
elif 'Ram' in sku['description'] or 'Memory' in sku['description']:
for memory_price in process_memory_sku(sku, regions):
yield memory_price
elif category['resourceFamily'] == 'Storage':
Expand Down
6 changes: 5 additions & 1 deletion batch/batch/cloud/gcp/driver/resource_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from ....instance_config import InstanceConfig, QuantifiedResource
from ..instance_config import GCPSlimInstanceConfig
from ..resource_utils import (
GCP_LOCAL_SSD_PARTITION_SIZE_GIB,
GCP_MACHINE_FAMILY,
family_worker_type_cores_to_gcp_machine_type,
gcp_machine_type_to_cores_and_memory_bytes,
Expand Down Expand Up @@ -111,7 +112,10 @@ async def create_vm(
instance_config: InstanceConfig,
) -> List[QuantifiedResource]:
if local_ssd_data_disk:
assert data_disk_size_gb == 375
assert (
data_disk_size_gb % GCP_LOCAL_SSD_PARTITION_SIZE_GIB == 0
and data_disk_size_gb >= GCP_LOCAL_SSD_PARTITION_SIZE_GIB
)

resource_rates = self.billing_manager.resource_rates

Expand Down
94 changes: 88 additions & 6 deletions batch/batch/cloud/gcp/resource_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@

GCP_MAX_PERSISTENT_SSD_SIZE_GIB = 64 * 1024

GCP_MACHINE_FAMILY = 'n1'
GCP_MACHINE_FAMILY = 'n2'

MEMORY_PER_CORE_MIB = {
('n1', 'standard'): 3840,
('n1', 'highmem'): 6656,
('n1', 'highcpu'): 924,
('n2', 'standard'): 4096,
('n2', 'highmem'): 8192,
('n2', 'highcpu'): 1024,
}


Expand Down Expand Up @@ -116,13 +119,62 @@ def __init__(self, machine_family: str, worker_type: str, cores: int, memory: in
for cores in [2, 4, 8, 16, 32, 64, 96]
}

# N2 Standard cores: 2 4 8 16 32 48 64 80 96 128
# N2 Standard mem: 4 * cores GiB
n2_standard_machines = {
f'n2-standard-{cores}': MachineTypeParts(
cores=cores,
memory=gib_to_bytes(4 * cores),
gpu_config=None,
machine_family='n2',
worker_type='standard',
)
for cores in [2, 4, 8, 16, 32, 48, 64, 80, 96, 128]
}

# N2 Highmem cores: 2 4 8 16 32 48 64 80 96
# N2 Highmem mem: 8 * cores GiB
n2_highmem_machines = {
f'n2-highmem-{cores}': MachineTypeParts(
cores=cores,
memory=gib_to_bytes(8 * cores),
gpu_config=None,
machine_family='n2',
worker_type='highmem',
)
for cores in [2, 4, 8, 16, 32, 48, 64, 80, 96]
}

# N2 Highcpu cores: 2 4 8 16 32 48 64 80 96
# N2 Highcpu mem: 1024 * cores MiB
n2_highcpu_machines = {
f'n2-highcpu-{cores}': MachineTypeParts(
cores=cores,
memory=mib_to_bytes(1024 * cores),
gpu_config=None,
machine_family='n2',
worker_type='highcpu',
)
for cores in [2, 4, 8, 16, 32, 48, 64, 80, 96]
}

MACHINE_TYPE_TO_PARTS = {
**n1_standard_t4_machines,
**n1_highmem_t4_machines,
**n1_highcpu_t4_machines,
**n1_standard_machines,
**n1_highmem_machines,
**n1_highcpu_machines,
**n2_standard_machines,
**n2_highmem_machines,
**n2_highcpu_machines,
'n2-highmem-128': MachineTypeParts(
cores=128,
memory=gib_to_bytes(864),
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not 1024?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. Not sure why/how it ends up as 864 or why it breaks the 8x{cores} pattern, but that's what it is https://docs.cloud.google.com/compute/docs/general-purpose-machines#n2-high-mem

gpu_config=None,
machine_family='n2',
worker_type='highmem',
),
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No n2-highcpu-128?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'g2-standard-4': MachineTypeParts(
cores=4,
memory=gib_to_bytes(16),
Expand Down Expand Up @@ -245,9 +297,9 @@ def __init__(self, machine_family: str, worker_type: str, cores: int, memory: in
}

gcp_valid_cores_for_pool_worker_type = {
'highcpu': [2, 4, 8, 16, 32, 64, 96],
'standard': [1, 2, 4, 8, 16, 32, 64, 96],
'highmem': [2, 4, 8, 16, 32, 64, 96],
'standard': [2, 4, 8, 16, 32, 48, 64, 80, 96, 128],
'highmem': [2, 4, 8, 16, 32, 48, 64, 80, 96, 128],
'highcpu': [2, 4, 8, 16, 32, 48, 64, 80, 96],
}

gcp_valid_machine_types = list(MACHINE_TYPE_TO_PARTS.keys())
Expand Down Expand Up @@ -291,8 +343,38 @@ def gcp_is_valid_storage_request(storage_in_gib: int) -> bool:
return 10 <= storage_in_gib <= GCP_MAX_PERSISTENT_SSD_SIZE_GIB


def gcp_local_ssd_size() -> int:
return 375
GCP_LOCAL_SSD_PARTITION_SIZE_GIB = 375

# N2 machines require local SSDs in specific quantities that vary by vCPU count.
# Source: https://docs.cloud.google.com/compute/docs/general-purpose-machines#n2-standard
N2_MIN_LOCAL_SSD_COUNT_BY_CORES = {
2: 1,
4: 1,
8: 1,
16: 2,
32: 4,
48: 8,
64: 8,
80: 8,
96: 16,
128: 16,
}


def gcp_local_ssd_count(machine_family: str, cores: int) -> int:
if machine_family != 'n2':
return 1
count = N2_MIN_LOCAL_SSD_COUNT_BY_CORES.get(cores)
if count is not None:
return count
for threshold_cores in sorted(N2_MIN_LOCAL_SSD_COUNT_BY_CORES.keys(), reverse=True):
if cores >= threshold_cores:
return N2_MIN_LOCAL_SSD_COUNT_BY_CORES[threshold_cores]
return 1


def gcp_local_ssd_size(machine_family: str, cores: int) -> int:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was updated to work for n2. Is it still valid when people ask for custom machine types that are not n2 (g4, n1, e5, etc?)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup -- We previously hardcoded in 375 GB for one SSD, but now we're gonna explicitly handle >1 SSD count in N2s via the new gcp_local_ssd_count().

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And "375 GB" is now a constant (GCP_LOCAL_SSD_PARTITION_SIZE) rather than a hardcoded value https://github.com/grohli/hail/blob/d646fb3f663bfcfcdd0b2bb0292ff4fb20a32b4a/batch/batch/cloud/gcp/resource_utils.py#L346

return GCP_LOCAL_SSD_PARTITION_SIZE_GIB * gcp_local_ssd_count(machine_family, cores)


def machine_type_to_gpu(machine_type: str) -> Optional[str]:
Expand Down
3 changes: 2 additions & 1 deletion batch/batch/cloud/resource_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
azure_valid_machine_types,
)
from .gcp.resource_utils import (
GCP_MACHINE_FAMILY,
gcp_is_valid_storage_request,
gcp_local_ssd_size,
gcp_machine_type_to_cores_and_memory_bytes,
Expand Down Expand Up @@ -115,4 +116,4 @@ def local_ssd_size(cloud: str, worker_type: str, cores: int) -> int:
if cloud == 'azure':
return azure_local_ssd_size(worker_type, cores)
assert cloud == 'gcp', cloud
return gcp_local_ssd_size()
return gcp_local_ssd_size(GCP_MACHINE_FAMILY, cores)
60 changes: 56 additions & 4 deletions batch/test/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
import pytest

from batch.cloud.azure.resource_utils import MACHINE_TYPE_TO_PARTS as MACHINE_TYPE_TO_PARTS_AZURE
from batch.cloud.gcp.resource_utils import MACHINE_TYPE_TO_PARTS as MACHINE_TYPE_TO_PARTS_GCP
from batch.cloud.gcp.resource_utils import gcp_worker_memory_per_core_mib, machine_type_to_gpu_num
from batch.cloud.gcp.resource_utils import (
MACHINE_TYPE_TO_PARTS as MACHINE_TYPE_TO_PARTS_GCP,
)
from batch.cloud.gcp.resource_utils import (
gcp_local_ssd_count,
gcp_local_ssd_size,
gcp_worker_memory_per_core_mib,
machine_type_to_gpu_num,
)
from batch.cloud.gcp.resources import GCPAcceleratorResource, gcp_resource_from_dict
from batch.cloud.resource_utils import adjust_cores_for_packability
from batch.utils import rewrite_dockerhub_image
Expand Down Expand Up @@ -33,11 +40,12 @@ def test_memory_str_to_bytes():


def test_gcp_worker_memory_per_core_mib():
with pytest.raises(AssertionError):
assert gcp_worker_memory_per_core_mib('n2', 'standard')
assert gcp_worker_memory_per_core_mib('n1', 'standard') == 3840
assert gcp_worker_memory_per_core_mib('n1', 'highmem') == 6656
assert gcp_worker_memory_per_core_mib('n1', 'highcpu') == 924
assert gcp_worker_memory_per_core_mib('n2', 'standard') == 4096
assert gcp_worker_memory_per_core_mib('n2', 'highmem') == 8192
assert gcp_worker_memory_per_core_mib('n2', 'highcpu') == 1024


def test_gcp_machine_memory_per_core_mib():
Expand All @@ -48,6 +56,15 @@ def test_gcp_machine_memory_per_core_mib():
assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 6656
elif machine_parts.machine_family == 'n1' and machine_parts.worker_type == 'highcpu':
assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 924
elif machine_parts.machine_family == 'n2' and machine_parts.worker_type == 'standard':
assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 4096
elif machine_parts.machine_family == 'n2' and machine_parts.worker_type == 'highmem':
if machine_parts.cores == 128:
assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 6912
else:
assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 8192
elif machine_parts.machine_family == 'n2' and machine_parts.worker_type == 'highcpu':
assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 1024
elif machine_parts.machine_family == 'g2' and machine_parts.worker_type == 'standard':
assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 4096
elif machine_parts.machine_family == 'a2' and machine_parts.worker_type == 'highgpu':
Expand All @@ -71,6 +88,41 @@ def test_azure_machine_memory_per_core_mib():
assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 8192


@pytest.mark.parametrize(
"family,cores,expected",
[
('n1', 16, 1),
('n1', 96, 1),
('n2', 2, 1),
('n2', 4, 1),
('n2', 8, 1),
('n2', 16, 2),
('n2', 32, 4),
('n2', 48, 8),
('n2', 64, 8),
('n2', 80, 8),
('n2', 96, 16),
('n2', 128, 16),
],
)
def test_gcp_local_ssd_count(family, cores, expected):
assert gcp_local_ssd_count(family, cores) == expected


@pytest.mark.parametrize(
"family,cores,expected",
[
('n1', 16, 375),
('n2', 2, 375),
('n2', 16, 750),
('n2', 48, 3000),
('n2', 128, 6000),
],
)
def test_gcp_local_ssd_size(family, cores, expected):
assert gcp_local_ssd_size(family, cores) == expected


def test_gcp_resource_from_dict():
name = 'accelerator/l4-nonpreemptible/us-central1/1712657549063'
gpu_data_dic_single = {'name': name, 'number': 1, 'type': 'gcp_accelerator', 'format_version': 2}
Expand Down
Loading