hail-is · grohli · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 27, 2026
@@ -13,7 +13,7 @@
 from ....instance_config import InstanceConfig
 from ...resource_utils import unreserved_worker_data_disk_size_gib
 from ...utils import ACCEPTABLE_QUERY_JAR_URL_PREFIX
-from ..resource_utils import GPUConfig, gcp_machine_type_to_parts, machine_type_to_gpu
+from ..resource_utils import GPUConfig, gcp_local_ssd_count, gcp_machine_type_to_parts, machine_type_to_gpu
 
 log = logging.getLogger('create_instance')
 
@@ -62,21 +62,28 @@ def create_vm_config(
     region = instance_config.region_for(zone)
     docker_run_gpu_args = '--runtime=nvidia --gpus all' if machine_type_to_gpu(machine_type_full) else ''
     if local_ssd_data_disk:
-        worker_data_disk = {
-            'type': 'SCRATCH',
-            'autoDelete': True,
-            'interface': 'NVME',
-            'initializeParams': {'diskType': f'zones/{zone}/diskTypes/local-ssd'},
-        }
-        worker_data_disk_name = 'nvme0n1'
+        num_local_ssds = gcp_local_ssd_count(parts.machine_family, cores)
+        worker_data_disks = [
+            {
+                'type': 'SCRATCH',
+                'autoDelete': True,
+                'interface': 'NVME',
+                'initializeParams': {'diskType': f'zones/{zone}/diskTypes/local-ssd'},
+            }
+            for _ in range(num_local_ssds)
+        ]
+        worker_data_disk_name = 'md0' if num_local_ssds > 1 else 'nvme0n1'
     else:
-        worker_data_disk = {
-            'autoDelete': True,
-            'initializeParams': {
-                'diskType': f'projects/{project}/zones/{zone}/diskTypes/pd-ssd',
-                'diskSizeGb': str(data_disk_size_gb),
-            },
-        }
+        num_local_ssds = 0
+        worker_data_disks = [
+            {
+                'autoDelete': True,
+                'initializeParams': {
+                    'diskType': f'projects/{project}/zones/{zone}/diskTypes/pd-ssd',
+                    'diskSizeGb': str(data_disk_size_gb),
+                },
+            }
+        ]
         worker_data_disk_name = 'nvme0n2' if 'g2' in machine_type else 'sdb'
 
     if job_private:
@@ -123,7 +130,7 @@ def scheduling() -> dict:
                     'diskSizeGb': str(boot_disk_size_gb),
                 },
             },
-            worker_data_disk,
+            *worker_data_disks,
         ],
         'networkInterfaces': [
             {
@@ -175,6 +182,7 @@ def scheduling() -> dict:
 set -x
 
 WORKER_DATA_DISK_NAME="{worker_data_disk_name}"
+NUM_LOCAL_SSDS="{num_local_ssds}"
 UNRESERVED_WORKER_DATA_DISK_SIZE_GB="{unreserved_disk_storage_gb}"
 ACCEPTABLE_QUERY_JAR_URL_PREFIX="{ACCEPTABLE_QUERY_JAR_URL_PREFIX}"
 
@@ -252,6 +260,15 @@ def scheduling() -> dict:
 
 sudo systemctl restart google-cloud-ops-agent
 
+# combine multiple local SSDs into a single RAID0 array
+if [ "$NUM_LOCAL_SSDS" -gt 1 ]; then
+    DEVICES=""
+    for i in $(seq 1 $NUM_LOCAL_SSDS); do
+        DEVICES="$DEVICES /dev/nvme0n$i"
+    done
+    mdadm --create /dev/md0 --level=0 --raid-devices=$NUM_LOCAL_SSDS $DEVICES --force --run
+fi
+
 # format worker data disk
 sudo mkfs.xfs -m reflink=1 -n ftype=1 /dev/$WORKER_DATA_DISK_NAME
 sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME

@@ -179,6 +179,8 @@ def instance_family_from_sku(sku: dict) -> Optional[str]:
     category = sku['category']
     if category['resourceGroup'] == 'N1Standard':
         return 'n1'
+    if sku['description'].startswith("N2 Instance") or sku['description'].startswith("Spot Preemptible N2 Instance"):
+        return 'n2'
     if sku['description'].startswith("G2 Instance") or sku['description'].startswith("Spot Preemptible G2 Instance"):
         return 'g2'
     if sku['description'].startswith("A2 Instance") or sku['description'].startswith("Spot Preemptible A2 Instance"):
@@ -283,7 +285,7 @@ def process_accelerator_sku(sku: dict, regions: List[str]) -> List[GCPAccelerato
 def process_memory_sku(sku: dict, regions: List[str]) -> List[GCPMemoryPrice]:
     category = sku['category']
     assert category['resourceFamily'] == 'Compute', sku
-    assert 'Ram' in sku['description']
+    assert 'Ram' in sku['description'] or 'Memory' in sku['description']
 
     instance_family = instance_family_from_sku(sku)
     preemptible = preemptible_from_sku(sku)
@@ -392,7 +394,7 @@ async def fetch_prices(
             if 'Core' in sku['description']:
                 for compute_price in process_compute_sku(sku, regions):
                     yield compute_price
-            elif 'Ram' in sku['description']:
+            elif 'Ram' in sku['description'] or 'Memory' in sku['description']:
                 for memory_price in process_memory_sku(sku, regions):
                     yield memory_price
         elif category['resourceFamily'] == 'Storage':

@@ -21,6 +21,7 @@
 from ....instance_config import InstanceConfig, QuantifiedResource
 from ..instance_config import GCPSlimInstanceConfig
 from ..resource_utils import (
+    GCP_LOCAL_SSD_PARTITION_SIZE_GIB,
     GCP_MACHINE_FAMILY,
     family_worker_type_cores_to_gcp_machine_type,
     gcp_machine_type_to_cores_and_memory_bytes,
@@ -111,7 +112,10 @@ async def create_vm(
         instance_config: InstanceConfig,
     ) -> List[QuantifiedResource]:
         if local_ssd_data_disk:
-            assert data_disk_size_gb == 375
+            assert (
+                data_disk_size_gb % GCP_LOCAL_SSD_PARTITION_SIZE_GIB == 0
+                and data_disk_size_gb >= GCP_LOCAL_SSD_PARTITION_SIZE_GIB
+            )
 
         resource_rates = self.billing_manager.resource_rates
 

@@ -6,12 +6,15 @@
 
 GCP_MAX_PERSISTENT_SSD_SIZE_GIB = 64 * 1024
 
-GCP_MACHINE_FAMILY = 'n1'
+GCP_MACHINE_FAMILY = 'n2'
 
 MEMORY_PER_CORE_MIB = {
     ('n1', 'standard'): 3840,
     ('n1', 'highmem'): 6656,
     ('n1', 'highcpu'): 924,
+    ('n2', 'standard'): 4096,
+    ('n2', 'highmem'): 8192,
+    ('n2', 'highcpu'): 1024,
 }
 
 
@@ -116,13 +119,62 @@ def __init__(self, machine_family: str, worker_type: str, cores: int, memory: in
     for cores in [2, 4, 8, 16, 32, 64, 96]
 }
 
+# N2 Standard cores: 2 4 8 16 32 48 64 80 96 128
+# N2 Standard mem: 4 * cores GiB
+n2_standard_machines = {
+    f'n2-standard-{cores}': MachineTypeParts(
+        cores=cores,
+        memory=gib_to_bytes(4 * cores),
+        gpu_config=None,
+        machine_family='n2',
+        worker_type='standard',
+    )
+    for cores in [2, 4, 8, 16, 32, 48, 64, 80, 96, 128]
+}
+
+# N2 Highmem cores: 2 4 8 16 32 48 64 80 96
+# N2 Highmem mem: 8 * cores GiB
+n2_highmem_machines = {
+    f'n2-highmem-{cores}': MachineTypeParts(
+        cores=cores,
+        memory=gib_to_bytes(8 * cores),
+        gpu_config=None,
+        machine_family='n2',
+        worker_type='highmem',
+    )
+    for cores in [2, 4, 8, 16, 32, 48, 64, 80, 96]
+}
+
+# N2 Highcpu cores: 2 4 8 16 32 48 64 80 96
+# N2 Highcpu mem: 1024 * cores MiB
+n2_highcpu_machines = {
+    f'n2-highcpu-{cores}': MachineTypeParts(
+        cores=cores,
+        memory=mib_to_bytes(1024 * cores),
+        gpu_config=None,
+        machine_family='n2',
+        worker_type='highcpu',
+    )
+    for cores in [2, 4, 8, 16, 32, 48, 64, 80, 96]
+}
+
 MACHINE_TYPE_TO_PARTS = {
     **n1_standard_t4_machines,
     **n1_highmem_t4_machines,
     **n1_highcpu_t4_machines,
     **n1_standard_machines,
     **n1_highmem_machines,
     **n1_highcpu_machines,
+    **n2_standard_machines,
+    **n2_highmem_machines,
+    **n2_highcpu_machines,
+    'n2-highmem-128': MachineTypeParts(
+        cores=128,
+        memory=gib_to_bytes(864),
+        gpu_config=None,
+        machine_family='n2',
+        worker_type='highmem',
+    ),
     'g2-standard-4': MachineTypeParts(
         cores=4,
         memory=gib_to_bytes(16),
@@ -245,9 +297,9 @@ def __init__(self, machine_family: str, worker_type: str, cores: int, memory: in
 }
 
 gcp_valid_cores_for_pool_worker_type = {
-    'highcpu': [2, 4, 8, 16, 32, 64, 96],
-    'standard': [1, 2, 4, 8, 16, 32, 64, 96],
-    'highmem': [2, 4, 8, 16, 32, 64, 96],
+    'standard': [2, 4, 8, 16, 32, 48, 64, 80, 96, 128],
+    'highmem': [2, 4, 8, 16, 32, 48, 64, 80, 96, 128],
+    'highcpu': [2, 4, 8, 16, 32, 48, 64, 80, 96],
 }
 
 gcp_valid_machine_types = list(MACHINE_TYPE_TO_PARTS.keys())
@@ -291,8 +343,38 @@ def gcp_is_valid_storage_request(storage_in_gib: int) -> bool:
     return 10 <= storage_in_gib <= GCP_MAX_PERSISTENT_SSD_SIZE_GIB
 
 
-def gcp_local_ssd_size() -> int:
-    return 375
+GCP_LOCAL_SSD_PARTITION_SIZE_GIB = 375
+
+# N2 machines require local SSDs in specific quantities that vary by vCPU count.
+# Source: https://docs.cloud.google.com/compute/docs/general-purpose-machines#n2-standard
+N2_MIN_LOCAL_SSD_COUNT_BY_CORES = {
+    2: 1,
+    4: 1,
+    8: 1,
+    16: 2,
+    32: 4,
+    48: 8,
+    64: 8,
+    80: 8,
+    96: 16,
+    128: 16,
+}
+
+
+def gcp_local_ssd_count(machine_family: str, cores: int) -> int:
+    if machine_family != 'n2':
+        return 1
+    count = N2_MIN_LOCAL_SSD_COUNT_BY_CORES.get(cores)
+    if count is not None:
+        return count
+    for threshold_cores in sorted(N2_MIN_LOCAL_SSD_COUNT_BY_CORES.keys(), reverse=True):
+        if cores >= threshold_cores:
+            return N2_MIN_LOCAL_SSD_COUNT_BY_CORES[threshold_cores]
+    return 1
+
+
+def gcp_local_ssd_size(machine_family: str, cores: int) -> int:
+    return GCP_LOCAL_SSD_PARTITION_SIZE_GIB * gcp_local_ssd_count(machine_family, cores)
 
 
 def machine_type_to_gpu(machine_type: str) -> Optional[str]:

@@ -13,6 +13,7 @@
     azure_valid_machine_types,
 )
 from .gcp.resource_utils import (
+    GCP_MACHINE_FAMILY,
     gcp_is_valid_storage_request,
     gcp_local_ssd_size,
     gcp_machine_type_to_cores_and_memory_bytes,
@@ -115,4 +116,4 @@ def local_ssd_size(cloud: str, worker_type: str, cores: int) -> int:
     if cloud == 'azure':
         return azure_local_ssd_size(worker_type, cores)
     assert cloud == 'gcp', cloud
-    return gcp_local_ssd_size()
+    return gcp_local_ssd_size(GCP_MACHINE_FAMILY, cores)
diff --git a/batch/test/test_utils.py b/batch/test/test_utils.py
@@ -1,8 +1,15 @@
 import pytest
 
 from batch.cloud.azure.resource_utils import MACHINE_TYPE_TO_PARTS as MACHINE_TYPE_TO_PARTS_AZURE
-from batch.cloud.gcp.resource_utils import MACHINE_TYPE_TO_PARTS as MACHINE_TYPE_TO_PARTS_GCP
-from batch.cloud.gcp.resource_utils import gcp_worker_memory_per_core_mib, machine_type_to_gpu_num
+from batch.cloud.gcp.resource_utils import (
+    MACHINE_TYPE_TO_PARTS as MACHINE_TYPE_TO_PARTS_GCP,
+)
+from batch.cloud.gcp.resource_utils import (
+    gcp_local_ssd_count,
+    gcp_local_ssd_size,
+    gcp_worker_memory_per_core_mib,
+    machine_type_to_gpu_num,
+)
 from batch.cloud.gcp.resources import GCPAcceleratorResource, gcp_resource_from_dict
 from batch.cloud.resource_utils import adjust_cores_for_packability
 from batch.utils import rewrite_dockerhub_image
@@ -33,11 +40,12 @@ def test_memory_str_to_bytes():
 
 
 def test_gcp_worker_memory_per_core_mib():
-    with pytest.raises(AssertionError):
-        assert gcp_worker_memory_per_core_mib('n2', 'standard')
     assert gcp_worker_memory_per_core_mib('n1', 'standard') == 3840
     assert gcp_worker_memory_per_core_mib('n1', 'highmem') == 6656
     assert gcp_worker_memory_per_core_mib('n1', 'highcpu') == 924
+    assert gcp_worker_memory_per_core_mib('n2', 'standard') == 4096
+    assert gcp_worker_memory_per_core_mib('n2', 'highmem') == 8192
+    assert gcp_worker_memory_per_core_mib('n2', 'highcpu') == 1024
 
 
 def test_gcp_machine_memory_per_core_mib():
@@ -48,6 +56,15 @@ def test_gcp_machine_memory_per_core_mib():
             assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 6656
         elif machine_parts.machine_family == 'n1' and machine_parts.worker_type == 'highcpu':
             assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 924
+        elif machine_parts.machine_family == 'n2' and machine_parts.worker_type == 'standard':
+            assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 4096
+        elif machine_parts.machine_family == 'n2' and machine_parts.worker_type == 'highmem':
+            if machine_parts.cores == 128:
+                assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 6912
+            else:
+                assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 8192
+        elif machine_parts.machine_family == 'n2' and machine_parts.worker_type == 'highcpu':
+            assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 1024
         elif machine_parts.machine_family == 'g2' and machine_parts.worker_type == 'standard':
             assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 4096
         elif machine_parts.machine_family == 'a2' and machine_parts.worker_type == 'highgpu':
@@ -71,6 +88,41 @@ def test_azure_machine_memory_per_core_mib():
             assert int(machine_parts.memory / machine_parts.cores / 1024**2) == 8192
 
 
+@pytest.mark.parametrize(
+    "family,cores,expected",
+    [
+        ('n1', 16, 1),
+        ('n1', 96, 1),
+        ('n2', 2, 1),
+        ('n2', 4, 1),
+        ('n2', 8, 1),
+        ('n2', 16, 2),
+        ('n2', 32, 4),
+        ('n2', 48, 8),
+        ('n2', 64, 8),
+        ('n2', 80, 8),
+        ('n2', 96, 16),
+        ('n2', 128, 16),
+    ],
+)
+def test_gcp_local_ssd_count(family, cores, expected):
+    assert gcp_local_ssd_count(family, cores) == expected
+
+
+@pytest.mark.parametrize(
+    "family,cores,expected",
+    [
+        ('n1', 16, 375),
+        ('n2', 2, 375),
+        ('n2', 16, 750),
+        ('n2', 48, 3000),
+        ('n2', 128, 6000),
+    ],
+)
+def test_gcp_local_ssd_size(family, cores, expected):
+    assert gcp_local_ssd_size(family, cores) == expected
+
+
 def test_gcp_resource_from_dict():
     name = 'accelerator/l4-nonpreemptible/us-central1/1712657549063'
     gpu_data_dic_single = {'name': name, 'number': 1, 'type': 'gcp_accelerator', 'format_version': 2}