airflow compute 源码

  • 2022-10-20
  • 浏览 (352)

airflow compute 代码

文件路径:/airflow/providers/google/cloud/hooks/compute.py

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""This module contains a Google Compute Engine Hook."""
from __future__ import annotations

import time
from typing import Any, Sequence

from googleapiclient.discovery import build

from airflow.exceptions import AirflowException
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook

# Time to sleep between active checks of the operation results
TIME_TO_SLEEP_IN_SECONDS = 1


class GceOperationStatus:
    """Class with GCE operations statuses."""

    PENDING = "PENDING"
    RUNNING = "RUNNING"
    DONE = "DONE"


class ComputeEngineHook(GoogleBaseHook):
    """
    Hook for Google Compute Engine APIs.

    All the methods in the hook where project_id is used must be called with
    keyword arguments rather than positional.
    """

    def __init__(
        self,
        api_version: str = 'v1',
        gcp_conn_id: str = 'google_cloud_default',
        delegate_to: str | None = None,
        impersonation_chain: str | Sequence[str] | None = None,
    ) -> None:
        super().__init__(
            gcp_conn_id=gcp_conn_id,
            delegate_to=delegate_to,
            impersonation_chain=impersonation_chain,
        )
        self.api_version = api_version

    _conn: Any | None = None

    def get_conn(self):
        """
        Retrieves connection to Google Compute Engine.
        :return: Google Compute Engine services object
        :rtype: dict
        """
        if not self._conn:
            http_authorized = self._authorize()
            self._conn = build('compute', self.api_version, http=http_authorized, cache_discovery=False)
        return self._conn

    @GoogleBaseHook.fallback_to_default_project_id
    def start_instance(self, zone: str, resource_id: str, project_id: str) -> None:
        """
        Starts an existing instance defined by project_id, zone and resource_id.
        Must be called with keyword arguments rather than positional.

        :param zone: Google Cloud zone where the instance exists
        :param resource_id: Name of the Compute Engine instance resource
        :param project_id: Optional, Google Cloud project ID where the
            Compute Engine Instance exists. If set to None or missing,
            the default project_id from the Google Cloud connection is used.
        :return: None
        """
        response = (
            self.get_conn()
            .instances()
            .start(project=project_id, zone=zone, instance=resource_id)
            .execute(num_retries=self.num_retries)
        )
        try:
            operation_name = response["name"]
        except KeyError:
            raise AirflowException(f"Wrong response '{response}' returned - it should contain 'name' field")
        self._wait_for_operation_to_complete(project_id=project_id, operation_name=operation_name, zone=zone)

    @GoogleBaseHook.fallback_to_default_project_id
    def stop_instance(self, zone: str, resource_id: str, project_id: str) -> None:
        """
        Stops an instance defined by project_id, zone and resource_id
        Must be called with keyword arguments rather than positional.

        :param zone: Google Cloud zone where the instance exists
        :param resource_id: Name of the Compute Engine instance resource
        :param project_id: Optional, Google Cloud project ID where the
            Compute Engine Instance exists. If set to None or missing,
            the default project_id from the Google Cloud connection is used.
        :return: None
        """
        response = (
            self.get_conn()
            .instances()
            .stop(project=project_id, zone=zone, instance=resource_id)
            .execute(num_retries=self.num_retries)
        )
        try:
            operation_name = response["name"]
        except KeyError:
            raise AirflowException(f"Wrong response '{response}' returned - it should contain 'name' field")
        self._wait_for_operation_to_complete(project_id=project_id, operation_name=operation_name, zone=zone)

    @GoogleBaseHook.fallback_to_default_project_id
    def set_machine_type(self, zone: str, resource_id: str, body: dict, project_id: str) -> None:
        """
        Sets machine type of an instance defined by project_id, zone and resource_id.
        Must be called with keyword arguments rather than positional.

        :param zone: Google Cloud zone where the instance exists.
        :param resource_id: Name of the Compute Engine instance resource
        :param body: Body required by the Compute Engine setMachineType API,
            as described in
            https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType
        :param project_id: Optional, Google Cloud project ID where the
            Compute Engine Instance exists. If set to None or missing,
            the default project_id from the Google Cloud connection is used.
        :return: None
        """
        response = self._execute_set_machine_type(zone, resource_id, body, project_id)
        try:
            operation_name = response["name"]
        except KeyError:
            raise AirflowException(f"Wrong response '{response}' returned - it should contain 'name' field")
        self._wait_for_operation_to_complete(project_id=project_id, operation_name=operation_name, zone=zone)

    def _execute_set_machine_type(self, zone: str, resource_id: str, body: dict, project_id: str) -> dict:

        return (
            self.get_conn()
            .instances()
            .setMachineType(project=project_id, zone=zone, instance=resource_id, body=body)
            .execute(num_retries=self.num_retries)
        )

    @GoogleBaseHook.fallback_to_default_project_id
    def get_instance_template(self, resource_id: str, project_id: str) -> dict:
        """
        Retrieves instance template by project_id and resource_id.
        Must be called with keyword arguments rather than positional.

        :param resource_id: Name of the instance template
        :param project_id: Optional, Google Cloud project ID where the
            Compute Engine Instance exists. If set to None or missing,
            the default project_id from the Google Cloud connection is used.
        :return: Instance template representation as object according to
            https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates
        :rtype: dict
        """
        response = (
            self.get_conn()
            .instanceTemplates()
            .get(project=project_id, instanceTemplate=resource_id)
            .execute(num_retries=self.num_retries)
        )
        return response

    @GoogleBaseHook.fallback_to_default_project_id
    def insert_instance_template(
        self,
        body: dict,
        project_id: str = PROVIDE_PROJECT_ID,
        request_id: str | None = None,
    ) -> None:
        """
        Inserts instance template using body specified
        Must be called with keyword arguments rather than positional.

        :param body: Instance template representation as object according to
            https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates
        :param request_id: Optional, unique request_id that you might add to achieve
            full idempotence (for example when client call times out repeating the request
            with the same request id will not create a new instance template again)
            It should be in UUID format as defined in RFC 4122
        :param project_id: Optional, Google Cloud project ID where the
            Compute Engine Instance exists. If set to None or missing,
            the default project_id from the Google Cloud connection is used.
        :return: None
        """
        response = (
            self.get_conn()
            .instanceTemplates()
            .insert(project=project_id, body=body, requestId=request_id)
            .execute(num_retries=self.num_retries)
        )
        try:
            operation_name = response["name"]
        except KeyError:
            raise AirflowException(f"Wrong response '{response}' returned - it should contain 'name' field")
        self._wait_for_operation_to_complete(project_id=project_id, operation_name=operation_name)

    @GoogleBaseHook.fallback_to_default_project_id
    def get_instance_group_manager(
        self,
        zone: str,
        resource_id: str,
        project_id: str = PROVIDE_PROJECT_ID,
    ) -> dict:
        """
        Retrieves Instance Group Manager by project_id, zone and resource_id.
        Must be called with keyword arguments rather than positional.

        :param zone: Google Cloud zone where the Instance Group Manager exists
        :param resource_id: Name of the Instance Group Manager
        :param project_id: Optional, Google Cloud project ID where the
            Compute Engine Instance exists. If set to None or missing,
            the default project_id from the Google Cloud connection is used.
        :return: Instance group manager representation as object according to
            https://cloud.google.com/compute/docs/reference/rest/beta/instanceGroupManagers
        :rtype: dict
        """
        response = (
            self.get_conn()
            .instanceGroupManagers()
            .get(project=project_id, zone=zone, instanceGroupManager=resource_id)
            .execute(num_retries=self.num_retries)
        )
        return response

    @GoogleBaseHook.fallback_to_default_project_id
    def patch_instance_group_manager(
        self,
        zone: str,
        resource_id: str,
        body: dict,
        project_id: str,
        request_id: str | None = None,
    ) -> None:
        """
        Patches Instance Group Manager with the specified body.
        Must be called with keyword arguments rather than positional.

        :param zone: Google Cloud zone where the Instance Group Manager exists
        :param resource_id: Name of the Instance Group Manager
        :param body: Instance Group Manager representation as json-merge-patch object
            according to
            https://cloud.google.com/compute/docs/reference/rest/beta/instanceTemplates/patch
        :param request_id: Optional, unique request_id that you might add to achieve
            full idempotence (for example when client call times out repeating the request
            with the same request id will not create a new instance template again).
            It should be in UUID format as defined in RFC 4122
        :param project_id: Optional, Google Cloud project ID where the
            Compute Engine Instance exists. If set to None or missing,
            the default project_id from the Google Cloud connection is used.
        :return: None
        """
        response = (
            self.get_conn()
            .instanceGroupManagers()
            .patch(
                project=project_id,
                zone=zone,
                instanceGroupManager=resource_id,
                body=body,
                requestId=request_id,
            )
            .execute(num_retries=self.num_retries)
        )
        try:
            operation_name = response["name"]
        except KeyError:
            raise AirflowException(f"Wrong response '{response}' returned - it should contain 'name' field")
        self._wait_for_operation_to_complete(project_id=project_id, operation_name=operation_name, zone=zone)

    def _wait_for_operation_to_complete(
        self, project_id: str, operation_name: str, zone: str | None = None
    ) -> None:
        """
        Waits for the named operation to complete - checks status of the async call.

        :param operation_name: name of the operation
        :param zone: optional region of the request (might be None for global operations)
        :return: None
        """
        service = self.get_conn()
        while True:
            if zone is None:
                operation_response = self._check_global_operation_status(
                    service=service,
                    operation_name=operation_name,
                    project_id=project_id,
                    num_retries=self.num_retries,
                )
            else:
                operation_response = self._check_zone_operation_status(
                    service, operation_name, project_id, zone, self.num_retries
                )
            if operation_response.get("status") == GceOperationStatus.DONE:
                error = operation_response.get("error")
                if error:
                    code = operation_response.get("httpErrorStatusCode")
                    msg = operation_response.get("httpErrorMessage")
                    # Extracting the errors list as string and trimming square braces
                    error_msg = str(error.get("errors"))[1:-1]
                    raise AirflowException(f"{code} {msg}: " + error_msg)
                break
            time.sleep(TIME_TO_SLEEP_IN_SECONDS)

    @staticmethod
    def _check_zone_operation_status(
        service: Any, operation_name: str, project_id: str, zone: str, num_retries: int
    ) -> dict:
        return (
            service.zoneOperations()
            .get(project=project_id, zone=zone, operation=operation_name)
            .execute(num_retries=num_retries)
        )

    @staticmethod
    def _check_global_operation_status(
        service: Any, operation_name: str, project_id: str, num_retries: int
    ) -> dict:
        return (
            service.globalOperations()
            .get(project=project_id, operation=operation_name)
            .execute(num_retries=num_retries)
        )

    @GoogleBaseHook.fallback_to_default_project_id
    def get_instance_info(self, zone: str, resource_id: str, project_id: str) -> dict[str, Any]:
        """
        Gets instance information.

        :param zone: Google Cloud zone where the Instance Group Manager exists
        :param resource_id: Name of the Instance Group Manager
        :param project_id: Optional, Google Cloud project ID where the
            Compute Engine Instance exists. If set to None or missing,
            the default project_id from the Google Cloud connection is used.
        """
        instance_info = (
            self.get_conn()
            .instances()
            .get(project=project_id, instance=resource_id, zone=zone)
            .execute(num_retries=self.num_retries)
        )
        return instance_info

    @GoogleBaseHook.fallback_to_default_project_id
    def get_instance_address(
        self, zone: str, resource_id: str, project_id: str = PROVIDE_PROJECT_ID, use_internal_ip: bool = False
    ) -> str:
        """
        Return network address associated to instance.

        :param zone: Google Cloud zone where the Instance Group Manager exists
        :param resource_id: Name of the Instance Group Manager
        :param project_id: Optional, Google Cloud project ID where the
            Compute Engine Instance exists. If set to None or missing,
            the default project_id from the Google Cloud connection is used.
        :param use_internal_ip: If true, return private IP address.
        """
        instance_info = self.get_instance_info(project_id=project_id, resource_id=resource_id, zone=zone)
        if use_internal_ip:
            return instance_info["networkInterfaces"][0].get("networkIP")

        access_config = instance_info["networkInterfaces"][0].get("accessConfigs")
        if access_config:
            return access_config[0].get("natIP")
        raise AirflowException("The target instance does not have external IP")

    @GoogleBaseHook.fallback_to_default_project_id
    def set_instance_metadata(
        self, zone: str, resource_id: str, metadata: dict[str, str], project_id: str
    ) -> None:
        """
        Set instance metadata.

        :param zone: Google Cloud zone where the Instance Group Manager exists
        :param resource_id: Name of the Instance Group Manager
        :param metadata: The new instance metadata.
        :param project_id: Optional, Google Cloud project ID where the
            Compute Engine Instance exists. If set to None or missing,
            the default project_id from the Google Cloud connection is used.
        """
        response = (
            self.get_conn()
            .instances()
            .setMetadata(project=project_id, zone=zone, instance=resource_id, body=metadata)
            .execute(num_retries=self.num_retries)
        )
        operation_name = response["name"]
        self._wait_for_operation_to_complete(project_id=project_id, operation_name=operation_name, zone=zone)

相关信息

airflow 源码目录

相关文章

airflow init 源码

airflow automl 源码

airflow bigquery 源码

airflow bigquery_dts 源码

airflow bigtable 源码

airflow cloud_build 源码

airflow cloud_composer 源码

airflow cloud_memorystore 源码

airflow cloud_sql 源码

airflow cloud_storage_transfer_service 源码

0  赞