kubernetes - 无法使用 Terraform 为 AKS 集群创建命名空间,报告没有此类主机

标签 kubernetes azure-aks

我有一个模块定义如下:

===

providers.tf

provider "kubernetes" {
  #load_config_file = "false"
  host                   = azurerm_kubernetes_cluster.aks.kube_config.0.host
  username               = azurerm_kubernetes_cluster.aks.kube_config.0.username
  password               = azurerm_kubernetes_cluster.aks.kube_config.0.password
  client_certificate     = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate)
  client_key             = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_key)
  cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate)
}

outputs.tf

output "node_resource_group" {
  value       = azurerm_kubernetes_cluster.aks.node_resource_group
  description = "The name of resource group where the AKS Nodes are created"
}
output "kubeConfig" {
  value = azurerm_kubernetes_cluster.aks.kube_config_raw
  description = "Kubeconfig of AKS Cluster"
}

output "host" {
  value = azurerm_kubernetes_cluster.aks.kube_config.0.host
}

output "client_key" {
  value = azurerm_kubernetes_cluster.aks.kube_config.0.client_key
}

output "client_certificate" {
  value = azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate
}

output "kube_config" {
  value = azurerm_kubernetes_cluster.aks.kube_config_raw
}

output "cluster_ca_certificate" {
  value = azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate
}

ma​​in.tf

resource "azurerm_log_analytics_workspace" "law" {
  name                = "${var.tla}-la-${local.lookup_result}-${var.identifier}"
  location            = data.azurerm_resource_group.rg.location
  resource_group_name = data.azurerm_resource_group.rg.name
  sku                 = var.la_sku
  retention_in_days   = 30
}

resource "azurerm_kubernetes_cluster" "aks" {
  name                    = "${var.tla}-aks-${local.lookup_result}-${var.identifier}"
  location                = data.azurerm_resource_group.rg.location
  resource_group_name     = data.azurerm_resource_group.rg.name
  dns_prefix              = var.dns_prefix
  kubernetes_version      = var.kubernetes_version
  sku_tier                = var.sku_tier
  private_cluster_enabled = var.enable_private_cluster
  #api_server_authorized_ip_ranges = ""
  default_node_pool {
    name                  = "syspool001"
    orchestrator_version  = var.orchestrator_version
    availability_zones    = var.agents_availability_zones
    enable_auto_scaling   = true
    node_count            = var.default_pool_node_count
    max_count             = var.default_pool_max_node_count
    min_count             = var.default_pool_min_node_count
    max_pods              = var.default_pool_max_pod_count
    vm_size               = var.agents_size
    enable_node_public_ip = false
    os_disk_size_gb       = var.default_pool_os_disk_size_gb
    type                  = "VirtualMachineScaleSets"
    vnet_subnet_id        = var.vnet_subnet_id
    node_labels           = var.agents_labels
    tags                  = merge(local.tags, var.agents_tags)
  }

  network_profile {
    network_plugin     = var.network_plugin
    network_policy     = var.network_policy
    dns_service_ip     = var.net_profile_dns_service_ip
    docker_bridge_cidr = var.net_profile_docker_bridge_cidr
    service_cidr       = var.net_profile_service_cidr
  }

  role_based_access_control {
    enabled = true
    azure_active_directory {
      managed                = true
      admin_group_object_ids = var.rbac_aad_admin_group_object_ids
    }
  }

  identity {
    type = "SystemAssigned"
  }

  addon_profile {
    azure_policy {
      enabled = true
    }

    http_application_routing {
      enabled = false
    }

    oms_agent {
      enabled                    = true
      log_analytics_workspace_id = data.azurerm_log_analytics_workspace.log_analytics.id
    }
  }

  tags = local.tags

  lifecycle {
    ignore_changes = [
      default_node_pool
    ]
  }

}

resource "azurerm_kubernetes_cluster_node_pool" "aksnp" {
  lifecycle {
    ignore_changes = [
      node_count
    ]
  }
  for_each              = var.additional_node_pools
  kubernetes_cluster_id = azurerm_kubernetes_cluster.aks.id
  name                  = each.value.node_os == "Windows" ? substr(each.key, 0, 6) : substr(each.key, 0, 12)
  node_count            = each.value.node_count
  vm_size               = each.value.vm_size
  availability_zones    = each.value.zones
  max_pods              = each.value.max_pods
  os_disk_size_gb       = each.value.os_disk_size_gb
  os_type               = each.value.node_os
  vnet_subnet_id        = var.vnet_subnet_id
  node_taints           = each.value.taints
  enable_auto_scaling   = each.value.cluster_auto_scaling
  min_count             = each.value.cluster_auto_scaling_min_count
  max_count             = each.value.cluster_auto_scaling_max_count
}

resource "kubernetes_namespace" "aks-namespace" {
  metadata {
    name = var.namespace
  }
}

数据.tf

data "azurerm_resource_group" "rg" {
  name = var.resource_group_name
}

lookups.tf

locals {

  environment_lookup = {
    dev  = "d"
    test = "t"
    int  = "i"
    prod = "p"
    prd  = "p"
    uat  = "a"
    poc  = "d"
    dr   = "r"
    lab  = "l"
  }

 
  lookup_result = lookup(local.environment_lookup, var.environment)

  tags = merge(
    data.azurerm_resource_group.rg.tags, {
      Directory      = "tectcompany.com",
      PrivateDNSZone = var.private_dns_zone,
      Immutable      = "False",
      ManagedOS      = "True",
    }
  )
}

data "azurerm_log_analytics_workspace" "log_analytics" {
  name                = "abc-az-lad2"
  resource_group_name = "abc-dev-aae"
}

变量.tf

variable "secondary_region" {
  description = "Is this resource being deployed into the secondary (pair) region?"

  default = false
  type    = bool
}

variable "override_log_analytics_workspace" {
  description = "Override the vm log analytics workspace"
  type        = string
  default     = null
}

variable "override_log_analytics_resource_group_name" {
  description = "Overrides the log analytics resource group name"
  type        = string
  default     = null
}

variable "environment" {
  description = "The name of environment for the AKS Cluster"
  type        = string
  default     = "dev"
}

variable "identifier" {
  description = "The identifier for the AKS Cluster"
  type        = number
  default     = "001"
}

variable "kubernetes_version" {
  description = "Specify which Kubernetes release to use. The default used is the latest Kubernetes version available in the region"
  type        = string
  default     = "1.19.9"
}

variable "dns_prefix" {
  description = "The dns prefix for the AKS Cluster"
  type        = string
  default     = "odessa-sandpit"
}

variable "orchestrator_version" {
  description = "Specify which Kubernetes release to use for the orchestration layer. The default used is the latest Kubernetes version available in the region"
  type        = string
  default     = null
}

variable "agents_availability_zones" {
  description = "(Optional) A list of Availability Zones across which the Node Pool should be spread. Changing this forces a new resource to be created."
  type        = list(string)
  default     = null
}

variable "agents_size" {
  default     = "Standard_D4s_v3"
  description = "The default virtual machine size for the Kubernetes agents"
  type        = string
}

variable "vnet_subnet_id" {
  description = "(Optional) The ID of a Subnet where the Kubernetes Node Pool should exist. Changing this forces a new resource to be created."
  type        = string
  default     = null
}

variable "agents_labels" {
  description = "(Optional) A map of Kubernetes labels which should be applied to nodes in the Default Node Pool. Changing this forces a new resource to be created."
  type        = map(string)
  default     = {}
}

variable "agents_tags" {
  description = "(Optional) A mapping of tags to assign to the Node Pool."
  type        = map(string)
  default     = {}
}

variable "net_profile_dns_service_ip" {
  description = "(Optional) IP address within the Kubernetes service address range that will be used by cluster service discovery (kube-dns). Changing this forces a new resource to be created."
  type        = string
  default     = null
}

variable "net_profile_docker_bridge_cidr" {
  description = "(Optional) IP address (in CIDR notation) used as the Docker bridge IP address on nodes. Changing this forces a new resource to be created."
  type        = string
  default     = null
}

variable "net_profile_service_cidr" {
  description = "(Optional) The Network Range used by the Kubernetes service. Changing this forces a new resource to be created."
  type        = string
  default     = null
}

variable "rbac_aad_admin_group_object_ids" {
  description = "Object ID of groups with admin access."
  type        = list(string)
  default     = null
}

variable "network_policy" {
  description = "(Optional) The Network Policy to be used by the network profile of Azure Kubernetes Cluster."
  type        = string
  default     = "azure"
}

variable "network_plugin" {
  description = "(Optional) The Network Plugin to be used by the network profile of Azure Kubernetes Cluster."
  type        = string
  default     = "azure"
}

variable "enable_private_cluster" {
  description = "(Optional) Set this variable to true if you want Azure Kubernetes Cluster to be private."
  default     = true
}

variable "default_pool_node_count" {
  description = "(Optional) The initial node count for the default pool of AKS Cluster"
  type        = number
  default     = 3
}

variable "default_pool_max_node_count" {
  description = "(Optional) The max node count for the default pool of AKS Cluster"
  type        = number
  default     = 6
}

variable "default_pool_min_node_count" {
  description = "(Optional) The min node count for the default pool of AKS Cluster"
  type        = number
  default     = 3
}

variable "default_pool_max_pod_count" {
  description = "(Optional) The max pod count for the default pool of AKS Cluster"
  type        = number
  default     = 13
}

variable "default_pool_os_disk_size_gb" {
  description = "(Optional) The size of os disk in gb for the nodes from default pool of AKS Cluster"
  type        = string
  default     = "64"
}

variable "additional_node_pools" {
  type = map(object({
    node_count                     = number
    max_pods                       = number
    os_disk_size_gb                = number
    vm_size                        = string
    zones                          = list(string)
    node_os                        = string
    taints                         = list(string)
    cluster_auto_scaling           = bool
    cluster_auto_scaling_min_count = number
    cluster_auto_scaling_max_count = number
  }))
}
variable "sku_tier" {
  description = "(Optional)The SKU Tier that should be used for this Kubernetes Cluster, possible values Free or Paid"
  type        = string
  default     = "Paid"

  validation {
    condition     = contains(["Free", "Paid"], var.sku_tier)
    error_message = "SKU_TIER can only be either Paid or Free."
  }

}

variable "la_sku" {

  description = "(Optional)The SKU Tier that should be used for Log Analytics. Multiple values are possible."
  type        = string
  default     = "PerGB2018"

  validation {
    condition     = contains(["Free", "PerNode", "Premium", "Standard", "Standalone", "Unlimited", "CapacityReservation", "PerGB2018"], var.la_sku)
    error_message = "SKU_TIER for Log Analytics can be can only be either of Free, PerNode, Premium, Standard, Standalone, Unlimited, CapacityReservation and PerGB2018(Default Value)."
  }

}

variable "resource_group_name" {
  description = "Resource Group for deploying AKS Cluster"
  type = string
}

variable "private_dns_zone" {
  description = "DNS prefix for AKS Cluster"
  type = string
  default = "testcluster"
}

variable "tla" {
  description = "Three Level acronym - three letter abbreviation for application"
  type = string
  default = ""
  validation {
    condition     = length(var.tla) == 3
    error_message = "The TLA should be precisely three characters."
  }
}

variable "namespace"{
 description = "AKS Namespace"
  type = string
}

最后,我在下面调用我的模块来为 AKS 集群创建 AKS 集群、LA 和命名空间:

provider "azurerm" {
   features {}
   #version = "~> 2.53.0"
}
module "aks-cluster1" {

  source = "../../"
  resource_group_name = "pst-aks-sandpit-dev-1"
  tla = "pqr"
  additional_node_pools = { 
        pool1 = {
            node_count                      = "1"
            max_pods                       = "110"
            os_disk_size_gb                = "30"
            vm_size                        = "Standard_D8s_v3"
            zones                          = ["1","2","3"]
            node_os                        = "Linux"
            taints                         =  ["kubernetes.io/os=windows:NoSchedule"]
            cluster_auto_scaling           = true
            cluster_auto_scaling_min_count = "2"
            cluster_auto_scaling_max_count = "4"
        } 
                            }
  namespace = "sample-ns"
}

问题: 当 terraform 尝试创建集群时,我收到一个错误,提示没有这样的主机。

我认为它无法连接到集群,但我可能错了。我不知道它内部是如何处理的。

Error: Post "https://testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io:443/api/v1/namespaces": dial tcp: lookup testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io: no such host

最佳答案

我是 Terraform Kubernetes 提供商的维护者之一,我经常看到这个特殊问题。作为一名前 devops 人员,我对我在这一领域不断看到的斗争表示同情。如果可能的话,我真的很想在提供程序中修复它。

您面临的问题是 Terraform 核心在 passing an unknown value to a provider configuration block 时的限制。 .引用他们的文档:

You can use expressions in the values of these configuration arguments, 
but can only reference values that are known before the configuration is applied.

当您对底层基础设施(例如本例中的 AKS 集群)进行更改时,您会将一个未知值传递到 Kubernetes 提供程序配置 block 中,因为直到更改之后才知道集群基础设施的全部范围已应用于AKS集群。

尽管我确实写了初始指南来表明 it can be possible to work around some of these issues ,正如您从经验中发现的那样,有许多边缘案例使它成为一个不可靠和不直观的过程,让 Kubernetes 提供者与底层基础设施一起工作。这是由于长期存在的 limitation in Terraform ,这无法在任何提供商中修复,但我们确实计划通过添加 better error messages upfront 来稍微消除颠簸。 ,在这种情况下,这会让您省去一些麻烦。

要解决这种特殊类型的问题,集群基础设施需要保持与 Kubernetes 和 Helm 提供程序资源分离的状态。我这里有一个示例,它在一个应用程序中构建一个 AKS 集群,然后在第二个应用程序中管理 Kubernetes/Helm 资源。您可以使用这种方法为您的特定用例构建最强大的配置:

https://github.com/hashicorp/terraform-provider-kubernetes/tree/e058e225e621f06e393bcb6407e7737fd43817bd/_examples/aks

我知道这种两次应用的方法很不方便,这就是为什么我们继续尝试在单一应用场景中适应用户,以及包含处于相同 Terraform 状态的 Kubernetes 和集群资源的场景。然而,在上游 Terraform 可以添加对此的支持之前,单一应用工作流将仍然存在错误,并且不如将集群基础设施与 Kubernetes 资源分开可靠。

大多数情况下都可以使用 depends_on 来解决(以确保集群在 Kubernetes 资源之前创建),或者通过将集群基础设施移动到一个单独的模块中并运行 terraform state rm 模块.kubernetes-configterraform apply -target=module.aks-cluster。但我认为鼓励这种变通办法从长远来看会导致更多麻烦,因为它让用户负责确定何时使用特殊的一次性应用命令,而不是将 Terraform 设置为可靠且可预测地运行开始。此外,它可能会产生意想不到的副作用,例如 orphaning cloud resources .

关于kubernetes - 无法使用 Terraform 为 AKS 集群创建命名空间,报告没有此类主机,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68140080/

相关文章:

kubernetes - Azure kubernetes pod 在重新启动或 hpa 工作时显示高 cpu 使用率?

kubernetes - 如何设置 Kubernetes Cronjob 在特定时间运行

dns - 无法根据 DNS 从 pod 容器访问 kubernetes master

用于存储微服务集中日志数据的 Azure PaaS

azure-aks - Azure Kubernetes服务上的垂直Pod自动缩放

azure - 没有使用 ARM 模板通过 Azure 容器注册表 (ACR) 创建 Azure AKS 群集的选项

kubernetes - 获取 ErrImagePull : 401 Unauthorized when creating a Kubernetes Pod with a declarative command

kubernetes - 使用 Terraform 启用 Stackdriver Kubernetes Engine 监控

kubernetes - 如何在kubernetes中继承Node-Labels到Pod-Labels?

azure-aks - 使用 AKS 应用程序网关入口 Controller 时 Kubernetes 服务的角色