Answer没有针对此问题的指针,因为回滚会删除堆栈。
下面是 CloudFormation 模板,编写用于在默认公共(public)子网中的 ECS 容器实例(DesiredCount:1)中启动 Jenkins docker 容器。
Jenkins docker 镜像在 dockerhub 中公开可用,该镜像在 ECS 容器实例中启动。
我们使用 ECS 优化的 AMI 镜像 (ami-05958d7635caa4d04
) ca-central-1
区域,运行 docker 版本 18.06.1
。
{
"AWSTemplateFormatVersion": "2010-09-09",
"Description": "Jenkins Stack",
"Parameters":{
"VpcId": {
"Type": "AWS::EC2::VPC::Id",
"Description": "The target VPC Id"
},
"SubnetId": {
"Type": "AWS::EC2::Subnet::Id",
"Description": "The target subnet Id"
},
"KeyName": {
"Type": "String",
"Description": "The key pair that is allowed SSH access"
}
},
"Resources":{
"EC2Instance":{
"Type": "AWS::EC2::Instance",
"Properties":{
"ImageId": "ami-05958d7635caa4d04",
"InstanceType": "t2.micro",
"SubnetId": { "Ref": "SubnetId"},
"KeyName": { "Ref": "KeyName"},
"SecurityGroupIds": [ { "Ref": "EC2InstanceSecurityGroup"} ],
"IamInstanceProfile": { "Ref" : "EC2InstanceProfile"},
"UserData":{
"Fn::Base64": { "Fn::Join": ["", [
"#!/bin/bash\n",
"echo ECS_CLUSTER=", { "Ref": "EcsCluster" }, " >> /etc/ecs/ecs.config\n",
"groupadd -g 1000 jenkins\n",
"useradd -u 1000 -g jenkins jenkins\n",
"mkdir -p /ecs/jenkins_home\n",
"chown -R jenkins:jenkins /ecs/jenkins_home\n"
] ] }
},
"Tags": [ { "Key": "Name", "Value": { "Fn::Join": ["", [ { "Ref": "AWS::StackName"}, "-instance" ] ]} }]
}
},
"EC2InstanceSecurityGroup":{
"Type": "AWS::EC2::SecurityGroup",
"Properties": {
"GroupDescription": { "Fn::Join": ["", [ { "Ref": "AWS::StackName" }, " ingress security group" ] ] },
"VpcId": { "Ref": "VpcId" },
"SecurityGroupIngress": [
{
"IpProtocol": "tcp",
"FromPort": "8080",
"ToPort": "8080",
"SourceSecurityGroupId": { "Ref": "ElbSecurityGroup"}
},
{
"IpProtocol": "tcp",
"FromPort": "22",
"ToPort": "22",
"CidrIp": "0.0.0.0/0"
}
]
}
},
"EC2InstanceProfile": {
"Type": "AWS::IAM::InstanceProfile",
"Properties": {
"Path": "/",
"Roles": [ { "Ref": "EC2InstanceRole" } ]
}
},
"EC2InstanceRole": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument":{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": { "Service": [ "ec2.amazonaws.com" ] },
"Action": [ "sts:AssumeRole" ]
}
]
},
"Path": "/",
"ManagedPolicyArns": [ "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" ]
}
},
"ElbSecurityGroup": {
"Type": "AWS::EC2::SecurityGroup",
"Properties": {
"GroupDescription": { "Fn::Join": ["", [ { "Ref": "AWS::StackName" }, " ELB ingress security group" ] ] },
"VpcId": { "Ref": "VpcId"},
"SecurityGroupIngress": [
{
"IpProtocol": "tcp",
"FromPort": "80",
"ToPort": "80",
"CidrIp": "0.0.0.0/0"
}
],
"Tags": [ { "Key": "Name", "Value": { "Fn::Join": ["", [ { "Ref": "AWS::StackName" }, "-elb-sg" ] ] } } ]
}
},
"ElasticLoadBalancer": {
"Type": "AWS::ElasticLoadBalancing::LoadBalancer",
"Properties": {
"CrossZone": "false",
"SecurityGroups": [ { "Ref": "ElbSecurityGroup" } ],
"Listeners": [
{
"LoadBalancerPort": "80",
"InstancePort": "8080",
"Protocol": "http"
}
],
"Instances": [ { "Ref": "EC2Instance"} ],
"Subnets": [ { "Ref": "SubnetId"} ]
}
},
"EcsCluster": {
"Type": "AWS::ECS::Cluster"
},
"EcsTaskDefinition": {
"Type": "AWS::ECS::TaskDefinition",
"Properties": {
"ContainerDefinitions": [
{
"Name": "jenkins",
"Image": "someaccount/jenkins:ecs",
"Memory": 995,
"PortMappings": [ { "ContainerPort": 8080, "HostPort": 8080 } ],
"MountPoints": [
{
"SourceVolume": "docker",
"ContainerPath": "/var/run/docker.sock"
},
{
"SourceVolume": "jenkins_home",
"ContainerPath": "/var/jenkins_home"
}
]
}
],
"Volumes": [
{
"Name": "jenkins_home",
"Host": { "SourcePath": "/ecs/jenkins_home" }
},
{
"Name": "docker",
"Host": { "SourcePath": "/var/run/docker.sock" }
}
]
}
},
"EcsService": {
"Type": "AWS::ECS::Service",
"Properties": {
"Cluster": { "Ref": "EcsCluster" },
"TaskDefinition": { "Ref": "EcsTaskDefinition" },
"DesiredCount": 1
}
}
},
"Outputs":{
"ElbDomainName": {
"Description": "Public DNS name of Elastic Load Balancer",
"Value": {
"Fn::GetAtt": [
"ElasticLoadBalancer",
"DNSName"
]
}
},
"EC2InstanceDomainName": {
"Description": "Public DNS name of EC2 instance",
"Value": {
"Fn::GetAtt": [
"EC2Instance",
"PublicDnsName"
]
}
}
}
}
以下是失败事件:
此错误发生在几个小时后。
ECS 实例处于事件状态,因此 AMI 镜像 (ami-05958d7635caa4d04
) 看起来不错。但没有任务处于 Running
或 Stopped
状态。
为了对 ECS 任务进行故障排除,我尝试了 docker-compose
方法:
version: '2'
volumes:
jenkins_home:
external: true
services:
jenkins:
image: someaccount/jenkins:ecs
volumes:
- jenkins_home:/var/jenkins_home
- /var/run/docker.sock:/var/run/docker.sock
ports:
- "8080:8080"
我看到 docker-compose up -d
在我的笔记本电脑中启动了容器
ECS 实例(t2.micro
) 激活后,我尝试手动运行新任务,出现以下错误:
在 Cloudformation 模板中,EcsTaskDefinition
具有 "Memory": 995
当计算平台为Linux服务器(VM)时,我们分析日志并排查问题。
如何解决这个问题?公有云ECS环境
最佳答案
t2.micro
您尝试启动的实例的总内存容量为 1 GiB。
ECS 将尝试将任务启动到您的集群中,但它会意识到它无法满足要求,因为您的容器实例没有足够的可用内存(操作系统进程消耗超过 5 MiB 的内存)并且您的任务需要 "Memory": 995
这正是问题所在。
在这种情况下,调试容器本身不会有帮助,因为它(可能)没有任何问题。事实上,ECS 调度程序无法满足启动任务的指定要求,因此除了失败并给出“有用”消息之外,它无法执行任何其他操作。
有多种原因可能导致调度程序无法启动任务,并且它应该始终给出有关该问题的提示。在你的情况下,它“清楚地”表明你没有足够的内存。其他需要注意的是每个容器实例只能启动一次的守护进程任务(这意外地发生在我身上)。
关于amazon-web-services - ECS服务不稳定,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59108077/