sql - 通过 Athena 和 CloudTrail 找到 EC2 实例的所有者

标签 sql json amazon-web-services broadcastreceiver amazon-athena

为了了解每个 EC2 实例 的所有者,我查询了 Athena 存储在 S3 中的 cloudtrail 日志

我在 Athena 中有一张表,其结构如下:

CREATE EXTERNAL TABLE cloudtrail_logs (
eventversion STRING,
useridentity STRUCT<
               type:STRING,
               principalid:STRING,
               arn:STRING,
               accountid:STRING,
               invokedby:STRING,
               accesskeyid:STRING,
               userName:STRING,
sessioncontext:STRUCT<
attributes:STRUCT<
               mfaauthenticated:STRING,
               creationdate:STRING>,
sessionissuer:STRUCT<  
               type:STRING,
               principalId:STRING,
               arn:STRING, 
               accountId:STRING,
               userName:STRING>>>,
eventtime STRING,
eventsource STRING,
eventname STRING,
awsregion STRING,
sourceipaddress STRING,
useragent STRING,
errorcode STRING,
errormessage STRING,
requestparameters STRING,
responseelements STRING,
additionaleventdata STRING,
requestid STRING,
eventid STRING,
resources ARRAY<STRUCT<
               ARN:STRING,
               accountId:STRING,
               type:STRING>>,
eventtype STRING,
apiversion STRING,
readonly STRING,
recipientaccountid STRING,
serviceeventdetails STRING,
sharedeventid STRING,
vpcendpointid STRING
)
PARTITIONED BY (account string, region string, year string)
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde'
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://<BUCKET>/AWSLogs/';

我想找到启动 EC2 实例的用户的身份,所以我需要解析字段 responseelements 并且只获取具有特定的 responseelements 的行实例ID

字段responseelements是这样的:

{
"requestId":"cab34472-31cc-44cd-ae32-a84077e55cb6",
"reservationId":"r-05964c8549788ac50",
"ownerId":"xxxxxxxxxx",
"groupSet":{},
"instancesSet":{
    "items":[
    {"instanceId":"i-043543cb4c12",
    "imageId":"ami-078df974",
    "instanceState":{"code":0,"name":"pending"},
    "privateDnsName":"ip-444444.eu-west-1.compute.internal",
    "keyName":"key-dev","amiLaunchIndex":0,"productCodes":{},
    "instanceType":"t2.large",
    "launchTime":1488438050000,
    "placement":{"availabilityZone":"eu-west-1b","tenancy":"default"},
    "monitoring":{"state":"pending"},
    "subnetId":"subnet-d8fffff",
    "vpcId":"vpc-444435",
    "privateIpAddress":"10.0.42.49",
    "stateReason":{"code":"pending","message":"pending"},
    "architecture":"x86_64",
    "rootDeviceType":"ebs",
    "rootDeviceName":"/dev/xvda",
    "blockDeviceMapping":{},
    "virtualizationType":"hvm",
    "hypervisor":"xen",
    "clientToken":"c6e53004-c561-437d-a642-196489ff297c_subnet-fffffffff",
    "groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
    "sourceDestCheck":true,
    "networkInterfaceSet":{
        "items":[
        {"networkInterfaceId":"eni-b16b66f0",
        "subnetId":"subnet-dffffff",
        "vpcId":"vpc-50fffff35",
        "ownerId":"xxxxxxxx",
        "status":"in-use",
        "macAddress":"fdsfdsfsdfqdsf",
        "privateIpAddress":"10.0.42.34234213",
        "privateDnsName":"ip-1dddddd.eu-west-1.compute.internal",
        "sourceDestCheck":true,
        "groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
        "attachment":{"attachmentId":"eni-attach-45619121","deviceIndex":0,"status":"attaching","attachTime":1488438050000,"deleteOnTermination":true},
        "privateIpAddressesSet":{"item":[{"privateIpAddress":"10ffffff","privateDnsName":"ip-ffffff.eu-west-1.compute.internal","primary":true}]},
        "ipv6AddressesSet":{},
        "tagSet":{}}]}
    ,"iamInstanceProfile":{"arn":"arn:aws:iam::xxxxx:instance-profile/infra-EC2InstanceProfile-1D59C5YR0LIYJ","id":"eeeeeeeeeeeeeeeeee"},
    "ebsOptimized":false}
    ]
    },
    "requesterId":"226008221399"
}

这是我尝试过的查询:

SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instanceId') as instance_id  
FROM cloudtrail_logs
WHERE account = 'xxxxxxxxxxxxxxx' 
AND eventname = 'RunInstances';

但这会将 instance_id 作为空列。 如何从 resposneelement 正确获取 instance_id

最佳答案

我找到了正确的查询来查找 ECS 实例的所有者。这可能对某人有帮助!

SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instancesSet.items[0].instanceId') as instance_id  
FROM cloudtrail_logs
WHERE account = 'xxxxxxx' 
AND eventname = 'RunInstances'
AND responseelements LIKE '%i-3434ecb4c12%' 
;

关于sql - 通过 Athena 和 CloudTrail 找到 EC2 实例的所有者,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53391991/

相关文章:

javascript - 用ajax和json动态添加元素破坏页面布局

C# Json 格式的响应

python - 是否使用正则表达式,从 HTML 中获取 json 值

python - 按每组的最近日期获取第一个元素

mysql - SQL ORDER BY - 为什么它在这里不起作用?

sql - 获取日期 -1 或 2

amazon-web-services - 在 AWS CloudFormation 中创建负载均衡器失败

php - 一个语句中有多个之间

javascript - 实用程序.crypto.lib。 randomBytes 不是函数 : aws cognito js throws error on authentication

amazon-web-services - 'aws ecr get-login'如何跨区域?