python - 如何被动嗅探 TCP/HTTP get 请求

我正在寻找被动嗅探 HTTP GET 请求(使用 Rpi)来跟踪网络设备的流量。 到目前为止,我有以下代码,我相信它会嗅探所有数据包,过滤掉应该包含 HTTP 请求的 TCP 数据包:

#Packet sniffer in python
#For Linux - Sniffs all incoming and outgoing packets :)
#Silver Moon (

import socket, sys
import sys
from threading import RLock
from struct import *

#Convert a string of 6 characters of ethernet address into a dash separated hex string
def eth_addr (a) :
  b = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x" % (ord(a[0]) , ord(a[1]) , ord(a[2]), ord(a[3]), ord(a[4]) , ord(a[5]))
  return b

#create a AF_PACKET type raw socket (thats basically packet level)
#define ETH_P_ALL    0x0003          /* Every packet (be careful!!!) */
    s = socket.socket( socket.AF_PACKET , socket.SOCK_RAW , socket.ntohs(0x0003))
except socket.error , msg:
    print 'Socket could not be created. Error Code : ' + str(msg[0]) + ' Message ' + msg[1]

# receive a packet
while True:
    packet = s.recvfrom(65565)

    #packet string from tuple
    packet = packet[0]

    #parse ethernet header
    eth_length = 14

    eth_header = packet[:eth_length]
    eth = unpack('!6s6sH' , eth_header)
    eth_protocol = socket.ntohs(eth[2])
    source_mac = eth_addr(packet[6:12])
    print 'Destination MAC : ' + eth_addr(packet[0:6]) + ' Source MAC : ' + source_mac + ' Protocol : ' + str(eth_protocol)

    #Parse IP packets, IP Protocol number = 8
    if eth_protocol == 8 :
        #Parse IP header
        #take first 20 characters for the ip header
        ip_header = packet[eth_length:20+eth_length]

        #now unpack them :)
        iph = unpack('!BBHHHBBH4s4s' , ip_header)

        version_ihl = iph[0]
        version = version_ihl >> 4
        ihl = version_ihl & 0xF

        iph_length = ihl * 4

        ttl = iph[5]
        protocol = iph[6]
        s_addr = socket.inet_ntoa(iph[8]);
        d_addr = socket.inet_ntoa(iph[9]);

        #print 'Version : ' + str(version) + ' IP Header Length : ' + str(ihl) + ' TTL : ' + str(ttl) + ' Protocol : ' + str(protocol) + ' Source Address : ' + str(s_addr) + ' Destination Address : ' + str(d_addr)

        #TCP protocol
        if protocol == 6 :
            t = iph_length + eth_length
            tcp_header = packet[t:t+20]

            #now unpack them :)
            tcph = unpack('!HHLLBBHHH' , tcp_header)

            source_port = tcph[0]
            dest_port = tcph[1]
            sequence = tcph[2]
            acknowledgement = tcph[3]
            doff_reserved = tcph[4]
            tcph_length = doff_reserved >> 4

            #print 'Source Port : ' + str(source_port) + ' Dest Port : ' + str(dest_port) + ' Sequence Number : ' + str(sequence) + ' Acknowledgement : ' + str(acknowledgement) + ' TCP header length : ' + str(tcph_length)

            h_size = eth_length + iph_length + tcph_length * 4
            data_size = len(packet) - h_size

            #get data from the packet
            data = packet[h_size:]
            print 'Data: '
            print data

这给出了以下内容: (在与 PC 浏览维基百科相同的子网上的 Rpi 上运行) Sniffer output

我需要做什么来解码 GET 请求字符串? IE。 GET/tutorials/other/top-20-mysql-best-practices/HTTP/1.1 主机



要做到“完美”,您需要在很大程度上实现 TCP/IP 堆栈。您需要跟踪 3 次握手、处理(和忽略)重新传输、重新排序数据包并将数据有效负载合并到单个流中。您还必须有弹性,在您的监视器丢失数据包但目的地已收到该数据包的情况下做出合理的猜测。

为了“好”,您可以做出诸如“GET 请求将在单个数据包中”(通常是正确的)之类的假设以避免实际的连接跟踪,但您仍然必须检查重新传输的数据包。在这种情况下,“GET”将是数据包的前三个字符,您可以从那里处理直到\n\n 或该数据包的结尾。

