0

I am trying to parse HTTP packets using netfilter hooks in my kernel module. I came across this code snippet on How to print data from TCP packets which I modified to make it build. However, I am unable to see any logs related to HTTP in dmesg even though I have loaded the kernel module successfully.

I have tried to debug the issue but haven't been able to figure out what's going wrong, the payload is not matching with HTTP.I suspect that the problem might be related to the fact that HTTP packets are carried over TCP and I am not properly handling the TCP protocol.

Code I am using:

#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
#include <linux/tcp.h>

#define PTCP_WATCH_PORT 80 /* HTTP port */

static struct nf_hook_ops nfho;

static unsigned int ptcp_hook_func(const struct nf_hook_ops *ops,
                                   struct sk_buff *skb,
                                   const struct net_device *in,
                                   const struct net_device *out,
                                   int (*okfn)(struct sk_buff *))
{
    struct iphdr *iph;        /* IPv4 header */
    struct tcphdr *tcph;      /* TCP header */
    u16 sport, dport;         /* Source and destination ports */
    u32 saddr, daddr;         /* Source and destination addresses */
    unsigned char *user_data; /* TCP data begin pointer */
    unsigned char *tail;      /* TCP data end pointer */
    unsigned char *it;        /* TCP data iterator */

    /* Network packet is empty, seems like some problem occurred. Skip it */
    if (!skb)
        return NF_ACCEPT;

    iph = ip_hdr(skb); /* get IP header */

    /* Skip if it's not TCP packet */
    if (iph->protocol != IPPROTO_TCP)
        return NF_ACCEPT;

    tcph = tcp_hdr(skb); /* get TCP header */

    /* Convert network endianness to host endiannes */
    saddr = ntohl(iph->saddr);
    daddr = ntohl(iph->daddr);
    sport = ntohs(tcph->source);
    dport = ntohs(tcph->dest);

    /* Watch only port of interest */
    if (sport != PTCP_WATCH_PORT)
        return NF_ACCEPT;

    /* Calculate pointers for begin and end of TCP packet data */
    user_data = (unsigned char *)((unsigned char *)tcph + (tcph->doff * 4));
    tail = skb_tail_pointer(skb);

    /* ----- Print all needed information from received TCP packet ------ */

    /* Show only HTTP packets */
    if (user_data[0] != 'H' || user_data[1] != 'T' || user_data[2] != 'T' ||
        user_data[3] != 'P')
    {
        // Control is reaching here always.
        return NF_ACCEPT;
    }

    /* Print packet route */
    printk("print_tcp: %pI4h:%d -> %pI4h:%d\n", &saddr, sport,
           &daddr, dport);

    /* Print TCP packet data (payload) */
    printk("print_tcp: data:\n");
    for (it = user_data; it != tail; ++it)
    {
        char c = *(char *)it;

        if (c == '\0')
            break;

        printk("%c", c);
    }
    printk("\n\n");

    return NF_ACCEPT;
}

static int __init ptcp_init(void)
{
    int res;

    nfho.hook = (nf_hookfn *)ptcp_hook_func; /* hook function */
    nfho.hooknum = NF_INET_PRE_ROUTING;      /* received packets */
    nfho.pf = PF_INET;                       /* IPv4 */
    nfho.priority = NF_IP_PRI_FIRST;         /* max hook priority */

    res = nf_register_net_hook(&init_net,&nfho);
    if (res < 0)
    {
        printk("print_tcp: error in nf_register_hook()\n");
        return res;
    }

    printk("print_tcp: loaded\n");
    return 0;
}

static void __exit ptcp_exit(void)
{
    nf_unregister_net_hook(&init_net,&nfho);
    printk("print_tcp: unloaded\n");
}

module_init(ptcp_init);
module_exit(ptcp_exit);

MODULE_DESCRIPTION("Module for printing HTTP packet data");
MODULE_LICENSE("GPL");
12
  • 1
    Uh, what happens if I send you a TCP packet < 4 bytes? Commented Apr 21, 2023 at 9:17
  • 2
    1. "all http packets starts with http in the tcp payload" that's wrong. The data payload of the first TCP packet of a HTTP interaction starts with "HTTP", all subsequent TCP packets do not (or do, if the data you're sending contains the string "HTTP", which is not that rare). Commented Apr 21, 2023 at 9:32
  • 2
    2. not all TCP packets are HTTP packets, and even in HTTP, packets shorter than 4 bytes are possible. So, your user_data[N] might just read an address that's not filled with packet data (in case of an empty TCP packet, I'm not even sure tcph + (tcph->doff * 4) would point to a valid address – and you might just crash your machine by letting your kernel access an invalid memory address) Commented Apr 21, 2023 at 9:34
  • 2
    3. HTTP/1.1. (so, the oldest version of HTTP you will find in the wild), and any later version of HTTP, does offer a persistent TCP connection mechanism, where you need to understand the previous HTTP request/reply, including all headers and data that was sent to the server and from the server, to know where the next HTTP request might start. So, prepare for this to become much more complicated later on! Commented Apr 21, 2023 at 9:38
  • 1
    Re: accessing the data: you're correctly accessing the data (as far as I can tell), it's just that you always have to make sure your data is as long as you read data from it, AND that your assumption about every HTTP packet starting with "HTTP" is wrong. Commented Apr 21, 2023 at 9:39

0

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.