[Nagiosplug-help] check_icmp and ICMP sequence number

Chris Adams cmadams at hiwaay.net
Wed Feb 13 21:38:09 CET 2008


I have one device that always shows 60% packet loss with check_icmp (but
a regular ping shows none).  I tracked it down to check_icmp using the
same sequence number for all packets to a host; apparently this device
rate-limits based on the sequence number.

Here's a quick-n-dirty patch that increments the sequence number for
each packet sent (it does NOT check it on return though).  Since the
sequence number is used to index the hosts, I shifted the host number
over to use some low bits for sequence.  It will wrap around if you set
the packet count too high, but that should be okay.  The number of bits
can easily be changed at the top; I used 4 (which leaves 12 for hosts; I
doubt someone is monitoring 4096 hosts with a single call of
check_icmp).

-- 
Chris Adams <cmadams at hiwaay.net>
Systems and Network Administrator - HiWAAY Internet Services
I don't speak for anybody but myself - that's enough trouble.


diff -urN nagios-plugins-1.4.8-dist/plugins-root/check_icmp.c nagios-plugins-1.4.8/plugins-root/check_icmp.c
--- nagios-plugins-1.4.8-dist/plugins-root/check_icmp.c	2007-03-27 01:53:57.000000000 -0500
+++ nagios-plugins-1.4.8/plugins-root/check_icmp.c	2008-02-13 14:04:13.000000000 -0600
@@ -117,6 +117,7 @@
 	unsigned int icmp_sent, icmp_recv, icmp_lost; /* counters */
 	unsigned char icmp_type, icmp_code; /* type and code from errors */
 	unsigned short flags;        /* control/status flags */
+	unsigned short icmp_cnt;     /* ICMP sequence number */
 	double rta;                  /* measured RTA */
 	unsigned char pl;            /* measured packet loss */
 	struct rta_host *next;       /* linked list */
@@ -173,6 +174,9 @@
 #define TSTATE_ALIVE 0x04       /* target is alive (has answered something) */
 #define TSTATE_UNREACH 0x08
 
+/* How many bits of the sequence to use for a counter */
+#define SEQ_BITS 4
+
 /** prototypes **/
 void print_help (void);
 void print_usage (void);
@@ -326,14 +330,14 @@
 	 * to RFC 792). If it isn't, just ignore it */
 	sent_icmp = (struct icmp *)(ptr + 28);
 	if(sent_icmp->icmp_type != ICMP_ECHO || sent_icmp->icmp_id != pid ||
-	   sent_icmp->icmp_seq >= targets)
+	   (sent_icmp->icmp_seq >> SEQ_BITS) >= targets)
 	{
 		if(debug) printf("Packet is no response to a packet we sent\n");
 		return 0;
 	}
 
 	/* it is indeed a response for us */
-	host = table[sent_icmp->icmp_seq];
+	host = table[(sent_icmp->icmp_seq >> SEQ_BITS)];
 	if(debug) {
 		printf("Received \"%s\" from %s for ICMP ECHO sent to %s.\n",
 			   get_icmp_error_msg(p->icmp_type, p->icmp_code),
@@ -752,7 +756,7 @@
 			continue;
 		}
 
-		if(icp->icmp_type != ICMP_ECHOREPLY || icp->icmp_seq >= targets) {
+		if(icp->icmp_type != ICMP_ECHOREPLY || (icp->icmp_seq >> SEQ_BITS) >= targets) {
 			if(debug > 2) printf("not a proper ICMP_ECHOREPLY\n");
 			handle_random_icmp(icp, &resp_addr);
 			continue;
@@ -761,7 +765,7 @@
 		/* this is indeed a valid response */
 		data = (struct icmp_ping_data *)(icp->icmp_data);
 
-		host = table[icp->icmp_seq];
+		host = table[(icp->icmp_seq >> SEQ_BITS)];
 		gettimeofday(&now, &tz);
 		tdiff = get_timevaldiff(&data->stime, &now);
 
@@ -825,7 +829,7 @@
 	icp->icmp_code = 0;
 	icp->icmp_cksum = 0;
 	icp->icmp_id = pid;
-	icp->icmp_seq = host->id;
+	icp->icmp_seq = (host->id << SEQ_BITS) + (host->icmp_cnt++ & ((1 << SEQ_BITS) - 1));
 	data = (struct icmp_ping_data *)icp->icmp_data;
 	data->ping_id = 10; /* host->icmp.icmp_sent; */
 	memcpy(&data->stime, &tv, sizeof(struct timeval));




More information about the Help mailing list