summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorM. Sean Finney <seanius@users.sourceforge.net>2006-04-12 10:00:23 (GMT)
committerM. Sean Finney <seanius@users.sourceforge.net>2006-04-12 10:00:23 (GMT)
commit1b207c0b2759eaabc1162190d0df47bbe5935de3 (patch)
tree9c624c380b9aeb594ecbf20cee9fe9716a3de9a2
parent35f52fe9a81689fb7111cb4494eca737f1310085 (diff)
downloadmonitoring-plugins-1b207c0b2759eaabc1162190d0df47bbe5935de3.tar.gz
the offset_requests are now parallelized. still stuff needs to be
done (conveniently marked with XXX), but on well behaving networks the plugin should behave more or less identical to check_ntp.pl now. git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/trunk@1373 f882894a-f735-0410-b71e-b25c423dba1c
-rw-r--r--plugins/check_ntp.c167
1 files changed, 105 insertions, 62 deletions
diff --git a/plugins/check_ntp.c b/plugins/check_ntp.c
index 86709a1..149ca98 100644
--- a/plugins/check_ntp.c
+++ b/plugins/check_ntp.c
@@ -29,6 +29,7 @@ const char *email = "nagiosplug-devel@lists.sourceforge.net";
29#include "common.h" 29#include "common.h"
30#include "netutils.h" 30#include "netutils.h"
31#include "utils.h" 31#include "utils.h"
32#include <sys/poll.h>
32 33
33static char *server_address=NULL; 34static char *server_address=NULL;
34static int verbose=0; 35static int verbose=0;
@@ -64,6 +65,13 @@ typedef struct {
64 uint64_t txts; /* time at which request departed server */ 65 uint64_t txts; /* time at which request departed server */
65} ntp_message; 66} ntp_message;
66 67
68/* this structure holds data about results from querying offset from a peer */
69typedef struct {
70 int waiting; /* we set to 1 to signal waiting for a response */
71 int num_responses; /* number of successfully recieved responses */
72 double offset[AVG_NUM]; /* offsets from each response */
73} ntp_server_results;
74
67/* this structure holds everything in an ntp control message as per rfc1305 */ 75/* this structure holds everything in an ntp control message as per rfc1305 */
68typedef struct { 76typedef struct {
69 uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ 77 uint8_t flags; /* byte with leapindicator,vers,mode. see macros */
@@ -271,38 +279,20 @@ void setup_request(ntp_message *p){
271 TVtoNTP64(t,p->txts); 279 TVtoNTP64(t,p->txts);
272} 280}
273 281
282/* do everything we need to get the total average offset
283 * - we use a certain amount of parallelization with poll() to ensure
284 * we don't waste time sitting around waiting for single packets.
285 * - we also "manually" handle resolving host names and connecting, because
286 * we have to do it in a way that our lazy macros don't handle currently :( */
274double offset_request(const char *host){ 287double offset_request(const char *host){
275 int i=0, conn=-1; 288 int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL, respnum=0;
276 ntp_message req; 289 int servers_completed=0, one_written=0, servers_readable=0, offsets_recvd=0;
277 double next_offset=0., avg_offset=0.; 290 ntp_message *req=NULL;
278 struct timeval recv_time; 291 double avg_offset=0.;
279
280 for(i=0; i<AVG_NUM; i++){
281 if(verbose) printf("offset run: %d/%d\n", i+1, AVG_NUM);
282 setup_request(&req);
283 my_udp_connect(server_address, 123, &conn);
284 write(conn, &req, sizeof(ntp_message));
285 read(conn, &req, sizeof(ntp_message));
286 gettimeofday(&recv_time, NULL);
287 /* if(verbose) print_packet(&req); */
288 close(conn);
289 next_offset=calc_offset(&req, &recv_time);
290 if(verbose) printf("offset: %g\n", next_offset);
291 avg_offset+=next_offset;
292 }
293 avg_offset/=AVG_NUM;
294 if(verbose) printf("average offset: %g\n", avg_offset);
295 return avg_offset;
296}
297
298
299/* this should behave more like ntpdate, but needs optomisations... */
300double offset_request_ntpdate(const char *host){
301 int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL;
302 ntp_message req;
303 double offset=0., avg_offset=0.;
304 struct timeval recv_time; 292 struct timeval recv_time;
305 struct addrinfo *ai=NULL, *ai_tmp=NULL, hints; 293 struct addrinfo *ai=NULL, *ai_tmp=NULL, hints;
294 struct pollfd *ufds=NULL;
295 ntp_server_results *servers=NULL;
306 296
307 /* setup hints to only return results from getaddrinfo that we'd like */ 297 /* setup hints to only return results from getaddrinfo that we'd like */
308 memset(&hints, 0, sizeof(struct addrinfo)); 298 memset(&hints, 0, sizeof(struct addrinfo));
@@ -310,24 +300,26 @@ double offset_request_ntpdate(const char *host){
310 hints.ai_protocol = IPPROTO_UDP; 300 hints.ai_protocol = IPPROTO_UDP;
311 hints.ai_socktype = SOCK_DGRAM; 301 hints.ai_socktype = SOCK_DGRAM;
312 302
313 /* XXX better error handling here... */ 303 /* fill in ai with the list of hosts resolved by the host name */
314 ga_result = getaddrinfo(host, "123", &hints, &ai); 304 ga_result = getaddrinfo(host, "123", &hints, &ai);
315 if(ga_result!=0){ 305 if(ga_result!=0){
316 fprintf(stderr, "error getting address for %s: %s\n", 306 die(STATE_UNKNOWN, "error getting address for %s: %s\n",
317 host, gai_strerror(ga_result)); 307 host, gai_strerror(ga_result));
318 return -1.0;
319 } 308 }
320 309
321 /* count te number of returned hosts, and allocate an array of sockets */ 310 /* count the number of returned hosts, and allocate stuff accordingly */
322 ai_tmp=ai; 311 for(ai_tmp=ai; ai_tmp!=NULL; ai_tmp=ai_tmp->ai_next){ num_hosts++; }
323 while(ai_tmp){ 312 req=(ntp_message*)malloc(sizeof(ntp_message)*num_hosts);
324 ai_tmp = ai_tmp->ai_next; 313 if(req==NULL) die(STATE_UNKNOWN, "can not allocate ntp message array");
325 num_hosts++;
326 }
327 socklist=(int*)malloc(sizeof(int)*num_hosts); 314 socklist=(int*)malloc(sizeof(int)*num_hosts);
328 if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); 315 if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array");
316 ufds=(struct pollfd*)malloc(sizeof(struct pollfd)*num_hosts);
317 if(ufds==NULL) die(STATE_UNKNOWN, "can not allocate socket array");
318 servers=(ntp_server_results*)malloc(sizeof(ntp_server_results)*num_hosts);
319 if(servers==NULL) die(STATE_UNKNOWN, "can not allocate server array");
320 memset(servers, 0, sizeof(ntp_server_results)*num_hosts);
329 321
330 /* setup each socket for writing */ 322 /* setup each socket for writing, and the corresponding struct pollfd */
331 ai_tmp=ai; 323 ai_tmp=ai;
332 for(i=0;ai_tmp;i++){ 324 for(i=0;ai_tmp;i++){
333 socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP); 325 socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP);
@@ -337,37 +329,88 @@ double offset_request_ntpdate(const char *host){
337 } 329 }
338 if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){ 330 if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){
339 die(STATE_UNKNOWN, "can't create socket connection"); 331 die(STATE_UNKNOWN, "can't create socket connection");
332 } else {
333 ufds[i].fd=socklist[i];
334 ufds[i].events=POLLIN;
335 ufds[i].revents=0;
340 } 336 }
341 ai_tmp = ai_tmp->ai_next; 337 ai_tmp = ai_tmp->ai_next;
342 } 338 }
343 339
344 /* now do AVG_NUM checks to each host. this needs to be optimized 340 /* now do AVG_NUM checks to each host. */
345 * two ways: 341 while(servers_completed<num_hosts){
346 * - use some parellization w/poll for much faster results. currently 342
347 * we do send/recv, send/recv, etc, whereas we could use poll(), to 343 /* write to any servers that are free and have done < AVG_NUM reqs */
348 * determine when to read and just do a bunch of writing when we 344 /* XXX we need some kind of ability to retransmit lost packets.
349 * have free time. 345 * XXX one way would be replace "waiting" with a timestamp and
350 * - behave like ntpdate and only take the 5 best responses. 346 * XXX if the timestamp is old enough the request is re-transmitted.
351 */ 347 * XXX then a certain number of failures could mark a server as
352 for(i=0; i<AVG_NUM; i++){ 348 * XXX bad, which is what i imagine that ntpdate does though
353 if(verbose) printf("offset calculation run %d/%d\n", i+1, AVG_NUM); 349 * XXX i can't confirm it (i think it still only sends a max
354 for(j=0; j<num_hosts; j++){ 350 * XXX of AVG_NUM requests, but what does it do if one fails
355 if(verbose) printf("peer %d: ", j); 351 * XXX but the others succeed? */
356 setup_request(&req); 352 /* XXX also we need the ability to cut out failed/unresponsive
357 write(socklist[j], &req, sizeof(ntp_message)); 353 * XXX servers. currently after doing all other servers we
358 read(socklist[j], &req, sizeof(ntp_message)); 354 * XXX still wait for them until the bitter end/timeout. */
359 gettimeofday(&recv_time, NULL); 355 one_written=0;
360 offset=calc_offset(&req, &recv_time); 356 for(i=0; i<num_hosts; i++){
361 if(verbose) printf("offset: %g\n", offset); 357 if(!servers[i].waiting && servers[i].num_responses<AVG_NUM){
362 avg_offset+=offset; 358 if(verbose) printf("sending request to peer %d\n", i);
359 setup_request(&req[i]);
360 write(socklist[i], &req[i], sizeof(ntp_message));
361 servers[i].waiting=1;
362 one_written=1;
363 break;
364 }
363 } 365 }
364 avg_offset/=num_hosts; 366
367 /* quickly poll for any sockets with pending data */
368 servers_readable=poll(ufds, num_hosts, 100);
369 if(servers_readable==-1){
370 perror("polling ntp sockets");
371 die(STATE_UNKNOWN, "communication errors");
372 }
373
374 /* read from any sockets with pending data */
375 for(i=0; servers_readable && i<num_hosts; i++){
376 if(ufds[i].revents&POLLIN){
377 if(verbose) {
378 printf("response from peer %d: ", i);
379 }
380 read(ufds[i].fd, &req[i], sizeof(ntp_message));
381 gettimeofday(&recv_time, NULL);
382 respnum=servers[i].num_responses++;
383 servers[i].offset[respnum]=calc_offset(&req[i], &recv_time);
384 if(verbose) {
385 printf("offset %g\n", servers[i].offset[respnum]);
386 }
387 servers[i].waiting=0;
388 servers_readable--;
389 if(servers[i].num_responses==AVG_NUM) servers_completed++;
390 }
391 }
392 /* lather, rinse, repeat. */
365 } 393 }
366 avg_offset/=AVG_NUM;
367 if(verbose) printf("overall average offset: %g\n", avg_offset);
368 394
395 /* finally, calculate the average offset */
396 /* XXX still something about the "top 5" */
397 for(i=0;i<num_hosts;i++){
398 for(j=0;j<servers[i].num_responses;j++){
399 offsets_recvd++;
400 avg_offset+=servers[i].offset[j];
401 }
402 }
403 avg_offset/=offsets_recvd;
404
405 /* cleanup */
369 for(j=0; j<num_hosts; j++){ close(socklist[j]); } 406 for(j=0; j<num_hosts; j++){ close(socklist[j]); }
407 free(socklist);
408 free(ufds);
409 free(servers);
410 free(req);
370 freeaddrinfo(ai); 411 freeaddrinfo(ai);
412
413 if(verbose) printf("overall average offset: %g\n", avg_offset);
371 return avg_offset; 414 return avg_offset;
372} 415}
373 416