diff options
Diffstat (limited to 'plugins')
| -rw-r--r-- | plugins/check_ntp.c | 167 |
1 files changed, 105 insertions, 62 deletions
diff --git a/plugins/check_ntp.c b/plugins/check_ntp.c index 86709a1f..149ca98f 100644 --- a/plugins/check_ntp.c +++ b/plugins/check_ntp.c | |||
| @@ -29,6 +29,7 @@ const char *email = "nagiosplug-devel@lists.sourceforge.net"; | |||
| 29 | #include "common.h" | 29 | #include "common.h" |
| 30 | #include "netutils.h" | 30 | #include "netutils.h" |
| 31 | #include "utils.h" | 31 | #include "utils.h" |
| 32 | #include <sys/poll.h> | ||
| 32 | 33 | ||
| 33 | static char *server_address=NULL; | 34 | static char *server_address=NULL; |
| 34 | static int verbose=0; | 35 | static int verbose=0; |
| @@ -64,6 +65,13 @@ typedef struct { | |||
| 64 | uint64_t txts; /* time at which request departed server */ | 65 | uint64_t txts; /* time at which request departed server */ |
| 65 | } ntp_message; | 66 | } ntp_message; |
| 66 | 67 | ||
| 68 | /* this structure holds data about results from querying offset from a peer */ | ||
| 69 | typedef struct { | ||
| 70 | int waiting; /* we set to 1 to signal waiting for a response */ | ||
| 71 | int num_responses; /* number of successfully recieved responses */ | ||
| 72 | double offset[AVG_NUM]; /* offsets from each response */ | ||
| 73 | } ntp_server_results; | ||
| 74 | |||
| 67 | /* this structure holds everything in an ntp control message as per rfc1305 */ | 75 | /* this structure holds everything in an ntp control message as per rfc1305 */ |
| 68 | typedef struct { | 76 | typedef struct { |
| 69 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | 77 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ |
| @@ -271,38 +279,20 @@ void setup_request(ntp_message *p){ | |||
| 271 | TVtoNTP64(t,p->txts); | 279 | TVtoNTP64(t,p->txts); |
| 272 | } | 280 | } |
| 273 | 281 | ||
| 282 | /* do everything we need to get the total average offset | ||
| 283 | * - we use a certain amount of parallelization with poll() to ensure | ||
| 284 | * we don't waste time sitting around waiting for single packets. | ||
| 285 | * - we also "manually" handle resolving host names and connecting, because | ||
| 286 | * we have to do it in a way that our lazy macros don't handle currently :( */ | ||
| 274 | double offset_request(const char *host){ | 287 | double offset_request(const char *host){ |
| 275 | int i=0, conn=-1; | 288 | int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL, respnum=0; |
| 276 | ntp_message req; | 289 | int servers_completed=0, one_written=0, servers_readable=0, offsets_recvd=0; |
| 277 | double next_offset=0., avg_offset=0.; | 290 | ntp_message *req=NULL; |
| 278 | struct timeval recv_time; | 291 | double avg_offset=0.; |
| 279 | |||
| 280 | for(i=0; i<AVG_NUM; i++){ | ||
| 281 | if(verbose) printf("offset run: %d/%d\n", i+1, AVG_NUM); | ||
| 282 | setup_request(&req); | ||
| 283 | my_udp_connect(server_address, 123, &conn); | ||
| 284 | write(conn, &req, sizeof(ntp_message)); | ||
| 285 | read(conn, &req, sizeof(ntp_message)); | ||
| 286 | gettimeofday(&recv_time, NULL); | ||
| 287 | /* if(verbose) print_packet(&req); */ | ||
| 288 | close(conn); | ||
| 289 | next_offset=calc_offset(&req, &recv_time); | ||
| 290 | if(verbose) printf("offset: %g\n", next_offset); | ||
| 291 | avg_offset+=next_offset; | ||
| 292 | } | ||
| 293 | avg_offset/=AVG_NUM; | ||
| 294 | if(verbose) printf("average offset: %g\n", avg_offset); | ||
| 295 | return avg_offset; | ||
| 296 | } | ||
| 297 | |||
| 298 | |||
| 299 | /* this should behave more like ntpdate, but needs optomisations... */ | ||
| 300 | double offset_request_ntpdate(const char *host){ | ||
| 301 | int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL; | ||
| 302 | ntp_message req; | ||
| 303 | double offset=0., avg_offset=0.; | ||
| 304 | struct timeval recv_time; | 292 | struct timeval recv_time; |
| 305 | struct addrinfo *ai=NULL, *ai_tmp=NULL, hints; | 293 | struct addrinfo *ai=NULL, *ai_tmp=NULL, hints; |
| 294 | struct pollfd *ufds=NULL; | ||
| 295 | ntp_server_results *servers=NULL; | ||
| 306 | 296 | ||
| 307 | /* setup hints to only return results from getaddrinfo that we'd like */ | 297 | /* setup hints to only return results from getaddrinfo that we'd like */ |
| 308 | memset(&hints, 0, sizeof(struct addrinfo)); | 298 | memset(&hints, 0, sizeof(struct addrinfo)); |
| @@ -310,24 +300,26 @@ double offset_request_ntpdate(const char *host){ | |||
| 310 | hints.ai_protocol = IPPROTO_UDP; | 300 | hints.ai_protocol = IPPROTO_UDP; |
| 311 | hints.ai_socktype = SOCK_DGRAM; | 301 | hints.ai_socktype = SOCK_DGRAM; |
| 312 | 302 | ||
| 313 | /* XXX better error handling here... */ | 303 | /* fill in ai with the list of hosts resolved by the host name */ |
| 314 | ga_result = getaddrinfo(host, "123", &hints, &ai); | 304 | ga_result = getaddrinfo(host, "123", &hints, &ai); |
| 315 | if(ga_result!=0){ | 305 | if(ga_result!=0){ |
| 316 | fprintf(stderr, "error getting address for %s: %s\n", | 306 | die(STATE_UNKNOWN, "error getting address for %s: %s\n", |
| 317 | host, gai_strerror(ga_result)); | 307 | host, gai_strerror(ga_result)); |
| 318 | return -1.0; | ||
| 319 | } | 308 | } |
| 320 | 309 | ||
| 321 | /* count te number of returned hosts, and allocate an array of sockets */ | 310 | /* count the number of returned hosts, and allocate stuff accordingly */ |
| 322 | ai_tmp=ai; | 311 | for(ai_tmp=ai; ai_tmp!=NULL; ai_tmp=ai_tmp->ai_next){ num_hosts++; } |
| 323 | while(ai_tmp){ | 312 | req=(ntp_message*)malloc(sizeof(ntp_message)*num_hosts); |
| 324 | ai_tmp = ai_tmp->ai_next; | 313 | if(req==NULL) die(STATE_UNKNOWN, "can not allocate ntp message array"); |
| 325 | num_hosts++; | ||
| 326 | } | ||
| 327 | socklist=(int*)malloc(sizeof(int)*num_hosts); | 314 | socklist=(int*)malloc(sizeof(int)*num_hosts); |
| 328 | if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | 315 | if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); |
| 316 | ufds=(struct pollfd*)malloc(sizeof(struct pollfd)*num_hosts); | ||
| 317 | if(ufds==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | ||
| 318 | servers=(ntp_server_results*)malloc(sizeof(ntp_server_results)*num_hosts); | ||
| 319 | if(servers==NULL) die(STATE_UNKNOWN, "can not allocate server array"); | ||
| 320 | memset(servers, 0, sizeof(ntp_server_results)*num_hosts); | ||
| 329 | 321 | ||
| 330 | /* setup each socket for writing */ | 322 | /* setup each socket for writing, and the corresponding struct pollfd */ |
| 331 | ai_tmp=ai; | 323 | ai_tmp=ai; |
| 332 | for(i=0;ai_tmp;i++){ | 324 | for(i=0;ai_tmp;i++){ |
| 333 | socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP); | 325 | socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP); |
| @@ -337,37 +329,88 @@ double offset_request_ntpdate(const char *host){ | |||
| 337 | } | 329 | } |
| 338 | if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){ | 330 | if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){ |
| 339 | die(STATE_UNKNOWN, "can't create socket connection"); | 331 | die(STATE_UNKNOWN, "can't create socket connection"); |
| 332 | } else { | ||
| 333 | ufds[i].fd=socklist[i]; | ||
| 334 | ufds[i].events=POLLIN; | ||
| 335 | ufds[i].revents=0; | ||
| 340 | } | 336 | } |
| 341 | ai_tmp = ai_tmp->ai_next; | 337 | ai_tmp = ai_tmp->ai_next; |
| 342 | } | 338 | } |
| 343 | 339 | ||
| 344 | /* now do AVG_NUM checks to each host. this needs to be optimized | 340 | /* now do AVG_NUM checks to each host. */ |
| 345 | * two ways: | 341 | while(servers_completed<num_hosts){ |
| 346 | * - use some parellization w/poll for much faster results. currently | 342 | |
| 347 | * we do send/recv, send/recv, etc, whereas we could use poll(), to | 343 | /* write to any servers that are free and have done < AVG_NUM reqs */ |
| 348 | * determine when to read and just do a bunch of writing when we | 344 | /* XXX we need some kind of ability to retransmit lost packets. |
| 349 | * have free time. | 345 | * XXX one way would be replace "waiting" with a timestamp and |
| 350 | * - behave like ntpdate and only take the 5 best responses. | 346 | * XXX if the timestamp is old enough the request is re-transmitted. |
| 351 | */ | 347 | * XXX then a certain number of failures could mark a server as |
| 352 | for(i=0; i<AVG_NUM; i++){ | 348 | * XXX bad, which is what i imagine that ntpdate does though |
| 353 | if(verbose) printf("offset calculation run %d/%d\n", i+1, AVG_NUM); | 349 | * XXX i can't confirm it (i think it still only sends a max |
| 354 | for(j=0; j<num_hosts; j++){ | 350 | * XXX of AVG_NUM requests, but what does it do if one fails |
| 355 | if(verbose) printf("peer %d: ", j); | 351 | * XXX but the others succeed? */ |
| 356 | setup_request(&req); | 352 | /* XXX also we need the ability to cut out failed/unresponsive |
| 357 | write(socklist[j], &req, sizeof(ntp_message)); | 353 | * XXX servers. currently after doing all other servers we |
| 358 | read(socklist[j], &req, sizeof(ntp_message)); | 354 | * XXX still wait for them until the bitter end/timeout. */ |
| 359 | gettimeofday(&recv_time, NULL); | 355 | one_written=0; |
| 360 | offset=calc_offset(&req, &recv_time); | 356 | for(i=0; i<num_hosts; i++){ |
| 361 | if(verbose) printf("offset: %g\n", offset); | 357 | if(!servers[i].waiting && servers[i].num_responses<AVG_NUM){ |
| 362 | avg_offset+=offset; | 358 | if(verbose) printf("sending request to peer %d\n", i); |
| 359 | setup_request(&req[i]); | ||
| 360 | write(socklist[i], &req[i], sizeof(ntp_message)); | ||
| 361 | servers[i].waiting=1; | ||
| 362 | one_written=1; | ||
| 363 | break; | ||
| 364 | } | ||
| 363 | } | 365 | } |
| 364 | avg_offset/=num_hosts; | 366 | |
| 367 | /* quickly poll for any sockets with pending data */ | ||
| 368 | servers_readable=poll(ufds, num_hosts, 100); | ||
| 369 | if(servers_readable==-1){ | ||
| 370 | perror("polling ntp sockets"); | ||
| 371 | die(STATE_UNKNOWN, "communication errors"); | ||
| 372 | } | ||
| 373 | |||
| 374 | /* read from any sockets with pending data */ | ||
| 375 | for(i=0; servers_readable && i<num_hosts; i++){ | ||
| 376 | if(ufds[i].revents&POLLIN){ | ||
| 377 | if(verbose) { | ||
| 378 | printf("response from peer %d: ", i); | ||
| 379 | } | ||
| 380 | read(ufds[i].fd, &req[i], sizeof(ntp_message)); | ||
| 381 | gettimeofday(&recv_time, NULL); | ||
| 382 | respnum=servers[i].num_responses++; | ||
| 383 | servers[i].offset[respnum]=calc_offset(&req[i], &recv_time); | ||
| 384 | if(verbose) { | ||
| 385 | printf("offset %g\n", servers[i].offset[respnum]); | ||
| 386 | } | ||
| 387 | servers[i].waiting=0; | ||
| 388 | servers_readable--; | ||
| 389 | if(servers[i].num_responses==AVG_NUM) servers_completed++; | ||
| 390 | } | ||
| 391 | } | ||
| 392 | /* lather, rinse, repeat. */ | ||
| 365 | } | 393 | } |
| 366 | avg_offset/=AVG_NUM; | ||
| 367 | if(verbose) printf("overall average offset: %g\n", avg_offset); | ||
| 368 | 394 | ||
| 395 | /* finally, calculate the average offset */ | ||
| 396 | /* XXX still something about the "top 5" */ | ||
| 397 | for(i=0;i<num_hosts;i++){ | ||
| 398 | for(j=0;j<servers[i].num_responses;j++){ | ||
| 399 | offsets_recvd++; | ||
| 400 | avg_offset+=servers[i].offset[j]; | ||
| 401 | } | ||
| 402 | } | ||
| 403 | avg_offset/=offsets_recvd; | ||
| 404 | |||
| 405 | /* cleanup */ | ||
| 369 | for(j=0; j<num_hosts; j++){ close(socklist[j]); } | 406 | for(j=0; j<num_hosts; j++){ close(socklist[j]); } |
| 407 | free(socklist); | ||
| 408 | free(ufds); | ||
| 409 | free(servers); | ||
| 410 | free(req); | ||
| 370 | freeaddrinfo(ai); | 411 | freeaddrinfo(ai); |
| 412 | |||
| 413 | if(verbose) printf("overall average offset: %g\n", avg_offset); | ||
| 371 | return avg_offset; | 414 | return avg_offset; |
| 372 | } | 415 | } |
| 373 | 416 | ||
