|
Lines 101-106
Link Here
|
| 101 |
#define COOKIE_SIZE 24U /* 8 + 4 + 4 + 8 */ |
101 |
#define COOKIE_SIZE 24U /* 8 + 4 + 4 + 8 */ |
| 102 |
#define ECS_SIZE 20U /* 2 + 1 + 1 + [0..16] */ |
102 |
#define ECS_SIZE 20U /* 2 + 1 + 1 + [0..16] */ |
| 103 |
|
103 |
|
|
|
104 |
#define TCPBUFFERS_FILLCOUNT 1U |
| 105 |
#define TCPBUFFERS_FREEMAX 8U |
| 106 |
|
| 104 |
#define WANTNSID(x) (((x)->attributes & NS_CLIENTATTR_WANTNSID) != 0) |
107 |
#define WANTNSID(x) (((x)->attributes & NS_CLIENTATTR_WANTNSID) != 0) |
| 105 |
#define WANTEXPIRE(x) (((x)->attributes & NS_CLIENTATTR_WANTEXPIRE) != 0) |
108 |
#define WANTEXPIRE(x) (((x)->attributes & NS_CLIENTATTR_WANTEXPIRE) != 0) |
| 106 |
#define WANTPAD(x) (((x)->attributes & NS_CLIENTATTR_WANTPAD) != 0) |
109 |
#define WANTPAD(x) (((x)->attributes & NS_CLIENTATTR_WANTPAD) != 0) |
|
Lines 330-341
client_senddone(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) {
Link Here
|
| 330 |
NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), |
333 |
NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), |
| 331 |
"send failed: %s", |
334 |
"send failed: %s", |
| 332 |
isc_result_totext(result)); |
335 |
isc_result_totext(result)); |
|
|
336 |
isc_nm_bad_request(handle); |
| 333 |
} |
337 |
} |
| 334 |
} |
338 |
} |
| 335 |
|
339 |
|
| 336 |
isc_nmhandle_detach(&handle); |
340 |
isc_nmhandle_detach(&handle); |
| 337 |
} |
341 |
} |
| 338 |
|
342 |
|
|
|
343 |
static void |
| 344 |
client_setup_tcp_buffer(ns_client_t *client) { |
| 345 |
REQUIRE(client->tcpbuf == NULL); |
| 346 |
|
| 347 |
client->tcpbuf = client->manager->tcp_buffer; |
| 348 |
client->tcpbuf_size = NS_CLIENT_TCP_BUFFER_SIZE; |
| 349 |
} |
| 350 |
|
| 351 |
static void |
| 352 |
client_put_tcp_buffer(ns_client_t *client) { |
| 353 |
if (client->tcpbuf == NULL) { |
| 354 |
return; |
| 355 |
} |
| 356 |
|
| 357 |
if (client->tcpbuf != client->manager->tcp_buffer) { |
| 358 |
isc_mem_put(client->manager->mctx, client->tcpbuf, |
| 359 |
client->tcpbuf_size); |
| 360 |
} |
| 361 |
|
| 362 |
client->tcpbuf = NULL; |
| 363 |
client->tcpbuf_size = 0; |
| 364 |
} |
| 365 |
|
| 339 |
static void |
366 |
static void |
| 340 |
client_allocsendbuf(ns_client_t *client, isc_buffer_t *buffer, |
367 |
client_allocsendbuf(ns_client_t *client, isc_buffer_t *buffer, |
| 341 |
unsigned char **datap) { |
368 |
unsigned char **datap) { |
|
Lines 345-356
client_allocsendbuf(ns_client_t *client, isc_buffer_t *buffer,
Link Here
|
| 345 |
REQUIRE(datap != NULL); |
372 |
REQUIRE(datap != NULL); |
| 346 |
|
373 |
|
| 347 |
if (TCP_CLIENT(client)) { |
374 |
if (TCP_CLIENT(client)) { |
| 348 |
INSIST(client->tcpbuf == NULL); |
375 |
client_setup_tcp_buffer(client); |
| 349 |
client->tcpbuf = isc_mem_get(client->manager->send_mctx, |
|
|
| 350 |
NS_CLIENT_TCP_BUFFER_SIZE); |
| 351 |
client->tcpbuf_size = NS_CLIENT_TCP_BUFFER_SIZE; |
| 352 |
data = client->tcpbuf; |
376 |
data = client->tcpbuf; |
| 353 |
isc_buffer_init(buffer, data, NS_CLIENT_TCP_BUFFER_SIZE); |
377 |
isc_buffer_init(buffer, data, client->tcpbuf_size); |
| 354 |
} else { |
378 |
} else { |
| 355 |
data = client->sendbuf; |
379 |
data = client->sendbuf; |
| 356 |
if ((client->attributes & NS_CLIENTATTR_HAVECOOKIE) == 0) { |
380 |
if ((client->attributes & NS_CLIENTATTR_HAVECOOKIE) == 0) { |
|
Lines 383-393
client_sendpkg(ns_client_t *client, isc_buffer_t *buffer) {
Link Here
|
| 383 |
|
407 |
|
| 384 |
if (isc_buffer_base(buffer) == client->tcpbuf) { |
408 |
if (isc_buffer_base(buffer) == client->tcpbuf) { |
| 385 |
size_t used = isc_buffer_usedlength(buffer); |
409 |
size_t used = isc_buffer_usedlength(buffer); |
| 386 |
client->tcpbuf = isc_mem_reget(client->manager->send_mctx, |
410 |
INSIST(client->tcpbuf_size == NS_CLIENT_TCP_BUFFER_SIZE); |
| 387 |
client->tcpbuf, |
411 |
|
| 388 |
client->tcpbuf_size, used); |
412 |
/* |
| 389 |
client->tcpbuf_size = used; |
413 |
* Copy the data into a smaller buffer before sending, |
| 390 |
r.base = client->tcpbuf; |
414 |
* and keep the original big TCP send buffer for reuse |
|
|
415 |
* by other clients. |
| 416 |
*/ |
| 417 |
if (used > NS_CLIENT_SEND_BUFFER_SIZE) { |
| 418 |
/* |
| 419 |
* We can save space by allocating a new buffer with a |
| 420 |
* correct size and freeing the big buffer. |
| 421 |
*/ |
| 422 |
unsigned char *new_tcpbuf = |
| 423 |
isc_mem_get(client->manager->mctx, used); |
| 424 |
memmove(new_tcpbuf, buffer->base, used); |
| 425 |
|
| 426 |
/* |
| 427 |
* Put the big buffer so we can replace the pointer |
| 428 |
* and the size with the new ones. |
| 429 |
*/ |
| 430 |
client_put_tcp_buffer(client); |
| 431 |
|
| 432 |
/* |
| 433 |
* Keep the new buffer's information so it can be freed. |
| 434 |
*/ |
| 435 |
client->tcpbuf = new_tcpbuf; |
| 436 |
client->tcpbuf_size = used; |
| 437 |
|
| 438 |
r.base = new_tcpbuf; |
| 439 |
} else { |
| 440 |
/* |
| 441 |
* The data fits in the available space in |
| 442 |
* 'sendbuf', there is no need for a new buffer. |
| 443 |
*/ |
| 444 |
memmove(client->sendbuf, buffer->base, used); |
| 445 |
|
| 446 |
/* |
| 447 |
* Put the big buffer, we don't need a dynamic buffer. |
| 448 |
*/ |
| 449 |
client_put_tcp_buffer(client); |
| 450 |
|
| 451 |
r.base = client->sendbuf; |
| 452 |
} |
| 391 |
r.length = used; |
453 |
r.length = used; |
| 392 |
} else { |
454 |
} else { |
| 393 |
isc_buffer_usedregion(buffer, &r); |
455 |
isc_buffer_usedregion(buffer, &r); |
|
Lines 461-468
ns_client_sendraw(ns_client_t *client, dns_message_t *message) {
Link Here
|
| 461 |
return; |
523 |
return; |
| 462 |
done: |
524 |
done: |
| 463 |
if (client->tcpbuf != NULL) { |
525 |
if (client->tcpbuf != NULL) { |
| 464 |
isc_mem_put(client->manager->send_mctx, client->tcpbuf, |
526 |
client_put_tcp_buffer(client); |
| 465 |
client->tcpbuf_size); |
|
|
| 466 |
} |
527 |
} |
| 467 |
|
528 |
|
| 468 |
ns_client_drop(client, result); |
529 |
ns_client_drop(client, result); |
|
Lines 746-753
renderend:
Link Here
|
| 746 |
|
807 |
|
| 747 |
cleanup: |
808 |
cleanup: |
| 748 |
if (client->tcpbuf != NULL) { |
809 |
if (client->tcpbuf != NULL) { |
| 749 |
isc_mem_put(client->manager->send_mctx, client->tcpbuf, |
810 |
client_put_tcp_buffer(client); |
| 750 |
client->tcpbuf_size); |
|
|
| 751 |
} |
811 |
} |
| 752 |
|
812 |
|
| 753 |
if (cleanup_cctx) { |
813 |
if (cleanup_cctx) { |
|
Lines 1629-1636
ns__client_reset_cb(void *client0) {
Link Here
|
| 1629 |
|
1689 |
|
| 1630 |
ns_client_endrequest(client); |
1690 |
ns_client_endrequest(client); |
| 1631 |
if (client->tcpbuf != NULL) { |
1691 |
if (client->tcpbuf != NULL) { |
| 1632 |
isc_mem_put(client->manager->send_mctx, client->tcpbuf, |
1692 |
client_put_tcp_buffer(client); |
| 1633 |
client->tcpbuf_size); |
|
|
| 1634 |
} |
1693 |
} |
| 1635 |
|
1694 |
|
| 1636 |
if (client->keytag != NULL) { |
1695 |
if (client->keytag != NULL) { |
|
Lines 1661-1668
ns__client_put_cb(void *client0) {
Link Here
|
| 1661 |
client->magic = 0; |
1720 |
client->magic = 0; |
| 1662 |
client->shuttingdown = true; |
1721 |
client->shuttingdown = true; |
| 1663 |
|
1722 |
|
| 1664 |
isc_mem_put(client->manager->send_mctx, client->sendbuf, |
|
|
| 1665 |
NS_CLIENT_SEND_BUFFER_SIZE); |
| 1666 |
if (client->opt != NULL) { |
1723 |
if (client->opt != NULL) { |
| 1667 |
INSIST(dns_rdataset_isassociated(client->opt)); |
1724 |
INSIST(dns_rdataset_isassociated(client->opt)); |
| 1668 |
dns_rdataset_disassociate(client->opt); |
1725 |
dns_rdataset_disassociate(client->opt); |
|
Lines 2339-2346
ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) {
Link Here
|
| 2339 |
dns_message_create(client->mctx, DNS_MESSAGE_INTENTPARSE, |
2396 |
dns_message_create(client->mctx, DNS_MESSAGE_INTENTPARSE, |
| 2340 |
&client->message); |
2397 |
&client->message); |
| 2341 |
|
2398 |
|
| 2342 |
client->sendbuf = isc_mem_get(client->manager->send_mctx, |
|
|
| 2343 |
NS_CLIENT_SEND_BUFFER_SIZE); |
| 2344 |
/* |
2399 |
/* |
| 2345 |
* Set magic earlier than usual because ns_query_init() |
2400 |
* Set magic earlier than usual because ns_query_init() |
| 2346 |
* and the functions it calls will require it. |
2401 |
* and the functions it calls will require it. |
|
Lines 2357-2363
ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) {
Link Here
|
| 2357 |
ns_clientmgr_t *oldmgr = client->manager; |
2412 |
ns_clientmgr_t *oldmgr = client->manager; |
| 2358 |
ns_server_t *sctx = client->sctx; |
2413 |
ns_server_t *sctx = client->sctx; |
| 2359 |
isc_task_t *task = client->task; |
2414 |
isc_task_t *task = client->task; |
| 2360 |
unsigned char *sendbuf = client->sendbuf; |
|
|
| 2361 |
dns_message_t *message = client->message; |
2415 |
dns_message_t *message = client->message; |
| 2362 |
isc_mem_t *oldmctx = client->mctx; |
2416 |
isc_mem_t *oldmctx = client->mctx; |
| 2363 |
ns_query_t query = client->query; |
2417 |
ns_query_t query = client->query; |
|
Lines 2372-2378
ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) {
Link Here
|
| 2372 |
.manager = oldmgr, |
2426 |
.manager = oldmgr, |
| 2373 |
.sctx = sctx, |
2427 |
.sctx = sctx, |
| 2374 |
.task = task, |
2428 |
.task = task, |
| 2375 |
.sendbuf = sendbuf, |
|
|
| 2376 |
.message = message, |
2429 |
.message = message, |
| 2377 |
.query = query, |
2430 |
.query = query, |
| 2378 |
.tid = tid }; |
2431 |
.tid = tid }; |
|
Lines 2397-2404
ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) {
Link Here
|
| 2397 |
return (ISC_R_SUCCESS); |
2450 |
return (ISC_R_SUCCESS); |
| 2398 |
|
2451 |
|
| 2399 |
cleanup: |
2452 |
cleanup: |
| 2400 |
isc_mem_put(client->manager->send_mctx, client->sendbuf, |
|
|
| 2401 |
NS_CLIENT_SEND_BUFFER_SIZE); |
| 2402 |
dns_message_detach(&client->message); |
2453 |
dns_message_detach(&client->message); |
| 2403 |
isc_task_detach(&client->task); |
2454 |
isc_task_detach(&client->task); |
| 2404 |
ns_clientmgr_detach(&client->manager); |
2455 |
ns_clientmgr_detach(&client->manager); |
|
Lines 2461-2468
clientmgr_destroy(ns_clientmgr_t *manager) {
Link Here
|
| 2461 |
isc_task_detach(&manager->task); |
2512 |
isc_task_detach(&manager->task); |
| 2462 |
ns_server_detach(&manager->sctx); |
2513 |
ns_server_detach(&manager->sctx); |
| 2463 |
|
2514 |
|
| 2464 |
isc_mem_detach(&manager->send_mctx); |
|
|
| 2465 |
|
| 2466 |
isc_mem_putanddetach(&manager->mctx, manager, sizeof(*manager)); |
2515 |
isc_mem_putanddetach(&manager->mctx, manager, sizeof(*manager)); |
| 2467 |
} |
2516 |
} |
| 2468 |
|
2517 |
|
|
Lines 2499-2559
ns_clientmgr_create(ns_server_t *sctx, isc_taskmgr_t *taskmgr,
Link Here
|
| 2499 |
|
2548 |
|
| 2500 |
ISC_LIST_INIT(manager->recursing); |
2549 |
ISC_LIST_INIT(manager->recursing); |
| 2501 |
|
2550 |
|
| 2502 |
/* |
|
|
| 2503 |
* We create specialised per-worker memory context specifically |
| 2504 |
* dedicated and tuned for allocating send buffers as it is a very |
| 2505 |
* common operation. Not doing so may result in excessive memory |
| 2506 |
* use in certain workloads. |
| 2507 |
* |
| 2508 |
* Please see this thread for more details: |
| 2509 |
* |
| 2510 |
* https://github.com/jemalloc/jemalloc/issues/2483 |
| 2511 |
* |
| 2512 |
* In particular, this information from the jemalloc developers is |
| 2513 |
* of the most interest: |
| 2514 |
* |
| 2515 |
* https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1639019699 |
| 2516 |
* https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1698173849 |
| 2517 |
* |
| 2518 |
* In essence, we use the following memory management strategy: |
| 2519 |
* |
| 2520 |
* 1. We use a per-worker memory arena for send buffers memory |
| 2521 |
* allocation to reduce lock contention (In reality, we create a |
| 2522 |
* per-client manager arena, but we have one client manager per |
| 2523 |
* worker). |
| 2524 |
* |
| 2525 |
* 2. The automatically created arenas settings remain unchanged |
| 2526 |
* and may be controlled by users (e.g. by setting the |
| 2527 |
* "MALLOC_CONF" variable). |
| 2528 |
* |
| 2529 |
* 3. We attune the arenas to not use dirty pages cache as the |
| 2530 |
* cache would have a poor reuse rate, and that is known to |
| 2531 |
* significantly contribute to excessive memory use. |
| 2532 |
* |
| 2533 |
* 4. There is no strict need for the dirty cache, as there is a |
| 2534 |
* per arena bin for each allocation size, so because we initially |
| 2535 |
* allocate strictly 64K per send buffer (enough for a DNS |
| 2536 |
* message), allocations would get directed to one bin (an "object |
| 2537 |
* pool" or a "slab") maintained within an arena. That is, there |
| 2538 |
* is an object pool already, specifically to optimise for the |
| 2539 |
* case of frequent allocations of objects of the given size. The |
| 2540 |
* object pool should suffice our needs, as we will end up |
| 2541 |
* recycling the objects from there without the need to back it by |
| 2542 |
* an additional layer of dirty pages cache. The dirty pages cache |
| 2543 |
* would have worked better in the case when there are more |
| 2544 |
* allocation bins involved due to a higher reuse rate (the case |
| 2545 |
* of a more "generic" memory management). |
| 2546 |
*/ |
| 2547 |
isc_mem_create_arena(&manager->send_mctx); |
| 2548 |
isc_mem_setname(manager->send_mctx, "sendbufs"); |
| 2549 |
(void)isc_mem_arena_set_dirty_decay_ms(manager->send_mctx, 0); |
| 2550 |
/* |
| 2551 |
* Disable muzzy pages cache too, as versions < 5.2.0 have it |
| 2552 |
* enabled by default. The muzzy pages cache goes right below the |
| 2553 |
* dirty pages cache and backs it. |
| 2554 |
*/ |
| 2555 |
(void)isc_mem_arena_set_muzzy_decay_ms(manager->send_mctx, 0); |
| 2556 |
|
| 2557 |
manager->magic = MANAGER_MAGIC; |
2551 |
manager->magic = MANAGER_MAGIC; |
| 2558 |
|
2552 |
|
| 2559 |
MTRACE("create"); |
2553 |
MTRACE("create"); |