From b436dd138bd5fa9ceecc44493827fd7de4230632 Mon Sep 17 00:00:00 2001 From: Gunnar Beutner Date: Wed, 26 May 2021 05:35:05 +0200 Subject: [PATCH] Kernel: Avoid allocations when sending IP packets Previously we'd allocate buffers when sending packets. This patch avoids these allocations by using the NetworkAdapter's packet queue. At the same time this also avoids copying partially constructed packets in order to prepend Ethernet and/or IPv4 headers. It also properly truncates UDP and raw IP packets. --- Kernel/Net/IPv4Socket.cpp | 17 ++++++-- Kernel/Net/NetworkAdapter.cpp | 64 +++++----------------------- Kernel/Net/NetworkAdapter.h | 13 ++++-- Kernel/Net/NetworkTask.cpp | 15 ++++--- Kernel/Net/TCPSocket.cpp | 78 +++++++++++++++++------------------ Kernel/Net/TCPSocket.h | 4 +- Kernel/Net/UDPSocket.cpp | 20 +++++---- 7 files changed, 94 insertions(+), 117 deletions(-) diff --git a/Kernel/Net/IPv4Socket.cpp b/Kernel/Net/IPv4Socket.cpp index 1520649c180..d5038d0a869 100644 --- a/Kernel/Net/IPv4Socket.cpp +++ b/Kernel/Net/IPv4Socket.cpp @@ -218,10 +218,19 @@ KResultOr IPv4Socket::sendto(FileDescription&, const UserOrKernelBuffer& dbgln_if(IPV4_SOCKET_DEBUG, "sendto: destination={}:{}", m_peer_address, m_peer_port); if (type() == SOCK_RAW) { - auto result = routing_decision.adapter->send_ipv4(local_address(), routing_decision.next_hop, - m_peer_address, (IPv4Protocol)protocol(), data, data_length, m_ttl); - if (result.is_error()) - return result; + auto ipv4_payload_offset = routing_decision.adapter->ipv4_payload_offset(); + data_length = min(data_length, routing_decision.adapter->mtu() - ipv4_payload_offset); + auto packet = routing_decision.adapter->acquire_packet_buffer(ipv4_payload_offset + data_length); + if (!packet) + return ENOMEM; + routing_decision.adapter->fill_in_ipv4_header(*packet, local_address(), routing_decision.next_hop, + m_peer_address, (IPv4Protocol)protocol(), data_length, m_ttl); + if (!data.read(packet->buffer.data() + ipv4_payload_offset, data_length)) { + routing_decision.adapter->release_packet_buffer(*packet); + return EFAULT; + } + routing_decision.adapter->send_raw({ packet->buffer.data(), packet->buffer.size() }); + routing_decision.adapter->release_packet_buffer(*packet); return data_length; } diff --git a/Kernel/Net/NetworkAdapter.cpp b/Kernel/Net/NetworkAdapter.cpp index 9090e074e04..820995af8af 100644 --- a/Kernel/Net/NetworkAdapter.cpp +++ b/Kernel/Net/NetworkAdapter.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -76,15 +75,15 @@ void NetworkAdapter::send(const MACAddress& destination, const ARPPacket& packet send_raw({ (const u8*)eth, size_in_bytes }); } -KResult NetworkAdapter::send_ipv4(const IPv4Address& source_ipv4, const MACAddress& destination_mac, const IPv4Address& destination_ipv4, IPv4Protocol protocol, const UserOrKernelBuffer& payload, size_t payload_size, u8 ttl) +void NetworkAdapter::fill_in_ipv4_header(PacketWithTimestamp& packet, IPv4Address const& source_ipv4, MACAddress const& destination_mac, IPv4Address const& destination_ipv4, IPv4Protocol protocol, size_t payload_size, u8 ttl) { size_t ipv4_packet_size = sizeof(IPv4Packet) + payload_size; - if (ipv4_packet_size > mtu()) - return send_ipv4_fragmented(source_ipv4, destination_mac, destination_ipv4, protocol, payload, payload_size, ttl); + VERIFY(ipv4_packet_size <= mtu()); - size_t ethernet_frame_size = sizeof(EthernetFrameHeader) + sizeof(IPv4Packet) + payload_size; - auto buffer = NetworkByteBuffer::create_zeroed(ethernet_frame_size); - auto& eth = *(EthernetFrameHeader*)buffer.data(); + size_t ethernet_frame_size = ipv4_payload_offset() + payload_size; + VERIFY(packet.buffer.size() == ethernet_frame_size); + memset(packet.buffer.data(), 0, ipv4_payload_offset()); + auto& eth = *(EthernetFrameHeader*)packet.buffer.data(); eth.set_source(mac_address()); eth.set_destination(destination_mac); eth.set_ether_type(EtherType::IPv4); @@ -98,53 +97,6 @@ KResult NetworkAdapter::send_ipv4(const IPv4Address& source_ipv4, const MACAddre ipv4.set_ident(1); ipv4.set_ttl(ttl); ipv4.set_checksum(ipv4.compute_checksum()); - m_packets_out++; - m_bytes_out += ethernet_frame_size; - - if (!payload.read(ipv4.payload(), payload_size)) - return EFAULT; - send_raw({ (const u8*)ð, ethernet_frame_size }); - return KSuccess; -} - -KResult NetworkAdapter::send_ipv4_fragmented(const IPv4Address& source_ipv4, const MACAddress& destination_mac, const IPv4Address& destination_ipv4, IPv4Protocol protocol, const UserOrKernelBuffer& payload, size_t payload_size, u8 ttl) -{ - // packets must be split on the 64-bit boundary - auto packet_boundary_size = (mtu() - sizeof(IPv4Packet) - sizeof(EthernetFrameHeader)) & 0xfffffff8; - auto fragment_block_count = (payload_size + packet_boundary_size) / packet_boundary_size; - auto last_block_size = payload_size - packet_boundary_size * (fragment_block_count - 1); - auto number_of_blocks_in_fragment = packet_boundary_size / 8; - - auto identification = get_good_random(); - - size_t ethernet_frame_size = mtu(); - for (size_t packet_index = 0; packet_index < fragment_block_count; ++packet_index) { - auto is_last_block = packet_index + 1 == fragment_block_count; - auto packet_payload_size = is_last_block ? last_block_size : packet_boundary_size; - auto buffer = NetworkByteBuffer::create_zeroed(ethernet_frame_size); - auto& eth = *(EthernetFrameHeader*)buffer.data(); - eth.set_source(mac_address()); - eth.set_destination(destination_mac); - eth.set_ether_type(EtherType::IPv4); - auto& ipv4 = *(IPv4Packet*)eth.payload(); - ipv4.set_version(4); - ipv4.set_internet_header_length(5); - ipv4.set_source(source_ipv4); - ipv4.set_destination(destination_ipv4); - ipv4.set_protocol((u8)protocol); - ipv4.set_length(sizeof(IPv4Packet) + packet_payload_size); - ipv4.set_has_more_fragments(!is_last_block); - ipv4.set_ident(identification); - ipv4.set_ttl(ttl); - ipv4.set_fragment_offset(packet_index * number_of_blocks_in_fragment); - ipv4.set_checksum(ipv4.compute_checksum()); - m_packets_out++; - m_bytes_out += ethernet_frame_size; - if (!payload.read(ipv4.payload(), packet_index * packet_boundary_size, packet_payload_size)) - return EFAULT; - send_raw({ (const u8*)ð, ethernet_frame_size }); - } - return KSuccess; } void NetworkAdapter::did_receive(ReadonlyBytes payload) @@ -195,6 +147,8 @@ RefPtr NetworkAdapter::acquire_packet_buffer(size_t size) if (m_unused_packets.is_empty()) { auto buffer = KBuffer::create_with_size(size, Region::Access::Read | Region::Access::Write, "Packet Buffer", AllocationStrategy::AllocateNow); auto packet = adopt_ref_if_nonnull(new PacketWithTimestamp { move(buffer), kgettimeofday() }); + if (!packet) + return nullptr; packet->buffer.set_size(size); return packet; } @@ -208,6 +162,8 @@ RefPtr NetworkAdapter::acquire_packet_buffer(size_t size) auto buffer = KBuffer::create_with_size(size, Region::Access::Read | Region::Access::Write, "Packet Buffer", AllocationStrategy::AllocateNow); packet = adopt_ref_if_nonnull(new PacketWithTimestamp { move(buffer), kgettimeofday() }); + if (!packet) + return nullptr; packet->buffer.set_size(size); return packet; } diff --git a/Kernel/Net/NetworkAdapter.h b/Kernel/Net/NetworkAdapter.h index 5ca8d54b3a2..35cf665f7e3 100644 --- a/Kernel/Net/NetworkAdapter.h +++ b/Kernel/Net/NetworkAdapter.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,8 @@ struct PacketWithTimestamp : public RefCounted { IntrusiveListNode> packet_node; }; -class NetworkAdapter : public RefCounted { +class NetworkAdapter : public RefCounted + , public Weakable { public: template static inline void for_each(Callback callback) @@ -67,8 +69,7 @@ public: void set_ipv4_gateway(const IPv4Address&); void send(const MACAddress&, const ARPPacket&); - KResult send_ipv4(const IPv4Address& source_ipv4, const MACAddress&, const IPv4Address&, IPv4Protocol, const UserOrKernelBuffer& payload, size_t payload_size, u8 ttl); - KResult send_ipv4_fragmented(const IPv4Address& source_ipv4, const MACAddress&, const IPv4Address&, IPv4Protocol, const UserOrKernelBuffer& payload, size_t payload_size, u8 ttl); + void fill_in_ipv4_header(PacketWithTimestamp&, IPv4Address const&, MACAddress const&, IPv4Address const&, IPv4Protocol, size_t, u8); size_t dequeue_packet(u8* buffer, size_t buffer_size, Time& packet_timestamp); @@ -85,13 +86,17 @@ public: RefPtr acquire_packet_buffer(size_t); void release_packet_buffer(PacketWithTimestamp&); + constexpr size_t layer3_payload_offset() const { return sizeof(EthernetFrameHeader); } + constexpr size_t ipv4_payload_offset() const { return layer3_payload_offset() + sizeof(IPv4Packet); } + + virtual void send_raw(ReadonlyBytes) = 0; + Function on_receive; protected: NetworkAdapter(); void set_interface_name(const PCI::Address&); void set_mac_address(const MACAddress& mac_address) { m_mac_address = mac_address; } - virtual void send_raw(ReadonlyBytes) = 0; void did_receive(ReadonlyBytes); void set_loopback_name(); diff --git a/Kernel/Net/NetworkTask.cpp b/Kernel/Net/NetworkTask.cpp index 87ec1dd4d8d..8db8da6c08c 100644 --- a/Kernel/Net/NetworkTask.cpp +++ b/Kernel/Net/NetworkTask.cpp @@ -246,8 +246,14 @@ void handle_icmp(const EthernetFrameHeader& eth, const IPv4Packet& ipv4_packet, dbgln("handle_icmp: EchoRequest packet is too small, ignoring."); return; } - auto buffer = ByteBuffer::create_zeroed(icmp_packet_size); - auto& response = *(ICMPEchoPacket*)buffer.data(); + auto ipv4_payload_offset = adapter->ipv4_payload_offset(); + auto packet = adapter->acquire_packet_buffer(ipv4_payload_offset + icmp_packet_size); + if (!packet) { + dbgln("Could not allocate packet buffer while sending ICMP packet"); + return; + } + adapter->fill_in_ipv4_header(*packet, adapter->ipv4_address(), eth.source(), ipv4_packet.source(), IPv4Protocol::ICMP, icmp_packet_size, 64); + auto& response = *(ICMPEchoPacket*)(packet->buffer.data() + ipv4_payload_offset); response.header.set_type(ICMPType::EchoReply); response.header.set_code(0); response.identifier = request.identifier; @@ -256,9 +262,8 @@ void handle_icmp(const EthernetFrameHeader& eth, const IPv4Packet& ipv4_packet, memcpy(response.payload(), request.payload(), icmp_payload_size); response.header.set_checksum(internet_checksum(&response, icmp_packet_size)); // FIXME: What is the right TTL value here? Is 64 ok? Should we use the same TTL as the echo request? - auto response_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&response); - [[maybe_unused]] auto result = adapter->send_ipv4(adapter->ipv4_address(), eth.source(), - ipv4_packet.source(), IPv4Protocol::ICMP, response_buffer, buffer.size(), 64); + adapter->send_raw({ packet->buffer.data(), packet->buffer.size() }); + adapter->release_packet_buffer(*packet); } } diff --git a/Kernel/Net/TCPSocket.cpp b/Kernel/Net/TCPSocket.cpp index b771bc7586d..e6e3150f3b3 100644 --- a/Kernel/Net/TCPSocket.cpp +++ b/Kernel/Net/TCPSocket.cpp @@ -188,18 +188,30 @@ KResult TCPSocket::send_ack(bool allow_duplicate) KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload, size_t payload_size, RoutingDecision* user_routing_decision) { + RoutingDecision routing_decision = user_routing_decision ? *user_routing_decision : route_to(peer_address(), local_address(), bound_interface()); + if (routing_decision.is_zero()) + return EHOSTUNREACH; + + auto ipv4_payload_offset = routing_decision.adapter->ipv4_payload_offset(); + const bool has_mss_option = flags == TCPFlags::SYN; const size_t options_size = has_mss_option ? sizeof(TCPOptionMSS) : 0; - const size_t header_size = sizeof(TCPPacket) + options_size; - const size_t buffer_size = header_size + payload_size; - auto buffer = NetworkByteBuffer::create_zeroed(buffer_size); - auto& tcp_packet = *(TCPPacket*)(buffer.data()); + const size_t tcp_header_size = sizeof(TCPPacket) + options_size; + const size_t buffer_size = ipv4_payload_offset + tcp_header_size + payload_size; + auto packet = routing_decision.adapter->acquire_packet_buffer(buffer_size); + if (!packet) + return ENOMEM; + routing_decision.adapter->fill_in_ipv4_header(*packet, local_address(), + routing_decision.next_hop, peer_address(), IPv4Protocol::TCP, + buffer_size - ipv4_payload_offset, ttl()); + memset(packet->buffer.data() + ipv4_payload_offset, 0, sizeof(TCPPacket)); + auto& tcp_packet = *(TCPPacket*)(packet->buffer.data() + ipv4_payload_offset); VERIFY(local_port()); tcp_packet.set_source_port(local_port()); tcp_packet.set_destination_port(peer_port()); tcp_packet.set_window_size(NumericLimits::max()); tcp_packet.set_sequence_number(m_sequence_number); - tcp_packet.set_data_offset(header_size / sizeof(u32)); + tcp_packet.set_data_offset(tcp_header_size / sizeof(u32)); tcp_packet.set_flags(flags); if (flags & TCPFlags::ACK) { @@ -217,31 +229,22 @@ KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload, m_sequence_number += payload_size; } - RoutingDecision routing_decision = user_routing_decision ? *user_routing_decision : route_to(peer_address(), local_address(), bound_interface()); - if (routing_decision.is_zero()) - return EHOSTUNREACH; - if (has_mss_option) { u16 mss = routing_decision.adapter->mtu() - sizeof(IPv4Packet) - sizeof(TCPPacket); TCPOptionMSS mss_option { mss }; - VERIFY(buffer.size() >= sizeof(TCPPacket) + sizeof(mss_option)); - memcpy(buffer.data() + sizeof(TCPPacket), &mss_option, sizeof(mss_option)); + VERIFY(packet->buffer.size() >= ipv4_payload_offset + sizeof(TCPPacket) + sizeof(mss_option)); + memcpy(packet->buffer.data() + ipv4_payload_offset + sizeof(TCPPacket), &mss_option, sizeof(mss_option)); } tcp_packet.set_checksum(compute_tcp_checksum(local_address(), peer_address(), tcp_packet, payload_size)); - auto packet_buffer = UserOrKernelBuffer::for_kernel_buffer(buffer.data()); - auto result = routing_decision.adapter->send_ipv4( - local_address(), routing_decision.next_hop, peer_address(), IPv4Protocol::TCP, - packet_buffer, buffer_size, ttl()); - if (result.is_error()) - return result; + routing_decision.adapter->send_raw({ packet->buffer.data(), packet->buffer.size() }); m_packets_out++; m_bytes_out += buffer_size; if (tcp_packet.has_syn() || payload_size > 0) { Locker locker(m_not_acked_lock); - m_not_acked.append({ m_sequence_number, move(buffer) }); + m_not_acked.append({ m_sequence_number, move(packet), ipv4_payload_offset, *routing_decision.adapter }); enqueue_for_retransmit(); } @@ -263,6 +266,9 @@ void TCPSocket::receive_tcp_packet(const TCPPacket& packet, u16 size) dbgln_if(TCP_SOCKET_DEBUG, "TCPSocket: iterate: {}", packet.ack_number); if (packet.ack_number <= ack_number) { + auto old_adapter = packet.adapter.strong_ref(); + if (old_adapter) + old_adapter->release_packet_buffer(*packet.buffer); m_not_acked.take_first(); removed++; } else { @@ -531,7 +537,7 @@ void TCPSocket::retransmit_packets() packet.tx_counter++; if constexpr (TCP_SOCKET_DEBUG) { - auto& tcp_packet = *(const TCPPacket*)(packet.buffer.data()); + auto& tcp_packet = *(const TCPPacket*)(packet.buffer->buffer.data() + packet.ipv4_payload_offset); dbgln("Sending TCP packet from {}:{} to {}:{} with ({}{}{}{}) seq_no={}, ack_no={}, tx_counter={}", local_address(), local_port(), peer_address(), peer_port(), @@ -544,29 +550,19 @@ void TCPSocket::retransmit_packets() packet.tx_counter); } - auto packet_buffer = UserOrKernelBuffer::for_kernel_buffer(packet.buffer.data()); - int err = routing_decision.adapter->send_ipv4( - local_address(), routing_decision.next_hop, peer_address(), - IPv4Protocol::TCP, packet_buffer, packet.buffer.size(), ttl()); - if (err < 0) { - auto& tcp_packet = *(const TCPPacket*)(packet.buffer.data()); - dmesgln("Error ({}) sending TCP packet from {}:{} to {}:{} with ({}{}{}{}) seq_no={}, ack_no={}, tx_counter={}", - err, - local_address(), - local_port(), - peer_address(), - peer_port(), - (tcp_packet.has_syn() ? "SYN " : ""), - (tcp_packet.has_ack() ? "ACK " : ""), - (tcp_packet.has_fin() ? "FIN " : ""), - (tcp_packet.has_rst() ? "RST " : ""), - tcp_packet.sequence_number(), - tcp_packet.ack_number(), - packet.tx_counter); - } else { - m_packets_out++; - m_bytes_out += packet.buffer.size(); + size_t ipv4_payload_offset = routing_decision.adapter->ipv4_payload_offset(); + if (ipv4_payload_offset != packet.ipv4_payload_offset) { + // FIXME: Add support for this. This can happen if after a route change + // we ended up on another adapter which doesn't have the same layer 2 type + // like the previous adapter. + VERIFY_NOT_REACHED(); } + routing_decision.adapter->fill_in_ipv4_header(*packet.buffer, + local_address(), routing_decision.next_hop, peer_address(), + IPv4Protocol::TCP, packet.buffer->buffer.size() - ipv4_payload_offset, ttl()); + routing_decision.adapter->send_raw({ packet.buffer->buffer.data(), packet.buffer->buffer.size() }); + m_packets_out++; + m_bytes_out += packet.buffer->buffer.size(); } } diff --git a/Kernel/Net/TCPSocket.h b/Kernel/Net/TCPSocket.h index f6c5511b132..7dab47d57f0 100644 --- a/Kernel/Net/TCPSocket.h +++ b/Kernel/Net/TCPSocket.h @@ -194,7 +194,9 @@ private: struct OutgoingPacket { u32 ack_number { 0 }; - NetworkByteBuffer buffer; + RefPtr buffer; + size_t ipv4_payload_offset; + WeakPtr adapter; int tx_counter { 0 }; }; diff --git a/Kernel/Net/UDPSocket.cpp b/Kernel/Net/UDPSocket.cpp index 9c81322e707..27ad0bd57ec 100644 --- a/Kernel/Net/UDPSocket.cpp +++ b/Kernel/Net/UDPSocket.cpp @@ -78,19 +78,23 @@ KResultOr UDPSocket::protocol_send(const UserOrKernelBuffer& data, size_ auto routing_decision = route_to(peer_address(), local_address(), bound_interface()); if (routing_decision.is_zero()) return EHOSTUNREACH; - const size_t buffer_size = sizeof(UDPPacket) + data_length; - auto buffer = ByteBuffer::create_zeroed(buffer_size); - auto& udp_packet = *reinterpret_cast(buffer.data()); + auto ipv4_payload_offset = routing_decision.adapter->ipv4_payload_offset(); + data_length = min(data_length, routing_decision.adapter->mtu() - ipv4_payload_offset - sizeof(UDPPacket)); + const size_t udp_buffer_size = sizeof(UDPPacket) + data_length; + auto packet = routing_decision.adapter->acquire_packet_buffer(ipv4_payload_offset + udp_buffer_size); + if (!packet) + return ENOMEM; + memset(packet->buffer.data() + ipv4_payload_offset, 0, sizeof(UDPPacket)); + auto& udp_packet = *reinterpret_cast(packet->buffer.data() + ipv4_payload_offset); udp_packet.set_source_port(local_port()); udp_packet.set_destination_port(peer_port()); - udp_packet.set_length(buffer_size); + udp_packet.set_length(udp_buffer_size); if (!data.read(udp_packet.payload(), data_length)) return EFAULT; - auto result = routing_decision.adapter->send_ipv4(local_address(), routing_decision.next_hop, - peer_address(), IPv4Protocol::UDP, UserOrKernelBuffer::for_kernel_buffer(buffer.data()), buffer_size, ttl()); - if (result.is_error()) - return result; + routing_decision.adapter->fill_in_ipv4_header(*packet, local_address(), routing_decision.next_hop, + peer_address(), IPv4Protocol::UDP, udp_buffer_size, ttl()); + routing_decision.adapter->send_raw({ packet->buffer.data(), packet->buffer.size() }); return data_length; }