diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d4e6e00bb90a..97862830f7d0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2102,6 +2102,8 @@ enum netdev_reg_state { * * FIXME: cleanup struct net_device such that network protocol info * moves out. + * + * @netdev_trace_buffer_list: Linked list for debugging refcount leak. */ struct net_device { @@ -2257,6 +2259,9 @@ struct net_device { #if IS_ENABLED(CONFIG_TLS_DEVICE) const struct tlsdev_ops *tlsdev_ops; #endif +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + struct list_head netdev_trace_buffer_list; +#endif unsigned int operstate; unsigned char link_mode; @@ -3185,6 +3190,7 @@ enum netdev_cmd { NETDEV_OFFLOAD_XSTATS_REPORT_USED, NETDEV_OFFLOAD_XSTATS_REPORT_DELTA, NETDEV_XDP_FEAT_CHANGE, + NETDEV_DEBUG_UNREGISTER, }; const char *netdev_cmd_to_name(enum netdev_cmd cmd); @@ -4373,9 +4379,15 @@ static inline bool dev_nit_active(const struct net_device *dev) void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); +void save_netdev_trace_buffer(struct net_device *dev, int delta); +int trim_netdev_trace(unsigned long *entries, int nr_entries); + static inline void __dev_put(struct net_device *dev) { if (dev) { +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + save_netdev_trace_buffer(dev, -1); +#endif #ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_dec(*dev->pcpu_refcnt); #else @@ -4387,6 +4399,9 @@ static inline void __dev_put(struct net_device *dev) static inline void __dev_hold(struct net_device *dev) { if (dev) { +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + save_netdev_trace_buffer(dev, 1); +#endif #ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_inc(*dev->pcpu_refcnt); #else diff --git a/kernel/softirq.c b/kernel/softirq.c index 77198911b8dd..5f435c1e48d8 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -576,6 +576,10 @@ static inline bool lockdep_softirq_start(void) { return false; } static inline void lockdep_softirq_end(bool in_hardirq) { } #endif +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +static noinline void handle_softirqs(bool ksirqd); +#endif + static void handle_softirqs(bool ksirqd) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index aeaec79bc09c..66cb4d8c00bb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3157,6 +3157,10 @@ static bool manage_workers(struct worker *worker) return true; } +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +static noinline void process_one_work(struct worker *worker, struct work_struct *work); +#endif + /** * process_one_work - process single work * @worker: self diff --git a/net/core/dev.c b/net/core/dev.c index c1a9f7fdcffa..fbc205c82256 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1874,6 +1874,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd) N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE) N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA) N(XDP_FEAT_CHANGE) + N(DEBUG_UNREGISTER) } #undef N return "UNKNOWN_NETDEV_EVENT"; @@ -11557,6 +11558,14 @@ int netdev_refcnt_read(const struct net_device *dev) } EXPORT_SYMBOL(netdev_refcnt_read); +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +static void dump_netdev_trace_buffer(const struct net_device *dev); +static void erase_netdev_trace_buffer(const struct net_device *dev); +#else +static inline void dump_netdev_trace_buffer(const struct net_device *dev) { } +static inline void erase_netdev_trace_buffer(const struct net_device *dev) { } +#endif + int netdev_unregister_timeout_secs __read_mostly = 10; #define WAIT_REFS_MIN_MSECS 1 @@ -11630,11 +11639,16 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) if (time_after(jiffies, warning_time + READ_ONCE(netdev_unregister_timeout_secs) * HZ)) { + rtnl_lock(); list_for_each_entry(dev, list, todo_list) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", dev->name, netdev_refcnt_read(dev)); ref_tracker_dir_print(&dev->refcnt_tracker, 10); + call_netdevice_notifiers(NETDEV_DEBUG_UNREGISTER, dev); + dump_netdev_trace_buffer(dev); } + __rtnl_unlock(); + rcu_barrier(); warning_time = jiffies; } @@ -12032,6 +12046,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->priv_len = sizeof_priv; +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + INIT_LIST_HEAD(&dev->netdev_trace_buffer_list); +#endif ref_tracker_dir_init(&dev->refcnt_tracker, 128, "netdev"); #ifdef CONFIG_PCPU_DEV_REFCNT dev->pcpu_refcnt = alloc_percpu(int); @@ -12204,6 +12221,8 @@ void free_netdev(struct net_device *dev) mutex_destroy(&dev->lock); + erase_netdev_trace_buffer(dev); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED || dev->reg_state == NETREG_DUMMY) { @@ -13306,3 +13325,171 @@ static int __init net_dev_init(void) } subsys_initcall(net_dev_init); + +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + +#define NETDEV_TRACE_BUFFER_SIZE 32768 +static struct netdev_trace_buffer { + struct list_head list; + int prev_count; + atomic_t count; + int nr_entries; + unsigned long entries[20]; +} netdev_trace_buffer[NETDEV_TRACE_BUFFER_SIZE]; +static LIST_HEAD(netdev_trace_buffer_list); +static DEFINE_SPINLOCK(netdev_trace_buffer_lock); +static bool netdev_trace_buffer_exhausted; + +static int netdev_trace_buffer_init(void) +{ + int i; + + for (i = 0; i < NETDEV_TRACE_BUFFER_SIZE; i++) + list_add_tail(&netdev_trace_buffer[i].list, &netdev_trace_buffer_list); + return 0; +} +pure_initcall(netdev_trace_buffer_init); + +static void dump_netdev_trace_buffer(const struct net_device *dev) +{ + struct netdev_trace_buffer *ptr; + int count, balance = 0, pos = 0; + + list_for_each_entry_rcu(ptr, &dev->netdev_trace_buffer_list, list, + /* list elements can't go away. */ 1) { + pos++; + count = atomic_read(&ptr->count); + balance += count; + if (ptr->prev_count == count) + continue; + ptr->prev_count = count; + pr_info("Call trace for %s[%d] %+d at\n", dev->name, pos, count); + stack_trace_print(ptr->entries, ptr->nr_entries, 4); + cond_resched(); + } + if (!netdev_trace_buffer_exhausted) + pr_info("balance as of %s[%d] is %d\n", dev->name, pos, balance); +} + +static void erase_netdev_trace_buffer(const struct net_device *dev) +{ + struct netdev_trace_buffer *ptr; + unsigned long flags; + + spin_lock_irqsave(&netdev_trace_buffer_lock, flags); + while (!list_empty(&dev->netdev_trace_buffer_list)) { + ptr = list_first_entry(&dev->netdev_trace_buffer_list, typeof(*ptr), list); + list_del(&ptr->list); + list_add_tail(&ptr->list, &netdev_trace_buffer_list); + } + spin_unlock_irqrestore(&netdev_trace_buffer_lock, flags); +} + +int trim_netdev_trace(unsigned long *entries, int nr_entries) +{ +#ifdef CONFIG_KALLSYMS + char buffer[32] = { }; + char *cp; + int i; + + if (in_softirq()) { + static unsigned long __data_racy caller; + + if (!caller) { + for (i = 0; i < nr_entries; i++) { + snprintf(buffer, sizeof(buffer) - 1, "%ps", (void *)entries[i]); + cp = strchr(buffer, ' '); + if (cp) + *cp = '\0'; + if (!strcmp(buffer, "handle_softirqs")) { + caller = entries[i]; + break; + } + } + } + for (i = 0; i < nr_entries; i++) + if (entries[i] == caller) + return i + 1; + } else if (current->flags & PF_WQ_WORKER) { + static unsigned long __data_racy caller; + + if (!caller) { + for (i = 0; i < nr_entries; i++) { + snprintf(buffer, sizeof(buffer) - 1, "%ps", (void *)entries[i]); + cp = strchr(buffer, ' '); + if (cp) + *cp = '\0'; + if (!strcmp(buffer, "process_one_work")) { + caller = entries[i]; + break; + } + } + } + for (i = 0; i < nr_entries; i++) + if (entries[i] == caller) + return i + 1; + } else { + for (i = 0; i < nr_entries; i++) { + snprintf(buffer, sizeof(buffer) - 1, "%ps", (void *)entries[i]); + cp = strchr(buffer, ' '); + if (cp) + *cp = '\0'; + if (buffer[0] == 'k') { + if (!strcmp(buffer, "ksys_unshare")) + return i + 1; + } else if (buffer[0] == 's') { + if (!strcmp(buffer, "sock_sendmsg_nosec") || + !strcmp(buffer, "sock_recvmsg_nosec")) + return i + 1; + } else if (buffer[0] == '_') { + if (!strcmp(buffer, "__sys_bind") || + !strcmp(buffer, "__sock_release") || + !strcmp(buffer, "__sys_bpf")) + return i + 1; + } else { + if (!strcmp(buffer, "do_sock_setsockopt")) + return i + 1; + } + } + } +#endif + return nr_entries; +} +EXPORT_SYMBOL(trim_netdev_trace); + +void save_netdev_trace_buffer(struct net_device *dev, int delta) +{ + struct netdev_trace_buffer *ptr; + unsigned long entries[ARRAY_SIZE(ptr->entries)]; + unsigned long nr_entries; + unsigned long flags; + + if (in_nmi()) + return; + nr_entries = stack_trace_save(entries, ARRAY_SIZE(ptr->entries), 1); + nr_entries = trim_netdev_trace(entries, nr_entries); + list_for_each_entry_rcu(ptr, &dev->netdev_trace_buffer_list, list, + /* list elements can't go away. */ 1) { + if (ptr->nr_entries == nr_entries && + !memcmp(ptr->entries, entries, nr_entries * sizeof(unsigned long))) { + atomic_add(delta, &ptr->count); + return; + } + } + spin_lock_irqsave(&netdev_trace_buffer_lock, flags); + if (!list_empty(&netdev_trace_buffer_list)) { + ptr = list_first_entry(&netdev_trace_buffer_list, typeof(*ptr), list); + list_del(&ptr->list); + ptr->prev_count = 0; + atomic_set(&ptr->count, delta); + ptr->nr_entries = nr_entries; + memmove(ptr->entries, entries, nr_entries * sizeof(unsigned long)); + list_add_tail_rcu(&ptr->list, &dev->netdev_trace_buffer_list); + } else { + netdev_trace_buffer_exhausted = true; + } + spin_unlock_irqrestore(&netdev_trace_buffer_lock, flags); +} +EXPORT_SYMBOL(save_netdev_trace_buffer); + +#endif diff --git a/net/core/lock_debug.c b/net/core/lock_debug.c index 9e9fb25314b9..78d611bb6d1c 100644 --- a/net/core/lock_debug.c +++ b/net/core/lock_debug.c @@ -29,6 +29,7 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event, case NETDEV_DOWN: case NETDEV_REBOOT: case NETDEV_UNREGISTER: + case NETDEV_DEBUG_UNREGISTER: case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: case NETDEV_PRE_CHANGEADDR: diff --git a/net/socket.c b/net/socket.c index 05952188127f..53c4b1fd3ef7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -650,7 +650,11 @@ struct socket *sock_alloc(void) } EXPORT_SYMBOL(sock_alloc); -static void __sock_release(struct socket *sock, struct inode *inode) +static +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +noinline +#endif +void __sock_release(struct socket *sock, struct inode *inode) { const struct proto_ops *ops = READ_ONCE(sock->ops); @@ -722,7 +726,13 @@ static noinline void call_trace_sock_send_length(struct sock *sk, int ret, trace_sock_send_length(sk, ret, 0); } -static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) +static +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +noinline +#else +inline +#endif +int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg, inet_sendmsg, sock, msg, @@ -1072,8 +1082,13 @@ static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int f trace_sock_recv_length(sk, ret, flags); } -static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, - int flags) +static +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +noinline +#else +inline +#endif +int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, int flags) { int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg, inet6_recvmsg, @@ -2532,9 +2547,12 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, return err < 0 ? err : 0; } -static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, - unsigned int flags, struct used_address *used_address, - unsigned int allowed_msghdr_flags) +static +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +noinline +#endif +int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, unsigned int flags, + struct used_address *used_address, unsigned int allowed_msghdr_flags) { unsigned char ctl[sizeof(struct cmsghdr) + 20] __aligned(sizeof(__kernel_size_t));