// https://syzkaller.appspot.com/bug?id=09f9deef3f5b37009a62ed8feaef6e47761f9718 // autogenerated by syzkaller (https://github.com/google/syzkaller) #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static unsigned long long procid; static __thread int clone_ongoing; static __thread int skip_segv; static __thread jmp_buf segv_env; static void segv_handler(int sig, siginfo_t* info, void* ctx) { if (__atomic_load_n(&clone_ongoing, __ATOMIC_RELAXED) != 0) { exit(sig); } uintptr_t addr = (uintptr_t)info->si_addr; const uintptr_t prog_start = 1 << 20; const uintptr_t prog_end = 100 << 20; int skip = __atomic_load_n(&skip_segv, __ATOMIC_RELAXED) != 0; int valid = addr < prog_start || addr > prog_end; if (skip && valid) { _longjmp(segv_env, 1); } exit(sig); } static void install_segv_handler(void) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = SIG_IGN; syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8); syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8); memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = segv_handler; sa.sa_flags = SA_NODEFER | SA_SIGINFO; sigaction(SIGSEGV, &sa, NULL); sigaction(SIGBUS, &sa, NULL); } #define NONFAILING(...) \ ({ \ int ok = 1; \ __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \ if (_setjmp(segv_env) == 0) { \ __VA_ARGS__; \ } else \ ok = 0; \ __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \ ok; \ }) static void sleep_ms(uint64_t ms) { usleep(ms * 1000); } static uint64_t current_time_ms(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) exit(1); return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; } static void use_temporary_dir(void) { char tmpdir_template[] = "./syzkaller.XXXXXX"; char* tmpdir = mkdtemp(tmpdir_template); if (!tmpdir) exit(1); if (chmod(tmpdir, 0777)) exit(1); if (chdir(tmpdir)) exit(1); } static bool write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; int len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) return false; if (write(fd, buf, len) != len) { int err = errno; close(fd); errno = err; return false; } close(fd); return true; } struct nlmsg { char* pos; int nesting; struct nlattr* nested[8]; char buf[4096]; }; static void netlink_init(struct nlmsg* nlmsg, int typ, int flags, const void* data, int size) { memset(nlmsg, 0, sizeof(*nlmsg)); struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf; hdr->nlmsg_type = typ; hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags; memcpy(hdr + 1, data, size); nlmsg->pos = (char*)(hdr + 1) + NLMSG_ALIGN(size); } static void netlink_attr(struct nlmsg* nlmsg, int typ, const void* data, int size) { struct nlattr* attr = (struct nlattr*)nlmsg->pos; attr->nla_len = sizeof(*attr) + size; attr->nla_type = typ; if (size > 0) memcpy(attr + 1, data, size); nlmsg->pos += NLMSG_ALIGN(attr->nla_len); } static void netlink_nest(struct nlmsg* nlmsg, int typ) { struct nlattr* attr = (struct nlattr*)nlmsg->pos; attr->nla_type = typ; nlmsg->pos += sizeof(*attr); nlmsg->nested[nlmsg->nesting++] = attr; } static void netlink_done(struct nlmsg* nlmsg) { struct nlattr* attr = nlmsg->nested[--nlmsg->nesting]; attr->nla_len = nlmsg->pos - (char*)attr; } static int netlink_send_ext(struct nlmsg* nlmsg, int sock, uint16_t reply_type, int* reply_len, bool dofail) { if (nlmsg->pos > nlmsg->buf + sizeof(nlmsg->buf) || nlmsg->nesting) exit(1); struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf; hdr->nlmsg_len = nlmsg->pos - nlmsg->buf; struct sockaddr_nl addr; memset(&addr, 0, sizeof(addr)); addr.nl_family = AF_NETLINK; ssize_t n = sendto(sock, nlmsg->buf, hdr->nlmsg_len, 0, (struct sockaddr*)&addr, sizeof(addr)); if (n != (ssize_t)hdr->nlmsg_len) { if (dofail) exit(1); return -1; } n = recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); if (reply_len) *reply_len = 0; if (n < 0) { if (dofail) exit(1); return -1; } if (n < (ssize_t)sizeof(struct nlmsghdr)) { errno = EINVAL; if (dofail) exit(1); return -1; } if (hdr->nlmsg_type == NLMSG_DONE) return 0; if (reply_len && hdr->nlmsg_type == reply_type) { *reply_len = n; return 0; } if (n < (ssize_t)(sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))) { errno = EINVAL; if (dofail) exit(1); return -1; } if (hdr->nlmsg_type != NLMSG_ERROR) { errno = EINVAL; if (dofail) exit(1); return -1; } errno = -((struct nlmsgerr*)(hdr + 1))->error; return -errno; } static int netlink_send(struct nlmsg* nlmsg, int sock) { return netlink_send_ext(nlmsg, sock, 0, NULL, true); } static int netlink_query_family_id(struct nlmsg* nlmsg, int sock, const char* family_name, bool dofail) { struct genlmsghdr genlhdr; memset(&genlhdr, 0, sizeof(genlhdr)); genlhdr.cmd = CTRL_CMD_GETFAMILY; netlink_init(nlmsg, GENL_ID_CTRL, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(nlmsg, CTRL_ATTR_FAMILY_NAME, family_name, strnlen(family_name, GENL_NAMSIZ - 1) + 1); int n = 0; int err = netlink_send_ext(nlmsg, sock, GENL_ID_CTRL, &n, dofail); if (err < 0) { return -1; } uint16_t id = 0; struct nlattr* attr = (struct nlattr*)(nlmsg->buf + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr))); for (; (char*)attr < nlmsg->buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) { if (attr->nla_type == CTRL_ATTR_FAMILY_ID) { id = *(uint16_t*)(attr + 1); break; } } if (!id) { errno = EINVAL; return -1; } recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); return id; } static int netlink_next_msg(struct nlmsg* nlmsg, unsigned int offset, unsigned int total_len) { struct nlmsghdr* hdr = (struct nlmsghdr*)(nlmsg->buf + offset); if (offset == total_len || offset + hdr->nlmsg_len > total_len) return -1; return hdr->nlmsg_len; } static void netlink_add_device_impl(struct nlmsg* nlmsg, const char* type, const char* name, bool up) { struct ifinfomsg hdr; memset(&hdr, 0, sizeof(hdr)); if (up) hdr.ifi_flags = hdr.ifi_change = IFF_UP; netlink_init(nlmsg, RTM_NEWLINK, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr)); if (name) netlink_attr(nlmsg, IFLA_IFNAME, name, strlen(name)); netlink_nest(nlmsg, IFLA_LINKINFO); netlink_attr(nlmsg, IFLA_INFO_KIND, type, strlen(type)); } static void netlink_add_device(struct nlmsg* nlmsg, int sock, const char* type, const char* name) { netlink_add_device_impl(nlmsg, type, name, false); netlink_done(nlmsg); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static void netlink_add_veth(struct nlmsg* nlmsg, int sock, const char* name, const char* peer) { netlink_add_device_impl(nlmsg, "veth", name, false); netlink_nest(nlmsg, IFLA_INFO_DATA); netlink_nest(nlmsg, VETH_INFO_PEER); nlmsg->pos += sizeof(struct ifinfomsg); netlink_attr(nlmsg, IFLA_IFNAME, peer, strlen(peer)); netlink_done(nlmsg); netlink_done(nlmsg); netlink_done(nlmsg); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static void netlink_add_xfrm(struct nlmsg* nlmsg, int sock, const char* name) { netlink_add_device_impl(nlmsg, "xfrm", name, true); netlink_nest(nlmsg, IFLA_INFO_DATA); int if_id = 1; netlink_attr(nlmsg, 2, &if_id, sizeof(if_id)); netlink_done(nlmsg); netlink_done(nlmsg); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static void netlink_add_hsr(struct nlmsg* nlmsg, int sock, const char* name, const char* slave1, const char* slave2) { netlink_add_device_impl(nlmsg, "hsr", name, false); netlink_nest(nlmsg, IFLA_INFO_DATA); int ifindex1 = if_nametoindex(slave1); netlink_attr(nlmsg, IFLA_HSR_SLAVE1, &ifindex1, sizeof(ifindex1)); int ifindex2 = if_nametoindex(slave2); netlink_attr(nlmsg, IFLA_HSR_SLAVE2, &ifindex2, sizeof(ifindex2)); netlink_done(nlmsg); netlink_done(nlmsg); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static void netlink_add_linked(struct nlmsg* nlmsg, int sock, const char* type, const char* name, const char* link) { netlink_add_device_impl(nlmsg, type, name, false); netlink_done(nlmsg); int ifindex = if_nametoindex(link); netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static void netlink_add_vlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16_t id, uint16_t proto) { netlink_add_device_impl(nlmsg, "vlan", name, false); netlink_nest(nlmsg, IFLA_INFO_DATA); netlink_attr(nlmsg, IFLA_VLAN_ID, &id, sizeof(id)); netlink_attr(nlmsg, IFLA_VLAN_PROTOCOL, &proto, sizeof(proto)); netlink_done(nlmsg); netlink_done(nlmsg); int ifindex = if_nametoindex(link); netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static void netlink_add_macvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link) { netlink_add_device_impl(nlmsg, "macvlan", name, false); netlink_nest(nlmsg, IFLA_INFO_DATA); uint32_t mode = MACVLAN_MODE_BRIDGE; netlink_attr(nlmsg, IFLA_MACVLAN_MODE, &mode, sizeof(mode)); netlink_done(nlmsg); netlink_done(nlmsg); int ifindex = if_nametoindex(link); netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static void netlink_add_geneve(struct nlmsg* nlmsg, int sock, const char* name, uint32_t vni, struct in_addr* addr4, struct in6_addr* addr6) { netlink_add_device_impl(nlmsg, "geneve", name, false); netlink_nest(nlmsg, IFLA_INFO_DATA); netlink_attr(nlmsg, IFLA_GENEVE_ID, &vni, sizeof(vni)); if (addr4) netlink_attr(nlmsg, IFLA_GENEVE_REMOTE, addr4, sizeof(*addr4)); if (addr6) netlink_attr(nlmsg, IFLA_GENEVE_REMOTE6, addr6, sizeof(*addr6)); netlink_done(nlmsg); netlink_done(nlmsg); int err = netlink_send(nlmsg, sock); if (err < 0) { } } #define IFLA_IPVLAN_FLAGS 2 #define IPVLAN_MODE_L3S 2 #undef IPVLAN_F_VEPA #define IPVLAN_F_VEPA 2 static void netlink_add_ipvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16_t mode, uint16_t flags) { netlink_add_device_impl(nlmsg, "ipvlan", name, false); netlink_nest(nlmsg, IFLA_INFO_DATA); netlink_attr(nlmsg, IFLA_IPVLAN_MODE, &mode, sizeof(mode)); netlink_attr(nlmsg, IFLA_IPVLAN_FLAGS, &flags, sizeof(flags)); netlink_done(nlmsg); netlink_done(nlmsg); int ifindex = if_nametoindex(link); netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static void netlink_device_change(struct nlmsg* nlmsg, int sock, const char* name, bool up, const char* master, const void* mac, int macsize, const char* new_name) { struct ifinfomsg hdr; memset(&hdr, 0, sizeof(hdr)); if (up) hdr.ifi_flags = hdr.ifi_change = IFF_UP; hdr.ifi_index = if_nametoindex(name); netlink_init(nlmsg, RTM_NEWLINK, 0, &hdr, sizeof(hdr)); if (new_name) netlink_attr(nlmsg, IFLA_IFNAME, new_name, strlen(new_name)); if (master) { int ifindex = if_nametoindex(master); netlink_attr(nlmsg, IFLA_MASTER, &ifindex, sizeof(ifindex)); } if (macsize) netlink_attr(nlmsg, IFLA_ADDRESS, mac, macsize); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static int netlink_add_addr(struct nlmsg* nlmsg, int sock, const char* dev, const void* addr, int addrsize) { struct ifaddrmsg hdr; memset(&hdr, 0, sizeof(hdr)); hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6; hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120; hdr.ifa_scope = RT_SCOPE_UNIVERSE; hdr.ifa_index = if_nametoindex(dev); netlink_init(nlmsg, RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr, sizeof(hdr)); netlink_attr(nlmsg, IFA_LOCAL, addr, addrsize); netlink_attr(nlmsg, IFA_ADDRESS, addr, addrsize); return netlink_send(nlmsg, sock); } static void netlink_add_addr4(struct nlmsg* nlmsg, int sock, const char* dev, const char* addr) { struct in_addr in_addr; inet_pton(AF_INET, addr, &in_addr); int err = netlink_add_addr(nlmsg, sock, dev, &in_addr, sizeof(in_addr)); if (err < 0) { } } static void netlink_add_addr6(struct nlmsg* nlmsg, int sock, const char* dev, const char* addr) { struct in6_addr in6_addr; inet_pton(AF_INET6, addr, &in6_addr); int err = netlink_add_addr(nlmsg, sock, dev, &in6_addr, sizeof(in6_addr)); if (err < 0) { } } static void netlink_add_neigh(struct nlmsg* nlmsg, int sock, const char* name, const void* addr, int addrsize, const void* mac, int macsize) { struct ndmsg hdr; memset(&hdr, 0, sizeof(hdr)); hdr.ndm_family = addrsize == 4 ? AF_INET : AF_INET6; hdr.ndm_ifindex = if_nametoindex(name); hdr.ndm_state = NUD_PERMANENT; netlink_init(nlmsg, RTM_NEWNEIGH, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr)); netlink_attr(nlmsg, NDA_DST, addr, addrsize); netlink_attr(nlmsg, NDA_LLADDR, mac, macsize); int err = netlink_send(nlmsg, sock); if (err < 0) { } } static struct nlmsg nlmsg; static int tunfd = -1; #define TUN_IFACE "syz_tun" #define LOCAL_MAC 0xaaaaaaaaaaaa #define REMOTE_MAC 0xaaaaaaaaaabb #define LOCAL_IPV4 "172.20.20.170" #define REMOTE_IPV4 "172.20.20.187" #define LOCAL_IPV6 "fe80::aa" #define REMOTE_IPV6 "fe80::bb" #define IFF_NAPI 0x0010 static void initialize_tun(void) { tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK); if (tunfd == -1) { printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n"); printf("otherwise fuzzing or reproducing might not work as intended\n"); return; } const int kTunFd = 200; if (dup2(tunfd, kTunFd) < 0) exit(1); close(tunfd); tunfd = kTunFd; struct ifreq ifr; memset(&ifr, 0, sizeof(ifr)); strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) { exit(1); } char sysctl[64]; sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/accept_dad", TUN_IFACE); write_file(sysctl, "0"); sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/router_solicitations", TUN_IFACE); write_file(sysctl, "0"); int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock == -1) exit(1); netlink_add_addr4(&nlmsg, sock, TUN_IFACE, LOCAL_IPV4); netlink_add_addr6(&nlmsg, sock, TUN_IFACE, LOCAL_IPV6); uint64_t macaddr = REMOTE_MAC; struct in_addr in_addr; inet_pton(AF_INET, REMOTE_IPV4, &in_addr); netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in_addr, sizeof(in_addr), &macaddr, ETH_ALEN); struct in6_addr in6_addr; inet_pton(AF_INET6, REMOTE_IPV6, &in6_addr); netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in6_addr, sizeof(in6_addr), &macaddr, ETH_ALEN); macaddr = LOCAL_MAC; netlink_device_change(&nlmsg, sock, TUN_IFACE, true, 0, &macaddr, ETH_ALEN, NULL); close(sock); } #define DEVLINK_FAMILY_NAME "devlink" #define DEVLINK_CMD_PORT_GET 5 #define DEVLINK_ATTR_BUS_NAME 1 #define DEVLINK_ATTR_DEV_NAME 2 #define DEVLINK_ATTR_NETDEV_NAME 7 static struct nlmsg nlmsg2; static void initialize_devlink_ports(const char* bus_name, const char* dev_name, const char* netdev_prefix) { struct genlmsghdr genlhdr; int len, total_len, id, err, offset; uint16_t netdev_index; int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (sock == -1) exit(1); int rtsock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (rtsock == -1) exit(1); id = netlink_query_family_id(&nlmsg, sock, DEVLINK_FAMILY_NAME, true); if (id == -1) goto error; memset(&genlhdr, 0, sizeof(genlhdr)); genlhdr.cmd = DEVLINK_CMD_PORT_GET; netlink_init(&nlmsg, id, NLM_F_DUMP, &genlhdr, sizeof(genlhdr)); netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1); netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1); err = netlink_send_ext(&nlmsg, sock, id, &total_len, true); if (err < 0) { goto error; } offset = 0; netdev_index = 0; while ((len = netlink_next_msg(&nlmsg, offset, total_len)) != -1) { struct nlattr* attr = (struct nlattr*)(nlmsg.buf + offset + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr))); for (; (char*)attr < nlmsg.buf + offset + len; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) { if (attr->nla_type == DEVLINK_ATTR_NETDEV_NAME) { char* port_name; char netdev_name[IFNAMSIZ]; port_name = (char*)(attr + 1); snprintf(netdev_name, sizeof(netdev_name), "%s%d", netdev_prefix, netdev_index); netlink_device_change(&nlmsg2, rtsock, port_name, true, 0, 0, 0, netdev_name); break; } } offset += len; netdev_index++; } error: close(rtsock); close(sock); } #define WIFI_INITIAL_DEVICE_COUNT 2 #define WIFI_MAC_BASE {0x08, 0x02, 0x11, 0x00, 0x00, 0x00} #define WIFI_IBSS_BSSID {0x50, 0x50, 0x50, 0x50, 0x50, 0x50} #define WIFI_IBSS_SSID {0x10, 0x10, 0x10, 0x10, 0x10, 0x10} #define WIFI_DEFAULT_FREQUENCY 2412 #define WIFI_DEFAULT_SIGNAL 0 #define WIFI_DEFAULT_RX_RATE 1 #define HWSIM_CMD_REGISTER 1 #define HWSIM_CMD_FRAME 2 #define HWSIM_CMD_NEW_RADIO 4 #define HWSIM_ATTR_SUPPORT_P2P_DEVICE 14 #define HWSIM_ATTR_PERM_ADDR 22 #define IF_OPER_UP 6 struct join_ibss_props { int wiphy_freq; bool wiphy_freq_fixed; uint8_t* mac; uint8_t* ssid; int ssid_len; }; static int set_interface_state(const char* interface_name, int on) { struct ifreq ifr; int sock = socket(AF_INET, SOCK_DGRAM, 0); if (sock < 0) { return -1; } memset(&ifr, 0, sizeof(ifr)); strcpy(ifr.ifr_name, interface_name); int ret = ioctl(sock, SIOCGIFFLAGS, &ifr); if (ret < 0) { close(sock); return -1; } if (on) ifr.ifr_flags |= IFF_UP; else ifr.ifr_flags &= ~IFF_UP; ret = ioctl(sock, SIOCSIFFLAGS, &ifr); close(sock); if (ret < 0) { return -1; } return 0; } static int nl80211_set_interface(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32_t ifindex, uint32_t iftype, bool dofail) { struct genlmsghdr genlhdr; memset(&genlhdr, 0, sizeof(genlhdr)); genlhdr.cmd = NL80211_CMD_SET_INTERFACE; netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex)); netlink_attr(nlmsg, NL80211_ATTR_IFTYPE, &iftype, sizeof(iftype)); int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail); if (err < 0) { } return err; } static int nl80211_join_ibss(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32_t ifindex, struct join_ibss_props* props, bool dofail) { struct genlmsghdr genlhdr; memset(&genlhdr, 0, sizeof(genlhdr)); genlhdr.cmd = NL80211_CMD_JOIN_IBSS; netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex)); netlink_attr(nlmsg, NL80211_ATTR_SSID, props->ssid, props->ssid_len); netlink_attr(nlmsg, NL80211_ATTR_WIPHY_FREQ, &(props->wiphy_freq), sizeof(props->wiphy_freq)); if (props->mac) netlink_attr(nlmsg, NL80211_ATTR_MAC, props->mac, ETH_ALEN); if (props->wiphy_freq_fixed) netlink_attr(nlmsg, NL80211_ATTR_FREQ_FIXED, NULL, 0); int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail); if (err < 0) { } return err; } static int get_ifla_operstate(struct nlmsg* nlmsg, int ifindex, bool dofail) { struct ifinfomsg info; memset(&info, 0, sizeof(info)); info.ifi_family = AF_UNSPEC; info.ifi_index = ifindex; int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock == -1) { return -1; } netlink_init(nlmsg, RTM_GETLINK, 0, &info, sizeof(info)); int n; int err = netlink_send_ext(nlmsg, sock, RTM_NEWLINK, &n, dofail); close(sock); if (err) { return -1; } struct rtattr* attr = IFLA_RTA(NLMSG_DATA(nlmsg->buf)); for (; RTA_OK(attr, n); attr = RTA_NEXT(attr, n)) { if (attr->rta_type == IFLA_OPERSTATE) return *((int32_t*)RTA_DATA(attr)); } return -1; } static int await_ifla_operstate(struct nlmsg* nlmsg, char* interface, int operstate, bool dofail) { int ifindex = if_nametoindex(interface); while (true) { usleep(1000); int ret = get_ifla_operstate(nlmsg, ifindex, dofail); if (ret < 0) return ret; if (ret == operstate) return 0; } return 0; } static int nl80211_setup_ibss_interface(struct nlmsg* nlmsg, int sock, int nl80211_family_id, char* interface, struct join_ibss_props* ibss_props, bool dofail) { int ifindex = if_nametoindex(interface); if (ifindex == 0) { return -1; } int ret = nl80211_set_interface(nlmsg, sock, nl80211_family_id, ifindex, NL80211_IFTYPE_ADHOC, dofail); if (ret < 0) { return -1; } ret = set_interface_state(interface, 1); if (ret < 0) { return -1; } ret = nl80211_join_ibss(nlmsg, sock, nl80211_family_id, ifindex, ibss_props, dofail); if (ret < 0) { return -1; } return 0; } static int hwsim80211_create_device(struct nlmsg* nlmsg, int sock, int hwsim_family, uint8_t mac_addr[ETH_ALEN]) { struct genlmsghdr genlhdr; memset(&genlhdr, 0, sizeof(genlhdr)); genlhdr.cmd = HWSIM_CMD_NEW_RADIO; netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(nlmsg, HWSIM_ATTR_SUPPORT_P2P_DEVICE, NULL, 0); netlink_attr(nlmsg, HWSIM_ATTR_PERM_ADDR, mac_addr, ETH_ALEN); int err = netlink_send(nlmsg, sock); if (err < 0) { } return err; } static void initialize_wifi_devices(void) { int rfkill = open("/dev/rfkill", O_RDWR); if (rfkill == -1) exit(1); struct rfkill_event event = {0}; event.type = RFKILL_TYPE_ALL; event.op = RFKILL_OP_CHANGE_ALL; if (write(rfkill, &event, sizeof(event)) != (ssize_t)(sizeof(event))) exit(1); close(rfkill); uint8_t mac_addr[6] = WIFI_MAC_BASE; int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (sock < 0) exit(1); int hwsim_family_id = netlink_query_family_id(&nlmsg, sock, "MAC80211_HWSIM", true); int nl80211_family_id = netlink_query_family_id(&nlmsg, sock, "nl80211", true); if (hwsim_family_id < 0 || nl80211_family_id < 0) exit(1); uint8_t ssid[] = WIFI_IBSS_SSID; uint8_t bssid[] = WIFI_IBSS_BSSID; struct join_ibss_props ibss_props = {.wiphy_freq = WIFI_DEFAULT_FREQUENCY, .wiphy_freq_fixed = true, .mac = bssid, .ssid = ssid, .ssid_len = sizeof(ssid)}; for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) { mac_addr[5] = device_id; int ret = hwsim80211_create_device(&nlmsg, sock, hwsim_family_id, mac_addr); if (ret < 0) exit(1); char interface[6] = "wlan0"; interface[4] += device_id; if (nl80211_setup_ibss_interface(&nlmsg, sock, nl80211_family_id, interface, &ibss_props, true) < 0) exit(1); } for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) { char interface[6] = "wlan0"; interface[4] += device_id; int ret = await_ifla_operstate(&nlmsg, interface, IF_OPER_UP, true); if (ret < 0) exit(1); } close(sock); } #define DEV_IPV4 "172.20.20.%d" #define DEV_IPV6 "fe80::%02x" #define DEV_MAC 0x00aaaaaaaaaa static void netdevsim_add(unsigned int addr, unsigned int port_count) { write_file("/sys/bus/netdevsim/del_device", "%u", addr); if (write_file("/sys/bus/netdevsim/new_device", "%u %u", addr, port_count)) { char buf[32]; snprintf(buf, sizeof(buf), "netdevsim%d", addr); initialize_devlink_ports("netdevsim", buf, "netdevsim"); } } #define WG_GENL_NAME "wireguard" enum wg_cmd { WG_CMD_GET_DEVICE, WG_CMD_SET_DEVICE, }; enum wgdevice_attribute { WGDEVICE_A_UNSPEC, WGDEVICE_A_IFINDEX, WGDEVICE_A_IFNAME, WGDEVICE_A_PRIVATE_KEY, WGDEVICE_A_PUBLIC_KEY, WGDEVICE_A_FLAGS, WGDEVICE_A_LISTEN_PORT, WGDEVICE_A_FWMARK, WGDEVICE_A_PEERS, }; enum wgpeer_attribute { WGPEER_A_UNSPEC, WGPEER_A_PUBLIC_KEY, WGPEER_A_PRESHARED_KEY, WGPEER_A_FLAGS, WGPEER_A_ENDPOINT, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, WGPEER_A_LAST_HANDSHAKE_TIME, WGPEER_A_RX_BYTES, WGPEER_A_TX_BYTES, WGPEER_A_ALLOWEDIPS, WGPEER_A_PROTOCOL_VERSION, }; enum wgallowedip_attribute { WGALLOWEDIP_A_UNSPEC, WGALLOWEDIP_A_FAMILY, WGALLOWEDIP_A_IPADDR, WGALLOWEDIP_A_CIDR_MASK, }; static void netlink_wireguard_setup(void) { const char ifname_a[] = "wg0"; const char ifname_b[] = "wg1"; const char ifname_c[] = "wg2"; const char private_a[] = "\xa0\x5c\xa8\x4f\x6c\x9c\x8e\x38\x53\xe2\xfd\x7a\x70\xae\x0f\xb2\x0f\xa1" "\x52\x60\x0c\xb0\x08\x45\x17\x4f\x08\x07\x6f\x8d\x78\x43"; const char private_b[] = "\xb0\x80\x73\xe8\xd4\x4e\x91\xe3\xda\x92\x2c\x22\x43\x82\x44\xbb\x88\x5c" "\x69\xe2\x69\xc8\xe9\xd8\x35\xb1\x14\x29\x3a\x4d\xdc\x6e"; const char private_c[] = "\xa0\xcb\x87\x9a\x47\xf5\xbc\x64\x4c\x0e\x69\x3f\xa6\xd0\x31\xc7\x4a\x15" "\x53\xb6\xe9\x01\xb9\xff\x2f\x51\x8c\x78\x04\x2f\xb5\x42"; const char public_a[] = "\x97\x5c\x9d\x81\xc9\x83\xc8\x20\x9e\xe7\x81\x25\x4b\x89\x9f\x8e\xd9\x25" "\xae\x9f\x09\x23\xc2\x3c\x62\xf5\x3c\x57\xcd\xbf\x69\x1c"; const char public_b[] = "\xd1\x73\x28\x99\xf6\x11\xcd\x89\x94\x03\x4d\x7f\x41\x3d\xc9\x57\x63\x0e" "\x54\x93\xc2\x85\xac\xa4\x00\x65\xcb\x63\x11\xbe\x69\x6b"; const char public_c[] = "\xf4\x4d\xa3\x67\xa8\x8e\xe6\x56\x4f\x02\x02\x11\x45\x67\x27\x08\x2f\x5c" "\xeb\xee\x8b\x1b\xf5\xeb\x73\x37\x34\x1b\x45\x9b\x39\x22"; const uint16_t listen_a = 20001; const uint16_t listen_b = 20002; const uint16_t listen_c = 20003; const uint16_t af_inet = AF_INET; const uint16_t af_inet6 = AF_INET6; const struct sockaddr_in endpoint_b_v4 = { .sin_family = AF_INET, .sin_port = htons(listen_b), .sin_addr = {htonl(INADDR_LOOPBACK)}}; const struct sockaddr_in endpoint_c_v4 = { .sin_family = AF_INET, .sin_port = htons(listen_c), .sin_addr = {htonl(INADDR_LOOPBACK)}}; struct sockaddr_in6 endpoint_a_v6 = {.sin6_family = AF_INET6, .sin6_port = htons(listen_a)}; endpoint_a_v6.sin6_addr = in6addr_loopback; struct sockaddr_in6 endpoint_c_v6 = {.sin6_family = AF_INET6, .sin6_port = htons(listen_c)}; endpoint_c_v6.sin6_addr = in6addr_loopback; const struct in_addr first_half_v4 = {0}; const struct in_addr second_half_v4 = {(uint32_t)htonl(128 << 24)}; const struct in6_addr first_half_v6 = {{{0}}}; const struct in6_addr second_half_v6 = {{{0x80}}}; const uint8_t half_cidr = 1; const uint16_t persistent_keepalives[] = {1, 3, 7, 9, 14, 19}; struct genlmsghdr genlhdr = {.cmd = WG_CMD_SET_DEVICE, .version = 1}; int sock; int id, err; sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (sock == -1) { return; } id = netlink_query_family_id(&nlmsg, sock, WG_GENL_NAME, true); if (id == -1) goto error; netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_a, strlen(ifname_a) + 1); netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_a, 32); netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_a, 2); netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32); netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4)); netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[0], 2); netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32); netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v6, sizeof(endpoint_c_v6)); netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[1], 2); netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_done(&nlmsg); err = netlink_send(&nlmsg, sock); if (err < 0) { } netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_b, strlen(ifname_b) + 1); netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_b, 32); netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_b, 2); netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32); netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6)); netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[2], 2); netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32); netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v4, sizeof(endpoint_c_v4)); netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[3], 2); netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_done(&nlmsg); err = netlink_send(&nlmsg, sock); if (err < 0) { } netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_c, strlen(ifname_c) + 1); netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_c, 32); netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_c, 2); netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32); netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6)); netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[4], 2); netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32); netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4)); netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[5], 2); netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_nest(&nlmsg, NLA_F_NESTED | 0); netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6)); netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_done(&nlmsg); netlink_done(&nlmsg); err = netlink_send(&nlmsg, sock); if (err < 0) { } error: close(sock); } static void initialize_netdevices(void) { char netdevsim[16]; sprintf(netdevsim, "netdevsim%d", (int)procid); struct { const char* type; const char* dev; } devtypes[] = { {"ip6gretap", "ip6gretap0"}, {"bridge", "bridge0"}, {"vcan", "vcan0"}, {"bond", "bond0"}, {"team", "team0"}, {"dummy", "dummy0"}, {"nlmon", "nlmon0"}, {"caif", "caif0"}, {"batadv", "batadv0"}, {"vxcan", "vxcan1"}, {"veth", 0}, {"wireguard", "wg0"}, {"wireguard", "wg1"}, {"wireguard", "wg2"}, }; const char* devmasters[] = {"bridge", "bond", "team", "batadv"}; struct { const char* name; int macsize; bool noipv6; } devices[] = { {"lo", ETH_ALEN}, {"sit0", 0}, {"bridge0", ETH_ALEN}, {"vcan0", 0, true}, {"tunl0", 0}, {"gre0", 0}, {"gretap0", ETH_ALEN}, {"ip_vti0", 0}, {"ip6_vti0", 0}, {"ip6tnl0", 0}, {"ip6gre0", 0}, {"ip6gretap0", ETH_ALEN}, {"erspan0", ETH_ALEN}, {"bond0", ETH_ALEN}, {"veth0", ETH_ALEN}, {"veth1", ETH_ALEN}, {"team0", ETH_ALEN}, {"veth0_to_bridge", ETH_ALEN}, {"veth1_to_bridge", ETH_ALEN}, {"veth0_to_bond", ETH_ALEN}, {"veth1_to_bond", ETH_ALEN}, {"veth0_to_team", ETH_ALEN}, {"veth1_to_team", ETH_ALEN}, {"veth0_to_hsr", ETH_ALEN}, {"veth1_to_hsr", ETH_ALEN}, {"hsr0", 0}, {"dummy0", ETH_ALEN}, {"nlmon0", 0}, {"vxcan0", 0, true}, {"vxcan1", 0, true}, {"caif0", ETH_ALEN}, {"batadv0", ETH_ALEN}, {netdevsim, ETH_ALEN}, {"xfrm0", ETH_ALEN}, {"veth0_virt_wifi", ETH_ALEN}, {"veth1_virt_wifi", ETH_ALEN}, {"virt_wifi0", ETH_ALEN}, {"veth0_vlan", ETH_ALEN}, {"veth1_vlan", ETH_ALEN}, {"vlan0", ETH_ALEN}, {"vlan1", ETH_ALEN}, {"macvlan0", ETH_ALEN}, {"macvlan1", ETH_ALEN}, {"ipvlan0", ETH_ALEN}, {"ipvlan1", ETH_ALEN}, {"veth0_macvtap", ETH_ALEN}, {"veth1_macvtap", ETH_ALEN}, {"macvtap0", ETH_ALEN}, {"macsec0", ETH_ALEN}, {"veth0_to_batadv", ETH_ALEN}, {"veth1_to_batadv", ETH_ALEN}, {"batadv_slave_0", ETH_ALEN}, {"batadv_slave_1", ETH_ALEN}, {"geneve0", ETH_ALEN}, {"geneve1", ETH_ALEN}, {"wg0", 0}, {"wg1", 0}, {"wg2", 0}, }; int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock == -1) exit(1); unsigned i; for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) netlink_add_device(&nlmsg, sock, devtypes[i].type, devtypes[i].dev); for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) { char master[32], slave0[32], veth0[32], slave1[32], veth1[32]; sprintf(slave0, "%s_slave_0", devmasters[i]); sprintf(veth0, "veth0_to_%s", devmasters[i]); netlink_add_veth(&nlmsg, sock, slave0, veth0); sprintf(slave1, "%s_slave_1", devmasters[i]); sprintf(veth1, "veth1_to_%s", devmasters[i]); netlink_add_veth(&nlmsg, sock, slave1, veth1); sprintf(master, "%s0", devmasters[i]); netlink_device_change(&nlmsg, sock, slave0, false, master, 0, 0, NULL); netlink_device_change(&nlmsg, sock, slave1, false, master, 0, 0, NULL); } netlink_add_xfrm(&nlmsg, sock, "xfrm0"); netlink_device_change(&nlmsg, sock, "bridge_slave_0", true, 0, 0, 0, NULL); netlink_device_change(&nlmsg, sock, "bridge_slave_1", true, 0, 0, 0, NULL); netlink_add_veth(&nlmsg, sock, "hsr_slave_0", "veth0_to_hsr"); netlink_add_veth(&nlmsg, sock, "hsr_slave_1", "veth1_to_hsr"); netlink_add_hsr(&nlmsg, sock, "hsr0", "hsr_slave_0", "hsr_slave_1"); netlink_device_change(&nlmsg, sock, "hsr_slave_0", true, 0, 0, 0, NULL); netlink_device_change(&nlmsg, sock, "hsr_slave_1", true, 0, 0, 0, NULL); netlink_add_veth(&nlmsg, sock, "veth0_virt_wifi", "veth1_virt_wifi"); netlink_add_linked(&nlmsg, sock, "virt_wifi", "virt_wifi0", "veth1_virt_wifi"); netlink_add_veth(&nlmsg, sock, "veth0_vlan", "veth1_vlan"); netlink_add_vlan(&nlmsg, sock, "vlan0", "veth0_vlan", 0, htons(ETH_P_8021Q)); netlink_add_vlan(&nlmsg, sock, "vlan1", "veth0_vlan", 1, htons(ETH_P_8021AD)); netlink_add_macvlan(&nlmsg, sock, "macvlan0", "veth1_vlan"); netlink_add_macvlan(&nlmsg, sock, "macvlan1", "veth1_vlan"); netlink_add_ipvlan(&nlmsg, sock, "ipvlan0", "veth0_vlan", IPVLAN_MODE_L2, 0); netlink_add_ipvlan(&nlmsg, sock, "ipvlan1", "veth0_vlan", IPVLAN_MODE_L3S, IPVLAN_F_VEPA); netlink_add_veth(&nlmsg, sock, "veth0_macvtap", "veth1_macvtap"); netlink_add_linked(&nlmsg, sock, "macvtap", "macvtap0", "veth0_macvtap"); netlink_add_linked(&nlmsg, sock, "macsec", "macsec0", "veth1_macvtap"); char addr[32]; sprintf(addr, DEV_IPV4, 14 + 10); struct in_addr geneve_addr4; if (inet_pton(AF_INET, addr, &geneve_addr4) <= 0) exit(1); struct in6_addr geneve_addr6; if (inet_pton(AF_INET6, "fc00::01", &geneve_addr6) <= 0) exit(1); netlink_add_geneve(&nlmsg, sock, "geneve0", 0, &geneve_addr4, 0); netlink_add_geneve(&nlmsg, sock, "geneve1", 1, 0, &geneve_addr6); netdevsim_add((int)procid, 4); netlink_wireguard_setup(); for (i = 0; i < sizeof(devices) / (sizeof(devices[0])); i++) { char addr[32]; sprintf(addr, DEV_IPV4, i + 10); netlink_add_addr4(&nlmsg, sock, devices[i].name, addr); if (!devices[i].noipv6) { sprintf(addr, DEV_IPV6, i + 10); netlink_add_addr6(&nlmsg, sock, devices[i].name, addr); } uint64_t macaddr = DEV_MAC + ((i + 10ull) << 40); netlink_device_change(&nlmsg, sock, devices[i].name, true, 0, &macaddr, devices[i].macsize, NULL); } close(sock); } static void initialize_netdevices_init(void) { int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock == -1) exit(1); struct { const char* type; int macsize; bool noipv6; bool noup; } devtypes[] = { {"nr", 7, true}, {"rose", 5, true, true}, }; unsigned i; for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) { char dev[32], addr[32]; sprintf(dev, "%s%d", devtypes[i].type, (int)procid); sprintf(addr, "172.30.%d.%d", i, (int)procid + 1); netlink_add_addr4(&nlmsg, sock, dev, addr); if (!devtypes[i].noipv6) { sprintf(addr, "fe88::%02x:%02x", i, (int)procid + 1); netlink_add_addr6(&nlmsg, sock, dev, addr); } int macsize = devtypes[i].macsize; uint64_t macaddr = 0xbbbbbb + ((unsigned long long)i << (8 * (macsize - 2))) + (procid << (8 * (macsize - 1))); netlink_device_change(&nlmsg, sock, dev, !devtypes[i].noup, 0, &macaddr, macsize, NULL); } close(sock); } static int read_tun(char* data, int size) { if (tunfd < 0) return -1; int rv = read(tunfd, data, size); if (rv < 0) { if (errno == EAGAIN || errno == EBADF || errno == EBADFD) return -1; exit(1); } return rv; } static void flush_tun() { char data[1000]; while (read_tun(&data[0], sizeof(data)) != -1) { } } #define MAX_FDS 30 static long syz_open_dev(volatile long a0, volatile long a1, volatile long a2) { if (a0 == 0xc || a0 == 0xb) { char buf[128]; sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8_t)a1, (uint8_t)a2); return open(buf, O_RDWR, 0); } else { unsigned long nb = a1; char buf[1024]; char* hash; strncpy(buf, (char*)a0, sizeof(buf) - 1); buf[sizeof(buf) - 1] = 0; while ((hash = strchr(buf, '#'))) { *hash = '0' + (char)(nb % 10); nb /= 10; } return open(buf, a2 & ~O_CREAT, 0); } } #define BTPROTO_HCI 1 #define ACL_LINK 1 #define SCAN_PAGE 2 typedef struct { uint8_t b[6]; } __attribute__((packed)) bdaddr_t; #define HCI_COMMAND_PKT 1 #define HCI_EVENT_PKT 4 #define HCI_VENDOR_PKT 0xff struct hci_command_hdr { uint16_t opcode; uint8_t plen; } __attribute__((packed)); struct hci_event_hdr { uint8_t evt; uint8_t plen; } __attribute__((packed)); #define HCI_EV_CONN_COMPLETE 0x03 struct hci_ev_conn_complete { uint8_t status; uint16_t handle; bdaddr_t bdaddr; uint8_t link_type; uint8_t encr_mode; } __attribute__((packed)); #define HCI_EV_CONN_REQUEST 0x04 struct hci_ev_conn_request { bdaddr_t bdaddr; uint8_t dev_class[3]; uint8_t link_type; } __attribute__((packed)); #define HCI_EV_REMOTE_FEATURES 0x0b struct hci_ev_remote_features { uint8_t status; uint16_t handle; uint8_t features[8]; } __attribute__((packed)); #define HCI_EV_CMD_COMPLETE 0x0e struct hci_ev_cmd_complete { uint8_t ncmd; uint16_t opcode; } __attribute__((packed)); #define HCI_OP_WRITE_SCAN_ENABLE 0x0c1a #define HCI_OP_READ_BUFFER_SIZE 0x1005 struct hci_rp_read_buffer_size { uint8_t status; uint16_t acl_mtu; uint8_t sco_mtu; uint16_t acl_max_pkt; uint16_t sco_max_pkt; } __attribute__((packed)); #define HCI_OP_READ_BD_ADDR 0x1009 struct hci_rp_read_bd_addr { uint8_t status; bdaddr_t bdaddr; } __attribute__((packed)); #define HCI_EV_LE_META 0x3e struct hci_ev_le_meta { uint8_t subevent; } __attribute__((packed)); #define HCI_EV_LE_CONN_COMPLETE 0x01 struct hci_ev_le_conn_complete { uint8_t status; uint16_t handle; uint8_t role; uint8_t bdaddr_type; bdaddr_t bdaddr; uint16_t interval; uint16_t latency; uint16_t supervision_timeout; uint8_t clk_accurancy; } __attribute__((packed)); struct hci_dev_req { uint16_t dev_id; uint32_t dev_opt; }; struct vhci_vendor_pkt_request { uint8_t type; uint8_t opcode; } __attribute__((packed)); struct vhci_pkt { uint8_t type; union { struct { uint8_t opcode; uint16_t id; } __attribute__((packed)) vendor_pkt; struct hci_command_hdr command_hdr; }; } __attribute__((packed)); #define HCIDEVUP _IOW('H', 201, int) #define HCISETSCAN _IOW('H', 221, int) static int vhci_fd = -1; static void rfkill_unblock_all() { int fd = open("/dev/rfkill", O_WRONLY); if (fd < 0) exit(1); struct rfkill_event event = {0}; event.idx = 0; event.type = RFKILL_TYPE_ALL; event.op = RFKILL_OP_CHANGE_ALL; event.soft = 0; event.hard = 0; if (write(fd, &event, sizeof(event)) < 0) exit(1); close(fd); } static void hci_send_event_packet(int fd, uint8_t evt, void* data, size_t data_len) { struct iovec iv[3]; struct hci_event_hdr hdr; hdr.evt = evt; hdr.plen = data_len; uint8_t type = HCI_EVENT_PKT; iv[0].iov_base = &type; iv[0].iov_len = sizeof(type); iv[1].iov_base = &hdr; iv[1].iov_len = sizeof(hdr); iv[2].iov_base = data; iv[2].iov_len = data_len; if (writev(fd, iv, sizeof(iv) / sizeof(struct iovec)) < 0) exit(1); } static void hci_send_event_cmd_complete(int fd, uint16_t opcode, void* data, size_t data_len) { struct iovec iv[4]; struct hci_event_hdr hdr; hdr.evt = HCI_EV_CMD_COMPLETE; hdr.plen = sizeof(struct hci_ev_cmd_complete) + data_len; struct hci_ev_cmd_complete evt_hdr; evt_hdr.ncmd = 1; evt_hdr.opcode = opcode; uint8_t type = HCI_EVENT_PKT; iv[0].iov_base = &type; iv[0].iov_len = sizeof(type); iv[1].iov_base = &hdr; iv[1].iov_len = sizeof(hdr); iv[2].iov_base = &evt_hdr; iv[2].iov_len = sizeof(evt_hdr); iv[3].iov_base = data; iv[3].iov_len = data_len; if (writev(fd, iv, sizeof(iv) / sizeof(struct iovec)) < 0) exit(1); } static bool process_command_pkt(int fd, char* buf, ssize_t buf_size) { struct hci_command_hdr* hdr = (struct hci_command_hdr*)buf; if (buf_size < (ssize_t)sizeof(struct hci_command_hdr) || hdr->plen != buf_size - sizeof(struct hci_command_hdr)) exit(1); switch (hdr->opcode) { case HCI_OP_WRITE_SCAN_ENABLE: { uint8_t status = 0; hci_send_event_cmd_complete(fd, hdr->opcode, &status, sizeof(status)); return true; } case HCI_OP_READ_BD_ADDR: { struct hci_rp_read_bd_addr rp = {0}; rp.status = 0; memset(&rp.bdaddr, 0xaa, 6); hci_send_event_cmd_complete(fd, hdr->opcode, &rp, sizeof(rp)); return false; } case HCI_OP_READ_BUFFER_SIZE: { struct hci_rp_read_buffer_size rp = {0}; rp.status = 0; rp.acl_mtu = 1021; rp.sco_mtu = 96; rp.acl_max_pkt = 4; rp.sco_max_pkt = 6; hci_send_event_cmd_complete(fd, hdr->opcode, &rp, sizeof(rp)); return false; } } char dummy[0xf9] = {0}; hci_send_event_cmd_complete(fd, hdr->opcode, dummy, sizeof(dummy)); return false; } static void* event_thread(void* arg) { while (1) { char buf[1024] = {0}; ssize_t buf_size = read(vhci_fd, buf, sizeof(buf)); if (buf_size < 0) exit(1); if (buf_size > 0 && buf[0] == HCI_COMMAND_PKT) { if (process_command_pkt(vhci_fd, buf + 1, buf_size - 1)) break; } } return NULL; } #define HCI_HANDLE_1 200 #define HCI_HANDLE_2 201 #define HCI_PRIMARY 0 #define HCI_OP_RESET 0x0c03 static void initialize_vhci() { int hci_sock = socket(AF_BLUETOOTH, SOCK_RAW, BTPROTO_HCI); if (hci_sock < 0) exit(1); vhci_fd = open("/dev/vhci", O_RDWR); if (vhci_fd == -1) exit(1); const int kVhciFd = 202; if (dup2(vhci_fd, kVhciFd) < 0) exit(1); close(vhci_fd); vhci_fd = kVhciFd; struct vhci_vendor_pkt_request vendor_pkt_req = {HCI_VENDOR_PKT, HCI_PRIMARY}; if (write(vhci_fd, &vendor_pkt_req, sizeof(vendor_pkt_req)) != sizeof(vendor_pkt_req)) exit(1); struct vhci_pkt vhci_pkt; if (read(vhci_fd, &vhci_pkt, sizeof(vhci_pkt)) != sizeof(vhci_pkt)) exit(1); if (vhci_pkt.type == HCI_COMMAND_PKT && vhci_pkt.command_hdr.opcode == HCI_OP_RESET) { char response[1] = {0}; hci_send_event_cmd_complete(vhci_fd, HCI_OP_RESET, response, sizeof(response)); if (read(vhci_fd, &vhci_pkt, sizeof(vhci_pkt)) != sizeof(vhci_pkt)) exit(1); } if (vhci_pkt.type != HCI_VENDOR_PKT) exit(1); int dev_id = vhci_pkt.vendor_pkt.id; pthread_t th; if (pthread_create(&th, NULL, event_thread, NULL)) exit(1); int ret = ioctl(hci_sock, HCIDEVUP, dev_id); if (ret) { if (errno == ERFKILL) { rfkill_unblock_all(); ret = ioctl(hci_sock, HCIDEVUP, dev_id); } if (ret && errno != EALREADY) exit(1); } struct hci_dev_req dr = {0}; dr.dev_id = dev_id; dr.dev_opt = SCAN_PAGE; if (ioctl(hci_sock, HCISETSCAN, &dr)) exit(1); struct hci_ev_conn_request request; memset(&request, 0, sizeof(request)); memset(&request.bdaddr, 0xaa, 6); *(uint8_t*)&request.bdaddr.b[5] = 0x10; request.link_type = ACL_LINK; hci_send_event_packet(vhci_fd, HCI_EV_CONN_REQUEST, &request, sizeof(request)); struct hci_ev_conn_complete complete; memset(&complete, 0, sizeof(complete)); complete.status = 0; complete.handle = HCI_HANDLE_1; memset(&complete.bdaddr, 0xaa, 6); *(uint8_t*)&complete.bdaddr.b[5] = 0x10; complete.link_type = ACL_LINK; complete.encr_mode = 0; hci_send_event_packet(vhci_fd, HCI_EV_CONN_COMPLETE, &complete, sizeof(complete)); struct hci_ev_remote_features features; memset(&features, 0, sizeof(features)); features.status = 0; features.handle = HCI_HANDLE_1; hci_send_event_packet(vhci_fd, HCI_EV_REMOTE_FEATURES, &features, sizeof(features)); struct { struct hci_ev_le_meta le_meta; struct hci_ev_le_conn_complete le_conn; } le_conn; memset(&le_conn, 0, sizeof(le_conn)); le_conn.le_meta.subevent = HCI_EV_LE_CONN_COMPLETE; memset(&le_conn.le_conn.bdaddr, 0xaa, 6); *(uint8_t*)&le_conn.le_conn.bdaddr.b[5] = 0x11; le_conn.le_conn.role = 1; le_conn.le_conn.handle = HCI_HANDLE_2; hci_send_event_packet(vhci_fd, HCI_EV_LE_META, &le_conn, sizeof(le_conn)); pthread_join(th, NULL); close(hci_sock); } #define noinline __attribute__((noinline)) #define __no_stack_protector #define __addrspace_guest #define GUEST_CODE \ __attribute__((section("guest"))) __no_stack_protector __addrspace_guest extern char *__start_guest, *__stop_guest; #define X86_ADDR_TEXT 0x0000 #define X86_ADDR_PD_IOAPIC 0x0000 #define X86_ADDR_GDT 0x1000 #define X86_ADDR_LDT 0x1800 #define X86_ADDR_PML4 0x2000 #define X86_ADDR_PDP 0x3000 #define X86_ADDR_PD 0x4000 #define X86_ADDR_STACK0 0x0f80 #define X86_ADDR_VAR_HLT 0x2800 #define X86_ADDR_VAR_SYSRET 0x2808 #define X86_ADDR_VAR_SYSEXIT 0x2810 #define X86_ADDR_VAR_IDT 0x3800 #define X86_ADDR_VAR_TSS64 0x3a00 #define X86_ADDR_VAR_TSS64_CPL3 0x3c00 #define X86_ADDR_VAR_TSS16 0x3d00 #define X86_ADDR_VAR_TSS16_2 0x3e00 #define X86_ADDR_VAR_TSS16_CPL3 0x3f00 #define X86_ADDR_VAR_TSS32 0x4800 #define X86_ADDR_VAR_TSS32_2 0x4a00 #define X86_ADDR_VAR_TSS32_CPL3 0x4c00 #define X86_ADDR_VAR_TSS32_VM86 0x4e00 #define X86_ADDR_VAR_VMXON_PTR 0x5f00 #define X86_ADDR_VAR_VMCS_PTR 0x5f08 #define X86_ADDR_VAR_VMEXIT_PTR 0x5f10 #define X86_ADDR_VAR_VMWRITE_FLD 0x5f18 #define X86_ADDR_VAR_VMWRITE_VAL 0x5f20 #define X86_ADDR_VAR_VMXON 0x6000 #define X86_ADDR_VAR_VMCS 0x7000 #define X86_ADDR_VAR_VMEXIT_CODE 0x9000 #define X86_ADDR_VAR_USER_CODE 0x9100 #define X86_ADDR_VAR_USER_CODE2 0x9120 #define X86_SYZOS_ADDR_ZERO 0x0 #define X86_SYZOS_ADDR_GDT 0x1000 #define X86_SYZOS_ADDR_PML4 0x2000 #define X86_SYZOS_ADDR_PDP 0x3000 #define X86_SYZOS_ADDR_PT_POOL 0x5000 #define X86_SYZOS_ADDR_VAR_IDT 0x25000 #define X86_SYZOS_ADDR_VAR_TSS 0x26000 #define X86_SYZOS_ADDR_SMRAM 0x30000 #define X86_SYZOS_ADDR_EXIT 0x40000 #define X86_SYZOS_ADDR_UEXIT (X86_SYZOS_ADDR_EXIT + 256) #define X86_SYZOS_ADDR_DIRTY_PAGES 0x41000 #define X86_SYZOS_ADDR_USER_CODE 0x50000 #define SYZOS_ADDR_EXECUTOR_CODE 0x54000 #define X86_SYZOS_ADDR_SCRATCH_CODE 0x58000 #define X86_SYZOS_ADDR_STACK_BOTTOM 0x60000 #define X86_SYZOS_ADDR_STACK0 0x60f80 #define X86_SYZOS_PER_VCPU_REGIONS_BASE 0x70000 #define X86_SYZOS_L1_VCPU_REGION_SIZE 0x40000 #define X86_SYZOS_L1_VCPU_OFFSET_VM_ARCH_SPECIFIC 0x0000 #define X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA 0x1000 #define X86_SYZOS_L2_VM_REGION_SIZE 0x8000 #define X86_SYZOS_L2_VM_OFFSET_VMCS_VMCB 0x0000 #define X86_SYZOS_L2_VM_OFFSET_VM_STACK 0x1000 #define X86_SYZOS_L2_VM_OFFSET_VM_CODE 0x2000 #define X86_SYZOS_L2_VM_OFFSET_VM_PGTABLE 0x3000 #define X86_SYZOS_L2_VM_OFFSET_MSR_BITMAP 0x7000 #define X86_SYZOS_ADDR_UNUSED 0x200000 #define X86_SYZOS_ADDR_IOAPIC 0xfec00000 #define X86_SYZOS_ADDR_VMCS_VMCB(cpu, vm) \ (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE + \ X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA + (vm) * X86_SYZOS_L2_VM_REGION_SIZE + \ X86_SYZOS_L2_VM_OFFSET_VMCS_VMCB) #define X86_SYZOS_ADDR_VM_CODE(cpu, vm) \ (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE + \ X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA + (vm) * X86_SYZOS_L2_VM_REGION_SIZE + \ X86_SYZOS_L2_VM_OFFSET_VM_CODE) #define X86_SYZOS_ADDR_VM_STACK(cpu, vm) \ (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE + \ X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA + (vm) * X86_SYZOS_L2_VM_REGION_SIZE + \ X86_SYZOS_L2_VM_OFFSET_VM_STACK) #define X86_SYZOS_ADDR_VM_PGTABLE(cpu, vm) \ (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE + \ X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA + (vm) * X86_SYZOS_L2_VM_REGION_SIZE + \ X86_SYZOS_L2_VM_OFFSET_VM_PGTABLE) #define X86_SYZOS_ADDR_MSR_BITMAP(cpu, vm) \ (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE + \ X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA + (vm) * X86_SYZOS_L2_VM_REGION_SIZE + \ X86_SYZOS_L2_VM_OFFSET_MSR_BITMAP) #define X86_SYZOS_ADDR_VM_ARCH_SPECIFIC(cpu) \ (X86_SYZOS_PER_VCPU_REGIONS_BASE + (cpu) * X86_SYZOS_L1_VCPU_REGION_SIZE + \ X86_SYZOS_L1_VCPU_OFFSET_VM_ARCH_SPECIFIC) #define X86_SYZOS_SEL_CODE 0x8 #define X86_SYZOS_SEL_DATA 0x10 #define X86_SYZOS_SEL_TSS64 0x18 #define X86_CR0_PE 1ULL #define X86_CR0_MP (1ULL << 1) #define X86_CR0_EM (1ULL << 2) #define X86_CR0_TS (1ULL << 3) #define X86_CR0_ET (1ULL << 4) #define X86_CR0_NE (1ULL << 5) #define X86_CR0_WP (1ULL << 16) #define X86_CR0_AM (1ULL << 18) #define X86_CR0_NW (1ULL << 29) #define X86_CR0_CD (1ULL << 30) #define X86_CR0_PG (1ULL << 31) #define X86_CR4_VME 1ULL #define X86_CR4_PVI (1ULL << 1) #define X86_CR4_TSD (1ULL << 2) #define X86_CR4_DE (1ULL << 3) #define X86_CR4_PSE (1ULL << 4) #define X86_CR4_PAE (1ULL << 5) #define X86_CR4_MCE (1ULL << 6) #define X86_CR4_PGE (1ULL << 7) #define X86_CR4_PCE (1ULL << 8) #define X86_CR4_OSFXSR (1ULL << 8) #define X86_CR4_OSXMMEXCPT (1ULL << 10) #define X86_CR4_UMIP (1ULL << 11) #define X86_CR4_VMXE (1ULL << 13) #define X86_CR4_SMXE (1ULL << 14) #define X86_CR4_FSGSBASE (1ULL << 16) #define X86_CR4_PCIDE (1ULL << 17) #define X86_CR4_OSXSAVE (1ULL << 18) #define X86_CR4_SMEP (1ULL << 20) #define X86_CR4_SMAP (1ULL << 21) #define X86_CR4_PKE (1ULL << 22) #define X86_EFER_SCE 1ULL #define X86_EFER_LME (1ULL << 8) #define X86_EFER_LMA (1ULL << 10) #define X86_EFER_NXE (1ULL << 11) #define X86_EFER_SVME (1ULL << 12) #define X86_EFER_LMSLE (1ULL << 13) #define X86_EFER_FFXSR (1ULL << 14) #define X86_EFER_TCE (1ULL << 15) #define X86_PDE32_PRESENT 1UL #define X86_PDE32_RW (1UL << 1) #define X86_PDE32_USER (1UL << 2) #define X86_PDE32_PS (1UL << 7) #define X86_PDE64_PRESENT 1 #define X86_PDE64_RW (1ULL << 1) #define X86_PDE64_USER (1ULL << 2) #define X86_PDE64_ACCESSED (1ULL << 5) #define X86_PDE64_DIRTY (1ULL << 6) #define X86_PDE64_PS (1ULL << 7) #define X86_PDE64_G (1ULL << 8) #define EPT_MEMTYPE_WB (6ULL << 3) #define EPT_ACCESSED (1ULL << 8) #define EPT_DIRTY (1ULL << 9) #define X86_SEL_LDT (1 << 3) #define X86_SEL_CS16 (2 << 3) #define X86_SEL_DS16 (3 << 3) #define X86_SEL_CS16_CPL3 ((4 << 3) + 3) #define X86_SEL_DS16_CPL3 ((5 << 3) + 3) #define X86_SEL_CS32 (6 << 3) #define X86_SEL_DS32 (7 << 3) #define X86_SEL_CS32_CPL3 ((8 << 3) + 3) #define X86_SEL_DS32_CPL3 ((9 << 3) + 3) #define X86_SEL_CS64 (10 << 3) #define X86_SEL_DS64 (11 << 3) #define X86_SEL_CS64_CPL3 ((12 << 3) + 3) #define X86_SEL_DS64_CPL3 ((13 << 3) + 3) #define X86_SEL_CGATE16 (14 << 3) #define X86_SEL_TGATE16 (15 << 3) #define X86_SEL_CGATE32 (16 << 3) #define X86_SEL_TGATE32 (17 << 3) #define X86_SEL_CGATE64 (18 << 3) #define X86_SEL_CGATE64_HI (19 << 3) #define X86_SEL_TSS16 (20 << 3) #define X86_SEL_TSS16_2 (21 << 3) #define X86_SEL_TSS16_CPL3 ((22 << 3) + 3) #define X86_SEL_TSS32 (23 << 3) #define X86_SEL_TSS32_2 (24 << 3) #define X86_SEL_TSS32_CPL3 ((25 << 3) + 3) #define X86_SEL_TSS32_VM86 (26 << 3) #define X86_SEL_TSS64 (27 << 3) #define X86_SEL_TSS64_HI (28 << 3) #define X86_SEL_TSS64_CPL3 ((29 << 3) + 3) #define X86_SEL_TSS64_CPL3_HI (30 << 3) #define X86_MSR_IA32_FEATURE_CONTROL 0x3a #define X86_MSR_IA32_VMX_BASIC 0x480 #define X86_MSR_IA32_SMBASE 0x9e #define X86_MSR_IA32_SYSENTER_CS 0x174 #define X86_MSR_IA32_SYSENTER_ESP 0x175 #define X86_MSR_IA32_SYSENTER_EIP 0x176 #define X86_MSR_IA32_CR_PAT 0x277 #define X86_MSR_CORE_PERF_GLOBAL_CTRL 0x38f #define X86_MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x48d #define X86_MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x48e #define X86_MSR_IA32_VMX_TRUE_EXIT_CTLS 0x48f #define X86_MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x490 #define X86_MSR_IA32_EFER 0xc0000080 #define X86_MSR_IA32_STAR 0xC0000081 #define X86_MSR_IA32_LSTAR 0xC0000082 #define X86_MSR_FS_BASE 0xc0000100 #define X86_MSR_GS_BASE 0xc0000101 #define X86_MSR_VM_HSAVE_PA 0xc0010117 #define X86_MSR_IA32_VMX_PROCBASED_CTLS2 0x48B #define RFLAGS_1_BIT (1ULL << 1) #define CPU_BASED_HLT_EXITING (1U << 7) #define CPU_BASED_RDTSC_EXITING (1U << 12) #define AR_TSS_AVAILABLE 0x0089 #define SVM_ATTR_LDTR_UNUSABLE 0x0000 #define VMX_AR_TSS_BUSY 0x008b #define VMX_AR_TSS_AVAILABLE 0x0089 #define VMX_AR_LDTR_UNUSABLE 0x10000 #define VM_ENTRY_IA32E_MODE (1U << 9) #define SECONDARY_EXEC_ENABLE_EPT (1U << 1) #define SECONDARY_EXEC_ENABLE_RDTSCP (1U << 3) #define VM_EXIT_HOST_ADDR_SPACE_SIZE (1U << 9) #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS (1U << 31) #define VMX_ACCESS_RIGHTS_P (1 << 7) #define VMX_ACCESS_RIGHTS_S (1 << 4) #define VMX_ACCESS_RIGHTS_TYPE_A (1 << 0) #define VMX_ACCESS_RIGHTS_TYPE_RW (1 << 1) #define VMX_ACCESS_RIGHTS_TYPE_E (1 << 3) #define VMX_ACCESS_RIGHTS_G (1 << 15) #define VMX_ACCESS_RIGHTS_DB (1 << 14) #define VMX_ACCESS_RIGHTS_L (1 << 13) #define VMX_AR_64BIT_DATA_STACK \ (VMX_ACCESS_RIGHTS_P | VMX_ACCESS_RIGHTS_S | VMX_ACCESS_RIGHTS_TYPE_RW | \ VMX_ACCESS_RIGHTS_TYPE_A | VMX_ACCESS_RIGHTS_G | VMX_ACCESS_RIGHTS_DB) #define VMX_AR_64BIT_CODE \ (VMX_ACCESS_RIGHTS_P | VMX_ACCESS_RIGHTS_S | VMX_ACCESS_RIGHTS_TYPE_E | \ VMX_ACCESS_RIGHTS_TYPE_RW | VMX_ACCESS_RIGHTS_TYPE_A | \ VMX_ACCESS_RIGHTS_G | VMX_ACCESS_RIGHTS_L) #define VMCS_VIRTUAL_PROCESSOR_ID 0x00000000 #define VMCS_POSTED_INTR_NV 0x00000002 #define VMCS_MSR_BITMAP 0x00002004 #define VMCS_VMREAD_BITMAP 0x00002006 #define VMCS_VMWRITE_BITMAP 0x00002008 #define VMCS_EPT_POINTER 0x0000201a #define VMCS_LINK_POINTER 0x00002800 #define VMCS_PIN_BASED_VM_EXEC_CONTROL 0x00004000 #define VMCS_CPU_BASED_VM_EXEC_CONTROL 0x00004002 #define VMCS_EXCEPTION_BITMAP 0x00004004 #define VMCS_PAGE_FAULT_ERROR_CODE_MASK 0x00004006 #define VMCS_PAGE_FAULT_ERROR_CODE_MATCH 0x00004008 #define VMCS_CR3_TARGET_COUNT 0x0000400a #define VMCS_VM_EXIT_CONTROLS 0x0000400c #define VMCS_VM_EXIT_MSR_STORE_COUNT 0x0000400e #define VMCS_VM_EXIT_MSR_LOAD_COUNT 0x00004010 #define VMCS_VM_ENTRY_CONTROLS 0x00004012 #define VMCS_VM_ENTRY_MSR_LOAD_COUNT 0x00004014 #define VMCS_VM_ENTRY_INTR_INFO_FIELD 0x00004016 #define VMCS_TPR_THRESHOLD 0x0000401c #define VMCS_SECONDARY_VM_EXEC_CONTROL 0x0000401e #define VMCS_VM_INSTRUCTION_ERROR 0x00004400 #define VMCS_VM_EXIT_REASON 0x00004402 #define VMCS_VMX_PREEMPTION_TIMER_VALUE 0x0000482e #define VMCS_CR0_GUEST_HOST_MASK 0x00006000 #define VMCS_CR4_GUEST_HOST_MASK 0x00006002 #define VMCS_CR0_READ_SHADOW 0x00006004 #define VMCS_CR4_READ_SHADOW 0x00006006 #define VMCS_HOST_ES_SELECTOR 0x00000c00 #define VMCS_HOST_CS_SELECTOR 0x00000c02 #define VMCS_HOST_SS_SELECTOR 0x00000c04 #define VMCS_HOST_DS_SELECTOR 0x00000c06 #define VMCS_HOST_FS_SELECTOR 0x00000c08 #define VMCS_HOST_GS_SELECTOR 0x00000c0a #define VMCS_HOST_TR_SELECTOR 0x00000c0c #define VMCS_HOST_IA32_PAT 0x00002c00 #define VMCS_HOST_IA32_EFER 0x00002c02 #define VMCS_HOST_IA32_PERF_GLOBAL_CTRL 0x00002c04 #define VMCS_HOST_IA32_SYSENTER_CS 0x00004c00 #define VMCS_HOST_CR0 0x00006c00 #define VMCS_HOST_CR3 0x00006c02 #define VMCS_HOST_CR4 0x00006c04 #define VMCS_HOST_FS_BASE 0x00006c06 #define VMCS_HOST_GS_BASE 0x00006c08 #define VMCS_HOST_TR_BASE 0x00006c0a #define VMCS_HOST_GDTR_BASE 0x00006c0c #define VMCS_HOST_IDTR_BASE 0x00006c0e #define VMCS_HOST_IA32_SYSENTER_ESP 0x00006c10 #define VMCS_HOST_IA32_SYSENTER_EIP 0x00006c12 #define VMCS_HOST_RSP 0x00006c14 #define VMCS_HOST_RIP 0x00006c16 #define VMCS_GUEST_INTR_STATUS 0x00000810 #define VMCS_GUEST_PML_INDEX 0x00000812 #define VMCS_GUEST_IA32_DEBUGCTL 0x00002802 #define VMCS_GUEST_IA32_PAT 0x00002804 #define VMCS_GUEST_IA32_EFER 0x00002806 #define VMCS_GUEST_IA32_PERF_GLOBAL_CTRL 0x00002808 #define VMCS_GUEST_ES_SELECTOR 0x00000800 #define VMCS_GUEST_CS_SELECTOR 0x00000802 #define VMCS_GUEST_SS_SELECTOR 0x00000804 #define VMCS_GUEST_DS_SELECTOR 0x00000806 #define VMCS_GUEST_FS_SELECTOR 0x00000808 #define VMCS_GUEST_GS_SELECTOR 0x0000080a #define VMCS_GUEST_LDTR_SELECTOR 0x0000080c #define VMCS_GUEST_TR_SELECTOR 0x0000080e #define VMCS_GUEST_ES_LIMIT 0x00004800 #define VMCS_GUEST_CS_LIMIT 0x00004802 #define VMCS_GUEST_SS_LIMIT 0x00004804 #define VMCS_GUEST_DS_LIMIT 0x00004806 #define VMCS_GUEST_FS_LIMIT 0x00004808 #define VMCS_GUEST_GS_LIMIT 0x0000480a #define VMCS_GUEST_LDTR_LIMIT 0x0000480c #define VMCS_GUEST_TR_LIMIT 0x0000480e #define VMCS_GUEST_GDTR_LIMIT 0x00004810 #define VMCS_GUEST_IDTR_LIMIT 0x00004812 #define VMCS_GUEST_ES_ACCESS_RIGHTS 0x00004814 #define VMCS_GUEST_CS_ACCESS_RIGHTS 0x00004816 #define VMCS_GUEST_SS_ACCESS_RIGHTS 0x00004818 #define VMCS_GUEST_DS_ACCESS_RIGHTS 0x0000481a #define VMCS_GUEST_FS_ACCESS_RIGHTS 0x0000481c #define VMCS_GUEST_GS_ACCESS_RIGHTS 0x0000481e #define VMCS_GUEST_LDTR_ACCESS_RIGHTS 0x00004820 #define VMCS_GUEST_TR_ACCESS_RIGHTS 0x00004822 #define VMCS_GUEST_ACTIVITY_STATE 0x00004824 #define VMCS_GUEST_INTERRUPTIBILITY_INFO 0x00004826 #define VMCS_GUEST_SYSENTER_CS 0x0000482a #define VMCS_GUEST_CR0 0x00006800 #define VMCS_GUEST_CR3 0x00006802 #define VMCS_GUEST_CR4 0x00006804 #define VMCS_GUEST_ES_BASE 0x00006806 #define VMCS_GUEST_CS_BASE 0x00006808 #define VMCS_GUEST_SS_BASE 0x0000680a #define VMCS_GUEST_DS_BASE 0x0000680c #define VMCS_GUEST_FS_BASE 0x0000680e #define VMCS_GUEST_GS_BASE 0x00006810 #define VMCS_GUEST_LDTR_BASE 0x00006812 #define VMCS_GUEST_TR_BASE 0x00006814 #define VMCS_GUEST_GDTR_BASE 0x00006816 #define VMCS_GUEST_IDTR_BASE 0x00006818 #define VMCS_GUEST_DR7 0x0000681a #define VMCS_GUEST_RSP 0x0000681c #define VMCS_GUEST_RIP 0x0000681e #define VMCS_GUEST_RFLAGS 0x00006820 #define VMCS_GUEST_PENDING_DBG_EXCEPTIONS 0x00006822 #define VMCS_GUEST_SYSENTER_ESP 0x00006824 #define VMCS_GUEST_SYSENTER_EIP 0x00006826 #define VMCB_CTRL_INTERCEPT_VEC3 0x0c #define VMCB_CTRL_INTERCEPT_VEC3_ALL (0xffffffff) #define VMCB_CTRL_INTERCEPT_VEC4 0x10 #define VMCB_CTRL_INTERCEPT_VEC4_ALL (0x3ff) #define VMCB_CTRL_ASID 0x058 #define VMCB_EXIT_CODE 0x070 #define VMCB_CTRL_NP_ENABLE 0x090 #define VMCB_CTRL_NPT_ENABLE_BIT 0 #define VMCB_CTRL_N_CR3 0x0b0 #define VMCB_GUEST_ES_SEL 0x400 #define VMCB_GUEST_ES_ATTR 0x402 #define VMCB_GUEST_ES_LIM 0x404 #define VMCB_GUEST_ES_BASE 0x408 #define VMCB_GUEST_CS_SEL 0x410 #define VMCB_GUEST_CS_ATTR 0x412 #define VMCB_GUEST_CS_LIM 0x414 #define VMCB_GUEST_CS_BASE 0x418 #define VMCB_GUEST_SS_SEL 0x420 #define VMCB_GUEST_SS_ATTR 0x422 #define VMCB_GUEST_SS_LIM 0x424 #define VMCB_GUEST_SS_BASE 0x428 #define VMCB_GUEST_DS_SEL 0x430 #define VMCB_GUEST_DS_ATTR 0x432 #define VMCB_GUEST_DS_LIM 0x434 #define VMCB_GUEST_DS_BASE 0x438 #define VMCB_GUEST_FS_SEL 0x440 #define VMCB_GUEST_FS_ATTR 0x442 #define VMCB_GUEST_FS_LIM 0x444 #define VMCB_GUEST_FS_BASE 0x448 #define VMCB_GUEST_GS_SEL 0x450 #define VMCB_GUEST_GS_ATTR 0x452 #define VMCB_GUEST_GS_LIM 0x454 #define VMCB_GUEST_GS_BASE 0x458 #define VMCB_GUEST_IDTR_SEL 0x480 #define VMCB_GUEST_IDTR_ATTR 0x482 #define VMCB_GUEST_IDTR_LIM 0x484 #define VMCB_GUEST_IDTR_BASE 0x488 #define VMCB_GUEST_GDTR_SEL 0x460 #define VMCB_GUEST_GDTR_ATTR 0x462 #define VMCB_GUEST_GDTR_LIM 0x464 #define VMCB_GUEST_GDTR_BASE 0x468 #define VMCB_GUEST_LDTR_SEL 0x470 #define VMCB_GUEST_LDTR_ATTR 0x472 #define VMCB_GUEST_LDTR_LIM 0x474 #define VMCB_GUEST_LDTR_BASE 0x478 #define VMCB_GUEST_TR_SEL 0x490 #define VMCB_GUEST_TR_ATTR 0x492 #define VMCB_GUEST_TR_LIM 0x494 #define VMCB_GUEST_TR_BASE 0x498 #define VMCB_GUEST_EFER 0x4d0 #define VMCB_GUEST_CR4 0x548 #define VMCB_GUEST_CR3 0x550 #define VMCB_GUEST_CR0 0x558 #define VMCB_GUEST_DR7 0x560 #define VMCB_GUEST_DR6 0x568 #define VMCB_GUEST_RFLAGS 0x570 #define VMCB_GUEST_RIP 0x578 #define VMCB_GUEST_RSP 0x5d8 #define VMCB_GUEST_PAT 0x668 #define VMCB_GUEST_DEBUGCTL 0x670 #define SVM_ATTR_G (1 << 15) #define SVM_ATTR_DB (1 << 14) #define SVM_ATTR_L (1 << 13) #define SVM_ATTR_P (1 << 7) #define SVM_ATTR_S (1 << 4) #define SVM_ATTR_TYPE_A (1 << 0) #define SVM_ATTR_TYPE_RW (1 << 1) #define SVM_ATTR_TYPE_E (1 << 3) #define SVM_ATTR_64BIT_CODE \ (SVM_ATTR_P | SVM_ATTR_S | SVM_ATTR_TYPE_E | SVM_ATTR_TYPE_RW | \ SVM_ATTR_TYPE_A | SVM_ATTR_L | SVM_ATTR_G) #define SVM_ATTR_64BIT_DATA \ (SVM_ATTR_P | SVM_ATTR_S | SVM_ATTR_TYPE_RW | SVM_ATTR_TYPE_A | \ SVM_ATTR_DB | SVM_ATTR_G) #define X86_NEXT_INSN $0xbadc0de #define X86_PREFIX_SIZE 0xba1d #define KVM_MAX_VCPU 4 #define KVM_PAGE_SIZE (1 << 12) #define KVM_GUEST_PAGES 1024 #define KVM_GUEST_MEM_SIZE (KVM_GUEST_PAGES * KVM_PAGE_SIZE) #define SZ_4K 0x00001000 #define SZ_64K 0x00010000 #define GENMASK_ULL(h, l) \ (((~0ULL) - (1ULL << (l)) + 1ULL) & (~0ULL >> (63 - (h)))) extern char* __start_guest; static inline uintptr_t executor_fn_guest_addr(void* fn) { volatile uintptr_t start = (uintptr_t)&__start_guest; volatile uintptr_t offset = SYZOS_ADDR_EXECUTOR_CODE; return (uintptr_t)fn - start + offset; } typedef enum { SYZOS_API_UEXIT = 0, SYZOS_API_CODE = 10, SYZOS_API_CPUID = 100, SYZOS_API_WRMSR = 101, SYZOS_API_RDMSR = 102, SYZOS_API_WR_CRN = 103, SYZOS_API_WR_DRN = 104, SYZOS_API_IN_DX = 105, SYZOS_API_OUT_DX = 106, SYZOS_API_SET_IRQ_HANDLER = 200, SYZOS_API_ENABLE_NESTED = 300, SYZOS_API_NESTED_CREATE_VM = 301, SYZOS_API_NESTED_LOAD_CODE = 302, SYZOS_API_NESTED_VMLAUNCH = 303, SYZOS_API_NESTED_VMRESUME = 304, SYZOS_API_NESTED_INTEL_VMWRITE_MASK = 340, SYZOS_API_NESTED_AMD_VMCB_WRITE_MASK = 380, SYZOS_API_STOP, } syzos_api_id; struct api_call_header { uint64_t call; uint64_t size; }; struct api_call_uexit { struct api_call_header header; uint64_t exit_code; }; struct api_call_code { struct api_call_header header; uint8_t insns[]; }; struct api_call_nested_load_code { struct api_call_header header; uint64_t vm_id; uint8_t insns[]; }; struct api_call_cpuid { struct api_call_header header; uint32_t eax; uint32_t ecx; }; struct api_call_1 { struct api_call_header header; uint64_t arg; }; struct api_call_2 { struct api_call_header header; uint64_t args[2]; }; struct api_call_3 { struct api_call_header header; uint64_t args[3]; }; struct api_call_5 { struct api_call_header header; uint64_t args[5]; }; struct l2_guest_regs { uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp; uint64_t r8, r9, r10, r11, r12, r13, r14, r15; }; GUEST_CODE static void guest_uexit(uint64_t exit_code); GUEST_CODE static void nested_vm_exit_handler_intel(uint64_t exit_reason, struct l2_guest_regs* regs); GUEST_CODE static void guest_execute_code(uint8_t* insns, uint64_t size); GUEST_CODE static void guest_handle_cpuid(uint32_t eax, uint32_t ecx); GUEST_CODE static void guest_handle_wrmsr(uint64_t reg, uint64_t val); GUEST_CODE static void guest_handle_rdmsr(uint64_t reg); GUEST_CODE static void guest_handle_wr_crn(struct api_call_2* cmd); GUEST_CODE static void guest_handle_wr_drn(struct api_call_2* cmd); GUEST_CODE static void guest_handle_in_dx(struct api_call_2* cmd); GUEST_CODE static void guest_handle_out_dx(struct api_call_3* cmd); GUEST_CODE static void guest_handle_set_irq_handler(struct api_call_2* cmd); GUEST_CODE static void guest_handle_enable_nested(struct api_call_1* cmd, uint64_t cpu_id); GUEST_CODE static void guest_handle_nested_create_vm(struct api_call_1* cmd, uint64_t cpu_id); GUEST_CODE static void guest_handle_nested_load_code(struct api_call_nested_load_code* cmd, uint64_t cpu_id); GUEST_CODE static void guest_handle_nested_vmlaunch(struct api_call_1* cmd, uint64_t cpu_id); GUEST_CODE static void guest_handle_nested_vmresume(struct api_call_1* cmd, uint64_t cpu_id); GUEST_CODE static void guest_handle_nested_intel_vmwrite_mask(struct api_call_5* cmd, uint64_t cpu_id); GUEST_CODE static void guest_handle_nested_amd_vmcb_write_mask(struct api_call_5* cmd, uint64_t cpu_id); typedef enum { UEXIT_END = (uint64_t)-1, UEXIT_IRQ = (uint64_t)-2, UEXIT_ASSERT = (uint64_t)-3, } uexit_code; typedef enum { CPU_VENDOR_INTEL, CPU_VENDOR_AMD, } cpu_vendor_id; __attribute__((naked)) GUEST_CODE static void dummy_null_handler() { asm("iretq"); } __attribute__((naked)) GUEST_CODE static void uexit_irq_handler() { asm volatile(R"( movq $-2, %rdi call guest_uexit iretq )"); } __attribute__((used)) GUEST_CODE static void guest_main(uint64_t size, uint64_t cpu) { uint64_t addr = X86_SYZOS_ADDR_USER_CODE + cpu * KVM_PAGE_SIZE; while (size >= sizeof(struct api_call_header)) { struct api_call_header* cmd = (struct api_call_header*)addr; if (cmd->call >= SYZOS_API_STOP) return; if (cmd->size > size) return; volatile uint64_t call = cmd->call; if (call == SYZOS_API_UEXIT) { struct api_call_uexit* ucmd = (struct api_call_uexit*)cmd; guest_uexit(ucmd->exit_code); } else if (call == SYZOS_API_CODE) { struct api_call_code* ccmd = (struct api_call_code*)cmd; guest_execute_code(ccmd->insns, cmd->size - sizeof(struct api_call_header)); } else if (call == SYZOS_API_CPUID) { struct api_call_cpuid* ccmd = (struct api_call_cpuid*)cmd; guest_handle_cpuid(ccmd->eax, ccmd->ecx); } else if (call == SYZOS_API_WRMSR) { struct api_call_2* ccmd = (struct api_call_2*)cmd; guest_handle_wrmsr(ccmd->args[0], ccmd->args[1]); } else if (call == SYZOS_API_RDMSR) { struct api_call_1* ccmd = (struct api_call_1*)cmd; guest_handle_rdmsr(ccmd->arg); } else if (call == SYZOS_API_WR_CRN) { guest_handle_wr_crn((struct api_call_2*)cmd); } else if (call == SYZOS_API_WR_DRN) { guest_handle_wr_drn((struct api_call_2*)cmd); } else if (call == SYZOS_API_IN_DX) { guest_handle_in_dx((struct api_call_2*)cmd); } else if (call == SYZOS_API_OUT_DX) { guest_handle_out_dx((struct api_call_3*)cmd); } else if (call == SYZOS_API_SET_IRQ_HANDLER) { guest_handle_set_irq_handler((struct api_call_2*)cmd); } else if (call == SYZOS_API_ENABLE_NESTED) { guest_handle_enable_nested((struct api_call_1*)cmd, cpu); } else if (call == SYZOS_API_NESTED_CREATE_VM) { guest_handle_nested_create_vm((struct api_call_1*)cmd, cpu); } else if (call == SYZOS_API_NESTED_LOAD_CODE) { guest_handle_nested_load_code((struct api_call_nested_load_code*)cmd, cpu); } else if (call == SYZOS_API_NESTED_VMLAUNCH) { guest_handle_nested_vmlaunch((struct api_call_1*)cmd, cpu); } else if (call == SYZOS_API_NESTED_VMRESUME) { guest_handle_nested_vmresume((struct api_call_1*)cmd, cpu); } else if (call == SYZOS_API_NESTED_INTEL_VMWRITE_MASK) { guest_handle_nested_intel_vmwrite_mask((struct api_call_5*)cmd, cpu); } else if (call == SYZOS_API_NESTED_AMD_VMCB_WRITE_MASK) { guest_handle_nested_amd_vmcb_write_mask((struct api_call_5*)cmd, cpu); } addr += cmd->size; size -= cmd->size; }; guest_uexit((uint64_t)-1); } GUEST_CODE static noinline void guest_execute_code(uint8_t* insns, uint64_t size) { volatile void (*fn)() = (volatile void (*)())insns; fn(); } __attribute__((used)) GUEST_CODE static noinline void guest_uexit(uint64_t exit_code) { volatile uint64_t* ptr = (volatile uint64_t*)X86_SYZOS_ADDR_UEXIT; *ptr = exit_code; } GUEST_CODE static noinline void guest_handle_cpuid(uint32_t eax, uint32_t ecx) { asm volatile("cpuid\n" : : "a"(eax), "c"(ecx) : "rbx", "rdx"); } GUEST_CODE static noinline void wrmsr(uint64_t reg, uint64_t val) { asm volatile("wrmsr" : : "c"(reg), "a"((uint32_t)val), "d"((uint32_t)(val >> 32)) : "memory"); } GUEST_CODE static noinline void guest_handle_wrmsr(uint64_t reg, uint64_t val) { wrmsr(reg, val); } GUEST_CODE static noinline uint64_t rdmsr(uint64_t msr_id) { uint32_t low = 0, high = 0; asm volatile("rdmsr" : "=a"(low), "=d"(high) : "c"(msr_id)); return ((uint64_t)high << 32) | low; } GUEST_CODE static noinline void guest_handle_rdmsr(uint64_t reg) { (void)rdmsr(reg); } GUEST_CODE static noinline void guest_handle_wr_crn(struct api_call_2* cmd) { uint64_t value = cmd->args[1]; volatile uint64_t reg = cmd->args[0]; if (reg == 0) { asm volatile("movq %0, %%cr0" ::"r"(value) : "memory"); return; } if (reg == 2) { asm volatile("movq %0, %%cr2" ::"r"(value) : "memory"); return; } if (reg == 3) { asm volatile("movq %0, %%cr3" ::"r"(value) : "memory"); return; } if (reg == 4) { asm volatile("movq %0, %%cr4" ::"r"(value) : "memory"); return; } if (reg == 8) { asm volatile("movq %0, %%cr8" ::"r"(value) : "memory"); return; } } GUEST_CODE static noinline void guest_handle_wr_drn(struct api_call_2* cmd) { uint64_t value = cmd->args[1]; volatile uint64_t reg = cmd->args[0]; if (reg == 0) { asm volatile("movq %0, %%dr0" ::"r"(value) : "memory"); return; } if (reg == 1) { asm volatile("movq %0, %%dr1" ::"r"(value) : "memory"); return; } if (reg == 2) { asm volatile("movq %0, %%dr2" ::"r"(value) : "memory"); return; } if (reg == 3) { asm volatile("movq %0, %%dr3" ::"r"(value) : "memory"); return; } if (reg == 4) { asm volatile("movq %0, %%dr4" ::"r"(value) : "memory"); return; } if (reg == 5) { asm volatile("movq %0, %%dr5" ::"r"(value) : "memory"); return; } if (reg == 6) { asm volatile("movq %0, %%dr6" ::"r"(value) : "memory"); return; } if (reg == 7) { asm volatile("movq %0, %%dr7" ::"r"(value) : "memory"); return; } } GUEST_CODE static noinline void guest_handle_in_dx(struct api_call_2* cmd) { uint16_t port = cmd->args[0]; volatile int size = cmd->args[1]; if (size == 1) { uint8_t unused; asm volatile("inb %1, %0" : "=a"(unused) : "d"(port)); return; } if (size == 2) { uint16_t unused; asm volatile("inw %1, %0" : "=a"(unused) : "d"(port)); return; } if (size == 4) { uint32_t unused; asm volatile("inl %1, %0" : "=a"(unused) : "d"(port)); } return; } GUEST_CODE static noinline void guest_handle_out_dx(struct api_call_3* cmd) { uint16_t port = cmd->args[0]; volatile int size = cmd->args[1]; uint32_t data = (uint32_t)cmd->args[2]; if (size == 1) { asm volatile("outb %b0, %w1" ::"a"(data), "d"(port)); return; } if (size == 2) { asm volatile("outw %w0, %w1" ::"a"(data), "d"(port)); return; } if (size == 4) { asm volatile("outl %k0, %w1" ::"a"(data), "d"(port)); return; } } struct idt_entry_64 { uint16_t offset_low; uint16_t selector; uint8_t ist; uint8_t type_attr; uint16_t offset_mid; uint32_t offset_high; uint32_t reserved; } __attribute__((packed)); GUEST_CODE static void set_idt_gate(uint8_t vector, uint64_t handler) { volatile struct idt_entry_64* idt = (volatile struct idt_entry_64*)(X86_SYZOS_ADDR_VAR_IDT); volatile struct idt_entry_64* idt_entry = &idt[vector]; idt_entry->offset_low = (uint16_t)handler; idt_entry->offset_mid = (uint16_t)(handler >> 16); idt_entry->offset_high = (uint32_t)(handler >> 32); idt_entry->selector = X86_SYZOS_SEL_CODE; idt_entry->type_attr = 0x8E; idt_entry->ist = 0; idt_entry->reserved = 0; } GUEST_CODE static noinline void guest_handle_set_irq_handler(struct api_call_2* cmd) { uint8_t vector = (uint8_t)cmd->args[0]; uint64_t type = cmd->args[1]; volatile uint64_t handler_addr = 0; if (type == 1) handler_addr = executor_fn_guest_addr(dummy_null_handler); else if (type == 2) handler_addr = executor_fn_guest_addr(uexit_irq_handler); set_idt_gate(vector, handler_addr); } GUEST_CODE static cpu_vendor_id get_cpu_vendor(void) { uint32_t ebx, eax = 0; asm volatile("cpuid" : "+a"(eax), "=b"(ebx) : : "ecx", "edx"); if (ebx == 0x756e6547) { return CPU_VENDOR_INTEL; } else if (ebx == 0x68747541) { return CPU_VENDOR_AMD; } else { guest_uexit(UEXIT_ASSERT); return CPU_VENDOR_INTEL; } } GUEST_CODE static inline uint64_t read_cr0(void) { uint64_t val; asm volatile("mov %%cr0, %0" : "=r"(val)); return val; } GUEST_CODE static inline uint64_t read_cr3(void) { uint64_t val; asm volatile("mov %%cr3, %0" : "=r"(val)); return val; } GUEST_CODE static inline uint64_t read_cr4(void) { uint64_t val; asm volatile("mov %%cr4, %0" : "=r"(val)); return val; } GUEST_CODE static inline void write_cr4(uint64_t val) { asm volatile("mov %0, %%cr4" : : "r"(val)); } GUEST_CODE static noinline void vmwrite(uint64_t field, uint64_t value) { uint8_t error = 0; asm volatile("vmwrite %%rax, %%rbx; setna %0" : "=q"(error) : "a"(value), "b"(field) : "cc", "memory"); if (error) guest_uexit(UEXIT_ASSERT); } GUEST_CODE static noinline uint64_t vmread(uint64_t field) { uint64_t value; asm volatile("vmread %%rbx, %%rax" : "=a"(value) : "b"(field) : "cc"); return value; } GUEST_CODE static inline void nested_vmptrld(uint64_t cpu_id, uint64_t vm_id) { uint64_t vmcs_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id); uint8_t error = 0; asm volatile("vmptrld %1; setna %0" : "=q"(error) : "m"(vmcs_addr) : "memory", "cc"); if (error) guest_uexit(0xE2BAD2); } GUEST_CODE static noinline void vmcb_write16(uint64_t vmcb, uint16_t offset, uint16_t val) { *((volatile uint16_t*)(vmcb + offset)) = val; } GUEST_CODE static noinline void vmcb_write32(uint64_t vmcb, uint16_t offset, uint32_t val) { *((volatile uint32_t*)(vmcb + offset)) = val; } GUEST_CODE static noinline void vmcb_write64(uint64_t vmcb, uint16_t offset, uint64_t val) { *((volatile uint64_t*)(vmcb + offset)) = val; } GUEST_CODE static noinline uint64_t vmcb_read64(volatile uint8_t* vmcb, uint16_t offset) { return *((volatile uint64_t*)(vmcb + offset)); } GUEST_CODE static void guest_memset(void* s, uint8_t c, int size) { volatile uint8_t* p = (volatile uint8_t*)s; for (int i = 0; i < size; i++) p[i] = c; } GUEST_CODE static void guest_memcpy(void* dst, void* src, int size) { volatile uint8_t* d = (volatile uint8_t*)dst; volatile uint8_t* s = (volatile uint8_t*)src; for (int i = 0; i < size; i++) d[i] = s[i]; } GUEST_CODE static noinline void nested_enable_vmx_intel(uint64_t cpu_id) { uint64_t vmxon_addr = X86_SYZOS_ADDR_VM_ARCH_SPECIFIC(cpu_id); uint64_t cr4 = read_cr4(); cr4 |= X86_CR4_VMXE; write_cr4(cr4); uint64_t feature_control = rdmsr(X86_MSR_IA32_FEATURE_CONTROL); if ((feature_control & 1) == 0) { feature_control |= 0b101; asm volatile("wrmsr" : : "d"(0x0), "c"(X86_MSR_IA32_FEATURE_CONTROL), "A"(feature_control)); } *(uint32_t*)vmxon_addr = rdmsr(X86_MSR_IA32_VMX_BASIC); uint8_t error; asm volatile("vmxon %1; setna %0" : "=q"(error) : "m"(vmxon_addr) : "memory", "cc"); if (error) { guest_uexit(0xE2BAD0); return; } } GUEST_CODE static noinline void nested_enable_svm_amd(uint64_t cpu_id) { uint64_t hsave_addr = X86_SYZOS_ADDR_VM_ARCH_SPECIFIC(cpu_id); uint64_t efer = rdmsr(X86_MSR_IA32_EFER); efer |= X86_EFER_SVME; wrmsr(X86_MSR_IA32_EFER, efer); wrmsr(X86_MSR_VM_HSAVE_PA, hsave_addr); } GUEST_CODE static noinline void guest_handle_enable_nested(struct api_call_1* cmd, uint64_t cpu_id) { if (get_cpu_vendor() == CPU_VENDOR_INTEL) { nested_enable_vmx_intel(cpu_id); } else { nested_enable_svm_amd(cpu_id); } } GUEST_CODE static noinline void setup_l2_page_tables(cpu_vendor_id vendor, uint64_t cpu_id, uint64_t vm_id) { uint64_t l2_pml4_addr = X86_SYZOS_ADDR_VM_PGTABLE(cpu_id, vm_id); uint64_t l2_pdpt_addr = l2_pml4_addr + KVM_PAGE_SIZE; uint64_t l2_pd_addr = l2_pml4_addr + 2 * KVM_PAGE_SIZE; uint64_t l2_pt_addr = l2_pml4_addr + 3 * KVM_PAGE_SIZE; volatile uint64_t* pml4 = (volatile uint64_t*)l2_pml4_addr; volatile uint64_t* pdpt = (volatile uint64_t*)l2_pdpt_addr; volatile uint64_t* pd = (volatile uint64_t*)l2_pd_addr; volatile uint64_t* pt = (volatile uint64_t*)l2_pt_addr; guest_memset((void*)l2_pml4_addr, 0, KVM_PAGE_SIZE); guest_memset((void*)l2_pdpt_addr, 0, KVM_PAGE_SIZE); guest_memset((void*)l2_pd_addr, 0, KVM_PAGE_SIZE); guest_memset((void*)l2_pt_addr, 0, KVM_PAGE_SIZE); guest_memset((void*)X86_SYZOS_ADDR_MSR_BITMAP(cpu_id, vm_id), 0, KVM_PAGE_SIZE); uint64_t flags = X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_USER; pml4[0] = l2_pdpt_addr | flags; pdpt[0] = l2_pd_addr | flags; pd[0] = l2_pt_addr | flags; uint64_t pt_flags = flags; if (vendor == CPU_VENDOR_INTEL) { pt_flags |= EPT_MEMTYPE_WB | EPT_ACCESSED | EPT_DIRTY; } else { pt_flags |= X86_PDE64_ACCESSED | X86_PDE64_DIRTY; } for (int i = 0; i < 512; i++) pt[i] = (i * KVM_PAGE_SIZE) | pt_flags; } GUEST_CODE static noinline void init_vmcs_control_fields(uint64_t cpu_id, uint64_t vm_id) { uint64_t vmx_msr = rdmsr(X86_MSR_IA32_VMX_TRUE_PINBASED_CTLS); vmwrite(VMCS_PIN_BASED_VM_EXEC_CONTROL, (uint32_t)vmx_msr); vmx_msr = (uint32_t)rdmsr(X86_MSR_IA32_VMX_PROCBASED_CTLS2); vmx_msr |= SECONDARY_EXEC_ENABLE_EPT | SECONDARY_EXEC_ENABLE_RDTSCP; vmwrite(VMCS_SECONDARY_VM_EXEC_CONTROL, vmx_msr); vmx_msr = rdmsr(X86_MSR_IA32_VMX_TRUE_PROCBASED_CTLS); vmx_msr |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; vmx_msr |= CPU_BASED_HLT_EXITING | CPU_BASED_RDTSC_EXITING; vmwrite(VMCS_CPU_BASED_VM_EXEC_CONTROL, (uint32_t)vmx_msr); vmx_msr = rdmsr(X86_MSR_IA32_VMX_TRUE_EXIT_CTLS); vmwrite(VMCS_VM_EXIT_CONTROLS, (uint32_t)vmx_msr | VM_EXIT_HOST_ADDR_SPACE_SIZE); vmx_msr = rdmsr(X86_MSR_IA32_VMX_TRUE_ENTRY_CTLS); vmwrite(VMCS_VM_ENTRY_CONTROLS, (uint32_t)vmx_msr | VM_ENTRY_IA32E_MODE); uint64_t eptp = (X86_SYZOS_ADDR_VM_PGTABLE(cpu_id, vm_id) & ~0xFFF) | (6 << 0) | (3 << 3); vmwrite(VMCS_EPT_POINTER, eptp); vmwrite(VMCS_CR0_GUEST_HOST_MASK, 0); vmwrite(VMCS_CR4_GUEST_HOST_MASK, 0); vmwrite(VMCS_CR0_READ_SHADOW, read_cr0()); vmwrite(VMCS_CR4_READ_SHADOW, read_cr4()); vmwrite(VMCS_MSR_BITMAP, 0); vmwrite(VMCS_VMREAD_BITMAP, 0); vmwrite(VMCS_VMWRITE_BITMAP, 0); vmwrite(VMCS_EXCEPTION_BITMAP, (1 << 6)); vmwrite(VMCS_VIRTUAL_PROCESSOR_ID, 0); vmwrite(VMCS_POSTED_INTR_NV, 0); vmwrite(VMCS_PAGE_FAULT_ERROR_CODE_MASK, 0); vmwrite(VMCS_PAGE_FAULT_ERROR_CODE_MATCH, -1); vmwrite(VMCS_CR3_TARGET_COUNT, 0); vmwrite(VMCS_VM_EXIT_MSR_STORE_COUNT, 0); vmwrite(VMCS_VM_EXIT_MSR_LOAD_COUNT, 0); vmwrite(VMCS_VM_ENTRY_MSR_LOAD_COUNT, 0); vmwrite(VMCS_VM_ENTRY_INTR_INFO_FIELD, 0); vmwrite(VMCS_TPR_THRESHOLD, 0); } typedef enum { SYZOS_NESTED_EXIT_REASON_HLT = 1, SYZOS_NESTED_EXIT_REASON_INVD = 2, SYZOS_NESTED_EXIT_REASON_CPUID = 3, SYZOS_NESTED_EXIT_REASON_RDTSC = 4, SYZOS_NESTED_EXIT_REASON_RDTSCP = 5, SYZOS_NESTED_EXIT_REASON_UNKNOWN = 0xFF, } syz_nested_exit_reason; GUEST_CODE static void guest_uexit_l2(uint64_t exit_reason, syz_nested_exit_reason mapped_reason, cpu_vendor_id vendor) { if (mapped_reason != SYZOS_NESTED_EXIT_REASON_UNKNOWN) { guest_uexit(0xe2e20000 | mapped_reason); } else if (vendor == CPU_VENDOR_INTEL) { guest_uexit(0xe2110000 | exit_reason); } else { guest_uexit(0xe2aa0000 | exit_reason); } } #define EXIT_REASON_CPUID 0xa #define EXIT_REASON_HLT 0xc #define EXIT_REASON_INVD 0xd #define EXIT_REASON_RDTSC 0x10 #define EXIT_REASON_RDTSCP 0x33 GUEST_CODE static syz_nested_exit_reason map_intel_exit_reason(uint64_t basic_reason) { volatile uint64_t reason = basic_reason; if (reason == EXIT_REASON_HLT) return SYZOS_NESTED_EXIT_REASON_HLT; if (reason == EXIT_REASON_INVD) return SYZOS_NESTED_EXIT_REASON_INVD; if (reason == EXIT_REASON_CPUID) return SYZOS_NESTED_EXIT_REASON_CPUID; if (reason == EXIT_REASON_RDTSC) return SYZOS_NESTED_EXIT_REASON_RDTSC; if (reason == EXIT_REASON_RDTSCP) return SYZOS_NESTED_EXIT_REASON_RDTSCP; return SYZOS_NESTED_EXIT_REASON_UNKNOWN; } GUEST_CODE static void advance_l2_rip_intel(uint64_t basic_reason) { volatile uint64_t reason = basic_reason; uint64_t rip = vmread(VMCS_GUEST_RIP); if ((reason == EXIT_REASON_INVD) || (reason == EXIT_REASON_CPUID) || (reason == EXIT_REASON_RDTSC)) { rip += 2; } else if (reason == EXIT_REASON_RDTSCP) { rip += 3; } vmwrite(VMCS_GUEST_RIP, rip); } __attribute__((used)) GUEST_CODE static void nested_vm_exit_handler_intel(uint64_t exit_reason, struct l2_guest_regs* regs) { uint64_t basic_reason = exit_reason & 0xFFFF; syz_nested_exit_reason mapped_reason = map_intel_exit_reason(basic_reason); guest_uexit_l2(exit_reason, mapped_reason, CPU_VENDOR_INTEL); advance_l2_rip_intel(basic_reason); } extern char after_vmentry_label; __attribute__((naked)) GUEST_CODE static void nested_vm_exit_handler_intel_asm(void) { asm volatile(R"( push %%rax push %%rbx push %%rcx push %%rdx push %%rsi push %%rdi push %%rbp push %%r8 push %%r9 push %%r10 push %%r11 push %%r12 push %%r13 push %%r14 push %%r15 mov %%rsp, %%rsi mov %[vm_exit_reason], %%rbx vmread %%rbx, %%rdi call nested_vm_exit_handler_intel add %[stack_cleanup_size], %%rsp jmp after_vmentry_label )" : : [stack_cleanup_size] "i"(sizeof(struct l2_guest_regs)), [vm_exit_reason] "i"(VMCS_VM_EXIT_REASON) : "memory", "cc", "rbx", "rdi", "rsi"); } #define VMEXIT_RDTSC 0x6e #define VMEXIT_CPUID 0x72 #define VMEXIT_INVD 0x76 #define VMEXIT_HLT 0x78 #define VMEXIT_RDTSCP 0x87 GUEST_CODE static syz_nested_exit_reason map_amd_exit_reason(uint64_t basic_reason) { volatile uint64_t reason = basic_reason; if (reason == VMEXIT_HLT) return SYZOS_NESTED_EXIT_REASON_HLT; if (reason == VMEXIT_INVD) return SYZOS_NESTED_EXIT_REASON_INVD; if (reason == VMEXIT_CPUID) return SYZOS_NESTED_EXIT_REASON_CPUID; if (reason == VMEXIT_RDTSC) return SYZOS_NESTED_EXIT_REASON_RDTSC; if (reason == VMEXIT_RDTSCP) return SYZOS_NESTED_EXIT_REASON_RDTSCP; return SYZOS_NESTED_EXIT_REASON_UNKNOWN; } GUEST_CODE static void advance_l2_rip_amd(uint64_t basic_reason, uint64_t cpu_id, uint64_t vm_id) { volatile uint64_t reason = basic_reason; uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id); uint64_t rip = vmcb_read64((volatile uint8_t*)vmcb_addr, VMCB_GUEST_RIP); if ((reason == VMEXIT_INVD) || (reason == VMEXIT_CPUID) || (reason == VMEXIT_RDTSC)) { rip += 2; } else if (reason == VMEXIT_RDTSCP) { rip += 3; } vmcb_write64(vmcb_addr, VMCB_GUEST_RIP, rip); } __attribute__((used)) GUEST_CODE static void nested_vm_exit_handler_amd(uint64_t exit_reason, uint64_t cpu_id, uint64_t vm_id) { volatile uint64_t basic_reason = exit_reason & 0xFFFF; syz_nested_exit_reason mapped_reason = map_amd_exit_reason(basic_reason); guest_uexit_l2(exit_reason, mapped_reason, CPU_VENDOR_AMD); advance_l2_rip_amd(basic_reason, cpu_id, vm_id); } GUEST_CODE static noinline void init_vmcs_host_state(void) { vmwrite(VMCS_HOST_CS_SELECTOR, X86_SYZOS_SEL_CODE); vmwrite(VMCS_HOST_DS_SELECTOR, X86_SYZOS_SEL_DATA); vmwrite(VMCS_HOST_ES_SELECTOR, X86_SYZOS_SEL_DATA); vmwrite(VMCS_HOST_SS_SELECTOR, X86_SYZOS_SEL_DATA); vmwrite(VMCS_HOST_FS_SELECTOR, X86_SYZOS_SEL_DATA); vmwrite(VMCS_HOST_GS_SELECTOR, X86_SYZOS_SEL_DATA); vmwrite(VMCS_HOST_TR_SELECTOR, X86_SYZOS_SEL_TSS64); vmwrite(VMCS_HOST_TR_BASE, 0); vmwrite(VMCS_HOST_GDTR_BASE, X86_SYZOS_ADDR_GDT); vmwrite(VMCS_HOST_IDTR_BASE, X86_SYZOS_ADDR_VAR_IDT); vmwrite(VMCS_HOST_FS_BASE, rdmsr(X86_MSR_FS_BASE)); vmwrite(VMCS_HOST_GS_BASE, rdmsr(X86_MSR_GS_BASE)); uint64_t tmpreg = 0; asm volatile("mov %%rsp, %0" : "=r"(tmpreg)); vmwrite(VMCS_HOST_RSP, tmpreg); vmwrite(VMCS_HOST_RIP, (uintptr_t)nested_vm_exit_handler_intel_asm); vmwrite(VMCS_HOST_CR0, read_cr0()); vmwrite(VMCS_HOST_CR3, read_cr3()); vmwrite(VMCS_HOST_CR4, read_cr4()); vmwrite(VMCS_HOST_IA32_PAT, rdmsr(X86_MSR_IA32_CR_PAT)); vmwrite(VMCS_HOST_IA32_EFER, rdmsr(X86_MSR_IA32_EFER)); vmwrite(VMCS_HOST_IA32_PERF_GLOBAL_CTRL, rdmsr(X86_MSR_CORE_PERF_GLOBAL_CTRL)); vmwrite(VMCS_HOST_IA32_SYSENTER_CS, rdmsr(X86_MSR_IA32_SYSENTER_CS)); vmwrite(VMCS_HOST_IA32_SYSENTER_ESP, rdmsr(X86_MSR_IA32_SYSENTER_ESP)); vmwrite(VMCS_HOST_IA32_SYSENTER_EIP, rdmsr(X86_MSR_IA32_SYSENTER_EIP)); } #define COPY_VMCS_FIELD(GUEST_FIELD, HOST_FIELD) \ vmwrite(GUEST_FIELD, vmread(HOST_FIELD)) #define SETUP_L2_SEGMENT(SEG, SELECTOR, BASE, LIMIT, AR) \ vmwrite(VMCS_GUEST_##SEG##_SELECTOR, SELECTOR); \ vmwrite(VMCS_GUEST_##SEG##_BASE, BASE); \ vmwrite(VMCS_GUEST_##SEG##_LIMIT, LIMIT); \ vmwrite(VMCS_GUEST_##SEG##_ACCESS_RIGHTS, AR); GUEST_CODE static noinline void init_vmcs_guest_state(uint64_t cpu_id, uint64_t vm_id) { uint64_t l2_code_addr = X86_SYZOS_ADDR_VM_CODE(cpu_id, vm_id); uint64_t l2_stack_addr = X86_SYZOS_ADDR_VM_STACK(cpu_id, vm_id); SETUP_L2_SEGMENT(CS, vmread(VMCS_HOST_CS_SELECTOR), 0, 0xFFFFFFFF, VMX_AR_64BIT_CODE); SETUP_L2_SEGMENT(DS, vmread(VMCS_HOST_DS_SELECTOR), 0, 0xFFFFFFFF, VMX_AR_64BIT_DATA_STACK); SETUP_L2_SEGMENT(ES, vmread(VMCS_HOST_ES_SELECTOR), 0, 0xFFFFFFFF, VMX_AR_64BIT_DATA_STACK); SETUP_L2_SEGMENT(SS, vmread(VMCS_HOST_SS_SELECTOR), 0, 0xFFFFFFFF, VMX_AR_64BIT_DATA_STACK); SETUP_L2_SEGMENT(FS, vmread(VMCS_HOST_FS_SELECTOR), vmread(VMCS_HOST_FS_BASE), 0xFFFFFFFF, VMX_AR_64BIT_DATA_STACK); SETUP_L2_SEGMENT(GS, vmread(VMCS_HOST_GS_SELECTOR), vmread(VMCS_HOST_GS_BASE), 0xFFFFFFFF, VMX_AR_64BIT_DATA_STACK); SETUP_L2_SEGMENT(TR, vmread(VMCS_HOST_TR_SELECTOR), vmread(VMCS_HOST_TR_BASE), 0x67, VMX_AR_TSS_BUSY); SETUP_L2_SEGMENT(LDTR, 0, 0, 0, VMX_AR_LDTR_UNUSABLE); vmwrite(VMCS_GUEST_CR0, vmread(VMCS_HOST_CR0)); vmwrite(VMCS_GUEST_CR3, vmread(VMCS_HOST_CR3)); vmwrite(VMCS_GUEST_CR4, vmread(VMCS_HOST_CR4)); vmwrite(VMCS_GUEST_RIP, l2_code_addr); vmwrite(VMCS_GUEST_RSP, l2_stack_addr + KVM_PAGE_SIZE - 8); vmwrite(VMCS_GUEST_RFLAGS, RFLAGS_1_BIT); vmwrite(VMCS_GUEST_DR7, 0x400); COPY_VMCS_FIELD(VMCS_GUEST_IA32_EFER, VMCS_HOST_IA32_EFER); COPY_VMCS_FIELD(VMCS_GUEST_IA32_PAT, VMCS_HOST_IA32_PAT); COPY_VMCS_FIELD(VMCS_GUEST_IA32_PERF_GLOBAL_CTRL, VMCS_HOST_IA32_PERF_GLOBAL_CTRL); COPY_VMCS_FIELD(VMCS_GUEST_SYSENTER_CS, VMCS_HOST_IA32_SYSENTER_CS); COPY_VMCS_FIELD(VMCS_GUEST_SYSENTER_ESP, VMCS_HOST_IA32_SYSENTER_ESP); COPY_VMCS_FIELD(VMCS_GUEST_SYSENTER_EIP, VMCS_HOST_IA32_SYSENTER_EIP); vmwrite(VMCS_GUEST_IA32_DEBUGCTL, 0); vmwrite(VMCS_GUEST_GDTR_BASE, vmread(VMCS_HOST_GDTR_BASE)); vmwrite(VMCS_GUEST_GDTR_LIMIT, 0xffff); vmwrite(VMCS_GUEST_IDTR_BASE, vmread(VMCS_HOST_IDTR_BASE)); vmwrite(VMCS_GUEST_IDTR_LIMIT, 0xffff); vmwrite(VMCS_LINK_POINTER, 0xffffffffffffffff); vmwrite(VMCS_GUEST_ACTIVITY_STATE, 0); vmwrite(VMCS_GUEST_INTERRUPTIBILITY_INFO, 0); vmwrite(VMCS_GUEST_PENDING_DBG_EXCEPTIONS, 0); vmwrite(VMCS_VMX_PREEMPTION_TIMER_VALUE, 0); vmwrite(VMCS_GUEST_INTR_STATUS, 0); vmwrite(VMCS_GUEST_PML_INDEX, 0); } GUEST_CODE static noinline void nested_create_vm_intel(struct api_call_1* cmd, uint64_t cpu_id) { uint64_t vm_id = cmd->arg; uint64_t vmcs_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id); uint8_t error = 0; *(uint32_t*)vmcs_addr = rdmsr(X86_MSR_IA32_VMX_BASIC); asm volatile("vmclear %1; setna %0" : "=q"(error) : "m"(vmcs_addr) : "memory", "cc"); if (error) { guest_uexit(0xE2BAD1); return; } nested_vmptrld(cpu_id, vm_id); setup_l2_page_tables(CPU_VENDOR_INTEL, cpu_id, vm_id); init_vmcs_control_fields(cpu_id, vm_id); init_vmcs_host_state(); init_vmcs_guest_state(cpu_id, vm_id); } #define SETUP_L2_SEGMENT_SVM(VMBC_PTR, SEG_NAME, SELECTOR, BASE, LIMIT, ATTR) \ vmcb_write16(VMBC_PTR, VMCB_GUEST_##SEG_NAME##_SEL, SELECTOR); \ vmcb_write16(VMBC_PTR, VMCB_GUEST_##SEG_NAME##_ATTR, ATTR); \ vmcb_write32(VMBC_PTR, VMCB_GUEST_##SEG_NAME##_LIM, LIMIT); \ vmcb_write64(VMBC_PTR, VMCB_GUEST_##SEG_NAME##_BASE, BASE); GUEST_CODE static noinline void init_vmcb_guest_state(uint64_t cpu_id, uint64_t vm_id) { uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id); uint64_t l2_code_addr = X86_SYZOS_ADDR_VM_CODE(cpu_id, vm_id); uint64_t l2_stack_addr = X86_SYZOS_ADDR_VM_STACK(cpu_id, vm_id); uint64_t npt_pml4_addr = X86_SYZOS_ADDR_VM_PGTABLE(cpu_id, vm_id); SETUP_L2_SEGMENT_SVM(vmcb_addr, CS, X86_SYZOS_SEL_CODE, 0, 0xFFFFFFFF, SVM_ATTR_64BIT_CODE); SETUP_L2_SEGMENT_SVM(vmcb_addr, DS, X86_SYZOS_SEL_DATA, 0, 0xFFFFFFFF, SVM_ATTR_64BIT_DATA); SETUP_L2_SEGMENT_SVM(vmcb_addr, ES, X86_SYZOS_SEL_DATA, 0, 0xFFFFFFFF, SVM_ATTR_64BIT_DATA); SETUP_L2_SEGMENT_SVM(vmcb_addr, SS, X86_SYZOS_SEL_DATA, 0, 0xFFFFFFFF, SVM_ATTR_64BIT_DATA); SETUP_L2_SEGMENT_SVM(vmcb_addr, FS, X86_SYZOS_SEL_DATA, 0, 0xFFFFFFFF, SVM_ATTR_64BIT_DATA); SETUP_L2_SEGMENT_SVM(vmcb_addr, GS, X86_SYZOS_SEL_DATA, 0, 0xFFFFFFFF, SVM_ATTR_64BIT_DATA); SETUP_L2_SEGMENT_SVM(vmcb_addr, TR, X86_SYZOS_SEL_TSS64, X86_SYZOS_ADDR_VAR_TSS, 0x67, VMX_AR_TSS_AVAILABLE); SETUP_L2_SEGMENT_SVM(vmcb_addr, LDTR, 0, 0, 0, SVM_ATTR_LDTR_UNUSABLE); uint64_t efer = rdmsr(X86_MSR_IA32_EFER); vmcb_write64(vmcb_addr, VMCB_GUEST_CR0, read_cr0() | X86_CR0_WP); vmcb_write64(vmcb_addr, VMCB_GUEST_CR3, read_cr3()); vmcb_write64(vmcb_addr, VMCB_GUEST_CR4, read_cr4()); vmcb_write64(vmcb_addr, VMCB_GUEST_RIP, l2_code_addr); vmcb_write64(vmcb_addr, VMCB_GUEST_RSP, l2_stack_addr + KVM_PAGE_SIZE - 8); vmcb_write64(vmcb_addr, VMCB_GUEST_RFLAGS, RFLAGS_1_BIT); vmcb_write64(vmcb_addr, VMCB_GUEST_DEBUGCTL, 0); vmcb_write64(vmcb_addr, VMCB_GUEST_DR6, 0x0); vmcb_write64(vmcb_addr, VMCB_GUEST_DR7, 0x0); vmcb_write64(vmcb_addr, VMCB_GUEST_EFER, efer & ~X86_EFER_SCE); vmcb_write64(vmcb_addr, VMCB_GUEST_PAT, rdmsr(X86_MSR_IA32_CR_PAT)); struct { uint16_t limit; uint64_t base; } __attribute__((packed)) gdtr, idtr; asm volatile("sgdt %0" : "=m"(gdtr)); asm volatile("sidt %0" : "=m"(idtr)); vmcb_write64(vmcb_addr, VMCB_GUEST_GDTR_BASE, gdtr.base); vmcb_write32(vmcb_addr, VMCB_GUEST_GDTR_LIM, gdtr.limit); vmcb_write64(vmcb_addr, VMCB_GUEST_IDTR_BASE, idtr.base); vmcb_write32(vmcb_addr, VMCB_GUEST_IDTR_LIM, idtr.limit); vmcb_write32(vmcb_addr, VMCB_CTRL_INTERCEPT_VEC3, VMCB_CTRL_INTERCEPT_VEC3_ALL); vmcb_write32(vmcb_addr, VMCB_CTRL_INTERCEPT_VEC4, VMCB_CTRL_INTERCEPT_VEC4_ALL); vmcb_write64(vmcb_addr, VMCB_CTRL_NP_ENABLE, (1 << VMCB_CTRL_NPT_ENABLE_BIT)); uint64_t npt_pointer = (npt_pml4_addr & ~0xFFF); vmcb_write64(vmcb_addr, VMCB_CTRL_N_CR3, npt_pointer); vmcb_write32(vmcb_addr, VMCB_CTRL_ASID, 1); } GUEST_CODE static noinline void nested_create_vm_amd(struct api_call_1* cmd, uint64_t cpu_id) { uint64_t vm_id = cmd->arg; uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id); guest_memset((void*)vmcb_addr, 0, KVM_PAGE_SIZE); guest_memset((void*)X86_SYZOS_ADDR_VM_ARCH_SPECIFIC(cpu_id), 0, KVM_PAGE_SIZE); setup_l2_page_tables(CPU_VENDOR_AMD, cpu_id, vm_id); init_vmcb_guest_state(cpu_id, vm_id); } GUEST_CODE static noinline void guest_handle_nested_create_vm(struct api_call_1* cmd, uint64_t cpu_id) { if (get_cpu_vendor() == CPU_VENDOR_INTEL) { nested_create_vm_intel(cmd, cpu_id); } else { nested_create_vm_amd(cmd, cpu_id); } } GUEST_CODE static noinline void guest_handle_nested_load_code(struct api_call_nested_load_code* cmd, uint64_t cpu_id) { uint64_t vm_id = cmd->vm_id; uint64_t l2_code_addr = X86_SYZOS_ADDR_VM_CODE(cpu_id, vm_id); uint64_t l2_stack_addr = X86_SYZOS_ADDR_VM_STACK(cpu_id, vm_id); uint64_t l2_code_size = cmd->header.size - sizeof(struct api_call_header) - sizeof(uint64_t); if (l2_code_size > KVM_PAGE_SIZE) l2_code_size = KVM_PAGE_SIZE; guest_memcpy((void*)l2_code_addr, (void*)cmd->insns, l2_code_size); if (get_cpu_vendor() == CPU_VENDOR_INTEL) { nested_vmptrld(cpu_id, vm_id); vmwrite(VMCS_GUEST_RIP, l2_code_addr); vmwrite(VMCS_GUEST_RSP, l2_stack_addr + KVM_PAGE_SIZE - 8); } else { vmcb_write64(X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id), VMCB_GUEST_RIP, l2_code_addr); vmcb_write64(X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id), VMCB_GUEST_RSP, l2_stack_addr + KVM_PAGE_SIZE - 8); } } GUEST_CODE static noinline void guest_handle_nested_vmentry_intel(uint64_t vm_id, uint64_t cpu_id, bool is_launch) { uint64_t vmx_error_code = 0; uint8_t fail_flag = 0; nested_vmptrld(cpu_id, vm_id); if (is_launch) { asm volatile(R"( vmlaunch setc %%al setz %%bl or %%bl, %%al)" : "=a"(fail_flag) : : "rbx", "cc", "memory"); } else { asm volatile(R"( vmresume setc %%al setz %%bl or %%bl, %%al)" : "=a"(fail_flag) : : "rbx", "cc", "memory"); } asm volatile(".globl after_vmentry_label\nafter_vmentry_label:"); if (fail_flag) { vmx_error_code = vmread(VMCS_VM_INSTRUCTION_ERROR); guest_uexit(0xE2E10000 | (uint32_t)vmx_error_code); return; } } GUEST_CODE static noinline void guest_run_amd_vm(uint64_t cpu_id, uint64_t vm_id) { uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id); volatile uint8_t* vmcb_ptr = (volatile uint8_t*)vmcb_addr; uint8_t fail_flag = 0; asm volatile("mov %1, %%rax\n\t" "vmrun\n\t" "setc %0\n\t" : "=q"(fail_flag) : "m"(vmcb_addr) : "rax", "cc", "memory"); if (fail_flag) { guest_uexit(0xE2E10000 | 0xFFFF); return; } uint64_t exit_reason = vmcb_read64(vmcb_ptr, VMCB_EXIT_CODE); nested_vm_exit_handler_amd(exit_reason, cpu_id, vm_id); } GUEST_CODE static noinline void guest_handle_nested_vmlaunch(struct api_call_1* cmd, uint64_t cpu_id) { uint64_t vm_id = cmd->arg; if (get_cpu_vendor() == CPU_VENDOR_INTEL) { guest_handle_nested_vmentry_intel(vm_id, cpu_id, true); } else { guest_run_amd_vm(cpu_id, vm_id); } } GUEST_CODE static noinline void guest_handle_nested_vmresume(struct api_call_1* cmd, uint64_t cpu_id) { uint64_t vm_id = cmd->arg; if (get_cpu_vendor() == CPU_VENDOR_INTEL) { guest_handle_nested_vmentry_intel(vm_id, cpu_id, false); } else { guest_run_amd_vm(cpu_id, vm_id); } } GUEST_CODE static noinline void guest_handle_nested_intel_vmwrite_mask(struct api_call_5* cmd, uint64_t cpu_id) { if (get_cpu_vendor() != CPU_VENDOR_INTEL) return; uint64_t vm_id = cmd->args[0]; nested_vmptrld(cpu_id, vm_id); uint64_t field = cmd->args[1]; uint64_t set_mask = cmd->args[2]; uint64_t unset_mask = cmd->args[3]; uint64_t flip_mask = cmd->args[4]; uint64_t current_value = vmread(field); uint64_t new_value = (current_value & ~unset_mask) | set_mask; new_value ^= flip_mask; vmwrite(field, new_value); } GUEST_CODE static noinline void guest_handle_nested_amd_vmcb_write_mask(struct api_call_5* cmd, uint64_t cpu_id) { if (get_cpu_vendor() != CPU_VENDOR_AMD) return; uint64_t vm_id = cmd->args[0]; uint64_t vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id); uint64_t offset = cmd->args[1]; uint64_t set_mask = cmd->args[2]; uint64_t unset_mask = cmd->args[3]; uint64_t flip_mask = cmd->args[4]; uint64_t current_value = vmcb_read64((volatile uint8_t*)vmcb_addr, offset); uint64_t new_value = (current_value & ~unset_mask) | set_mask; new_value ^= flip_mask; vmcb_write64(vmcb_addr, offset, new_value); } const char kvm_asm16_cpl3[] = "\x0f\x20\xc0\x66\x83\xc8\x01\x0f\x22\xc0\xb8\xa0\x00\x0f\x00\xd8\xb8\x2b" "\x00\x8e\xd8\x8e\xc0\x8e\xe0\x8e\xe8\xbc\x00\x01\xc7\x06\x00\x01\x1d\xba" "\xc7\x06\x02\x01\x23\x00\xc7\x06\x04\x01\x00\x01\xc7\x06\x06\x01\x2b\x00" "\xcb"; const char kvm_asm32_paged[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0"; const char kvm_asm32_vm86[] = "\x66\xb8\xb8\x00\x0f\x00\xd8\xea\x00\x00\x00\x00\xd0\x00"; const char kvm_asm32_paged_vm86[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\x66\xb8\xb8\x00\x0f\x00\xd8" "\xea\x00\x00\x00\x00\xd0\x00"; const char kvm_asm64_enable_long[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00" "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8"; const char kvm_asm64_init_vm[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00" "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8\x48\xc7\xc1\x3a\x00\x00\x00\x0f" "\x32\x48\x83\xc8\x05\x0f\x30\x0f\x20\xe0\x48\x0d\x00\x20\x00\x00\x0f\x22" "\xe0\x48\xc7\xc1\x80\x04\x00\x00\x0f\x32\x48\xc7\xc2\x00\x60\x00\x00\x89" "\x02\x48\xc7\xc2\x00\x70\x00\x00\x89\x02\x48\xc7\xc0\x00\x5f\x00\x00\xf3" "\x0f\xc7\x30\x48\xc7\xc0\x08\x5f\x00\x00\x66\x0f\xc7\x30\x0f\xc7\x30\x48" "\xc7\xc1\x81\x04\x00\x00\x0f\x32\x48\x83\xc8\x00\x48\x21\xd0\x48\xc7\xc2" "\x00\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x82\x04\x00\x00\x0f\x32\x48\x83" "\xc8\x00\x48\x21\xd0\x48\xc7\xc2\x02\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc2" "\x1e\x40\x00\x00\x48\xc7\xc0\x81\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x83" "\x04\x00\x00\x0f\x32\x48\x0d\xff\x6f\x03\x00\x48\x21\xd0\x48\xc7\xc2\x0c" "\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x84\x04\x00\x00\x0f\x32\x48\x0d\xff" "\x17\x00\x00\x48\x21\xd0\x48\xc7\xc2\x12\x40\x00\x00\x0f\x79\xd0\x48\xc7" "\xc2\x04\x2c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2" "\x00\x28\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2\x02" "\x0c\x00\x00\x48\xc7\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc0\x58\x00" "\x00\x00\x48\xc7\xc2\x00\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x0c\x00" "\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x08" "\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x0c\x00\x00\x0f\x79\xd0\x48\xc7" "\xc0\xd8\x00\x00\x00\x48\xc7\xc2\x0c\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2" "\x02\x2c\x00\x00\x48\xc7\xc0\x00\x05\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00" "\x4c\x00\x00\x48\xc7\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x6c" "\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x12\x6c\x00" "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x0f\x20\xc0\x48\xc7\xc2\x00" "\x6c\x00\x00\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xd8\x48\xc7\xc2\x02\x6c\x00" "\x00\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xe0\x48\xc7\xc2\x04\x6c\x00\x00\x48" "\x89\xc0\x0f\x79\xd0\x48\xc7\xc2\x06\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00" "\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00" "\x0f\x79\xd0\x48\xc7\xc2\x0a\x6c\x00\x00\x48\xc7\xc0\x00\x3a\x00\x00\x0f" "\x79\xd0\x48\xc7\xc2\x0c\x6c\x00\x00\x48\xc7\xc0\x00\x10\x00\x00\x0f\x79" "\xd0\x48\xc7\xc2\x0e\x6c\x00\x00\x48\xc7\xc0\x00\x38\x00\x00\x0f\x79\xd0" "\x48\xc7\xc2\x14\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48" "\xc7\xc2\x16\x6c\x00\x00\x48\x8b\x04\x25\x10\x5f\x00\x00\x0f\x79\xd0\x48" "\xc7\xc2\x00\x00\x00\x00\x48\xc7\xc0\x01\x00\x00\x00\x0f\x79\xd0\x48\xc7" "\xc2\x02\x00\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2" "\x00\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02" "\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x20" "\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x20\x00" "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x77\x02\x00\x00" "\x0f\x32\x48\xc1\xe2\x20\x48\x09\xd0\x48\xc7\xc2\x00\x2c\x00\x00\x48\x89" "\xc0\x0f\x79\xd0\x48\xc7\xc2\x04\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00" "\x0f\x79\xd0\x48\xc7\xc2\x0a\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f" "\x79\xd0\x48\xc7\xc2\x0e\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79" "\xd0\x48\xc7\xc2\x10\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0" "\x48\xc7\xc2\x16\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48" "\xc7\xc2\x14\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7" "\xc2\x00\x60\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2" "\x02\x60\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2\x1c" "\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x1e\x20" "\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x20\x20\x00" "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x22\x20\x00\x00" "\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00\x08\x00\x00\x48" "\xc7\xc0\x58\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02\x08\x00\x00\x48\xc7" "\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x08\x00\x00\x48\xc7\xc0" "\x58\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x08\x00\x00\x48\xc7\xc0\x58" "\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x08\x00\x00\x48\xc7\xc0\x58\x00" "\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x08\x00\x00\x48\xc7\xc0\x58\x00\x00" "\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x08\x00\x00\x48\xc7\xc0\x00\x00\x00\x00" "\x0f\x79\xd0\x48\xc7\xc2\x0e\x08\x00\x00\x48\xc7\xc0\xd8\x00\x00\x00\x0f" "\x79\xd0\x48\xc7\xc2\x12\x68\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79" "\xd0\x48\xc7\xc2\x14\x68\x00\x00\x48\xc7\xc0\x00\x3a\x00\x00\x0f\x79\xd0" "\x48\xc7\xc2\x16\x68\x00\x00\x48\xc7\xc0\x00\x10\x00\x00\x0f\x79\xd0\x48" "\xc7\xc2\x18\x68\x00\x00\x48\xc7\xc0\x00\x38\x00\x00\x0f\x79\xd0\x48\xc7" "\xc2\x00\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2" "\x02\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x04" "\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x48" "\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x48\x00" "\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x48\x00\x00" "\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x48\x00\x00\x48" "\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0e\x48\x00\x00\x48\xc7" "\xc0\xff\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x48\x00\x00\x48\xc7\xc0" "\xff\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x12\x48\x00\x00\x48\xc7\xc0\xff" "\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x14\x48\x00\x00\x48\xc7\xc0\x93\x40" "\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x16\x48\x00\x00\x48\xc7\xc0\x9b\x20\x00" "\x00\x0f\x79\xd0\x48\xc7\xc2\x18\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00" "\x0f\x79\xd0\x48\xc7\xc2\x1a\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f" "\x79\xd0\x48\xc7\xc2\x1c\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79" "\xd0\x48\xc7\xc2\x1e\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79\xd0" "\x48\xc7\xc2\x20\x48\x00\x00\x48\xc7\xc0\x82\x00\x00\x00\x0f\x79\xd0\x48" "\xc7\xc2\x22\x48\x00\x00\x48\xc7\xc0\x8b\x00\x00\x00\x0f\x79\xd0\x48\xc7" "\xc2\x1c\x68\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2" "\x1e\x68\x00\x00\x48\xc7\xc0\x00\x91\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x20" "\x68\x00\x00\x48\xc7\xc0\x02\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x28" "\x00\x00\x48\xc7\xc0\x00\x05\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x28\x00" "\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x28\x00\x00" "\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0e\x28\x00\x00\x48" "\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x28\x00\x00\x48\xc7" "\xc0\x00\x00\x00\x00\x0f\x79\xd0\x0f\x20\xc0\x48\xc7\xc2\x00\x68\x00\x00" "\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xd8\x48\xc7\xc2\x02\x68\x00\x00\x48\x89" "\xc0\x0f\x79\xd0\x0f\x20\xe0\x48\xc7\xc2\x04\x68\x00\x00\x48\x89\xc0\x0f" "\x79\xd0\x48\xc7\xc0\x18\x5f\x00\x00\x48\x8b\x10\x48\xc7\xc0\x20\x5f\x00" "\x00\x48\x8b\x08\x48\x31\xc0\x0f\x78\xd0\x48\x31\xc8\x0f\x79\xd0\x0f\x01" "\xc2\x48\xc7\xc2\x00\x44\x00\x00\x0f\x78\xd0\xf4"; const char kvm_asm64_vm_exit[] = "\x48\xc7\xc3\x00\x44\x00\x00\x0f\x78\xda\x48\xc7\xc3\x02\x44\x00\x00\x0f" "\x78\xd9\x48\xc7\xc0\x00\x64\x00\x00\x0f\x78\xc0\x48\xc7\xc3\x1e\x68\x00" "\x00\x0f\x78\xdb\xf4"; const char kvm_asm64_cpl3[] = "\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\xde\xc0\xad\x0b\x50\x00" "\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8\x48\xc7\xc0\x6b\x00\x00\x00\x8e" "\xd8\x8e\xc0\x8e\xe0\x8e\xe8\x48\xc7\xc4\x80\x0f\x00\x00\x48\xc7\x04\x24" "\x1d\xba\x00\x00\x48\xc7\x44\x24\x04\x63\x00\x00\x00\x48\xc7\x44\x24\x08" "\x80\x0f\x00\x00\x48\xc7\x44\x24\x0c\x6b\x00\x00\x00\xcb"; #define KVM_SMI _IO(KVMIO, 0xb7) struct tss16 { uint16_t prev; uint16_t sp0; uint16_t ss0; uint16_t sp1; uint16_t ss1; uint16_t sp2; uint16_t ss2; uint16_t ip; uint16_t flags; uint16_t ax; uint16_t cx; uint16_t dx; uint16_t bx; uint16_t sp; uint16_t bp; uint16_t si; uint16_t di; uint16_t es; uint16_t cs; uint16_t ss; uint16_t ds; uint16_t ldt; } __attribute__((packed)); struct tss32 { uint16_t prev, prevh; uint32_t sp0; uint16_t ss0, ss0h; uint32_t sp1; uint16_t ss1, ss1h; uint32_t sp2; uint16_t ss2, ss2h; uint32_t cr3; uint32_t ip; uint32_t flags; uint32_t ax; uint32_t cx; uint32_t dx; uint32_t bx; uint32_t sp; uint32_t bp; uint32_t si; uint32_t di; uint16_t es, esh; uint16_t cs, csh; uint16_t ss, ssh; uint16_t ds, dsh; uint16_t fs, fsh; uint16_t gs, gsh; uint16_t ldt, ldth; uint16_t trace; uint16_t io_bitmap; } __attribute__((packed)); struct tss64 { uint32_t reserved0; uint64_t rsp[3]; uint64_t reserved1; uint64_t ist[7]; uint64_t reserved2; uint16_t reserved3; uint16_t io_bitmap; } __attribute__((packed)); static void fill_segment_descriptor(uint64_t* dt, uint64_t* lt, struct kvm_segment* seg) { uint16_t index = seg->selector >> 3; uint64_t limit = seg->g ? seg->limit >> 12 : seg->limit; uint64_t sd = (limit & 0xffff) | (seg->base & 0xffffff) << 16 | (uint64_t)seg->type << 40 | (uint64_t)seg->s << 44 | (uint64_t)seg->dpl << 45 | (uint64_t)seg->present << 47 | (limit & 0xf0000ULL) << 48 | (uint64_t)seg->avl << 52 | (uint64_t)seg->l << 53 | (uint64_t)seg->db << 54 | (uint64_t)seg->g << 55 | (seg->base & 0xff000000ULL) << 56; dt[index] = sd; lt[index] = sd; } static void fill_segment_descriptor_dword(uint64_t* dt, uint64_t* lt, struct kvm_segment* seg) { fill_segment_descriptor(dt, lt, seg); uint16_t index = seg->selector >> 3; dt[index + 1] = 0; lt[index + 1] = 0; } static void setup_syscall_msrs(int cpufd, uint16_t sel_cs, uint16_t sel_cs_cpl3) { char buf[sizeof(struct kvm_msrs) + 5 * sizeof(struct kvm_msr_entry)]; memset(buf, 0, sizeof(buf)); struct kvm_msrs* msrs = (struct kvm_msrs*)buf; struct kvm_msr_entry* entries = msrs->entries; msrs->nmsrs = 5; entries[0].index = X86_MSR_IA32_SYSENTER_CS; entries[0].data = sel_cs; entries[1].index = X86_MSR_IA32_SYSENTER_ESP; entries[1].data = X86_ADDR_STACK0; entries[2].index = X86_MSR_IA32_SYSENTER_EIP; entries[2].data = X86_ADDR_VAR_SYSEXIT; entries[3].index = X86_MSR_IA32_STAR; entries[3].data = ((uint64_t)sel_cs << 32) | ((uint64_t)sel_cs_cpl3 << 48); entries[4].index = X86_MSR_IA32_LSTAR; entries[4].data = X86_ADDR_VAR_SYSRET; ioctl(cpufd, KVM_SET_MSRS, msrs); } static void setup_32bit_idt(struct kvm_sregs* sregs, char* host_mem, uintptr_t guest_mem) { sregs->idt.base = guest_mem + X86_ADDR_VAR_IDT; sregs->idt.limit = 0x1ff; uint64_t* idt = (uint64_t*)(host_mem + sregs->idt.base); for (int i = 0; i < 32; i++) { struct kvm_segment gate; gate.selector = i << 3; switch (i % 6) { case 0: gate.type = 6; gate.base = X86_SEL_CS16; break; case 1: gate.type = 7; gate.base = X86_SEL_CS16; break; case 2: gate.type = 3; gate.base = X86_SEL_TGATE16; break; case 3: gate.type = 14; gate.base = X86_SEL_CS32; break; case 4: gate.type = 15; gate.base = X86_SEL_CS32; break; case 5: gate.type = 11; gate.base = X86_SEL_TGATE32; break; } gate.limit = guest_mem + X86_ADDR_VAR_USER_CODE2; gate.present = 1; gate.dpl = 0; gate.s = 0; gate.g = 0; gate.db = 0; gate.l = 0; gate.avl = 0; fill_segment_descriptor(idt, idt, &gate); } } static void setup_64bit_idt(struct kvm_sregs* sregs, char* host_mem, uintptr_t guest_mem) { sregs->idt.base = guest_mem + X86_ADDR_VAR_IDT; sregs->idt.limit = 0x1ff; uint64_t* idt = (uint64_t*)(host_mem + sregs->idt.base); for (int i = 0; i < 32; i++) { struct kvm_segment gate; gate.selector = (i * 2) << 3; gate.type = (i & 1) ? 14 : 15; gate.base = X86_SEL_CS64; gate.limit = guest_mem + X86_ADDR_VAR_USER_CODE2; gate.present = 1; gate.dpl = 0; gate.s = 0; gate.g = 0; gate.db = 0; gate.l = 0; gate.avl = 0; fill_segment_descriptor_dword(idt, idt, &gate); } } struct kvm_syz_vm { int vmfd; int next_cpu_id; void* host_mem; size_t total_pages; void* user_text; void* gpa0_mem; }; struct kvm_text { uintptr_t typ; const void* text; uintptr_t size; }; struct kvm_opt { uint64_t typ; uint64_t val; }; #define KVM_SETUP_PAGING (1 << 0) #define KVM_SETUP_PAE (1 << 1) #define KVM_SETUP_PROTECTED (1 << 2) #define KVM_SETUP_CPL3 (1 << 3) #define KVM_SETUP_VIRT86 (1 << 4) #define KVM_SETUP_SMM (1 << 5) #define KVM_SETUP_VM (1 << 6) static volatile long syz_kvm_setup_cpu(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5, volatile long a6, volatile long a7) { const int vmfd = a0; const int cpufd = a1; char* const host_mem = (char*)a2; const struct kvm_text* const text_array_ptr = (struct kvm_text*)a3; const uintptr_t text_count = a4; const uintptr_t flags = a5; const struct kvm_opt* const opt_array_ptr = (struct kvm_opt*)a6; uintptr_t opt_count = a7; const uintptr_t page_size = 4 << 10; const uintptr_t ioapic_page = 10; const uintptr_t guest_mem_size = 24 * page_size; const uintptr_t guest_mem = 0; (void)text_count; int text_type = text_array_ptr[0].typ; const void* text = text_array_ptr[0].text; uintptr_t text_size = text_array_ptr[0].size; for (uintptr_t i = 0; i < guest_mem_size / page_size; i++) { struct kvm_userspace_memory_region memreg; memreg.slot = i; memreg.flags = 0; memreg.guest_phys_addr = guest_mem + i * page_size; if (i == ioapic_page) memreg.guest_phys_addr = 0xfec00000; memreg.memory_size = page_size; memreg.userspace_addr = (uintptr_t)host_mem + i * page_size; ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg); } struct kvm_userspace_memory_region memreg; memreg.slot = 1 + (1 << 16); memreg.flags = 0; memreg.guest_phys_addr = 0x30000; memreg.memory_size = 64 << 10; memreg.userspace_addr = (uintptr_t)host_mem; ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg); struct kvm_sregs sregs; if (ioctl(cpufd, KVM_GET_SREGS, &sregs)) return -1; struct kvm_regs regs; memset(®s, 0, sizeof(regs)); regs.rip = guest_mem + X86_ADDR_TEXT; regs.rsp = X86_ADDR_STACK0; sregs.gdt.base = guest_mem + X86_ADDR_GDT; sregs.gdt.limit = 256 * sizeof(uint64_t) - 1; uint64_t* gdt = (uint64_t*)(host_mem + sregs.gdt.base); struct kvm_segment seg_ldt; memset(&seg_ldt, 0, sizeof(seg_ldt)); seg_ldt.selector = X86_SEL_LDT; seg_ldt.type = 2; seg_ldt.base = guest_mem + X86_ADDR_LDT; seg_ldt.limit = 256 * sizeof(uint64_t) - 1; seg_ldt.present = 1; seg_ldt.dpl = 0; seg_ldt.s = 0; seg_ldt.g = 0; seg_ldt.db = 1; seg_ldt.l = 0; sregs.ldt = seg_ldt; uint64_t* ldt = (uint64_t*)(host_mem + sregs.ldt.base); struct kvm_segment seg_cs16; memset(&seg_cs16, 0, sizeof(seg_cs16)); seg_cs16.selector = X86_SEL_CS16; seg_cs16.type = 11; seg_cs16.base = 0; seg_cs16.limit = 0xfffff; seg_cs16.present = 1; seg_cs16.dpl = 0; seg_cs16.s = 1; seg_cs16.g = 0; seg_cs16.db = 0; seg_cs16.l = 0; struct kvm_segment seg_ds16 = seg_cs16; seg_ds16.selector = X86_SEL_DS16; seg_ds16.type = 3; struct kvm_segment seg_cs16_cpl3 = seg_cs16; seg_cs16_cpl3.selector = X86_SEL_CS16_CPL3; seg_cs16_cpl3.dpl = 3; struct kvm_segment seg_ds16_cpl3 = seg_ds16; seg_ds16_cpl3.selector = X86_SEL_DS16_CPL3; seg_ds16_cpl3.dpl = 3; struct kvm_segment seg_cs32 = seg_cs16; seg_cs32.selector = X86_SEL_CS32; seg_cs32.db = 1; struct kvm_segment seg_ds32 = seg_ds16; seg_ds32.selector = X86_SEL_DS32; seg_ds32.db = 1; struct kvm_segment seg_cs32_cpl3 = seg_cs32; seg_cs32_cpl3.selector = X86_SEL_CS32_CPL3; seg_cs32_cpl3.dpl = 3; struct kvm_segment seg_ds32_cpl3 = seg_ds32; seg_ds32_cpl3.selector = X86_SEL_DS32_CPL3; seg_ds32_cpl3.dpl = 3; struct kvm_segment seg_cs64 = seg_cs16; seg_cs64.selector = X86_SEL_CS64; seg_cs64.l = 1; struct kvm_segment seg_ds64 = seg_ds32; seg_ds64.selector = X86_SEL_DS64; struct kvm_segment seg_cs64_cpl3 = seg_cs64; seg_cs64_cpl3.selector = X86_SEL_CS64_CPL3; seg_cs64_cpl3.dpl = 3; struct kvm_segment seg_ds64_cpl3 = seg_ds64; seg_ds64_cpl3.selector = X86_SEL_DS64_CPL3; seg_ds64_cpl3.dpl = 3; struct kvm_segment seg_tss32; memset(&seg_tss32, 0, sizeof(seg_tss32)); seg_tss32.selector = X86_SEL_TSS32; seg_tss32.type = 9; seg_tss32.base = X86_ADDR_VAR_TSS32; seg_tss32.limit = 0x1ff; seg_tss32.present = 1; seg_tss32.dpl = 0; seg_tss32.s = 0; seg_tss32.g = 0; seg_tss32.db = 0; seg_tss32.l = 0; struct kvm_segment seg_tss32_2 = seg_tss32; seg_tss32_2.selector = X86_SEL_TSS32_2; seg_tss32_2.base = X86_ADDR_VAR_TSS32_2; struct kvm_segment seg_tss32_cpl3 = seg_tss32; seg_tss32_cpl3.selector = X86_SEL_TSS32_CPL3; seg_tss32_cpl3.base = X86_ADDR_VAR_TSS32_CPL3; struct kvm_segment seg_tss32_vm86 = seg_tss32; seg_tss32_vm86.selector = X86_SEL_TSS32_VM86; seg_tss32_vm86.base = X86_ADDR_VAR_TSS32_VM86; struct kvm_segment seg_tss16 = seg_tss32; seg_tss16.selector = X86_SEL_TSS16; seg_tss16.base = X86_ADDR_VAR_TSS16; seg_tss16.limit = 0xff; seg_tss16.type = 1; struct kvm_segment seg_tss16_2 = seg_tss16; seg_tss16_2.selector = X86_SEL_TSS16_2; seg_tss16_2.base = X86_ADDR_VAR_TSS16_2; seg_tss16_2.dpl = 0; struct kvm_segment seg_tss16_cpl3 = seg_tss16; seg_tss16_cpl3.selector = X86_SEL_TSS16_CPL3; seg_tss16_cpl3.base = X86_ADDR_VAR_TSS16_CPL3; seg_tss16_cpl3.dpl = 3; struct kvm_segment seg_tss64 = seg_tss32; seg_tss64.selector = X86_SEL_TSS64; seg_tss64.base = X86_ADDR_VAR_TSS64; seg_tss64.limit = 0x1ff; struct kvm_segment seg_tss64_cpl3 = seg_tss64; seg_tss64_cpl3.selector = X86_SEL_TSS64_CPL3; seg_tss64_cpl3.base = X86_ADDR_VAR_TSS64_CPL3; seg_tss64_cpl3.dpl = 3; struct kvm_segment seg_cgate16; memset(&seg_cgate16, 0, sizeof(seg_cgate16)); seg_cgate16.selector = X86_SEL_CGATE16; seg_cgate16.type = 4; seg_cgate16.base = X86_SEL_CS16 | (2 << 16); seg_cgate16.limit = X86_ADDR_VAR_USER_CODE2; seg_cgate16.present = 1; seg_cgate16.dpl = 0; seg_cgate16.s = 0; seg_cgate16.g = 0; seg_cgate16.db = 0; seg_cgate16.l = 0; seg_cgate16.avl = 0; struct kvm_segment seg_tgate16 = seg_cgate16; seg_tgate16.selector = X86_SEL_TGATE16; seg_tgate16.type = 3; seg_cgate16.base = X86_SEL_TSS16_2; seg_tgate16.limit = 0; struct kvm_segment seg_cgate32 = seg_cgate16; seg_cgate32.selector = X86_SEL_CGATE32; seg_cgate32.type = 12; seg_cgate32.base = X86_SEL_CS32 | (2 << 16); struct kvm_segment seg_tgate32 = seg_cgate32; seg_tgate32.selector = X86_SEL_TGATE32; seg_tgate32.type = 11; seg_tgate32.base = X86_SEL_TSS32_2; seg_tgate32.limit = 0; struct kvm_segment seg_cgate64 = seg_cgate16; seg_cgate64.selector = X86_SEL_CGATE64; seg_cgate64.type = 12; seg_cgate64.base = X86_SEL_CS64; int kvmfd = open("/dev/kvm", O_RDWR); char buf[sizeof(struct kvm_cpuid2) + 128 * sizeof(struct kvm_cpuid_entry2)]; memset(buf, 0, sizeof(buf)); struct kvm_cpuid2* cpuid = (struct kvm_cpuid2*)buf; cpuid->nent = 128; ioctl(kvmfd, KVM_GET_SUPPORTED_CPUID, cpuid); ioctl(cpufd, KVM_SET_CPUID2, cpuid); close(kvmfd); const char* text_prefix = 0; int text_prefix_size = 0; char* host_text = host_mem + X86_ADDR_TEXT; if (text_type == 8) { if (flags & KVM_SETUP_SMM) { if (flags & KVM_SETUP_PROTECTED) { sregs.cs = seg_cs16; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16; sregs.cr0 |= X86_CR0_PE; } else { sregs.cs.selector = 0; sregs.cs.base = 0; } *(host_mem + X86_ADDR_TEXT) = 0xf4; host_text = host_mem + 0x8000; ioctl(cpufd, KVM_SMI, 0); } else if (flags & KVM_SETUP_VIRT86) { sregs.cs = seg_cs32; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32; sregs.cr0 |= X86_CR0_PE; sregs.efer |= X86_EFER_SCE; setup_syscall_msrs(cpufd, X86_SEL_CS32, X86_SEL_CS32_CPL3); setup_32bit_idt(&sregs, host_mem, guest_mem); if (flags & KVM_SETUP_PAGING) { uint64_t pd_addr = guest_mem + X86_ADDR_PD; uint64_t* pd = (uint64_t*)(host_mem + X86_ADDR_PD); pd[0] = X86_PDE32_PRESENT | X86_PDE32_RW | X86_PDE32_USER | X86_PDE32_PS; sregs.cr3 = pd_addr; sregs.cr4 |= X86_CR4_PSE; text_prefix = kvm_asm32_paged_vm86; text_prefix_size = sizeof(kvm_asm32_paged_vm86) - 1; } else { text_prefix = kvm_asm32_vm86; text_prefix_size = sizeof(kvm_asm32_vm86) - 1; } } else { sregs.cs.selector = 0; sregs.cs.base = 0; } } else if (text_type == 16) { if (flags & KVM_SETUP_CPL3) { sregs.cs = seg_cs16; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16; text_prefix = kvm_asm16_cpl3; text_prefix_size = sizeof(kvm_asm16_cpl3) - 1; } else { sregs.cr0 |= X86_CR0_PE; sregs.cs = seg_cs16; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds16; } } else if (text_type == 32) { sregs.cr0 |= X86_CR0_PE; sregs.efer |= X86_EFER_SCE; setup_syscall_msrs(cpufd, X86_SEL_CS32, X86_SEL_CS32_CPL3); setup_32bit_idt(&sregs, host_mem, guest_mem); if (flags & KVM_SETUP_SMM) { sregs.cs = seg_cs32; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32; *(host_mem + X86_ADDR_TEXT) = 0xf4; host_text = host_mem + 0x8000; ioctl(cpufd, KVM_SMI, 0); } else if (flags & KVM_SETUP_PAGING) { sregs.cs = seg_cs32; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32; uint64_t pd_addr = guest_mem + X86_ADDR_PD; uint64_t* pd = (uint64_t*)(host_mem + X86_ADDR_PD); pd[0] = X86_PDE32_PRESENT | X86_PDE32_RW | X86_PDE32_USER | X86_PDE32_PS; sregs.cr3 = pd_addr; sregs.cr4 |= X86_CR4_PSE; text_prefix = kvm_asm32_paged; text_prefix_size = sizeof(kvm_asm32_paged) - 1; } else if (flags & KVM_SETUP_CPL3) { sregs.cs = seg_cs32_cpl3; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32_cpl3; } else { sregs.cs = seg_cs32; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32; } } else { sregs.efer |= X86_EFER_LME | X86_EFER_SCE; sregs.cr0 |= X86_CR0_PE; setup_syscall_msrs(cpufd, X86_SEL_CS64, X86_SEL_CS64_CPL3); setup_64bit_idt(&sregs, host_mem, guest_mem); sregs.cs = seg_cs32; sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32; uint64_t pml4_addr = guest_mem + X86_ADDR_PML4; uint64_t* pml4 = (uint64_t*)(host_mem + X86_ADDR_PML4); uint64_t pdpt_addr = guest_mem + X86_ADDR_PDP; uint64_t* pdpt = (uint64_t*)(host_mem + X86_ADDR_PDP); uint64_t pd_addr = guest_mem + X86_ADDR_PD; uint64_t* pd = (uint64_t*)(host_mem + X86_ADDR_PD); pml4[0] = X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_USER | pdpt_addr; pdpt[0] = X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_USER | pd_addr; pd[0] = X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_USER | X86_PDE64_PS; sregs.cr3 = pml4_addr; sregs.cr4 |= X86_CR4_PAE; if (flags & KVM_SETUP_VM) { sregs.cr0 |= X86_CR0_NE; *((uint64_t*)(host_mem + X86_ADDR_VAR_VMXON_PTR)) = X86_ADDR_VAR_VMXON; *((uint64_t*)(host_mem + X86_ADDR_VAR_VMCS_PTR)) = X86_ADDR_VAR_VMCS; memcpy(host_mem + X86_ADDR_VAR_VMEXIT_CODE, kvm_asm64_vm_exit, sizeof(kvm_asm64_vm_exit) - 1); *((uint64_t*)(host_mem + X86_ADDR_VAR_VMEXIT_PTR)) = X86_ADDR_VAR_VMEXIT_CODE; text_prefix = kvm_asm64_init_vm; text_prefix_size = sizeof(kvm_asm64_init_vm) - 1; } else if (flags & KVM_SETUP_CPL3) { text_prefix = kvm_asm64_cpl3; text_prefix_size = sizeof(kvm_asm64_cpl3) - 1; } else { text_prefix = kvm_asm64_enable_long; text_prefix_size = sizeof(kvm_asm64_enable_long) - 1; } } struct tss16 tss16; memset(&tss16, 0, sizeof(tss16)); tss16.ss0 = tss16.ss1 = tss16.ss2 = X86_SEL_DS16; tss16.sp0 = tss16.sp1 = tss16.sp2 = X86_ADDR_STACK0; tss16.ip = X86_ADDR_VAR_USER_CODE2; tss16.flags = (1 << 1); tss16.cs = X86_SEL_CS16; tss16.es = tss16.ds = tss16.ss = X86_SEL_DS16; tss16.ldt = X86_SEL_LDT; struct tss16* tss16_addr = (struct tss16*)(host_mem + seg_tss16_2.base); memcpy(tss16_addr, &tss16, sizeof(tss16)); memset(&tss16, 0, sizeof(tss16)); tss16.ss0 = tss16.ss1 = tss16.ss2 = X86_SEL_DS16; tss16.sp0 = tss16.sp1 = tss16.sp2 = X86_ADDR_STACK0; tss16.ip = X86_ADDR_VAR_USER_CODE2; tss16.flags = (1 << 1); tss16.cs = X86_SEL_CS16_CPL3; tss16.es = tss16.ds = tss16.ss = X86_SEL_DS16_CPL3; tss16.ldt = X86_SEL_LDT; struct tss16* tss16_cpl3_addr = (struct tss16*)(host_mem + seg_tss16_cpl3.base); memcpy(tss16_cpl3_addr, &tss16, sizeof(tss16)); struct tss32 tss32; memset(&tss32, 0, sizeof(tss32)); tss32.ss0 = tss32.ss1 = tss32.ss2 = X86_SEL_DS32; tss32.sp0 = tss32.sp1 = tss32.sp2 = X86_ADDR_STACK0; tss32.ip = X86_ADDR_VAR_USER_CODE; tss32.flags = (1 << 1) | (1 << 17); tss32.ldt = X86_SEL_LDT; tss32.cr3 = sregs.cr3; tss32.io_bitmap = offsetof(struct tss32, io_bitmap); struct tss32* tss32_addr = (struct tss32*)(host_mem + seg_tss32_vm86.base); memcpy(tss32_addr, &tss32, sizeof(tss32)); memset(&tss32, 0, sizeof(tss32)); tss32.ss0 = tss32.ss1 = tss32.ss2 = X86_SEL_DS32; tss32.sp0 = tss32.sp1 = tss32.sp2 = X86_ADDR_STACK0; tss32.ip = X86_ADDR_VAR_USER_CODE; tss32.flags = (1 << 1); tss32.cr3 = sregs.cr3; tss32.es = tss32.ds = tss32.ss = tss32.gs = tss32.fs = X86_SEL_DS32; tss32.cs = X86_SEL_CS32; tss32.ldt = X86_SEL_LDT; tss32.cr3 = sregs.cr3; tss32.io_bitmap = offsetof(struct tss32, io_bitmap); struct tss32* tss32_cpl3_addr = (struct tss32*)(host_mem + seg_tss32_2.base); memcpy(tss32_cpl3_addr, &tss32, sizeof(tss32)); struct tss64 tss64; memset(&tss64, 0, sizeof(tss64)); tss64.rsp[0] = X86_ADDR_STACK0; tss64.rsp[1] = X86_ADDR_STACK0; tss64.rsp[2] = X86_ADDR_STACK0; tss64.io_bitmap = offsetof(struct tss64, io_bitmap); struct tss64* tss64_addr = (struct tss64*)(host_mem + seg_tss64.base); memcpy(tss64_addr, &tss64, sizeof(tss64)); memset(&tss64, 0, sizeof(tss64)); tss64.rsp[0] = X86_ADDR_STACK0; tss64.rsp[1] = X86_ADDR_STACK0; tss64.rsp[2] = X86_ADDR_STACK0; tss64.io_bitmap = offsetof(struct tss64, io_bitmap); struct tss64* tss64_cpl3_addr = (struct tss64*)(host_mem + seg_tss64_cpl3.base); memcpy(tss64_cpl3_addr, &tss64, sizeof(tss64)); if (text_size > 1000) text_size = 1000; if (text_prefix) { memcpy(host_text, text_prefix, text_prefix_size); void* patch = memmem(host_text, text_prefix_size, "\xde\xc0\xad\x0b", 4); if (patch) *((uint32_t*)patch) = guest_mem + X86_ADDR_TEXT + ((char*)patch - host_text) + 6; uint16_t magic = X86_PREFIX_SIZE; patch = memmem(host_text, text_prefix_size, &magic, sizeof(magic)); if (patch) *((uint16_t*)patch) = guest_mem + X86_ADDR_TEXT + text_prefix_size; } memcpy((void*)(host_text + text_prefix_size), text, text_size); *(host_text + text_prefix_size + text_size) = 0xf4; memcpy(host_mem + X86_ADDR_VAR_USER_CODE, text, text_size); *(host_mem + X86_ADDR_VAR_USER_CODE + text_size) = 0xf4; *(host_mem + X86_ADDR_VAR_HLT) = 0xf4; memcpy(host_mem + X86_ADDR_VAR_SYSRET, "\x0f\x07\xf4", 3); memcpy(host_mem + X86_ADDR_VAR_SYSEXIT, "\x0f\x35\xf4", 3); *(uint64_t*)(host_mem + X86_ADDR_VAR_VMWRITE_FLD) = 0; *(uint64_t*)(host_mem + X86_ADDR_VAR_VMWRITE_VAL) = 0; if (opt_count > 2) opt_count = 2; for (uintptr_t i = 0; i < opt_count; i++) { uint64_t typ = opt_array_ptr[i].typ; uint64_t val = opt_array_ptr[i].val; switch (typ % 9) { case 0: sregs.cr0 ^= val & (X86_CR0_MP | X86_CR0_EM | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | X86_CR0_NW | X86_CR0_CD); break; case 1: sregs.cr4 ^= val & (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE | X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT | X86_CR4_UMIP | X86_CR4_VMXE | X86_CR4_SMXE | X86_CR4_FSGSBASE | X86_CR4_PCIDE | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); break; case 2: sregs.efer ^= val & (X86_EFER_SCE | X86_EFER_NXE | X86_EFER_SVME | X86_EFER_LMSLE | X86_EFER_FFXSR | X86_EFER_TCE); break; case 3: val &= ((1 << 8) | (1 << 9) | (1 << 10) | (1 << 12) | (1 << 13) | (1 << 14) | (1 << 15) | (1 << 18) | (1 << 19) | (1 << 20) | (1 << 21)); regs.rflags ^= val; tss16_addr->flags ^= val; tss16_cpl3_addr->flags ^= val; tss32_addr->flags ^= val; tss32_cpl3_addr->flags ^= val; break; case 4: seg_cs16.type = val & 0xf; seg_cs32.type = val & 0xf; seg_cs64.type = val & 0xf; break; case 5: seg_cs16_cpl3.type = val & 0xf; seg_cs32_cpl3.type = val & 0xf; seg_cs64_cpl3.type = val & 0xf; break; case 6: seg_ds16.type = val & 0xf; seg_ds32.type = val & 0xf; seg_ds64.type = val & 0xf; break; case 7: seg_ds16_cpl3.type = val & 0xf; seg_ds32_cpl3.type = val & 0xf; seg_ds64_cpl3.type = val & 0xf; break; case 8: *(uint64_t*)(host_mem + X86_ADDR_VAR_VMWRITE_FLD) = (val & 0xffff); *(uint64_t*)(host_mem + X86_ADDR_VAR_VMWRITE_VAL) = (val >> 16); break; default: exit(1); } } regs.rflags |= 2; fill_segment_descriptor(gdt, ldt, &seg_ldt); fill_segment_descriptor(gdt, ldt, &seg_cs16); fill_segment_descriptor(gdt, ldt, &seg_ds16); fill_segment_descriptor(gdt, ldt, &seg_cs16_cpl3); fill_segment_descriptor(gdt, ldt, &seg_ds16_cpl3); fill_segment_descriptor(gdt, ldt, &seg_cs32); fill_segment_descriptor(gdt, ldt, &seg_ds32); fill_segment_descriptor(gdt, ldt, &seg_cs32_cpl3); fill_segment_descriptor(gdt, ldt, &seg_ds32_cpl3); fill_segment_descriptor(gdt, ldt, &seg_cs64); fill_segment_descriptor(gdt, ldt, &seg_ds64); fill_segment_descriptor(gdt, ldt, &seg_cs64_cpl3); fill_segment_descriptor(gdt, ldt, &seg_ds64_cpl3); fill_segment_descriptor(gdt, ldt, &seg_tss32); fill_segment_descriptor(gdt, ldt, &seg_tss32_2); fill_segment_descriptor(gdt, ldt, &seg_tss32_cpl3); fill_segment_descriptor(gdt, ldt, &seg_tss32_vm86); fill_segment_descriptor(gdt, ldt, &seg_tss16); fill_segment_descriptor(gdt, ldt, &seg_tss16_2); fill_segment_descriptor(gdt, ldt, &seg_tss16_cpl3); fill_segment_descriptor_dword(gdt, ldt, &seg_tss64); fill_segment_descriptor_dword(gdt, ldt, &seg_tss64_cpl3); fill_segment_descriptor(gdt, ldt, &seg_cgate16); fill_segment_descriptor(gdt, ldt, &seg_tgate16); fill_segment_descriptor(gdt, ldt, &seg_cgate32); fill_segment_descriptor(gdt, ldt, &seg_tgate32); fill_segment_descriptor_dword(gdt, ldt, &seg_cgate64); if (ioctl(cpufd, KVM_SET_SREGS, &sregs)) return -1; if (ioctl(cpufd, KVM_SET_REGS, ®s)) return -1; return 0; } static void setup_gadgetfs(); static void setup_binderfs(); static void setup_fusectl(); static void sandbox_common_mount_tmpfs(void) { write_file("/proc/sys/fs/mount-max", "100000"); if (mkdir("./syz-tmp", 0777)) exit(1); if (mount("", "./syz-tmp", "tmpfs", 0, NULL)) exit(1); if (mkdir("./syz-tmp/newroot", 0777)) exit(1); if (mkdir("./syz-tmp/newroot/dev", 0700)) exit(1); unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE; if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL)) exit(1); if (mkdir("./syz-tmp/newroot/proc", 0700)) exit(1); if (mount("syz-proc", "./syz-tmp/newroot/proc", "proc", 0, NULL)) exit(1); if (mkdir("./syz-tmp/newroot/selinux", 0700)) exit(1); const char* selinux_path = "./syz-tmp/newroot/selinux"; if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) { if (errno != ENOENT) exit(1); if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT) exit(1); } if (mkdir("./syz-tmp/newroot/sys", 0700)) exit(1); if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL)) exit(1); if (mount("/sys/kernel/debug", "./syz-tmp/newroot/sys/kernel/debug", NULL, bind_mount_flags, NULL) && errno != ENOENT) exit(1); if (mount("/sys/fs/smackfs", "./syz-tmp/newroot/sys/fs/smackfs", NULL, bind_mount_flags, NULL) && errno != ENOENT) exit(1); if (mount("/proc/sys/fs/binfmt_misc", "./syz-tmp/newroot/proc/sys/fs/binfmt_misc", NULL, bind_mount_flags, NULL) && errno != ENOENT) exit(1); if (mkdir("./syz-tmp/newroot/syz-inputs", 0700)) exit(1); if (mount("/syz-inputs", "./syz-tmp/newroot/syz-inputs", NULL, bind_mount_flags | MS_RDONLY, NULL) && errno != ENOENT) exit(1); if (mkdir("./syz-tmp/pivot", 0777)) exit(1); if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) { if (chdir("./syz-tmp")) exit(1); } else { if (chdir("/")) exit(1); if (umount2("./pivot", MNT_DETACH)) exit(1); } if (chroot("./newroot")) exit(1); if (chdir("/")) exit(1); setup_gadgetfs(); setup_binderfs(); setup_fusectl(); } static void setup_gadgetfs() { if (mkdir("/dev/gadgetfs", 0777)) { } if (mount("gadgetfs", "/dev/gadgetfs", "gadgetfs", 0, NULL)) { } } static void setup_fusectl() { if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) { } } static void setup_binderfs() { if (mkdir("/dev/binderfs", 0777)) { } if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) { } } static void loop(); static void sandbox_common() { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); if (getppid() == 1) exit(1); struct rlimit rlim; rlim.rlim_cur = rlim.rlim_max = (200 << 20); setrlimit(RLIMIT_AS, &rlim); rlim.rlim_cur = rlim.rlim_max = 32 << 20; setrlimit(RLIMIT_MEMLOCK, &rlim); rlim.rlim_cur = rlim.rlim_max = 136 << 20; setrlimit(RLIMIT_FSIZE, &rlim); rlim.rlim_cur = rlim.rlim_max = 1 << 20; setrlimit(RLIMIT_STACK, &rlim); rlim.rlim_cur = rlim.rlim_max = 128 << 20; setrlimit(RLIMIT_CORE, &rlim); rlim.rlim_cur = rlim.rlim_max = 256; setrlimit(RLIMIT_NOFILE, &rlim); if (unshare(CLONE_NEWNS)) { } if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) { } if (unshare(CLONE_NEWIPC)) { } if (unshare(0x02000000)) { } if (unshare(CLONE_NEWUTS)) { } if (unshare(CLONE_SYSVSEM)) { } typedef struct { const char* name; const char* value; } sysctl_t; static const sysctl_t sysctls[] = { {"/proc/sys/kernel/shmmax", "16777216"}, {"/proc/sys/kernel/shmall", "536870912"}, {"/proc/sys/kernel/shmmni", "1024"}, {"/proc/sys/kernel/msgmax", "8192"}, {"/proc/sys/kernel/msgmni", "1024"}, {"/proc/sys/kernel/msgmnb", "1024"}, {"/proc/sys/kernel/sem", "1024 1048576 500 1024"}, }; unsigned i; for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++) write_file(sysctls[i].name, sysctls[i].value); } static int wait_for_loop(int pid) { if (pid < 0) exit(1); int status = 0; while (waitpid(-1, &status, __WALL) != pid) { } return WEXITSTATUS(status); } static void drop_caps(void) { struct __user_cap_header_struct cap_hdr = {}; struct __user_cap_data_struct cap_data[2] = {}; cap_hdr.version = _LINUX_CAPABILITY_VERSION_3; cap_hdr.pid = getpid(); if (syscall(SYS_capget, &cap_hdr, &cap_data)) exit(1); const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE); cap_data[0].effective &= ~drop; cap_data[0].permitted &= ~drop; cap_data[0].inheritable &= ~drop; if (syscall(SYS_capset, &cap_hdr, &cap_data)) exit(1); } static int do_sandbox_none(void) { if (unshare(CLONE_NEWPID)) { } int pid = fork(); if (pid != 0) return wait_for_loop(pid); initialize_vhci(); sandbox_common(); drop_caps(); initialize_netdevices_init(); if (unshare(CLONE_NEWNET)) { } write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535"); initialize_tun(); initialize_netdevices(); initialize_wifi_devices(); sandbox_common_mount_tmpfs(); loop(); exit(1); } #define FS_IOC_SETFLAGS _IOW('f', 2, long) static void remove_dir(const char* dir) { int iter = 0; DIR* dp = 0; const int umount_flags = MNT_FORCE | UMOUNT_NOFOLLOW; retry: while (umount2(dir, umount_flags) == 0) { } dp = opendir(dir); if (dp == NULL) { if (errno == EMFILE) { exit(1); } exit(1); } struct dirent* ep = 0; while ((ep = readdir(dp))) { if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0) continue; char filename[FILENAME_MAX]; snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name); while (umount2(filename, umount_flags) == 0) { } struct stat st; if (lstat(filename, &st)) exit(1); if (S_ISDIR(st.st_mode)) { remove_dir(filename); continue; } int i; for (i = 0;; i++) { if (unlink(filename) == 0) break; if (errno == EPERM) { int fd = open(filename, O_RDONLY); if (fd != -1) { long flags = 0; if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) { } close(fd); continue; } } if (errno == EROFS) { break; } if (errno != EBUSY || i > 100) exit(1); if (umount2(filename, umount_flags)) exit(1); } } closedir(dp); for (int i = 0;; i++) { if (rmdir(dir) == 0) break; if (i < 100) { if (errno == EPERM) { int fd = open(dir, O_RDONLY); if (fd != -1) { long flags = 0; if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) { } close(fd); continue; } } if (errno == EROFS) { break; } if (errno == EBUSY) { if (umount2(dir, umount_flags)) exit(1); continue; } if (errno == ENOTEMPTY) { if (iter < 100) { iter++; goto retry; } } } exit(1); } } static void kill_and_wait(int pid, int* status) { kill(-pid, SIGKILL); kill(pid, SIGKILL); for (int i = 0; i < 100; i++) { if (waitpid(-1, status, WNOHANG | __WALL) == pid) return; usleep(1000); } DIR* dir = opendir("/sys/fs/fuse/connections"); if (dir) { for (;;) { struct dirent* ent = readdir(dir); if (!ent) break; if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue; char abort[300]; snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name); int fd = open(abort, O_WRONLY); if (fd == -1) { continue; } if (write(fd, abort, 1) < 0) { } close(fd); } closedir(dir); } else { } while (waitpid(-1, status, __WALL) != pid) { } } static void setup_test() { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); write_file("/proc/self/oom_score_adj", "1000"); flush_tun(); if (symlink("/dev/binderfs", "./binderfs")) { } } static void close_fds() { for (int fd = 3; fd < MAX_FDS; fd++) close(fd); } static const char* setup_usb() { if (chmod("/dev/raw-gadget", 0666)) return "failed to chmod /dev/raw-gadget"; return NULL; } static void execute_one(void); #define WAIT_FLAGS __WALL static void loop(void) { int iter = 0; for (;; iter++) { char cwdbuf[32]; sprintf(cwdbuf, "./%d", iter); if (mkdir(cwdbuf, 0777)) exit(1); int pid = fork(); if (pid < 0) exit(1); if (pid == 0) { if (chdir(cwdbuf)) exit(1); setup_test(); execute_one(); close_fds(); exit(0); } int status = 0; uint64_t start = current_time_ms(); for (;;) { sleep_ms(10); if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) break; if (current_time_ms() - start < 5000) continue; kill_and_wait(pid, &status); break; } remove_dir(cwdbuf); } } uint64_t r[4] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x0}; void execute_one(void) { intptr_t res = 0; if (write(1, "executing program\n", sizeof("executing program\n") - 1)) { } // openat$kvm arguments: [ // fd: const = 0xffffffffffffff9c (8 bytes) // file: ptr[in, buffer] { // buffer: {2f 64 65 76 2f 6b 76 6d 00} (length 0x9) // } // flags: open_flags = 0x22401 (4 bytes) // mode: const = 0x0 (2 bytes) // ] // returns fd_kvm NONFAILING(memcpy((void*)0x200000000480, "/dev/kvm\000", 9)); res = syscall( __NR_openat, /*fd=*/0xffffffffffffff9cul, /*file=*/0x200000000480ul, /*flags=O_NOFOLLOW|FASYNC|O_APPEND|O_WRONLY*/ 0x22401, /*mode=*/0); if (res != -1) r[0] = res; // ioctl$KVM_CREATE_VM arguments: [ // fd: fd_kvm (resource) // cmd: const = 0xae01 (4 bytes) // type: intptr = 0x0 (8 bytes) // ] // returns fd_kvmvm res = syscall(__NR_ioctl, /*fd=*/r[0], /*cmd=*/0xae01, /*type=*/0ul); if (res != -1) r[1] = res; // ioctl$KVM_CREATE_VCPU arguments: [ // fd: fd_kvmvm (resource) // cmd: const = 0xae41 (4 bytes) // id: intptr = 0x0 (8 bytes) // ] // returns fd_kvmcpu res = syscall(__NR_ioctl, /*fd=*/r[1], /*cmd=*/0xae41, /*id=*/0ul); if (res != -1) r[2] = res; // syz_kvm_setup_cpu$x86 arguments: [ // fd: fd_kvmvm (resource) // cpufd: fd_kvmcpu (resource) // usermem: VMA[0x18000] // text: ptr[in, array[kvm_text_x86]] { // array[kvm_text_x86] { // union kvm_text_x86 { // text16: kvm_text_x86_16 { // typ: const = 0x10 (8 bytes) // text: ptr[in, buffer] { // buffer: {ea 0d 00 b0 00 0f 22 94 64 67 0f c7 9c 81 00 80 00 00 // ba 61 00 ec ba f8 0c 66 b8 ec b1 04 85 66 ef ba fc 0c 66 b8 10 // 6d 7d d1 66 ef f0 86 7d c5 de cb 66 b9 80 00 00 c0 0f 32 66 35 // 00 01 00 00 0f 30 ea 00 00 e5 00 66 b8 01 00 00 00 0f 01 d9} // (length 0x50) // } // size: len = 0x50 (8 bytes) // } // } // } // } // ntext: len = 0x1 (8 bytes) // flags: kvm_setup_flags = 0x43 (8 bytes) // opts: nil // nopt: len = 0x0 (8 bytes) // ] NONFAILING(*(uint64_t*)0x200000000100 = 0x10); NONFAILING(*(uint64_t*)0x200000000108 = 0x200000000200); NONFAILING( memcpy((void*)0x200000000200, "\xea\x0d\x00\xb0\x00\x0f\x22\x94\x64\x67\x0f\xc7\x9c\x81\x00\x80" "\x00\x00\xba\x61\x00\xec\xba\xf8\x0c\x66\xb8\xec\xb1\x04\x85\x66" "\xef\xba\xfc\x0c\x66\xb8\x10\x6d\x7d\xd1\x66\xef\xf0\x86\x7d\xc5" "\xde\xcb\x66\xb9\x80\x00\x00\xc0\x0f\x32\x66\x35\x00\x01\x00\x00" "\x0f\x30\xea\x00\x00\xe5\x00\x66\xb8\x01\x00\x00\x00\x0f\x01\xd9", 80)); NONFAILING(*(uint64_t*)0x200000000110 = 0x50); NONFAILING(syz_kvm_setup_cpu( /*fd=*/-1, /*cpufd=*/r[2], /*usermem=*/0x200000fe8000, /*text=*/0x200000000100, /*ntext=*/1, /*flags=KVM_SETUP_VM|KVM_SETUP_PAE|KVM_SETUP_PAGING*/ 0x43, /*opts=*/0, /*nopt=*/0)); // getpgrp arguments: [ // pid: pid (resource) // ] // returns pid res = syscall(__NR_getpgrp, /*pid=*/0); if (res != -1) r[3] = res; // sched_setaffinity arguments: [ // pid: pid (resource) // cpusetsize: len = 0x8 (8 bytes) // mask: ptr[in, int64] { // int64 = 0x5 (8 bytes) // } // ] NONFAILING(*(uint64_t*)0x200000000040 = 5); syscall(__NR_sched_setaffinity, /*pid=*/r[3], /*cpusetsize=*/8ul, /*mask=*/0x200000000040ul); // prlimit64 arguments: [ // pid: pid (resource) // res: rlimit_type = 0xe (8 bytes) // new: ptr[in, rlimit] { // rlimit { // soft: intptr = 0x8 (8 bytes) // hard: intptr = 0x80000100008b (8 bytes) // } // } // old: nil // ] NONFAILING(*(uint64_t*)0x200000000100 = 8); NONFAILING(*(uint64_t*)0x200000000108 = 0x80000100008b); syscall(__NR_prlimit64, /*pid=*/0, /*res=RLIMIT_RTPRIO*/ 0xeul, /*new=*/0x200000000100ul, /*old=*/0ul); // sched_setscheduler arguments: [ // pid: pid (resource) // policy: sched_policy = 0x1 (8 bytes) // prio: ptr[in, int32] { // int32 = 0x7 (4 bytes) // } // ] NONFAILING(*(uint32_t*)0x200000000300 = 7); syscall(__NR_sched_setscheduler, /*pid=*/0, /*policy=SCHED_FIFO*/ 1ul, /*prio=*/0x200000000300ul); // prctl$PR_SCHED_CORE arguments: [ // option: const = 0x3e (8 bytes) // cmd: intptr = 0x1 (8 bytes) // pid: pid (resource) // type: pid_type = 0x1 (8 bytes) // uaddr: nil // ] syscall(__NR_prctl, /*option=*/0x3eul, /*cmd=*/1ul, /*pid=*/0, /*type=PIDTYPE_TGID*/ 1ul, /*uaddr=*/0ul); // syz_open_dev$MSR arguments: [ // dev: nil // id: intptr = 0x0 (8 bytes) // flags: const = 0x0 (8 bytes) // ] // returns fd_msr NONFAILING(syz_open_dev(/*dev=*/0, /*id=*/0, /*flags=*/0)); // syz_kvm_setup_cpu$x86 arguments: [ // fd: fd_kvmvm (resource) // cpufd: fd_kvmcpu (resource) // usermem: VMA[0x18000] // text: ptr[in, array[kvm_text_x86]] { // array[kvm_text_x86] { // union kvm_text_x86 { // text16: kvm_text_x86_16 { // typ: const = 0x10 (8 bytes) // text: nil // size: len = 0x3f (8 bytes) // } // } // } // } // ntext: len = 0x1 (8 bytes) // flags: kvm_setup_flags = 0x0 (8 bytes) // opts: nil // nopt: len = 0x0 (8 bytes) // ] NONFAILING(*(uint64_t*)0x2000000000c0 = 0x10); NONFAILING(*(uint64_t*)0x2000000000c8 = 0); NONFAILING(*(uint64_t*)0x2000000000d0 = 0x3f); NONFAILING(syz_kvm_setup_cpu(/*fd=*/r[1], /*cpufd=*/-1, /*usermem=*/0x200000fe8000, /*text=*/0x2000000000c0, /*ntext=*/1, /*flags=*/0, /*opts=*/0, /*nopt=*/0)); // ioctl$KVM_RUN arguments: [ // fd: fd_kvmcpu (resource) // cmd: const = 0xae80 (4 bytes) // arg: const = 0x0 (8 bytes) // ] syscall(__NR_ioctl, /*fd=*/r[2], /*cmd=*/0xae80, /*arg=*/0ul); } int main(void) { syscall(__NR_mmap, /*addr=*/0x1ffffffff000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/(intptr_t)-1, /*offset=*/0ul); syscall(__NR_mmap, /*addr=*/0x200000000000ul, /*len=*/0x1000000ul, /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/(intptr_t)-1, /*offset=*/0ul); syscall(__NR_mmap, /*addr=*/0x200001000000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/(intptr_t)-1, /*offset=*/0ul); const char* reason; (void)reason; if ((reason = setup_usb())) printf("the reproducer may not work as expected: USB injection setup " "failed: %s\n", reason); install_segv_handler(); for (procid = 0; procid < 5; procid++) { if (fork() == 0) { use_temporary_dir(); do_sandbox_none(); } } sleep(1000000); return 0; }