Try   HackMD

KVM Host Development Record

Note of kvm-host

The note is available in HackMD.

Resolved Issues

Misconfiguration of Setting Base Address Register (BAR)

This is the original method in kvm-host (src/pci.c, commit: 93f1fee):

void pci_set_bar(struct pci_dev *dev,
                 uint8_t bar,
                 uint32_t bar_size,
                 bool is_io_space,
                 dev_io_fn do_io)
{
    /* TODO: mem type, prefetch */
    /* FIXME: bar_size must be power of 2 */
    PCI_HDR_WRITE(dev->hdr, PCI_BAR_OFFSET(bar), is_io_space, 32);
    dev->bar_size[bar] = bar_size;
    dev->bar_is_io_space[bar] = is_io_space;
    dev_init(&dev->space_dev[bar], 0, bar_size, dev, do_io);
}

Usage:

In src/virtio-pci.c:

void virtio_pci_init(struct virtio_pci_dev *dev,
                     struct pci *pci,
                     struct bus *io_bus,
                     struct bus *mmio_bus)
{
    // ...
    pci_set_bar(&dev->pci_dev, 0, 0x100, PCI_BASE_ADDRESS_SPACE_MEMORY,
                virtio_pci_space_io);
    // ...
}

Definitions:

In /usr/include/linux/pci_regs.h:

/*
 * Base addresses specify locations in memory or I/O space.
 * Decoded size can be determined by writing a value of
 * 0xffffffff to the register, and reading it back.  Only
 * 1 bits are decoded.
 */
#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
#define PCI_BASE_ADDRESS_1	0x14	/* 32 bits [htype 0,1 only] */
#define PCI_BASE_ADDRESS_2	0x18	/* 32 bits [htype 0 only] */
#define PCI_BASE_ADDRESS_3	0x1c	/* 32 bits */
#define PCI_BASE_ADDRESS_4	0x20	/* 32 bits */
#define PCI_BASE_ADDRESS_5	0x24	/* 32 bits */

#define PCI_BASE_ADDRESS_SPACE         0x01	/* 0 = memory, 1 = I/O */
#define PCI_BASE_ADDRESS_SPACE_IO      0x01
#define PCI_BASE_ADDRESS_SPACE_MEMORY  0x00
#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06
#define PCI_BASE_ADDRESS_MEM_TYPE_32   0x00	/* 32 bit address */
#define PCI_BASE_ADDRESS_MEM_TYPE_1M   0x02	/* Below 1M [obsolete] */
#define PCI_BASE_ADDRESS_MEM_TYPE_64   0x04	/* 64 bit address */
#define PCI_BASE_ADDRESS_MEM_PREFETCH  0x08	/* prefetchable? */
#define PCI_BASE_ADDRESS_MEM_MASK      (~0x0fUL)
#define PCI_BASE_ADDRESS_IO_MASK       (~0x03UL)
/* bit 1 is reserved if address_space = 1 */

In src/pci.h:

#define PCI_BAR_OFFSET(bar) (PCI_BASE_ADDRESS_0 + ((bar) << 2))

Therefore, the pci_set_bar() in virtio_pci_init() does the followings:

PCI_HDR_WRITE(dev->pci_dev->dev->hdr, PCI_BAR_OFFSET(0), PCI_BASE_ADDRESS_SPACE_MEMORY, 32);
dev->pci_dev->dev->bar_size[bar] = 0x100;  // 256
dev->pci_dev->dev->bar_is_io_space[bar] = PCI_BASE_ADDRESS_SPACE_MEMORY;
dev_init(dev->pci_dev->dev->space_dev[0], 0, 0x100, dev->pci_dev->dev, virtio_pci_space_io);

According to the BAR settings, we have to configure:

  1. if this is memory space layout or I/O space layout.
  2. Avoid to implicitly cast the PCI_BASE_ADDRESS_SPACE_MEMORY = 0x00 to bool bar_is_io_space[6];

The fix applied in commit c4b325e replaces individual BAR flags with one layout parameter.

Users are required to indicate the correct flags via layout, as demonstrated below:

diff --git a/src/virtio-pci.c b/src/virtio-pci.c
index 289abb8..f712e8c 100644
--- a/src/virtio-pci.c
+++ b/src/virtio-pci.c
@@ -268,7 +268,9 @@ void virtio_pci_init(struct virtio_pci_dev *dev,
     pci_set_status(&dev->pci_dev, PCI_STATUS_CAP_LIST | PCI_STATUS_INTERRUPT);
-    pci_set_bar(&dev->pci_dev, 0, 0x100, PCI_BASE_ADDRESS_SPACE_MEMORY,
+    pci_set_bar(&dev->pci_dev, 0, 0x100,
+                PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_32
+                /* | PCI_BASE_ADDRESS_MEM_PREFETCH */,
     virtio_pci_set_cap(dev, cap_list);

The pci_set_bar() function writes the layout directly into the BAR and extracts the boolean bar_is_io_space from bit 0 of the layout.

diff --git a/src/pci.c b/src/pci.c
index 2b59ded..aa4dccf 100644
--- a/src/pci.c
+++ b/src/pci.c
@@ -145,14 +145,13 @@ static void pci_mmio_io(void *owner,
 void pci_set_bar(struct pci_dev *dev,
                  uint8_t bar,
                  uint32_t bar_size,
-                 bool is_io_space,
+                 uint32_t layout,
                  dev_io_fn do_io)
 {
-    /* TODO: mem type, prefetch */
     /* FIXME: bar_size must be power of 2 */
-    PCI_HDR_WRITE(dev->hdr, PCI_BAR_OFFSET(bar), is_io_space, 32);
+    PCI_HDR_WRITE(dev->hdr, PCI_BAR_OFFSET(bar), layout, 32);
     dev->bar_size[bar] = bar_size;
-    dev->bar_is_io_space[bar] = is_io_space;
+    dev->bar_is_io_space[bar] = layout & 0x1U;  // Get the bit[0] of layout
     dev_init(&dev->space_dev[bar], 0, bar_size, dev, do_io);
 }

TODO List

Network IRQ Descriptor in arm64

VIRTIO_NET_IRQ is defined in src/arch/x86/desc.h:

#define VIRTIO_NET_IRQ 14
#define VIRTIO_BLK_IRQ 15

but not in arm64.

Below is the compliation error log in arm64 platform.

  CC    build/virtio-net.o
src/virtio-net.c: In function ‘virtio_net_setup’:
src/virtio-net.c:242:20: error: ‘VIRTIO_NET_IRQ’ undeclared (first use in this function); did you mean ‘VIRTIO_NET_ERR’?
  242 |     dev->irq_num = VIRTIO_NET_IRQ;
      |                    ^~~~~~~~~~~~~~
      |                    VIRTIO_NET_ERR
src/virtio-net.c:242:20: note: each undeclared identifier is reported only once for each function it appears in
make: *** [Makefile:60: build/virtio-net.o] Error 1

TODO: 閱讀清單與目前進度

Others

Linux 核心專題: KVM 在 Arm64 的驗證和調整