本文重点分析中断的处理流程。

(1)pic中断处理流程

为了一窥中断处理的具体流程,这里我们选择最简单模拟串口为例进行分析。qemu作为设备模拟器会模拟很多传统的设备,isa-serial就是其中之一。我们看下串口触发中断时候的调用栈:

#0  0x00005555557dd543 in kvm_set_irq (s=0x5555568f4440, irq=4, level=1) at /home/fang/code/qemu/accel/kvm/kvm-all.c:991
#1  0x0000555555881c0f in kvm_pic_set_irq (opaque=0x0, irq=4, level=1) at /home/fang/code/qemu/hw/i386/kvm/i8259.c:114
#2  0x00005555559cb0aa in qemu_set_irq (irq=0x5555577c9dc0, level=1) at hw/core/irq.c:45
#3  0x0000555555881fda in kvm_pc_gsi_handler (opaque=0x555556b61970, n=4, level=1) at /home/fang/code/qemu/hw/i386/kvm/ioapic.c:55
#4  0x00005555559cb0aa in qemu_set_irq (irq=0x555556b63660, level=1) at hw/core/irq.c:45
#5  0x00005555559c06e7 in qemu_irq_raise (irq=0x555556b63660) at /home/fang/code/qemu/include/hw/irq.h:16
#6  0x00005555559c09b3 in serial_update_irq (s=0x555557b77770) at hw/char/serial.c:145
#7  0x00005555559c138c in serial_ioport_write (opaque=0x555557b77770, addr=1, val=2, size=1) at hw/char/serial.c:404

可以看到qemu用户态有个函数kvm_set_irq,这个函数是用户态通知kvm内核态触发一个中断的入口。函数中通过调用 kvm_vm_ioctl注入一个中断,调用号是 KVM_IRQ_LINE(pic类型中断),入参是一个 kvm_irq_level 的数据结构(传入了irq编号和中断的电平信息)。模拟isa串口是个isa设备使用边沿触发,所以注入中断会调用2次这个函数前后2次电平相反。

int kvm_set_irq(KVMState *s, int irq, int level)
{
    struct kvm_irq_level event;
    int ret;

    assert(kvm_async_interrupts_enabled());

    event.level = level;
    event.irq = irq;
    ret = kvm_vm_ioctl(s, s->irq_set_ioctl, &event);
    if (ret < 0) {
        perror("kvm_set_irq");
        abort();
    }

    return (s->irq_set_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
}

这个ioctl在内核的处理对应到下面这段代码,pic类型中断进而会调用到kvm_vm_ioctl_irq_line函数。

kvm_vm_ioctl
{
    ......
#ifdef __KVM_HAVE_IRQ_LINE
    case KVM_IRQ_LINE_STATUS:
    case KVM_IRQ_LINE: {            /* 处理pic上产生的中断 */
        struct kvm_irq_level irq_event;

        r = -EFAULT;
        if (copy_from_user(&irq_event, argp, sizeof(irq_event)))
            goto out;

        r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
                    ioctl == KVM_IRQ_LINE_STATUS);
        if (r)
            goto out;

        r = -EFAULT;
        if (ioctl == KVM_IRQ_LINE_STATUS) {
            if (copy_to_user(argp, &irq_event, sizeof(irq_event)))
                goto out;
        }

        r = 0;
        break;
    }
#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING      /* 处理ioapic的中断 */
    case KVM_SET_GSI_ROUTING: {
        struct kvm_irq_routing routing;
        struct kvm_irq_routing __user *urouting;
        struct kvm_irq_routing_entry *entries = NULL;

        r = -EFAULT;
        if (copy_from_user(&routing, argp, sizeof(routing)))
            goto out;
        r = -EINVAL;
        if (!kvm_arch_can_set_irq_routing(kvm))
            goto out;
        if (routing.nr > KVM_MAX_IRQ_ROUTES)
            goto out;
        if (routing.flags)
            goto out;
        if (routing.nr) {
            r = -ENOMEM;
            entries = vmalloc(routing.nr * sizeof(*entries));
            if (!entries)
                goto out;
            r = -EFAULT;
            urouting = argp;
            if (copy_from_user(entries, urouting->entries,
                    routing.nr * sizeof(*entries)))
                goto out_free_irq_routing;
        }
        r = kvm_set_irq_routing(kvm, entries, routing.nr,
                    routing.flags);
out_free_irq_routing:
        vfree(entries);
        break;
    }
    ......
}

kvm_vm_ioctl_irq_line函数中会进一步调用内核态的kvm_set_irq函数(用户态用同名函数额),这个函数是整个中断处理的入口:

int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
            bool line_status)
{
    if (!irqchip_in_kernel(kvm))
        return -ENXIO;

    irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
                    irq_event->irq, irq_event->level,
                    line_status);
    return 0;
}

kvm_set_irq函数的入参有5个,kvm代表某个特性的的虚拟机,irq_source_id可以是KVM_USERSPACE_IRQ_SOURCE_ID或者KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID(这个是irqfd这个我们这里不讨论),irq是传入的设备irq号,对于串口来说第一个port的irq=4而且irq=gsi,level代表电平。kvm_irq_map函数会获取改gsi索引上注册的中断路由项(kvm_kernel_irq_routing_entry),while循环中会挨个调用每个中断路由项上的set方法触发中,在guest中会忽略没有实现的芯片类型发送的中断消息

对于PIC而言,set函数对应于kvm_set_pic_irq函数,对于IOAPIC而言set函数对应于kvm_set_ioapic_irq(不同的chip不一样额)。对于串口而言,我们会进一步调用kvm_pic_set_irq来处理中断。

/*
* Return value:
*  < 0   Interrupt was ignored (masked or not delivered for other reasons)
*  = 0   Interrupt was coalesced (previous irq is still pending)
*  > 0   Number of CPUs interrupt was delivered to
*/
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
        bool line_status)
{
    struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
    int ret = -1, i, idx;

    trace_kvm_set_irq(irq, level, irq_source_id);

    /* Not possible to detect if the guest uses the PIC or the
    * IOAPIC.  So set the bit in both. The guest will ignore
    * writes to the unused one.
    */
    idx = srcu_read_lock(&kvm->irq_srcu);
    i = kvm_irq_map_gsi(kvm, irq_set, irq);
    srcu_read_unlock(&kvm->irq_srcu, idx);

    /* 依次调用同一个gsi上的所有芯片的set方法 */
    while (i--) {
        int r;
        r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
                line_status);
        if (r < 0)
            continue;

        ret = r + ((ret < 0) ? 0 : ret);
    }

    return ret;
}

/* 查询出此gsi号上对应的所有的“中断路由项” */
int kvm_irq_map_gsi(struct kvm *kvm,
            struct kvm_kernel_irq_routing_entry *entries, int gsi)
{
    struct kvm_irq_routing_table *irq_rt;
    struct kvm_kernel_irq_routing_entry *e;
    int n = 0;

    irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
                    lockdep_is_held(&kvm->irq_lock));
    if (irq_rt && gsi < irq_rt->nr_rt_entries) {
        hlist_for_each_entry(e, &irq_rt->map[gsi], link) {  /* 遍历此gsi对应的中断路由项 */
            entries[n] = *e;
            ++n;
        }
    }

    return n;
}

kvm_pic_set_irq 函数中,根据传入的irq编号check下原先的irq_state将本次的level与上次的irq_state进行逻辑“异或”判断是否发生电平跳变从而进行边沿检测(pic_set_irq1)。如果是的话设置IRR对应的bit,然后调用和pic_update_irq更新pic相关的寄存器并唤醒vcpu注入中断。

int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level)
{
    int ret, irq_level;

    BUG_ON(irq < 0 || irq >= PIC_NUM_PINS);

    pic_lock(s);
    /* irq_level = 1表示该irq引脚有电平跳变,出发中断 */
    irq_level = __kvm_irq_line_state(&s->irq_states[irq],
                    irq_source_id, level);
    /* 一个pic最多8个irq */
    ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level);
    pic_update_irq(s);
    trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
                s->pics[irq >> 3].imr, ret == 0);
    pic_unlock(s);

    return ret;
}

最后的最后,pic_unlock函数中在如果wakeup为true(又中断产生时)的时候会遍历每个vcpu,在满足条件的情况下调用kvm_make_request为vcpu注入中断,然后kick每个vcpu。

static void pic_unlock(struct kvm_pic *s)
    __releases(&s->lock)
{
    bool wakeup = s->wakeup_needed;
    struct kvm_vcpu *vcpu;
    int i;

    s->wakeup_needed = false;

    spin_unlock(&s->lock);

    if (wakeup) {       /* wakeup在pic_update_irq中被更新 */
        kvm_for_each_vcpu(i, vcpu, s->kvm) {
            if (kvm_apic_accept_pic_intr(vcpu)) {
                /* 中断注入会在kvm_enter_guest时候执行 */
                kvm_make_request(KVM_REQ_EVENT, vcpu);
                kvm_vcpu_kick(vcpu);
                return;
            }
        }
    }
}

results matching ""

    No results matching ""