@BABOLO modify 2005-04-26 Original http://www.ambrisko.com/doug/ata/ --- sys/dev/ata/ata-all.c +++ sys/dev/ata/ata-all.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,8 @@ static void ata_change_mode(struct ata_device *, int); static u_int8_t ata_enclosure_sensor(struct ata_device *, int, u_int8_t, u_int8_t); static int ata_enclosure_status(struct ata_device *, int *, int *, int *, int *); +static void ata_work(void); +static void ata_start_work(struct ata_channel *, int); /* sysctl vars */ SYSCTL_NODE(_hw, OID_AUTO, ata, CTLFLAG_RD, 0, "ATA driver parameters"); @@ -98,6 +101,13 @@ /* local vars */ static struct intr_config_hook *ata_delayed_attach = NULL; static MALLOC_DEFINE(M_ATA, "ATA generic", "ATA driver generic layer"); +static int ata_ready = 0; /* DJA check */ + +struct ata_queue { + TAILQ_ENTRY(ata_queue) ata_next; + struct ata_channel *ch; +}; +TAILQ_HEAD(,ata_queue) ata_q; /* misc defines */ #define DEV_ATAPIALL NATAPICD > 0 || NATAPIFD > 0 || \ @@ -105,7 +115,7 @@ /* For Promise SATA controllers */ void -ata_promise_init(struct ata_channel *ch) +promise_init(struct ata_channel *ch) { device_t parent = device_get_parent(ch->dev); struct ata_pci_controller *ctlr = device_get_softc(parent); @@ -180,7 +190,7 @@ case 0x3371105a: /* Promise SATA */ case 0x3373105a: /* Promise SATA */ case 0x3376105a: /* Promise SATA */ - ata_promise_init(ch); + promise_init(ch); break; default: @@ -293,8 +303,9 @@ case 0x24d18086: /* Intel ICH5 SATA */ case 0x24df8086: /* Intel ICH5 SATA RAID */ case 0x26518086: /* Intel ICH6 SATA */ + ata_intel_map_sata_ports(dev, ch); if ((error = bus_setup_intr(dev, ch->r_irq, INTR_TYPE_BIO, - ata_intr, ch, &ch->ih))) { + ata_intel_intr, ch, &ch->ih))) { ata_printf(ch, -1, "unable to setup interrupt\n"); return error; } @@ -351,12 +362,23 @@ ata_detach(device_t dev) { struct ata_channel *ch; - int i, s; + struct ata_queue *ata_op; + int s, i; if (!dev || !(ch = device_get_softc(dev)) || - !ch->r_io || !ch->r_altio || !ch->r_irq) + !ch->r_io[ATA_IDX_DATA].res || !ch->r_altio[ATA_IDX_DATA].res + || !ch->r_irq) return ENXIO; + s = splbio(); + TAILQ_FOREACH(ata_op, &ata_q, ata_next) + if (ch == ata_op->ch) { + ata_op->ch->count = 0; + TAILQ_REMOVE(&ata_q, ata_op, ata_next); + free(ata_op, M_DEVBUF); + } + splx(s); + /* make sure channel is not busy */ ATA_SLEEPLOCK_CH(ch, ATA_CONTROL); @@ -541,6 +563,9 @@ case ATARAIDREBUILD: return ata_raid_rebuild(iocmd->channel); + case ATARAIDREBUILDSTOP: + return ata_raid_rebuild_stop(iocmd->channel); + case ATARAIDCREATE: return ata_raid_create(&iocmd->u.raid_setup); @@ -549,6 +574,9 @@ case ATARAIDSTATUS: return ata_raid_status(iocmd->channel, &iocmd->u.raid_status); + + case ATARAIDADDSPARE: + return ata_raid_addspare(iocmd->channel, iocmd->u.raid_spare.disk); #endif #if DEV_ATAPIALL case ATAPICMD: { @@ -646,14 +674,14 @@ ata_boot_attach(void) { struct ata_channel *ch; - int ctlr, s; + int ctlr; if (ata_delayed_attach) { config_intrhook_disestablish(ata_delayed_attach); free(ata_delayed_attach, M_TEMP); ata_delayed_attach = NULL; } - s = splbio(); + /* s = splbio(); DJA */ /* * run through all ata devices and look for real ATA & ATAPI devices @@ -703,7 +731,7 @@ #endif } #endif - splx(s); + /* splx(s); DJA */ } void @@ -731,17 +759,22 @@ if (ch->status & ATA_S_ERROR) ch->error = ATA_INB(ch->r_io, ATA_ERROR); + if (ch->active & ATA_STATUS_ONLY) { + ch->active &= ~ATA_STATUS_ONLY; + return; + } + /* find & call the responsible driver to process this interrupt */ switch (ch->active) { #if NATADISK > 0 case ATA_ACTIVE_ATA: - if (!ch->running || ad_interrupt(ch->running) == ATA_OP_CONTINUES) + if (ch->running && ad_interrupt(ch->running) == ATA_OP_CONTINUES) return; break; #endif #if DEV_ATAPIALL case ATA_ACTIVE_ATAPI: - if (!ch->running || atapi_interrupt(ch->running) == ATA_OP_CONTINUES) + if (ch->running && atapi_interrupt(ch->running) == ATA_OP_CONTINUES) return; break; #endif @@ -775,16 +808,151 @@ break; } ch->active &= ATA_CONTROL; - if (ch->active & ATA_CONTROL) + if (ch->active & ATA_CONTROL) { return; + } ch->running = NULL; + if (ch->count > 0) + ch->count--; + +dprintf(("%s %d ch %p count %d\n",__FUNCTION__,__LINE__,ch,ch->count)); ata_start(ch); return; } +/* + * worker thread to run task without splbio so we can get interrupts + * from things like plug status. If you don't respond to those and + * just do ATA work then it will wedge the system on I/O. Caveat + * is that you need code to read that + */ + +int ata_work_pending = 0; + +int +ata_work_queue_if_needed(struct ata_channel *ch){ + struct ata_queue *ata_op; + int s = splbio(); + + TAILQ_FOREACH(ata_op, &ata_q, ata_next) { + if (ata_op->ch == ch) { + splx(s); + return 1; + } + } + + ata_op = malloc(sizeof(struct ata_queue), M_DEVBUF, M_NOWAIT); + ch->count++; + ata_op->ch = ch; + ata_work_pending++; + TAILQ_INSERT_TAIL(&ata_q, ata_op, ata_next); + + splx(s); + return 0; +} + +void +ata_work(void) +{ + struct ata_queue *ata_op; + struct ata_channel *ch; + int s; + int busy, count; + + TAILQ_INIT(&ata_q); + ata_ready = 1; + for (;;) { + for (;;) { + s = splbio(); + busy = 1; + count = 0; + TAILQ_FOREACH(ata_op, &ata_q, ata_next) { + count++; + if (!ata_op->ch->running) { + busy = 0; + } + } + if (count && busy) { + /* + printf("All work channels busy\n"); + */ + splx(s); + break; + } + ata_op = TAILQ_FIRST(&ata_q); +dsprintf(("do work %d %d %p\n",count, busy, ata_op)); + if (!count || !ata_op) { + splx(s); + break; + } else { + ch = ata_op->ch; + TAILQ_REMOVE(&ata_q, ata_op, ata_next); + ata_work_pending--; + free(ata_op, M_DEVBUF); + splx(s); + + if (ch->running) { + dprintf(("Still busy\n")); + } else { + ata_start_work(ch, 0); + } + + if (ch->count > 0) + ch->count--; +dprintf(("%s %d ch %p count %d\n",__FUNCTION__,__LINE__,ch,ch->count)); + } + } + /* + if (count || ata_work_pending) + printf("wait for work %d %d\n",count,ata_work_pending); + */ + tsleep(&ata_q, PZERO, "ata_wait", 5 * hz); + } +} + +/* put work from interrupt in at the front of the work queue */ +void +ata_resume_op(struct ata_channel *ch) +{ +#if 0 + struct ata_queue *ata_op; + int s; +#endif + + if (!ata_ready) { + ata_start_work(ch, 0); + } else { + printf("RESUME NOT HANDLED\n"); +#if 0 /* Not sure DJA */ + ata_work_queue_if_needed(ch); + wakeup(&ata_q); +#endif + } +} + +/* put channel on the work queue */ void ata_start(struct ata_channel *ch) { + int s, skip_queue = 0; + + s = splbio(); + if (s != splbio()) { + skip_queue = 1; + } + splx(s); + if (skip_queue || !ata_ready || panicstr) { + ata_start_work(ch, 1); + } else { + ata_work_queue_if_needed(ch); + wakeup(&ata_q); + } + wakeup(&ata_q); +} + +void +ata_start_work(struct ata_channel *ch, int now) +{ #ifdef NATADISK struct ad_request *ad_request; #endif @@ -793,6 +961,7 @@ #endif int s; + /* spl_check(); */ if (!ATA_LOCK_CH(ch, ATA_ACTIVE)) return; @@ -805,7 +974,8 @@ if (ch->devices & (ATA_ATA_SLAVE) && ch->device[SLAVE].driver) ad_start(&ch->device[SLAVE]); } - if ((ad_request = TAILQ_FIRST(&ch->ata_queue))) { + if ((now || ch->running == NULL) + && (ad_request = TAILQ_FIRST(&ch->ata_queue))) { TAILQ_REMOVE(&ch->ata_queue, ad_request, chain); ch->active = ATA_ACTIVE_ATA; ch->running = ad_request; @@ -818,7 +988,8 @@ #endif #if DEV_ATAPIALL /* find & call the responsible driver if anything on the ATAPI queue */ - if (TAILQ_EMPTY(&ch->atapi_queue)) { + if ((now || ch->running == NULL) + && TAILQ_EMPTY(&ch->atapi_queue)) { if (ch->devices & (ATA_ATAPI_MASTER) && ch->device[MASTER].driver) atapi_start(&ch->device[MASTER]); if (ch->devices & (ATA_ATAPI_SLAVE) && ch->device[SLAVE].driver) @@ -843,7 +1014,40 @@ { u_int8_t lsb, msb, ostat0, ostat1; u_int8_t stat0 = 0, stat1 = 0; - int mask = 0, timeout; + int mask = 0, timeout, s; + struct ata_device *atadev; + + s = splbio(); + + if (ch->devices & ATA_ATA_MASTER) { + atadev = &ch->device[MASTER]; + if (atadev->dmastate.flags & ATA_DS_ACTIVE) { + ata_printf(ch, -1, "Clearing DMA\n"); + ata_dmadone(atadev); + } + } + + if (ch->devices & ATA_ATA_MASTER) { + atadev = &ch->device[MASTER]; + if (atadev->dmastate.flags & ATA_DS_ACTIVE) { + ata_printf(ch, -1, "Clearing DMA\n"); + ata_dmadone(atadev); + } + } + + ata_intel_sata_status(ch); + + switch (ch->chiptype) { + case 0x3318105a: /* Promise SATA */ + case 0x3319105a: /* Promise SATA */ + case 0x3371105a: /* Promise SATA */ + case 0x3373105a: /* Promise SATA */ + case 0x3376105a: /* Promise SATA */ + promise_init(ch); + if(ch->devices & ATA_ATA_MASTER) + ata_dmadone(&ch->device[MASTER]); + break; + } /* do we have any signs of ATA/ATAPI HW being present ? */ ATA_OUTB(ch->r_io, ATA_DRIVE, ATA_D_IBM | ATA_MASTER); @@ -862,6 +1066,8 @@ } ch->devices = 0; + if (ch->active == ATA_DEAD) + return; if (!mask) return; @@ -884,6 +1090,8 @@ DELAY(100000); ATA_INB(ch->r_io, ATA_ERROR); + /* XXX DJA could have troubles here */ + /* wait for BUSY to go inactive */ for (timeout = 0; timeout < 310000; timeout++) { if (stat0 & ATA_S_BUSY) { @@ -964,6 +1172,8 @@ if (lsb != 0x58 && msb == 0xa5) ch->devices |= ATA_ATA_SLAVE; } + + splx(s); if (bootverbose) ata_printf(ch, -1, "devices=%02x\n", ch->devices); } @@ -981,6 +1191,7 @@ devices = ch->devices; ata_printf(ch, -1, "resetting devices .. "); + ch->active &=~ ATA_DEAD; /* assume channel is dead */ ata_reset(ch); if ((misdev = devices & ~ch->devices)) { @@ -1067,43 +1278,59 @@ static int ata_service(struct ata_channel *ch) { + int s = splbio();; + /* do we have a SERVICE request from the drive ? */ if ((ch->status & (ATA_S_SERVICE|ATA_S_ERROR|ATA_S_DRQ)) == ATA_S_SERVICE) { ATA_OUTB(ch->r_bmio, ATA_BMSTAT_PORT, ata_dmastatus(ch) | ATA_BMSTAT_INTERRUPT); #if NATADISK > 0 if ((ATA_INB(ch->r_io, ATA_DRIVE) & ATA_SLAVE) == ATA_MASTER) { - if ((ch->devices & ATA_ATA_MASTER) && ch->device[MASTER].driver) + if ((ch->devices & ATA_ATA_MASTER) && ch->device[MASTER].driver) { + splx(s); return ad_service((struct ad_softc *) ch->device[MASTER].driver, 0); + } } else { - if ((ch->devices & ATA_ATA_SLAVE) && ch->device[SLAVE].driver) + if ((ch->devices & ATA_ATA_SLAVE) && ch->device[SLAVE].driver) { + splx(s); return ad_service((struct ad_softc *) ch->device[SLAVE].driver, 0); + } } #endif } + splx(s); return ATA_OP_FINISHED; } int ata_wait(struct ata_device *atadev, u_int8_t mask) { - int timeout = 0; + struct ata_channel *ch = atadev->channel; + int timeout = 0, s; - DELAY(1); - while (timeout < 5000000) { /* timeout 5 secs */ + s = splbio(); + if(atadev->channel->status == ATA_S_BUSY) atadev->channel->status = ATA_INB(atadev->channel->r_io, ATA_STATUS); + while (timeout < 5000000) { /* timeout 5 secs */ + if (ch->active & ATA_DEAD) { + splx(s); + return -1; + } + /* if drive fails status, reselect the drive just to be sure */ if (atadev->channel->status == 0xff) { ata_prtdev(atadev, "no status, reselecting device\n"); ATA_OUTB(atadev->channel->r_io, ATA_DRIVE, ATA_D_IBM|atadev->unit); DELAY(10); atadev->channel->status = ATA_INB(atadev->channel->r_io,ATA_STATUS); - if (atadev->channel->status == 0xff) + if (atadev->channel->status == 0xff) { + splx(s); return -1; + } } /* are we done ? */ @@ -1118,9 +1345,24 @@ timeout += 10; DELAY(10); } - } + + /* atadev->channel->active |= ATA_STATUS_ONLY; XXX DJA */ + atadev->channel->status = ATA_S_BUSY; + splx(s); + s = splbio(); + if(atadev->channel->status == ATA_S_BUSY) + atadev->channel->status = ATA_INB(atadev->channel->r_io, + ATA_STATUS); + } + splx(s); + s = splbio(); + if (ch->active & ATA_DEAD) { + splx(s); + return -1; + } if (atadev->channel->status & ATA_S_ERROR) atadev->channel->error = ATA_INB(atadev->channel->r_io, ATA_ERROR); + splx(s); if (timeout >= 5000000) return -1; if (!mask) @@ -1129,12 +1371,19 @@ /* Wait 50 msec for bits wanted. */ timeout = 5000; while (timeout--) { - atadev->channel->status = ATA_INB(atadev->channel->r_io, ATA_STATUS); + s = splbio(); /* XXX DJA */ + if (ch->active & ATA_DEAD) { + splx(s); + return -1; + } + if ((atadev->channel->status & mask) == mask) { if (atadev->channel->status & ATA_S_ERROR) atadev->channel->error=ATA_INB(atadev->channel->r_io,ATA_ERROR); return (atadev->channel->status & ATA_S_ERROR); } + atadev->channel->status = ATA_INB(atadev->channel->r_io, ATA_STATUS); + splx(s); DELAY (10); } return -1; @@ -1144,7 +1393,8 @@ ata_command(struct ata_device *atadev, u_int8_t command, u_int64_t lba, u_int16_t count, u_int8_t feature, int flags) { - int error = 0; + struct ata_channel *ch = atadev->channel; + int error = 0, s; #ifdef ATA_DEBUG ata_prtdev(atadev, "ata_command: addr=%04lx, cmd=%02x, " "lba=%lld, count=%d, feature=%d, flags=%02x\n", @@ -1152,6 +1402,11 @@ command, lba, count, feature, flags); #endif + if (ch->active & ATA_DEAD) { + return -1; + } + s = splbio(); + /* select device */ ATA_OUTB(atadev->channel->r_io, ATA_DRIVE, ATA_D_IBM | atadev->unit); @@ -1159,10 +1414,12 @@ if (atadev->channel->flags & ATA_QUEUED) ATA_OUTB(atadev->channel->r_altio, ATA_ALTSTAT, ATA_A_IDS | ATA_A_4BIT); + atadev->channel->status = ATA_S_BUSY; /* ready to issue command ? */ if (ata_wait(atadev, 0) < 0) { ata_prtdev(atadev, "timeout sending command=%02x s=%02x e=%02x\n", command, atadev->channel->status, atadev->channel->error); + splx(s); return -1; } @@ -1203,6 +1460,7 @@ command = ATA_C_FLUSHCACHE48; break; default: ata_prtdev(atadev, "can't translate cmd to 48bit version\n"); + splx(s); return -1; } } @@ -1236,17 +1494,28 @@ /* enable interrupt */ if (atadev->channel->flags & ATA_QUEUED) ATA_OUTB(atadev->channel->r_altio, ATA_ALTSTAT, ATA_A_4BIT); - - if (tsleep((caddr_t)atadev->channel, PRIBIO, "atacmd", 10 * hz)) { - ata_prtdev(atadev, "timeout waiting for interrupt\n"); - atadev->channel->active &= ~ATA_WAIT_INTR; - error = -1; + splx(s); + if (ch->active & ATA_DEAD) { + return -1; } + + if (atadev->channel->status & ATA_S_BUSY) + if (tsleep((caddr_t)atadev->channel, PRIBIO, "atacmd", 10 * hz)) { + ata_prtdev(atadev, "timeout waiting for interrupt\n"); + atadev->channel->active &= ~ATA_WAIT_INTR; + error = -1; + } break; case ATA_WAIT_READY: + if (ch->active & ATA_DEAD) { + splx(s); + return -1; + } + atadev->channel->active |= ATA_WAIT_READY; ATA_OUTB(atadev->channel->r_io, ATA_CMD, command); + atadev->channel->status = ATA_S_BUSY; if (ata_wait(atadev, ATA_S_READY) < 0) { ata_prtdev(atadev, "timeout waiting for cmd=%02x s=%02x e=%02x\n", command, atadev->channel->status,atadev->channel->error); @@ -1255,6 +1524,8 @@ atadev->channel->active &= ~ATA_WAIT_READY; break; } + atadev->channel->status = ATA_S_BUSY; + splx(s); return error; } @@ -1682,6 +1953,10 @@ static void ata_init(void) { + struct proc *pid; + + kthread_create((void (*)(void *))ata_work, NULL, &pid, "ata_work"); + /* register controlling device */ make_dev(&ata_cdevsw, 0, UID_ROOT, GID_OPERATOR, 0600, "ata"); --- sys/dev/ata/ata-all.h +++ sys/dev/ata/ata-all.h @@ -28,6 +28,19 @@ * $FreeBSD: src/sys/dev/ata/ata-all.h,v 1.26.2.15 2005/04/20 23:51:31 ambrisko Exp $ */ +#if 1 +#define dprintf(x) +#else +#define dprintf(x) {int s; s=splbio(); printf x; splx(s);} +#endif +#define bprintf(x) dprintf(x) +#define dsprintf(x) dprintf(x) +#define aprintf(x) dprintf(x) + +#define spl_check() {extern unsigned int cpl;\ + if (cpl == bio_imask) { printf("SPL WATCH %s %d %x\n", \ + __FUNCTION__,__LINE__,cpl);}} + /* ATA register defines */ #define ATA_DATA 0x00 /* data register */ #define ATA_ERROR 0x01 /* (R) error register */ @@ -282,9 +295,13 @@ int ata_resume(device_t); void ata_start(struct ata_channel *); -void ata_intr(void *); -void ata_promise_init(struct ata_channel *); +void ata_intr(void *); void ata_promise_intr(void *); +void ata_intel_intr(void *); +void ata_intel_sata_status(struct ata_channel *); +void ata_intel_map_sata_ports(device_t, struct ata_channel *); +void promise_init(struct ata_channel *); +void ata_resume_op(struct ata_channel *ch); void ata_reset(struct ata_channel *); int ata_reinit(struct ata_channel *); int ata_wait(struct ata_device *, u_int8_t); --- sys/dev/ata/ata-disk.c +++ sys/dev/ata/ata-disk.c @@ -52,6 +52,8 @@ #include #include +int ata_bio_pending = 0; + /* device structures */ static d_open_t adopen; static d_close_t adclose; @@ -114,9 +116,14 @@ struct ad_softc *adp; dev_t dev; - if (!(adp = malloc(sizeof(struct ad_softc), M_AD, M_NOWAIT | M_ZERO))) { - ata_prtdev(atadev, "failed to allocate driver storage\n"); - return; + if (atadev->driver) { + /* Re-use old one */ + adp = atadev->driver; + } else { + if (!(adp = malloc(sizeof(struct ad_softc), M_AD, M_NOWAIT | M_ZERO))) { + ata_prtdev(atadev, "failed to allocate driver storage\n"); + return; + } } adp->device = atadev; #ifdef ATA_STATIC_ID @@ -250,12 +257,14 @@ /* if this disk belongs to an ATA RAID dont print the probe */ if (ata_raiddisk_attach(adp)) adp->flags |= AD_F_RAID_SUBDISK; - else { - if (atadev->driver) { - ad_print(adp); - ata_enclosure_print(atadev); - } - } +/* DJA HACK + if (atadev->driver) { + ad_print(adp); + ata_enclosure_print(atadev); + } +*/ + ad_print(adp); + ata_enclosure_print(atadev); } void @@ -264,26 +273,45 @@ struct ad_softc *adp = atadev->driver; struct ad_request *request; struct buf *bp; + int s; atadev->flags |= ATA_D_DETACHING; ata_prtdev(atadev, "removed from configuration\n"); ad_invalidatequeue(adp, NULL); + request = atadev->channel->running; + s = splbio(); + if (request) { + untimeout((timeout_t *)ad_timeout, request, request->timeout_handle); + request->bp->b_error = ENXIO; + request->bp->b_flags |= B_ERROR; + ata_bio_pending--; + biodone(request->bp); + ad_free(request); + atadev->channel->running = NULL; +dprintf(("%s %d ch %p count %d\n",__FUNCTION__,__LINE__,atadev->channel,atadev->channel->count)); + atadev->channel->count = 0; + } TAILQ_FOREACH(request, &atadev->channel->ata_queue, chain) { if (request->softc != adp) continue; TAILQ_REMOVE(&atadev->channel->ata_queue, request, chain); request->bp->b_error = ENXIO; request->bp->b_flags |= B_ERROR; + ata_bio_pending--; biodone(request->bp); ad_free(request); } + splx(s); ata_dmafree(atadev); + s = splbio(); while ((bp = bufq_first(&adp->queue))) { bufq_remove(&adp->queue, bp); bp->b_error = ENXIO; bp->b_flags |= B_ERROR; + ata_bio_pending--; biodone(bp); } + splx(s); disk_invalidate(&adp->disk); disk_destroy(adp->dev); devstat_remove_entry(&adp->stats); @@ -293,11 +321,13 @@ } if (adp->flags & AD_F_RAID_SUBDISK) ata_raiddisk_detach(adp); + s = splbio(); ata_free_name(atadev); ata_free_lun(&adp_lun_map, adp->lun); atadev->driver = NULL; atadev->flags = 0; - free(adp, M_AD); + /* Don't free since other things may still use it like RAID */ + splx(s); } static int @@ -334,6 +364,7 @@ biodone(bp); return; } + ata_bio_pending++; s = splbio(); bufqdisksort(&adp->queue, bp); splx(s); @@ -402,6 +433,7 @@ addr += PAGE_SIZE * dumppages; } + adp->device->channel->status = ATA_S_BUSY; if (ata_wait(adp->device, ATA_S_READY | ATA_S_DSC) < 0) ata_prtdev(adp->device, "timeout waiting for final ready\n"); return 0; @@ -413,7 +445,7 @@ struct ad_softc *adp = atadev->driver; struct buf *bp = bufq_first(&adp->queue); struct ad_request *request; - int tag = 0; + int tag = 0, s; if (!bp) return; @@ -426,8 +458,10 @@ return; } + s = splbio(); if (!(request = malloc(sizeof(struct ad_request), M_AD, M_NOWAIT|M_ZERO))) { ata_prtdev(atadev, "out of memory in start\n"); + splx(s); return; } @@ -453,8 +487,16 @@ /* link onto controller queue */ TAILQ_INSERT_TAIL(&atadev->channel->ata_queue, request, chain); + splx(s); } +static int ata_fail_block = 0; +static char ata_fail_name[10] = {""}; +SYSCTL_INT(_hw_ata, OID_AUTO, ata_fail_block, CTLFLAG_RW, &ata_fail_block, 0, + "ATA disk block to fail"); +SYSCTL_STRING(_hw_ata, OID_AUTO, ata_fail_name, CTLFLAG_RW, ata_fail_name, + sizeof(ata_fail_name), "ATA disk name to fail"); + int ad_transfer(struct ad_request *request) { @@ -463,9 +505,14 @@ u_int32_t count, max_count; u_int8_t cmd; int flags = ATA_IMMEDIATE; + struct ata_channel *ch; + request->flags |= ADR_F_NOT_BUSY; /* get request params */ adp = request->softc; + ch = adp->device->channel; + if (ch->active & ATA_DEAD) + return ATA_OP_FINISHED; /* calculate transfer details */ lba = request->blockaddr + (request->donecount / DEV_BSIZE); @@ -502,6 +549,18 @@ devstat_start_transaction(&adp->stats); + /* DJA hack */ + if (lba >= ata_fail_block + && lba < request->currentsize + ata_fail_block) { + if (strcmp(adp->device->name, ata_fail_name) == 0) { + if (request->bp->b_flags & B_READ) + return ATA_OP_CONTINUES; + else + ata_fail_name[0] = '\000'; + } + } + /* DJA hack */ + /* does this drive & transfer work with DMA ? */ request->flags &= ~ADR_F_DMA_USED; if (adp->device->mode >= ATA_DMA && @@ -513,12 +572,15 @@ if (adp->flags & AD_F_TAG_ENABLED) { cmd = (request->flags & ADR_F_READ) ? ATA_C_READ_DMA_QUEUED : ATA_C_WRITE_DMA_QUEUED; + if (ch->active & ATA_DEAD) + return ATA_OP_FINISHED; if (ata_command(adp->device, cmd, lba, request->tag << 3, count, flags)) { ata_prtdev(adp->device, "error executing command"); goto transfer_failed; } + request->flags &= ~ADR_F_NOT_BUSY; if (ata_wait(adp->device, ATA_S_READY)) { ata_prtdev(adp->device, "timeout waiting for READY\n"); goto transfer_failed; @@ -535,16 +597,22 @@ cmd = (request->flags & ADR_F_READ) ? ATA_C_READ_DMA : ATA_C_WRITE_DMA; + if (ch->active & ATA_DEAD) + return ATA_OP_FINISHED; + if (ata_command(adp->device, cmd, lba, count, 0, flags)) { ata_prtdev(adp->device, "error executing command"); goto transfer_failed; } + + request->flags &= ~ADR_F_NOT_BUSY; #if 0 /* * wait for data transfer phase * * well this should be here acording to specs, but older * promise controllers doesn't like it, they lockup! + * XXX DJA is this still true now. */ if (ata_wait(adp->device, ATA_S_READY | ATA_S_DRQ)) { ata_prtdev(adp->device, "timeout waiting for data phase\n"); @@ -571,6 +639,7 @@ ata_prtdev(adp->device, "error executing command"); goto transfer_failed; } + request->flags &= ~ADR_F_NOT_BUSY; } /* calculate this transfer length */ @@ -586,6 +655,8 @@ goto transfer_failed; } + if (ch->active & ATA_DEAD) + return ATA_OP_FINISHED; /* output the data */ if (adp->device->channel->flags & ATA_USE_16BIT) ATA_OUTSW(adp->device->channel->r_io, ATA_DATA, @@ -599,8 +670,11 @@ transfer_failed: untimeout((timeout_t *)ad_timeout, request, request->timeout_handle); + if (ch->active & ATA_DEAD) + return ATA_OP_FINISHED; ad_invalidatequeue(adp, request); printf(" - resetting\n"); + ata_intel_sata_status(ch); /* if retries still permit, reinject this request */ if (request->retries++ < AD_MAX_RETRIES) @@ -611,6 +685,7 @@ request->bp->b_flags |= B_ERROR; request->bp->b_resid = request->bytecount; devstat_end_transaction_buf(&adp->stats, request->bp); + ata_bio_pending--; biodone(request->bp); ad_free(request); } @@ -624,6 +699,7 @@ struct ad_softc *adp = request->softc; int dma_stat = 0; + adp->device->channel->need_intr = 0; /* finish DMA transfer */ if (request->flags & ADR_F_DMA_USED) dma_stat = ata_dmadone(adp->device); @@ -665,6 +741,13 @@ untimeout((timeout_t *)ad_timeout, request,request->timeout_handle); ad_invalidatequeue(adp, request); ata_dmainit(adp->device, ata_pmode(adp->device->param), -1, -1); + if (adp->device->mode & ~ATA_MODE_MASK) { + /* No PIO fall-back */ + printf(" no PIO mode\n"); + request->bp->b_error = EIO; + request->bp->b_flags |= B_ERROR; + goto failed; + } request->flags |= ADR_F_FORCE_PIO; printf(" trying PIO mode\n"); TAILQ_INSERT_HEAD(&adp->device->channel->ata_queue, request, chain); @@ -694,6 +777,8 @@ request->flags |= ADR_F_ERROR; } else { + if (request->bytecount < request->currentsize) + request->currentsize = request->bytecount; /* data ready, read in */ if (adp->device->channel->flags & ATA_USE_16BIT) ATA_INSW(adp->device->channel->r_io, ATA_DATA, @@ -715,17 +800,18 @@ request->bytecount -= request->currentsize; request->donecount += request->currentsize; if (request->bytecount > 0) { - ad_transfer(request); + ata_resume_op(adp->device->channel); return ATA_OP_CONTINUES; } } /* disarm timeout for this transfer */ untimeout((timeout_t *)ad_timeout, request, request->timeout_handle); - +failed: request->bp->b_resid = request->bytecount; devstat_end_transaction_buf(&adp->stats, request->bp); + ata_bio_pending--; biodone(request->bp); ad_free(request); adp->outstanding--; @@ -737,6 +823,12 @@ int ad_service(struct ad_softc *adp, int change) { + struct ata_channel *ch = adp->device->channel; + int s; + + if (ch->active & ATA_DEAD) + return ATA_OP_FINISHED; + s = splbio(); /* do we have to check the other device on this channel ? */ if (adp->device->channel->flags & ATA_QUEUED && change) { int device = adp->device->unit; @@ -766,7 +858,10 @@ } adp->device->channel->status = ATA_INB(adp->device->channel->r_altio, ATA_ALTSTAT); + splx(s); + if (ch->active & ATA_DEAD) + return ATA_OP_FINISHED; /* do we have a SERVICE request from the drive ? */ if (adp->flags & AD_F_TAG_ENABLED && adp->outstanding > 0 && @@ -799,7 +894,9 @@ ad_invalidatequeue(adp, NULL); return ATA_OP_FINISHED; } + s = splbio(); tag = ATA_INB(adp->device->channel->r_io, ATA_COUNT) >> 3; + splx(s); if (!(request = adp->tags[tag])) { ata_prtdev(adp->device, "no request for tag=%d\n", tag); ad_invalidatequeue(adp, NULL); @@ -834,6 +931,13 @@ static void ad_invalidatequeue(struct ad_softc *adp, struct ad_request *request) { + struct ata_channel *ch = adp->device->channel; + int s; + + if (ch->active & ATA_DEAD) + return; + s = splbio(); + /* if tags used invalidate all other tagged transfers */ if (adp->flags & AD_F_TAG_ENABLED) { struct ad_request *tmpreq; @@ -853,6 +957,7 @@ ata_prtdev(adp->device, "flush queue failed\n"); adp->outstanding = 0; } + splx(s); } static int @@ -890,38 +995,103 @@ return 0; } +void ad_clear_request(struct ad_softc *adp); +void +ad_clear_request(struct ad_softc *adp) +{ + struct ata_device *atadev = adp->device; + struct ad_request *request; + int s = splbio(); + + + request = atadev->channel->running; + + if (request) { + atadev->channel->running = NULL; + untimeout((timeout_t *)ad_timeout, request, request->timeout_handle); + + if (request->flags & ADR_F_DMA_USED) { + ata_dmadone(adp->device); + ad_invalidatequeue(adp, request); + } + + /* retries all used up, return error */ + request->bp->b_error = EIO; + request->bp->b_flags |= B_ERROR; + devstat_end_transaction_buf(&adp->stats, request->bp); + ata_bio_pending--; + biodone(request->bp); + ad_free(request); + } + TAILQ_FOREACH(request, &atadev->channel->ata_queue, chain) { + if (request->softc != adp) + continue; +/* why doesn't this work */ + TAILQ_REMOVE(&atadev->channel->ata_queue, request, chain); + request->bp->b_error = ENXIO; + request->bp->b_flags |= B_ERROR; + ata_bio_pending--; + biodone(request->bp); + ad_free(request); + } + splx(s); +} + static void ad_timeout(struct ad_request *request) { struct ad_softc *adp = request->softc; + int s; + + if(adp->device == NULL || adp->device->channel == NULL) { + request->bp->b_error = EIO; + request->bp->b_flags |= B_ERROR; + devstat_end_transaction_buf(&adp->stats, request->bp); + ata_bio_pending--; + biodone(request->bp); + ad_free(request); + return; + } adp->device->channel->running = NULL; - ata_prtdev(adp->device, "%s command timeout tag=%d serv=%d - resetting\n", +dprintf(("%s %d ch %p count %d\n",__FUNCTION__,__LINE__,adp->device->channel,adp->device->channel->count)); + adp->device->channel->count = 0; + ata_prtdev(adp->device, "%s command timeout tag=%d serv=%d bp=%p - resetting\n", (request->flags & ADR_F_READ) ? "READ" : "WRITE", - request->tag, request->serv); + request->tag, request->serv, request->bp); + s = splbio(); if (request->flags & ADR_F_DMA_USED) { ata_dmadone(adp->device); ad_invalidatequeue(adp, request); if (request->retries == AD_MAX_RETRIES) { ata_dmainit(adp->device, ata_pmode(adp->device->param), -1, -1); - ata_prtdev(adp->device, "trying fallback to PIO mode\n"); - request->retries = 0; + if (!(adp->device->mode & ~ATA_MODE_MASK)) { + ata_prtdev(adp->device, "trying fallback to PIO mode\n"); + request->retries = 0; + } } } /* if retries still permit, reinject this request */ if (request->retries++ < AD_MAX_RETRIES) { - TAILQ_INSERT_HEAD(&adp->device->channel->ata_queue, request, chain); + if (request->bp + && (request->bp->b_flags & B_CALL) + && request->bp->b_iodone == (void *)wakeup) { + } else { + TAILQ_INSERT_HEAD(&adp->device->channel->ata_queue, request, chain); + } } else { /* retries all used up, return error */ request->bp->b_error = EIO; request->bp->b_flags |= B_ERROR; devstat_end_transaction_buf(&adp->stats, request->bp); + ata_bio_pending--; biodone(request->bp); ad_free(request); } + splx(s); ata_reinit(adp->device->channel); } --- sys/dev/ata/ata-disk.h +++ sys/dev/ata/ata-disk.h @@ -43,6 +43,7 @@ #define ADR_F_DMA_USED 0x0004 #define ADR_F_QUEUED 0x0008 #define ADR_F_FORCE_PIO 0x0010 +#define ADR_F_NOT_BUSY 0x0020 caddr_t data; /* pointer to data buf */ struct buf *bp; /* associated bio ptr */ --- sys/dev/ata/ata-pci.c 20 Apr 2005 23:51:32 -0000 1.32.2.22 +++ sys/dev/ata/ata-pci.c 26 Apr 2005 13:00:27 -0000 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,16 @@ #define ATA_MASTERDEV(dev) ((pci_get_progif(dev) & 0x80) && \ (pci_get_progif(dev) & 0x05) != 0x05) +static int sata_ready = 0; + +struct sata_queue { + TAILQ_ENTRY(sata_queue) sata_next; + struct ata_channel *ch; +}; +TAILQ_HEAD(,sata_queue) sata_q; + +static void sata_watch(void); +static void ata_promise_check_plug_intr(void *data); static void ata_promise_plug_scan_intr(void *data, int plug_only); int @@ -399,6 +413,76 @@ return ENXIO; } +void +ata_intel_map_sata_ports(device_t dev, struct ata_channel *channel){ + device_t parent = device_get_parent(dev); + int unit = channel->unit; + int sata_config; + + switch (channel->chiptype) { + case 0x25a38086: /* Intel 6300ESB SATA */ + case 0x25b08086: /* Intel 6300ESB SATA RAID */ + case 0x24d18086: /* Intel ICH5 SATA */ + case 0x24df8086: /* Intel ICH5 SATA RAID */ + sata_config = pci_read_config(parent, 0x90, 1); + + switch (sata_config & 7) { + case 0: /* SATA P0 on pri master SATA P1 on sec master */ + if (unit == 0) { + channel->sata_master_idx = 1; + device_printf(channel->dev, "SATA P0 primary master\n"); + } + if (unit == 1) { + channel->sata_master_idx = 2; + device_printf(channel->dev, "SATA P1 secondary master\n"); + } + break; + case 1: /* SATA P0 on sec master SATA P1 on pri master */ + if (unit == 0) { + channel->sata_master_idx = 2; + device_printf(channel->dev, "SATA P0 secondary master\n"); + } + if (unit == 1) { + channel->sata_master_idx = 0; + device_printf(channel->dev, "SATA P1 primary master\n"); + } + break; + case 4: /* SATA P0 on pri master SATA P1 on pri slave */ + if (unit == 0) { + channel->sata_master_idx = 1; + channel->sata_slave_idx = 2; + device_printf(channel->dev, "SATA P0 primary master\n"); + device_printf(channel->dev, "SATA P1 primary slave\n"); + } + break; + case 5: /* SATA P0 on pri slave SATA P1 on pri master */ + if (unit == 0) { + channel->sata_master_idx = 2; + channel->sata_slave_idx = 1; + device_printf(channel->dev, "SATA P0 primary slave\n"); + device_printf(channel->dev, "SATA P1 primary master\n"); + } + break; + case 6: /* SATA P0 on sec master SATA P1 on sec slave */ + if (unit == 1) { + channel->sata_master_idx = 1; + channel->sata_slave_idx = 2; + device_printf(channel->dev, "SATA P0 secondary master\n"); + device_printf(channel->dev, "SATA P1 secondary slave\n"); + } + break; + case 7: /* SATA P0 on sec slave SATA P1 on sec master */ + if (unit == 1) { + channel->sata_master_idx = 2; + channel->sata_slave_idx = 1; + device_printf(channel->dev, "SATA P0 secondary slave\n"); + device_printf(channel->dev, "SATA P1 secondary master\n"); + } + break; + } + } +} + static int ata_pci_add_child(device_t dev, int unit) { @@ -416,10 +500,42 @@ return 0; } +/* for now we just watch sata plug events. We need to run reinit in + a process context to tsleep etc. */ +static void +sata_watch(void) +{ + struct sata_queue *sata_op; + int s; + + TAILQ_INIT(&sata_q); + sata_ready = 1; + + for(;;) { + for(;;) { + s = splbio(); + sata_op = TAILQ_FIRST(&sata_q); + splx(s); + if (!sata_op) { + break; + } else { + sata_op->ch->active &= ~ATA_DEAD; + ata_reinit(sata_op->ch); + s = splbio(); + TAILQ_REMOVE(&sata_q, sata_op, sata_next); + free(sata_op, M_DEVBUF); + splx(s); + } + } + tsleep(&sata_q, PWAIT, "sata_wait", 0); + } +} + static int ata_pci_attach(device_t dev) { struct ata_pci_controller *controller = device_get_softc(dev); + struct proc *pid; u_int8_t class, subclass; u_int32_t type, cmd; int rid, i, result = 0; @@ -435,6 +551,9 @@ case 0x25a38086: /* Intel 6300ESB SATA150 */ case 0x25b08086: /* Intel 6300ESB SATA150 RAID */ case 0x26518086: /* Intel ICH6 SATA150 */ + if (!sata_ready) + kthread_create((void (*)(void *))sata_watch, NULL, &pid, + "sata_watch"); break; case 0x3318105a: /* Promise SATA150 TX4 */ @@ -468,6 +587,9 @@ ata_pci_add_child(dev, 0); result = bus_generic_attach(dev); } + if (!sata_ready) + kthread_create((void (*)(void *))sata_watch, NULL, &pid, + "sata_watch"); /* DONE */ return result; @@ -735,12 +858,13 @@ case 0x3376105a: /* Promise SATA */ parent = device_get_parent(ch->dev); ctlr = device_get_softc(parent); - ata_promise_plug_scan_intr(ch, 1); + irq_vector = ctlr->irq_vector; if (irq_vector & (1 << (ch->unit + 1))) { ATA_OUTL(ch->r_bmio, ATA_BMDEVSPEC_0, 0x00000001); } + ata_promise_plug_scan_intr(ch, 1); return 0; } @@ -753,23 +877,95 @@ return 0; } +static void +ata_promise_check_plug_intr(void *data) +{ + struct ata_channel *ch = (struct ata_channel *)data; + device_t parent; + struct ata_pci_controller *ctlr; + struct sata_queue *sata_op; + u_int32_t plug; + + parent = device_get_parent(ch->dev); + ctlr = device_get_softc(parent); + + + plug = _ATA_INL(ctlr->bmio[ATA_IDX_DATA].res,0x6c); + if (plug & 1 << ch->unit) { + _ATA_OUTL(ctlr->bmio[ATA_IDX_DATA].res,0x6c, 1 << ch->unit); + + if (ch->devices) { /* TUNING XXX DJA */ + ch->active = ATA_DEAD; + promise_init(ch); + } + + if (sata_ready) { + TAILQ_FOREACH(sata_op, &sata_q, sata_next) + if(sata_op->ch == ch) + break; + if ((sata_op == NULL) || (sata_op->ch != ch)) { + sata_op = malloc(sizeof(struct sata_queue),M_DEVBUF, + M_NOWAIT); + sata_op->ch = ch; + TAILQ_INSERT_TAIL(&sata_q, sata_op, sata_next); + wakeup(&sata_q); + } + } + } + + if (plug & 1 << (ch->unit + 4)) { + _ATA_OUTL(ctlr->bmio[ATA_IDX_DATA].res,0x6c, 1 << (ch->unit + 4)); + + if (sata_ready) { + TAILQ_FOREACH(sata_op, &sata_q, sata_next) + if(sata_op->ch == ch) + break; + if ((sata_op == NULL) || (sata_op->ch != ch)) { + sata_op = malloc(sizeof(struct sata_queue),M_DEVBUF, + M_NOWAIT); + sata_op->ch = ch; + TAILQ_INSERT_TAIL(&sata_q, sata_op, sata_next); + wakeup(&sata_q); + } + } + } +} + static void ata_promise_plug_scan_intr(void *data, int plug_only) { struct ata_channel *ch; + struct ata_channel *temp_ch; device_t parent; + u_int32_t plug; + int nchildren; + device_t *children; struct ata_pci_controller *ctlr; - int control; + int i, control; ch = (struct ata_channel *)data; parent = device_get_parent(ch->dev); ctlr = device_get_softc(parent); + plug = _ATA_INL(ctlr->bmio[ATA_IDX_DATA].res,0x6c); + if (plug & 0xff) { + if (device_get_children(parent, &children, &nchildren)) + return; + + for (i = 0; i < nchildren; i++) { + temp_ch=device_get_softc(children[i]); + ata_promise_check_plug_intr(temp_ch); + } + if ((plug & 1 << ch->unit) || (plug & 1 << (ch->unit + 4))) { + return; + } + } + if (!plug_only) { if (ctlr->irq_vector & (1 << (ch->unit + 1))) { control = ATA_INL(ch->r_bmio, ATA_BMDEVSPEC_0); if (control & (1 << (ch->unit + 16))) { - ata_promise_init(ch); + promise_init(ch); } ata_intr(ch); } else { @@ -799,6 +995,160 @@ ata_promise_plug_scan_intr(data, 0); } +static int ata_max_errors = 20; +SYSCTL_DECL(_hw_ata); +SYSCTL_INT(_hw_ata, OID_AUTO, ata_max_errors, CTLFLAG_RW, &ata_max_errors, 0, + "ATA maxium errors"); +static MALLOC_DEFINE(M_ATA, "ATA generic", "ATA driver generic layer"); +void +ata_intel_sata_status(struct ata_channel *ch) +{ + device_t parent; + struct ata_pci_controller *ctlr; + struct sata_queue *sata_op; + u_int32_t sstatus, serror, scontrol; + int index = 0, device, base, type = 0; + int s; + + if (!ch->dev) /* Channel not ready */ + return; + + s=splhigh(); + + untimeout((timeout_t *)ata_intel_sata_status, ch, ch->timeout_handle); + + switch (ch->chiptype) { + case 0x25a38086: /* Intel 6300ESB SATA */ + case 0x25b08086: /* Intel 6300ESB SATA RAID */ + case 0x24d18086: /* Intel ICH5 SATA */ + case 0x24df8086: /* Intel ICH5 SATA RAID */ + parent = device_get_parent(ch->dev); + ctlr = device_get_softc(parent); + + index = ch->sata_master_idx | ch->sata_slave_idx; + + for (device = 1; device <= 2; device++) { + if (index & device) { + if (device == 1) + base = 0x50; + else + base = 0x60; + if (device & ch->sata_master_idx) + type = MASTER; + if (device & ch->sata_slave_idx) + type = SLAVE; + + pci_write_config(parent, 0xa0, base, 4); + sstatus = pci_read_config(parent, 0xa4, 4); + + pci_write_config(parent, 0xa0, base + 4, 4); + serror = pci_read_config(parent, 0xa4, 4); + + pci_write_config(parent, 0xa0, base + 8, 4); + scontrol = pci_read_config(parent, 0xa4, 4); + + if (serror & ~0x80000) { + if (sstatus == 0x05 || sstatus == 0x113) { /* Drive left reset port */ + pci_write_config(parent, 0x92, + pci_read_config(parent, 0x92, 2) & ~device, 2); + pci_write_config(parent, 0x92, + pci_read_config(parent, 0x92, 2) | device, 2); + } + /* Loop until SATA port is okay otherwise we can hang */ + for (; ch->errors[type] < ata_max_errors;) { + ch->errors[type]++; + pci_write_config(parent, 0xa0, base, 4); + sstatus = pci_read_config(parent, 0xa4, 4); + + pci_write_config(parent, 0xa0, base + 4, 4); + serror = pci_read_config(parent, 0xa4, 4); + /* Acknowledge serror */ + pci_write_config(parent, 0xa4, serror, 4); + + pci_write_config(parent, 0xa0, base + 8, 4); + scontrol = pci_read_config(parent, 0xa4, 4); + device_printf(ch->dev, + "Intel SATA P%d status %x error %x scontrol %x %d %x\n", + device - 1, sstatus, serror, scontrol, + ch->unit, ch->devices); + if (!(serror & ~0x80000) && sstatus != 5) + break; + + DELAY(1000000); + } + + if (ch->errors[type] >= ata_max_errors && ch->device[type].driver) { + device_printf(ch->dev, "Removing device from system ... giving up\n"); + /* Disable drive */ + sstatus &= ~0x7; + sstatus |= 0x4; + pci_write_config(parent, 0xa0, base + 8, 4); + pci_write_config(parent, 0xa4, sstatus, 4); + + pci_write_config(parent, 0xa0, base, 4); + sstatus = pci_read_config(parent, 0xa4, 4); + + pci_write_config(parent, 0xa0, base + 4, 4); + serror = pci_read_config(parent, 0xa4, 4); + /* Acknowledge serror */ + pci_write_config(parent, 0xa4, serror, 4); + + pci_write_config(parent, 0xa0, base + 8, 4); + scontrol = pci_read_config(parent, 0xa4, 4); + pci_write_config(parent, 0xa0, base + 4, 4); + serror = pci_read_config(parent, 0xa4, 4); + if (type == MASTER && ch->devices & ATA_ATA_MASTER + && ch->device[type].driver) { + ad_detach(&ch->device[type], 0); + if (ch->device[MASTER].param) { + free(ch->device[MASTER].param, M_ATA); + ch->device[MASTER].param = NULL; + } + ch->device[MASTER].driver = NULL; + ch->device[MASTER].mode = 0; + ch->devices &= ~ATA_ATA_MASTER; + } + if (type == SLAVE && ch->devices & ATA_ATA_SLAVE + && ch->device[type].driver) { + ad_detach(&ch->device[type], 0); + if (ch->device[SLAVE].param) { + free(ch->device[SLAVE].param, M_ATA); + ch->device[SLAVE].param = NULL; + } + ch->device[SLAVE].driver = NULL; + ch->device[SLAVE].mode = 0; + ch->devices &= ~ATA_ATA_SLAVE; + } + } + if (sata_ready && !panicstr && ch->errors[type] < ata_max_errors) { + TAILQ_FOREACH(sata_op, &sata_q, sata_next) + if(sata_op->ch == ch) + break; + if ((sata_op == NULL) || (sata_op->ch != ch)) { + sata_op = malloc(sizeof(struct sata_queue),M_DEVBUF, + M_NOWAIT); + sata_op->ch = ch; + TAILQ_INSERT_TAIL(&sata_q, sata_op, sata_next); + wakeup(&sata_q); + } + } + } + } + } + ch->timeout_handle = timeout((timeout_t*)ata_intel_sata_status, + ch, 10 * hz); + } + splx(s); +} + +void +ata_intel_intr(void *data) +{ + ata_intel_sata_status(data); + + ata_intr(data); +} + static int ata_pci_print_child(device_t dev, device_t child) { --- sys/dev/ata/ata-raid.c +++ sys/dev/ata/ata-raid.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -40,7 +41,12 @@ #include #include #include +#include +#include +#include +#include #include +#include #include #include #include @@ -49,6 +55,7 @@ /* device structures */ static d_open_t aropen; static d_strategy_t arstrategy; +static d_dump_t ardump; static struct cdevsw ar_cdevsw = { /* open */ aropen, /* close */ nullclose, @@ -60,7 +67,7 @@ /* strategy */ arstrategy, /* name */ "ar", /* maj */ 157, - /* dump */ nodump, + /* dump */ ardump, /* psize */ nopsize, /* flags */ D_DISK, /* bmaj */ -1 @@ -69,19 +76,36 @@ /* prototypes */ static void ar_attach_raid(struct ar_softc *, int); +#define DJA_REWORK_SCHEDULE +#ifdef DJA_REWORK_SCHEDULE static void ar_done(struct buf *); +#endif static void ar_config_changed(struct ar_softc *, int); static int ar_rebuild(struct ar_softc *); static int ar_highpoint_read_conf(struct ad_softc *, struct ar_softc **); static int ar_highpoint_write_conf(struct ar_softc *); static int ar_promise_read_conf(struct ad_softc *, struct ar_softc **, int); static int ar_promise_write_conf(struct ar_softc *); +static int ar_intel_read_conf(struct ad_softc *, struct ar_softc **); +static int ar_intel_write_conf(struct ar_softc *); static int ar_rw(struct ad_softc *, u_int32_t, int, caddr_t, int); static struct ata_device *ar_locate_disk(int); +int ar_ready = 0; +#ifdef DJA_REWORK_SCHEDULE +static void ar_work(void); +#endif +static void ar_done_work(struct buf *bp); /* internal vars */ static struct ar_softc **ar_table = NULL; static MALLOC_DEFINE(M_AR, "AR driver", "ATA RAID driver"); +static int ar_rebuild_enable = 1; + +/* sysctl vars */ +TUNABLE_INT("hw.ata.ar_rebuild_enable", &ar_rebuild_enable); +SYSCTL_DECL(_hw_ata); +SYSCTL_INT(_hw_ata, OID_AUTO, ar_rebuild_enable, CTLFLAG_RW, + &ar_rebuild_enable, 0, "AR RAID rebuild control"); int ata_raiddisk_attach(struct ad_softc *adp) @@ -95,23 +119,42 @@ continue; for (disk = 0; disk < rdp->total_disks; disk++) { - if ((rdp->disks[disk].flags & AR_DF_ASSIGNED) && - rdp->disks[disk].device == adp->device) { + /* printf("DEBUG %d %p %p %x\n",disk,rdp->disks[disk].device, + adp->device, rdp->disks[disk].flags); */ +#ifdef DJA + if (rdp->disks[disk].flags == AR_DF_SPARE + && !rdp->disks[disk].device) { + /* DJA HACK XXX CAREFUL */ + printf("Throw this disk in\n"); + rdp->disks[disk].device = adp->device; + } +#endif + if ((rdp->disks[disk].flags & (AR_DF_ASSIGNED | AR_DF_SPARE)) + && rdp->disks[disk].device == adp->device) { ata_prtdev(rdp->disks[disk].device, "inserted into ar%d disk%d as spare\n", array, disk); rdp->disks[disk].flags |= (AR_DF_PRESENT | AR_DF_SPARE); AD_SOFTC(rdp->disks[disk])->flags = AD_F_RAID_SUBDISK; ar_config_changed(rdp, 1); + /* DJA HACK */ + ata_raid_rebuild(array); + /* DJA HACK */ return 1; } } } } - if (!ar_table) + if (!ar_table) { + struct proc *pid; + ar_table = malloc(sizeof(struct ar_soft *) * MAX_ARRAYS, M_AR, M_NOWAIT | M_ZERO); +#ifdef DJA_REWORK_SCHEDULE + kthread_create((void (*)(void *))ar_work, NULL, &pid, "ar_work"); +#endif + } if (!ar_table) { ata_prtdev(adp->device, "no memory for ATA raid array\n"); return 0; @@ -122,12 +165,18 @@ case 0x0d30105a: case 0x4d68105a: case 0x6268105a: case 0x4d69105a: case 0x5275105a: case 0x6269105a: case 0x7275105a: + case 0x3318105a: case 0x3319105a: case 0x3371105a: case 0x3373105a: + case 0x3376105a: /* test RAID bit in PCI reg XXX */ return (ar_promise_read_conf(adp, ar_table, 0)); case 0x00041103: case 0x00051103: case 0x00081103: return (ar_highpoint_read_conf(adp, ar_table)); + + case 0x24df8086: case 0x25b08086: + return (ar_intel_read_conf(adp, ar_table)); + default: return (ar_promise_read_conf(adp, ar_table, 1)); } @@ -147,10 +196,15 @@ for (disk = 0; disk < rdp->total_disks; disk++) { if (rdp->disks[disk].device == adp->device) { ata_prtdev(rdp->disks[disk].device, - "deleted from ar%d disk%d\n", array, disk); + "deleted from ar%d disk %d\n", array, disk); rdp->disks[disk].flags &= ~(AR_DF_PRESENT | AR_DF_ONLINE); + if (rdp->disks[disk].flags & AR_DF_REBUILDING) { + rdp->disks[disk].flags &= ~AR_DF_REBUILDING; + rdp->flags &= ~AR_F_REBUILDING; + } AD_SOFTC(rdp->disks[disk])->flags &= ~AD_F_RAID_SUBDISK; ar_config_changed(rdp, 1); + rdp->disks[disk].device = NULL; return 1; } } @@ -210,6 +264,14 @@ break; case (AR_F_DEGRADED | AR_F_READY): printf("DEGRADED"); + /* DJA HACK */ + if (!rdp->flags & AR_F_REBUILDING) { + printf(" [Rebuilding]"); + rdp->flags |= AR_F_REBUILDING; + kthread_create((void (*)(void *))ar_rebuild, rdp, &rdp->pid, + "rebuilding ar%d", 0); + } + /* DJA HACK */ break; default: printf("BROKEN"); @@ -225,7 +287,6 @@ else printf(" %d FREE ", disk); ad_print(AD_SOFTC(rdp->disks[disk])); - printf(" "); ata_enclosure_print(AD_SOFTC(rdp->disks[disk])->device); } else if (rdp->disks[disk].flags & AR_DF_ASSIGNED) @@ -236,12 +297,48 @@ } int +ata_raid_addspare(int array, int disk) +{ + struct ar_softc *rdp; + struct ata_device *atadev; + int i; + + if (!ar_table || !(rdp = ar_table[array])) + return ENXIO; + if (!(rdp->flags & AR_F_RAID1)) + return EPERM; + if (rdp->flags & AR_F_REBUILDING) + return EBUSY; + if (!(rdp->flags & AR_F_DEGRADED) || !(rdp->flags & AR_F_READY)) + return ENXIO; + + for (i = 0; i < rdp->total_disks; i++ ) { + if (((rdp->disks[i].flags & (AR_DF_PRESENT | AR_DF_ONLINE)) == + (AR_DF_PRESENT | AR_DF_ONLINE)) && rdp->disks[i].device) + continue; + if ((atadev = ar_locate_disk(disk))){ + if(((((struct ad_softc *)atadev->driver)->flags & AD_F_RAID_SUBDISK) == 0)) { + rdp->disks[i].device = atadev; + rdp->disks[i].flags |= (AR_DF_PRESENT|AR_DF_ASSIGNED|AR_DF_SPARE); + ((struct ad_softc *)atadev->driver)->flags |= AD_F_RAID_SUBDISK; + ata_prtdev(rdp->disks[i].device, + "inserted into ar%d disk%d as spare\n", array, i); + ar_config_changed(rdp, 1); + ata_raid_rebuild(array); + return 0; + } + } + } + return ENXIO; +} + +int ata_raid_create(struct raid_setup *setup) { struct ata_device *atadev; struct ar_softc *rdp; int array, disk; - int ctlr = 0, disk_size = 0, total_disks = 0; + int ctlr = 0, disk_size, total_disks = 0; if (!ar_table) ar_table = malloc(sizeof(struct ar_soft *) * MAX_ARRAYS, @@ -263,10 +360,11 @@ return ENOMEM; } + disk_size = setup->disk_size; for (disk = 0; disk < setup->total_disks; disk++) { if ((atadev = ar_locate_disk(setup->disks[disk]))) { rdp->disks[disk].device = atadev; - if (AD_SOFTC(rdp->disks[disk])->flags & AD_F_RAID_SUBDISK) { + if (((struct ad_softc *)atadev->driver)->flags & AD_F_RAID_SUBDISK) { setup->disks[disk] = -1; free(rdp, M_AR); return EBUSY; @@ -289,6 +387,12 @@ PR_LBA(AD_SOFTC(rdp->disks[disk])); break; } + switch (rdp->disks[disk].device->channel->chiptype) { + case 0x24df8086: case 0x25b08086: + ctlr = AR_F_INTEL_RAID; + rdp->disks[disk].disk_sectors = + I_LBA(AD_SOFTC(rdp->disks[disk])); + } if ((rdp->flags & (AR_F_PROMISE_RAID|AR_F_HIGHPOINT_RAID)) && (rdp->flags & (AR_F_PROMISE_RAID|AR_F_HIGHPOINT_RAID)) != (ctlr & (AR_F_PROMISE_RAID|AR_F_HIGHPOINT_RAID))) { @@ -341,8 +445,9 @@ break; } - for (disk = 0; disk < total_disks; disk++) - AD_SOFTC(rdp->disks[disk])->flags = AD_F_RAID_SUBDISK; + for (disk = 0; disk < total_disks; disk++){ + ((struct ad_softc *)rdp->disks[disk].device->driver)->flags |= AD_F_RAID_SUBDISK; + } rdp->lun = array; if (rdp->flags & AR_F_RAID0) { @@ -354,6 +459,8 @@ rdp->interleave = min(max(2, 1 << bit), 2048); if (rdp->flags & AR_F_HIGHPOINT_RAID) rdp->interleave = min(max(32, 1 << bit), 128); + if (rdp->flags & AR_F_INTEL_RAID) + rdp->interleave = min(max(16, 1 << bit), 256); } rdp->total_disks = total_disks; rdp->width = total_disks / ((rdp->flags & AR_F_RAID1) ? 2 : 1); @@ -369,6 +476,11 @@ rdp->offset = HPT_LBA + 1; rdp->reserved = HPT_LBA + 1; } + if (rdp->flags & AR_F_INTEL_RAID) { + rdp->offset = 0; + rdp->reserved = 63; + rdp->total_sectors = (rdp->total_sectors / 512) * 512; + } rdp->lock_start = rdp->lock_end = 0xffffffff; rdp->flags |= AR_F_READY; @@ -391,17 +503,26 @@ if (!(rdp = ar_table[array])) return ENXIO; + if (rdp->pid) { + printf("ar%d: stopping rebuild\n", rdp->lun); + rdp->rebuild_stop = 1; + tsleep(&rdp->pid, PWAIT, "rebuild_wait", 0); + rdp->rebuild_stop = 0; + } rdp->flags &= ~AR_F_READY; for (disk = 0; disk < rdp->total_disks; disk++) { if ((rdp->disks[disk].flags&AR_DF_PRESENT) && rdp->disks[disk].device) { - AD_SOFTC(rdp->disks[disk])->flags &= ~AD_F_RAID_SUBDISK; + ((struct ad_softc *)rdp->disks[disk].device->driver)->flags &= ~AD_F_RAID_SUBDISK; ata_enclosure_leds(rdp->disks[disk].device, ATA_LED_GREEN); rdp->disks[disk].flags = 0; } } if (rdp->flags & AR_F_PROMISE_RAID) ar_promise_write_conf(rdp); - else + else if (rdp->flags & AR_F_INTEL_RAID) { + rdp->flags |= AR_F_DELETE; + ar_intel_write_conf(rdp); + } else ar_highpoint_write_conf(rdp); disk_invalidate(&rdp->disk); disk_destroy(rdp->dev); @@ -435,10 +556,16 @@ } status->total_disks = rdp->total_disks; for (i = 0; i < rdp->total_disks; i++ ) { - if ((rdp->disks[i].flags & AR_DF_PRESENT) && rdp->disks[i].device) + if ((rdp->disks[i].flags & AR_DF_PRESENT) && rdp->disks[i].device + && rdp->disks[i].device->driver + && AD_SOFTC(rdp->disks[i])->flags & AD_F_RAID_SUBDISK) status->disks[i] = AD_SOFTC(rdp->disks[i])->lun; else status->disks[i] = -1; + if (rdp->disks[i].flags & AR_DF_ONLINE) + status->not_okay[i] = 0; + else + status->not_okay[i] = 1; } status->interleave = rdp->interleave; status->status = 0; @@ -462,8 +589,34 @@ return ENXIO; if (rdp->flags & AR_F_REBUILDING) return EBUSY; + rdp->flags |= AR_F_REBUILDING; /* create process here XXX SOS */ - return ar_rebuild(rdp); + printf("ar%d: Starting rebuild\n", rdp->lun); + return kthread_create((void (*)(void *))ar_rebuild, rdp, &rdp->pid, + "rebuilding ar%d", array); +} + +int +ata_raid_rebuild_stop(int array) +{ + struct ar_softc *rdp; + int disk; + + if (!ar_table || !(rdp = ar_table[array])) + return ENXIO; + if ((rdp->flags & AR_F_REBUILDING) == 0) + return ENXIO; + + if (rdp->pid) { + rdp->rebuild_stop = 1; + tsleep(&rdp->pid, PWAIT, "rebuild_wait", 0); + rdp->rebuild_stop = 0; + } + rdp->flags &= ~AR_F_REBUILDING; + for (disk = 0; disk < rdp->total_disks; disk++) + rdp->disks[disk].flags &= ~AR_DF_REBUILDING; + ar_config_changed(rdp, 1); + return 0; } static int @@ -483,6 +636,7 @@ return 0; } +int ata_ar_pending=0; static void arstrategy(struct buf *bp) { @@ -490,6 +644,7 @@ int blkno, count, chunk, lba, lbs, tmplba; int drv = 0, change = 0; caddr_t data; + int s; if (!(rdp->flags & AR_F_READY)) { bp->b_flags |= B_ERROR; @@ -497,7 +652,7 @@ biodone(bp); return; } - + ata_ar_pending++; bp->b_resid = bp->b_bcount; blkno = bp->b_pblkno; data = bp->b_data; @@ -541,10 +696,12 @@ printf("ar%d: unknown array type in arstrategy\n", rdp->lun); bp->b_flags |= B_ERROR; bp->b_error = EIO; + ata_ar_pending--; biodone(bp); return; } + s = splbio(); buf1 = malloc(sizeof(struct ar_buf), M_AR, M_NOWAIT | M_ZERO); BUF_LOCKINIT(&buf1->bp); BUF_LOCK(&buf1->bp, LK_EXCLUSIVE); @@ -555,8 +712,13 @@ buf1->bp.b_bcount = chunk * DEV_BSIZE; buf1->bp.b_data = data; buf1->bp.b_flags = bp->b_flags | B_CALL; +#ifdef DJA_REWORK_SCHEDULE buf1->bp.b_iodone = ar_done; +#else + buf1->bp.b_iodone = ar_done_work; +#endif buf1->org = bp; + splx(s); switch (rdp->flags & (AR_F_RAID0 | AR_F_RAID1 | AR_F_SPAN)) { case AR_F_SPAN: @@ -569,6 +731,7 @@ free(buf1, M_AR); bp->b_flags |= B_ERROR; bp->b_error = EIO; + ata_ar_pending--; biodone(bp); return; } @@ -605,6 +768,7 @@ free(buf1, M_AR); bp->b_flags |= B_ERROR; bp->b_error = EIO; + ata_ar_pending--; biodone(bp); return; } @@ -653,11 +817,162 @@ } } +int +ardump(dev_t dev) +{ + struct ar_softc *rdp = dev->si_drv1; + struct ad_softc *adp; + struct ad_request request[10]; /* assume mirror DJA */ + u_int count, blkno, secsize; + vm_paddr_t addr = 0; + long blkcnt; + int dumppages = MAXDUMPPGS; + int error; + int i, disk; + + if ((error = disk_dumpcheck(dev, &count, &blkno, &secsize))) + return error; + + if (!rdp) + return ENXIO; + + if ((rdp->flags & (AR_F_RAID0 | AR_F_RAID1 | AR_F_SPAN)) != AR_F_RAID1) { + printf("Dumps only supported with RAID1 %x\n", rdp->flags); + return ENXIO; + } + + /* force PIO mode for dumps */ + for (disk = 0; disk < rdp->total_disks; disk++) { + if ( !rdp->disks[disk].device ) + continue; + adp = AD_SOFTC(rdp->disks[disk]); + adp->device->mode = ATA_PIO; + ata_reinit(adp->device->channel); + } + + blkcnt = howmany(PAGE_SIZE, secsize); + while (count > 0) { + caddr_t va = NULL; + DELAY(1000); + + if ((count / blkcnt) < dumppages) + dumppages = count / blkcnt; + + for (i = 0; i < dumppages; ++i) { + vm_paddr_t a = addr + (i * PAGE_SIZE); + if (is_physical_memory(a)) + va = pmap_kenter_temporary(trunc_page(a), i); + else + va = pmap_kenter_temporary(trunc_page(0), i); + } + + for (disk = 0; disk < rdp->total_disks; disk++) { + if ( !rdp->disks[disk].device ) + continue; + bzero(&request[disk], sizeof(struct ad_request)); + request[disk].softc = AD_SOFTC(rdp->disks[disk]); + request[disk].blockaddr = blkno; + request[disk].bytecount = PAGE_SIZE * dumppages; + request[disk].data = va; + } + + for (disk = 0; disk < rdp->total_disks; disk++) { + if ( !rdp->disks[disk].device || + rdp->disks[disk].flags & AR_DF_REBUILDING + || !(rdp->disks[disk].flags & AR_DF_ONLINE)) + continue; + while (request[disk].bytecount > 0) { + /* DJA XXX need generic method */ + ata_intel_sata_status(rdp->disks[disk].device->channel); + ad_transfer(&request[disk]); + if (request[disk].flags & ADR_F_ERROR) + return EIO; + request[disk].donecount += request[disk].currentsize; + request[disk].bytecount -= request[disk].currentsize; + DELAY(20); + } + } + + if (dumpstatus(addr, (off_t)count * DEV_BSIZE) < 0) + return EINTR; + + blkno += blkcnt * dumppages; + count -= blkcnt * dumppages; + addr += PAGE_SIZE * dumppages; + } + + for (disk = 0; disk < rdp->total_disks; disk++) { + if ( !rdp->disks[disk].device ) + continue; + adp = AD_SOFTC(rdp->disks[disk]); + adp->device->channel->status = ATA_S_BUSY; + if (ata_wait(adp->device, ATA_S_READY | ATA_S_DSC) < 0) + ata_prtdev(adp->device, "timeout waiting for final ready\n"); + } + return 0; +} + +struct ar_queue { + TAILQ_ENTRY(ar_queue) ar_next; + struct buf *bp; +}; +TAILQ_HEAD(,ar_queue) ar_q; + +#ifdef DJA_REWORK_SCHEDULE +static void +ar_work(void) +{ + struct ar_queue *ar_op; + + int s; + + TAILQ_INIT(&ar_q); + ar_ready = 1; + for (;;) { + s = splhigh(); + ar_op = TAILQ_FIRST(&ar_q); + if (!ar_op) + splx(s); + else { + TAILQ_REMOVE(&ar_q, ar_op, ar_next); + splx(s); + ar_done_work(ar_op->bp); + free(ar_op, M_DEVBUF); + } + s = splhigh(); + if (!TAILQ_FIRST(&ar_q)) { + splx(s); + tsleep(&ar_q, PZERO, "ar_wait", 5 * hz); + } + splx(s); + } +} + + static void ar_done(struct buf *bp) { + int s; + struct ar_queue *ar_op = NULL; + + if (ar_ready) { + ar_op = malloc(sizeof(struct ar_queue), M_DEVBUF, M_NOWAIT); + ar_op->bp = bp; + s = splhigh(); + TAILQ_INSERT_HEAD(&ar_q, ar_op, ar_next); + splx(s); + wakeup(&ar_q); + } else + ar_done_work(bp); +} +#endif + +static void +ar_done_work(struct buf *bp) +{ struct ar_softc *rdp = (struct ar_softc *)bp->b_caller1; struct ar_buf *buf = (struct ar_buf *)bp; + int s; switch (rdp->flags & (AR_F_RAID0 | AR_F_RAID1 | AR_F_SPAN)) { case AR_F_SPAN: @@ -667,22 +982,79 @@ ar_config_changed(rdp, 1); buf->org->b_flags |= B_ERROR; buf->org->b_error = EIO; + ata_ar_pending--; biodone(buf->org); } else { buf->org->b_resid -= buf->bp.b_bcount; - if (buf->org->b_resid == 0) + if (buf->org->b_resid == 0) { + ata_ar_pending--; biodone(buf->org); + } } break; case AR_F_RAID1: case AR_F_RAID0 | AR_F_RAID1: if (buf->bp.b_flags & B_ERROR) { + int other_drive = 0; +#ifdef DJA_REWORK_SCHEDULE + /* DJA */ + if (rdp->disks[buf->drive].flags & AR_DF_ONLINE && + rdp->flags & AR_F_RAID1 && buf->bp.b_flags & B_READ) { + printf("ar%d: RAID read failure %d block %d, count %d, " + "flags %x\n", rdp->lun, + buf->drive, + buf->bp.b_pblkno, + (int)buf->bp.b_bcount, + (int)buf->bp.b_flags + ); + for (other_drive = 0; other_drive < rdp->total_disks; + other_drive++) { + if (buf->drive == other_drive) + continue; + printf("ar%d: Checking drive %d flags %x\n", rdp->lun, + other_drive, rdp->disks[other_drive].flags); + if (!rdp->disks[other_drive].flags & AR_DF_ONLINE) + continue; + + printf("ar%d: Found drive %d for %d\n", + rdp->lun, other_drive, buf->drive); + + if (ar_rw(AD_SOFTC(rdp->disks[other_drive]), + buf->bp.b_pblkno, buf->bp.b_bcount, + buf->bp.b_data, AR_READ | AR_WAIT)) { + printf("ar%d: Failed to recover data from other " + "drive\n", rdp->lun); + } else { + printf("ar%d: Recovering data from other drive\n", + rdp->lun); + if (!rdp->disks[buf->drive].flags & AR_DF_PRESENT) { + printf("ar%d: No drive to write data to\n", + rdp->lun); + } else { + if (ar_rw(AD_SOFTC(rdp->disks[buf->drive]), + buf->bp.b_pblkno, buf->bp.b_bcount, + buf->bp.b_data, AR_WRITE | AR_WAIT)) { + printf("ar%d: Failed to write recovered data\n", + rdp->lun); + } else { + printf("ar%d: Succeeded in writing out data\n", + rdp->lun); + buf->bp.b_flags &= ~B_ERROR; + goto recovered; + } + } + } + } + } + /* DJA */ +#endif rdp->disks[buf->drive].flags &= ~AR_DF_ONLINE; ar_config_changed(rdp, 1); if (rdp->flags & AR_F_READY) { if (buf->bp.b_flags & B_READ) { + s = splbio(); if (buf->drive < rdp->width) buf->drive = buf->drive + rdp->width; else @@ -690,14 +1062,17 @@ buf->bp.b_dev = AD_SOFTC(rdp->disks[buf->drive])->dev; buf->bp.b_flags = buf->org->b_flags | B_CALL; buf->bp.b_error = 0; + splx(s); AR_STRATEGY((struct buf *)buf); return; } else { if (buf->flags & AB_F_DONE) { buf->org->b_resid -= buf->bp.b_bcount; - if (buf->org->b_resid == 0) + if (buf->org->b_resid == 0) { + ata_ar_pending--; biodone(buf->org); + } } else buf->mirror->flags |= AB_F_DONE; @@ -706,10 +1081,14 @@ else { buf->org->b_flags |= B_ERROR; buf->org->b_error = EIO; + ata_ar_pending--; biodone(buf->org); } } else { +#ifdef DJA_REWORK_SCHEDULE +recovered: +#endif if (!(buf->bp.b_flags & B_READ)) { if (buf->mirror && !(buf->flags & AB_F_DONE)){ buf->mirror->flags |= AB_F_DONE; @@ -717,8 +1096,10 @@ } } buf->org->b_resid -= buf->bp.b_bcount; - if (buf->org->b_resid == 0) + if (buf->org->b_resid == 0) { + ata_ar_pending--; biodone(buf->org); + } } break; @@ -784,6 +1165,8 @@ ar_promise_write_conf(rdp); if (rdp->flags & AR_F_HIGHPOINT_RAID) ar_highpoint_write_conf(rdp); + if (rdp->flags & AR_F_INTEL_RAID) + ar_intel_write_conf(rdp); } } @@ -793,8 +1176,18 @@ int disk, s, count = 0, error = 0; caddr_t buffer; - if ((rdp->flags & (AR_F_READY|AR_F_DEGRADED)) != (AR_F_READY|AR_F_DEGRADED)) - return EEXIST; + if (!ar_rebuild_enable) { + printf("ar%d: Rebuilding disabled via sysctl\n", rdp->lun); + rdp->pid = NULL; + wakeup(&rdp->pid); + kthread_exit(EPERM); + } + if ((rdp->flags & (AR_F_READY|AR_F_DEGRADED)) != (AR_F_READY|AR_F_DEGRADED)) { + printf("ar%d: Failed rebuild\n", rdp->lun); + rdp->pid = NULL; + wakeup(&rdp->pid); + kthread_exit(EEXIST); + } for (disk = 0; disk < rdp->total_disks; disk++) { if (((rdp->disks[disk].flags&(AR_DF_PRESENT|AR_DF_ONLINE|AR_DF_SPARE))== @@ -810,11 +1203,18 @@ continue; } ata_enclosure_leds(rdp->disks[disk].device, ATA_LED_ORANGE); + rdp->disks[disk].flags |= AR_DF_REBUILDING; + printf("ar%d: Rebuilding disk ad%d\n", rdp->lun, + AD_SOFTC(rdp->disks[disk])->lun); count++; } } - if (!count) - return ENODEV; + if (!count) { + printf("ar%d: Failed rebuild -- no suitable drive\n", rdp->lun); + rdp->pid = NULL; + wakeup(&rdp->pid); + kthread_exit(ENODEV); + } /* setup start conditions */ s = splbio(); @@ -831,6 +1231,8 @@ for (disk = 0; disk < rdp->width; disk++) { struct ad_softc *adp; + if (rdp->rebuild_stop) + break; if (((rdp->disks[disk].flags & AR_DF_ONLINE) && (rdp->disks[disk + rdp->width].flags & AR_DF_ONLINE)) || ((rdp->disks[disk].flags & AR_DF_ONLINE) && @@ -839,26 +1241,57 @@ !(rdp->disks[disk].flags & AR_DF_SPARE))) continue; - if (rdp->disks[disk].flags & AR_DF_ONLINE) - adp = AD_SOFTC(rdp->disks[disk]); - else - adp = AD_SOFTC(rdp->disks[disk + rdp->width]); + s = splbio(); + adp = NULL; + if (rdp->disks[disk].flags & AR_DF_ONLINE) { + if (rdp->disks[disk].flags & AR_DF_PRESENT) + adp = AD_SOFTC(rdp->disks[disk]); + } else { + if (rdp->disks[disk + rdp->width].flags & AR_DF_PRESENT) + adp = AD_SOFTC(rdp->disks[disk + rdp->width]); + } + splx(s); + if (!adp) { + error = 1; + break; + } if ((error = ar_rw(adp, rdp->lock_start, size * DEV_BSIZE, buffer, AR_READ | AR_WAIT))) break; - if (rdp->disks[disk].flags & AR_DF_ONLINE) - adp = AD_SOFTC(rdp->disks[disk + rdp->width]); - else - adp = AD_SOFTC(rdp->disks[disk]); + s = splbio(); + adp = NULL; + if (rdp->disks[disk].flags & AR_DF_ONLINE) { + if (rdp->disks[disk + rdp->width].flags & AR_DF_PRESENT) + adp = AD_SOFTC(rdp->disks[disk + rdp->width]); + } else { + if (rdp->disks[disk].flags & AR_DF_PRESENT) + adp = AD_SOFTC(rdp->disks[disk]); + } + splx(s); + if (!adp) { + error = 1; + break; + } if ((error = ar_rw(adp, rdp->lock_start, size * DEV_BSIZE, buffer, AR_WRITE | AR_WAIT))) break; } - if (error) { + if (error || rdp->rebuild_stop) { wakeup(rdp); free(buffer, M_AR); - return error; + s = splbio(); + rdp->lock_start = 0xffffffff; + rdp->lock_end = 0xffffffff; + rdp->flags &= ~AR_F_REBUILDING; + for (disk = 0; disk < rdp->total_disks; disk++) + rdp->disks[disk].flags &= ~AR_DF_REBUILDING; + ar_config_changed(rdp, 1); + splx(s); + printf("ar%d: Failed rebuild -- error %x\n", rdp->lun, error); + rdp->pid = NULL; + wakeup(&rdp->pid); + kthread_exit(error); } s = splbio(); rdp->lock_start = rdp->lock_end; @@ -878,9 +1311,14 @@ rdp->lock_start = 0xffffffff; rdp->lock_end = 0xffffffff; rdp->flags &= ~AR_F_REBUILDING; + for (disk = 0; disk < rdp->total_disks; disk++) + rdp->disks[disk].flags &= ~AR_DF_REBUILDING; + rdp->pid = NULL; splx(s); ar_config_changed(rdp, 1); - return 0; + printf("ar%d: Rebuild successful\n", rdp->lun); + wakeup(&rdp->pid); + kthread_exit(0); } static int @@ -1145,7 +1583,40 @@ goto promise_out; } } - +#if 0 + {int i; + printf("Promise id %s\n",info->promise_id); + printf("Promise dummy_0 %x\n",info->dummy_0); + printf("Promise magic_0 %xl\n",info->magic_0); + printf("Promise magic_1 %x\n",info->magic_1); + printf("Promise magic_2 %x\n",info->magic_2); + printf("Promise integrity %x\n",info->raid.integrity); + printf("Promise flags %x\n",info->raid.flags); + printf("Promise disk_number %x\n",info->raid.disk_number); + printf("Promise channel %x\n",info->raid.channel); + printf("Promise device %x\n",info->raid.device); + printf("Promise disk_offset %x\n",info->raid.disk_offset); + printf("Promise disk_sectors %x\n",info->raid.disk_sectors); + printf("Promise rebuild_lba %x\n",info->raid.rebuild_lba); + printf("Promise generation %x\n",info->raid.generation); + printf("Promise status %x\n",info->raid.status); + printf("Promise type %x\n",info->raid.type); + printf("Promise total_disks %x\n",info->raid.total_disks); + printf("Promise stripe_shift %x\n",info->raid.stripe_shift); + printf("Promise array_width %x\n",info->raid.array_width); + printf("Promise array_number %x\n",info->raid.array_number); + printf("Promise total_sectors %x\n",info->raid.total_sectors); + printf("Promise magic_1 %x\n",info->raid.magic_1); + for(i=0;i<8;i++){ + printf("Promise %d flags %x\n",i,info->raid.disk[i].flags); + printf("Promise %d dummy_0 %x\n",i,info->raid.disk[i].dummy_0); + printf("Promise %d channel %x\n",i,info->raid.disk[i].channel); + printf("Promise %d device %x\n",i,info->raid.disk[i].device); + printf("Promise %d magic_0 %x\n",i,info->raid.disk[i].magic_0); + } + printf("Promise checksum %x\n",info->checksum); + } +#endif /* check if the checksum is OK */ for (cksum = 0, ckptr = (int32_t *)info, count = 0; count < 511; count++) cksum += *ckptr++; @@ -1245,6 +1716,9 @@ } if (info->raid.disk[disk].flags & (PR_F_REDIR | PR_F_DOWN)) raid->disks[disk].flags &= ~AR_DF_ONLINE; + if ((raid->disks[disk].flags & AR_DF_ONLINE) == 0) { + raid->disks[disk].flags |= AR_DF_SPARE; + } } if (!disksum) { free(raidp[array], M_AR); @@ -1393,6 +1867,409 @@ return 0; } +static int +ar_intel_read_conf(struct ad_softc *adp, struct ar_softc **raidp) +{ + struct intel_raid_conf *info; + struct intel_raid_config *config; + struct ar_softc *raid = NULL; + int array, disk = 0, retval = 0, size = 1024, i, matched = -1; + u_int32_t count, cksum, cksum_orig, *ckptr; + + if (!(info = (struct intel_raid_conf *) + malloc(size, M_AR, M_NOWAIT | M_ZERO))) + return retval; + + if (ar_rw(adp, I_LBA(adp), size, + (caddr_t)info, AR_READ | AR_WAIT)) { + if (bootverbose) + printf("ar: Intel read conf failed\n"); + goto intel_out; + } + + /* check if this is a Intel RAID struct */ + if (bcmp(info->intel_id, I_MAGIC, sizeof(I_MAGIC) - 1)) { + if (bootverbose) + printf("ar: Intel check1 failed\n"); + goto intel_out; + } + +#if 0 + printf("%s\n", info->intel_id); + printf("checksum %d\n",info->checksum); +#endif + + ckptr = (void *)info; + cksum_orig = info->checksum; + info->checksum = 0; + for (cksum = 0, count = 0; count < info->disk_struct_size / 4; count++) + cksum += *ckptr++; + +#if 0 + printf("calculated checksum %d\n",cksum); +#endif + + if (cksum != cksum_orig) { + if (bootverbose) + printf("ar: Intel checksum failed\n"); + goto intel_out; + } + +#if 0 + printf("disk_struct_size %d\n",info->disk_struct_size); + printf("id %d\n",info->id); + printf("generation %d\n",info->generation); + printf("reserved[0] %d\n",info->reserved[0]); + printf("reserved[1] %d\n",info->reserved[1]); + printf("total_disks %d\n",info->total_disks); + printf("bootable %d\n",info->bootable); +#endif + + for (i=0; i < info->total_disks; i++) { +#if 0 + printf("serial %s\n",info->disk[i].serial); + printf("total_sectors %d\n",info->disk[i].total_sectors); + printf("unit_id %d\n",info->disk[i].unit_id); + printf("status %d\n",info->disk[i].status); +#endif + if (strncmp(info->disk[i].serial, adp->device->param->serial, + sizeof(adp->device->param->serial)) == 0) { + matched = i; + } + } + + config = (struct intel_raid_config*)&info->disk[info->total_disks]; +#if 0 + printf("volume serial %s\n", config->serial); + printf("volume total_sectors_low %d\n", config->total_sectors_low); + printf("volume total_sectors_high %d\n", config->total_sectors_high); + printf("volume status %d\n", config->status); + printf("volume migrating %d\n", config->volume.migrating); + printf("volume state %d\n", config->volume.state); + printf("volume dirty %d\n", config->volume.dirty); + + printf("map start %d\n", config->map.start); + printf("map total_sectors %d\n", config->map.total_sectors); + printf("map stripes %d\n", config->map.stripes); + printf("map interleave %d\n", config->map.interleave); + printf("map state %d\n", config->map.state); + printf("map type %d\n", config->map.type); + printf("map total_disks %d\n", config->map.total_disks); + for(i=0; i < config->map.total_disks; i++){ + printf("Disk %d %d\n", i, config->map.disk_order[i]); + } +#endif + + if (matched == -1) { + if (bootverbose) + printf("ar: Intel couldn't match drive to config\n"); + goto intel_out; + } + + for (array = 0; array < MAX_ARRAYS; array++) { + if (!raidp[array]) { + raidp[array] = + (struct ar_softc*)malloc(sizeof(struct ar_softc), M_AR, + M_NOWAIT | M_ZERO); + if (!raidp[array]) { + printf("ar%d: failed to allocate raid config storage\n", array); + goto intel_out; + raid->flags |= AR_F_DEGRADED; + } + } + raid = raidp[array]; + if (raid->flags & AR_F_HIGHPOINT_RAID) + continue; + if (raid->flags & AR_F_PROMISE_RAID) + continue; + if ((raid->flags & AR_F_INTEL_RAID) && raid->magic_0 != info->id) + continue; + + raid->magic_0 = info->id; + +/*printf("HELLO gen %d %d\n",info->generation, raid->generation); */ + + if (!info->generation || info->generation > raid->generation) { + raid->generation = info->generation; + raid->flags = AR_F_INTEL_RAID; + raid->lun = array; + + switch (config->volume.state) { +#if 0 + case I_VOLUME_DISABLED: + raid->flags |= AR_F_DEGRADED; + break; + case I_VOLUME_DEGRADED: + raid->flags |= AR_F_DEGRADED; + break; + case I_VOLUME_FAILED: + raid->flags |= AR_F_DEGRADED; + break; +#endif + default: + /* printf("Unknown Intel Volume state %x\n", config->volume.state); */ + } + + switch (config->map.state) { + case I_MAP_OKAY: + raid->flags |= AR_F_READY; + break; + case I_MAP_DEGRADED: + raid->flags |= AR_F_DEGRADED; + case I_MAP_FAILED: + raid->flags |= AR_F_DEGRADED; + i = config->map.filler1[1]; + if (i == 0xff) { + printf("INTEL BOGUS drive failed\n"); + } else { + if (raid->disks[i].flags & AR_DF_ONLINE) { + raid->disks[i].flags &= ~AR_DF_ONLINE; + } + raid->disks[i].flags |= AR_DF_SPARE; + } + break; + default: + printf("Unkown Intel Map state %x\n", config->map.state); + } + + switch (config->map.type) { + case I_MAP_RAID0: + raid->flags |= AR_F_RAID0; + break; + + case I_MAP_RAID1: + raid->flags |= AR_F_RAID1; + break; + + default: + printf("ar%d: Intel unknown RAID type 0x%02x\n", array, + config->map.type); + goto intel_out; + } + raid->width = config->map.total_disks + / ((raid->flags & AR_F_RAID1) ? 2 : 1); + raid->interleave = config->map.interleave; + raid->total_disks = config->map.total_disks; + + raid->heads = 255; + raid->sectors = 63; + raid->cylinders = config->total_sectors_low / (63 * 255); + raid->total_sectors = config->total_sectors_low; + raid->offset = 0; + raid->reserved = 63; + raid->lock_start = raid->lock_end = 0; + + for (disk = 0; disk < config->map.total_disks; disk++) { + if (info->disk[disk].status & I_DISK_SPARE) { + raid->disks[disk].flags |= AR_DF_SPARE; + } + if (info->disk[disk].status & I_DISK_CONFIGURED) { + raid->disks[disk].flags |= AR_DF_ASSIGNED; /*normally set*/ + } + if (info->disk[disk].status & I_DISK_FAILED) { + raid->disks[disk].flags |= AR_DF_SPARE; + } + if (info->disk[disk].status & I_DISK_USABLE) { + if (!(raid->disks[disk].flags & AR_DF_SPARE)) { + raid->disks[disk].flags |= AR_DF_ONLINE; /* normally set */ + } + } + } + } + + if (info->generation == raid->generation + && !raid->disks[matched].device) { + disk = matched; + raid->disks[disk].flags |= AR_DF_PRESENT; + raid->disks[disk].device = adp->device; + raid->disks[disk].disk_sectors = + info->disk[disk].total_sectors; + ((struct ad_softc *)adp->device->driver)->flags |= AD_F_RAID_SUBDISK; + } + if (info->generation < raid->generation) { +/*printf("HELLO Generation was old\n"); */ + disk = matched; + raid->disks[disk].flags = AR_DF_PRESENT | AR_DF_SPARE; + raid->disks[disk].device = adp->device; + raid->disks[disk].disk_sectors = + info->disk[disk].total_sectors; + ((struct ad_softc *)adp->device->driver)->flags |= AD_F_RAID_SUBDISK; + } + + for (count = 0, disk = 0; disk < config->map.total_disks; disk++) { + if (raid->disks[disk].device) + count++; + } + +#define GEN_MARGIN 15 +#define GEN_MAX 0x80000000 +#define GEN_OVERFLOW(x) ((x + GEN_MARGIN < 0)) + + if (count == raid->total_disks + && GEN_OVERFLOW(raid->generation)) + ar_intel_write_conf(raid); + if (count == raid->total_disks && (raid->flags & AR_F_DEGRADED)) + ata_raid_rebuild(array); + + break; + } + +intel_out: + free(info, M_AR); + + return retval; +} + +static int +ar_intel_write_conf(struct ar_softc *raid) +{ + struct intel_raid_conf *info; + struct intel_raid_config *config; + struct timeval timestamp; + u_int32_t cksum, *ckptr; + int count, disk, i; + + raid->generation++; + if (GEN_OVERFLOW(raid->generation)) /* give us some margin */ + raid->generation = 1; /* roll generation */ + microtime(×tamp); + + for (disk = 0; disk < raid->total_disks; disk++) { + if (!(info = (struct intel_raid_conf *) + malloc(1024, M_AR, M_NOWAIT))) { + printf("ar%d: Intel allocating conf failed\n", + raid->lun); + return -1; + } + bzero(info, 1024); + if (raid->flags & AR_F_DELETE) + goto intel_write; + + /* need to build critical parts of structure */ + info->total_disks = raid->total_disks; + config = (struct intel_raid_config*)&info->disk[info->total_disks]; + if (raid->flags & AR_F_RAID0) + config->map.type = I_MAP_RAID0; + if (raid->flags & AR_F_RAID1) + config->map.type = I_MAP_RAID1; + + /* start to fill in */ + snprintf(info->intel_id, sizeof(info->intel_id), "%s 1.%d.00", + I_MAGIC, config->map.type); + info->checksum = 0; /* last */ + info->disk_struct_size = 480; /* Seems to be constant */ + if (!raid->magic_0) { + raid->magic_0 = 0x8253823c; + } + info->id = raid->magic_0; + info->generation = raid->generation; + info->reserved[1] = 0xc0000000; /* Unknown */ + /* info->total_disks Already done above */ + info->bootable = 1; + + for (i = 0; i < info->total_disks; i++) { + if (raid->disks[i].device) { + bcopy(raid->disks[i].device->param->serial, + info->disk[i].serial, sizeof(info->disk[i].serial)); + info->disk[i].total_sectors + = AD_SOFTC(raid->disks[i])->total_secs; + info->disk[i].unit_id = i * 0x10000; + info->disk[i].status = 0x130; + if (raid->disks[i].flags & AR_DF_SPARE) { + info->disk[i].status |= I_DISK_SPARE; + } + if (raid->disks[i].flags & AR_DF_ASSIGNED) { + info->disk[i].status |= I_DISK_CONFIGURED; /* normally set */ + } + if ((raid->disks[i].flags & AR_DF_ONLINE) + && (raid->disks[i].flags & AR_DF_PRESENT)) { + info->disk[i].status |= I_DISK_USABLE; /* normally set */ + } + /* Other options + info->disk[i].status |= I_DISK_FAILED; + */ + } + } + + snprintf(config->serial, sizeof(config->serial), + "RAID_Volume%d", raid->lun + 1); + config->total_sectors_low = raid->total_sectors; + config->total_sectors_high = 0; + config->status = 0; + config->volume.migrating = 0; +#if 0 /* Not used */ + if (raid->flags & AR_F_DEGRADED) { + config->volume.state = I_VOLUME_DEGRADED; + } + /* Other options: + config->volume.state = I_VOLUME_DISABLED + config->volume.state = I_VOLUME_FAILED + */ +#endif + + config->volume.dirty = 0; /* not used yet */ + + config->map.start = 0; + config->map.state = 0; + if (raid->flags & AR_F_RAID0) { + config->map.total_sectors = raid->total_sectors / 2; + config->map.interleave = raid->interleave; + } + if (raid->flags & AR_F_RAID1) { + config->map.total_sectors = raid->total_sectors; + config->map.interleave = 256; /* DJA ??? */ + } + config->map.stripes = raid->total_sectors / 512; + if (raid->flags & AR_F_READY) { + config->map.state |= I_MAP_OKAY; + } + if (raid->flags & AR_F_DEGRADED) { + config->map.state |= I_MAP_DEGRADED; + } + if (raid->flags & AR_F_REBUILDING) { + config->map.state |= I_MAP_DEGRADED; + } + /* Other options are + config->map.state |= I_MAP_FAILED; + */ +/* config->map.type Already defined */ + config->map.total_disks = info->total_disks; + + config->map.filler1[0] = config->map.type + 1; /* ??? */ + + config->map.filler1[1] = 0xff; /* None failed */ + for (i = 0; i < raid->total_disks; i++) { + if (raid->disks[i].flags & AR_DF_REBUILDING + || !(raid->disks[i].flags & AR_DF_ONLINE)) { + config->map.filler1[1] = i; + } + } + config->map.filler1[2] = 0x1; /* ??? */ + + for (i = 0; i < raid->total_disks; i++) + config->map.disk_order[i]=i; + + ckptr = (void *)info; + for (cksum = 0, count = 0; count < info->disk_struct_size / 4; count++) + cksum += *ckptr++; + info->checksum = cksum; /* last */ + +intel_write: + if (raid->disks[disk].device && raid->disks[disk].device->driver && + !(raid->disks[disk].device->flags & ATA_D_DETACHING)) { + if (ar_rw(AD_SOFTC(raid->disks[disk]), + I_LBA(AD_SOFTC(raid->disks[disk])), + 1024, + (caddr_t)info, AR_WRITE)) { + printf("ar%d: Intel write conf failed\n", + raid->lun); + return -1; + } + } + } + return 0; +} + static void ar_rw_done(struct buf *bp) { @@ -1400,11 +2277,14 @@ free(bp, M_AR); } +extern void ad_clear_request(struct ad_softc *); + static int ar_rw(struct ad_softc *adp, u_int32_t lba, int count, caddr_t data, int flags) { struct buf *bp; int retry = 0, error = 0; + int s = splbio(); if (!(bp = (struct buf *)malloc(sizeof(struct buf), M_AR, M_NOWAIT|M_ZERO))) return ENOMEM; @@ -1424,13 +2304,20 @@ if (flags & AR_WRITE) bp->b_flags |= B_WRITE; + splx(s); + AR_STRATEGY((struct buf *)bp); if (flags & AR_WAIT) { while ((retry++ < (15*hz/10)) && (error = !(bp->b_flags & B_DONE))) error = tsleep(bp, PRIBIO, "arrw", 10); + s = splbio(); if (!error && (bp->b_flags & B_ERROR)) error = bp->b_error; + if (error) { + ad_clear_request(adp); + } + splx(s); free(bp, M_AR); } return error; --- sys/dev/ata/ata-raid.h +++ sys/dev/ata/ata-raid.h @@ -48,6 +48,7 @@ #define AR_DF_ASSIGNED 0x00000002 #define AR_DF_SPARE 0x00000004 #define AR_DF_ONLINE 0x00000008 +#define AR_DF_REBUILDING 0x00000010 }; struct ar_softc { @@ -58,12 +59,14 @@ #define AR_F_RAID0 0x0001 /* STRIPE */ #define AR_F_RAID1 0x0002 /* MIRROR */ #define AR_F_SPAN 0x0004 /* SPAN */ +#define AR_F_DELETE 0x0008 #define AR_F_READY 0x0100 #define AR_F_DEGRADED 0x0200 #define AR_F_REBUILDING 0x0400 #define AR_F_PROMISE_RAID 0x1000 #define AR_F_HIGHPOINT_RAID 0x2000 #define AR_F_FREEBSD_RAID 0x4000 +#define AR_F_INTEL_RAID 0x8000 int total_disks; /* number of disks in this array */ int generation; /* generation of this array */ @@ -80,6 +83,7 @@ u_int64_t lock_end; /* end of locked area for rebuild */ struct disk disk; /* disklabel/slice stuff */ struct proc *pid; + int rebuild_stop; dev_t dev; /* device place holder */ }; @@ -222,6 +226,72 @@ u_int32_t checksum; } __attribute__((packed)); +#define I_LBA(adp) (((adp->total_secs / 2) - 1) * 2) +#define I_MAGIC "Intel Raid ISM Cfg Sig." + +struct intel_raid_conf { + int8_t intel_id[32]; + u_int32_t checksum; + + u_int32_t disk_struct_size; + u_int32_t id; + u_int32_t generation; + u_int32_t reserved[2]; + u_int8_t total_disks; + u_int8_t bootable; + u_int8_t filler1[2]; + u_int32_t filler2[39]; + struct { + int8_t serial[16]; + u_int32_t total_sectors; + u_int32_t unit_id; + u_int32_t status; +#define I_DISK_SPARE 0x1 +#define I_DISK_CONFIGURED 0x2 +#define I_DISK_FAILED 0x4 +#define I_DISK_USABLE 0x8 + u_int32_t filler2[5]; + } disk[10]; +} __attribute__((packed)); + +/* RAID DEVICE CONFIGURATION INFO */ +struct intel_raid_config { + int8_t serial[16]; + u_int32_t total_sectors_low; + u_int32_t total_sectors_high; + u_int32_t status; + u_int32_t reserved; + u_int32_t filler[12]; + struct { + u_int32_t filler[2]; + u_int8_t migrating; + u_int8_t state; +/* ??? +#define I_VOLUME_OKAY 0x0 +*/ + u_int8_t dirty; + u_int8_t filler1[1]; + u_int32_t filler2[5]; + } volume; + struct { + u_int32_t start; + u_int32_t total_sectors; + u_int32_t stripes; + u_int16_t interleave; + u_int8_t state; +#define I_MAP_OKAY 0x0 +#define I_MAP_DEGRADED 0x2 +#define I_MAP_FAILED 0x3 + u_int8_t type; +#define I_MAP_RAID0 0 +#define I_MAP_RAID1 1 + u_int8_t total_disks; + u_int8_t filler1[3]; + u_int32_t filler2[7]; + u_int32_t disk_order[10]; + } map; +} __attribute__((packed)); + int ata_raiddisk_probe(struct ad_softc *); int ata_raiddisk_attach(struct ad_softc *); int ata_raiddisk_detach(struct ad_softc *); @@ -229,5 +299,7 @@ int ata_raid_create(struct raid_setup *); int ata_raid_delete(int); int ata_raid_status(int array, struct raid_status *); +int ata_raid_addspare(int, int); int ata_raid_rebuild(int); +int ata_raid_rebuild_stop(int); --- sys/dev/ata/atapi-all.c +++ sys/dev/ata/atapi-all.c @@ -128,6 +128,7 @@ atapi_detach(struct ata_device *atadev) { struct atapi_request *request; + int s; atadev->flags |= ATA_D_DETACHING; ata_prtdev(atadev, "removed from configuration\n"); @@ -150,6 +151,7 @@ default: return; } + s = splbio(); TAILQ_FOREACH(request, &atadev->channel->atapi_queue, chain) { if (request->device != atadev) continue; @@ -164,6 +166,7 @@ } ata_dmafree(atadev); free(atadev->result, M_ATAPI); + splx(s); atadev->driver = NULL; atadev->flags = 0; @@ -610,6 +613,7 @@ atapi_timeout(struct atapi_request *request) { struct ata_device *atadev = request->device; + int s; atadev->channel->running = NULL; ata_prtdev(atadev, "%s command timeout - resetting\n", @@ -626,6 +630,7 @@ } } + s = splbio(); /* if retries still permit, reinject this request */ if (request->retries++ < ATAPI_MAX_RETRIES) { TAILQ_INSERT_HEAD(&atadev->channel->atapi_queue, request, chain); @@ -635,7 +640,8 @@ request->error = EIO; wakeup((caddr_t)request); } - ata_reinit(atadev->channel); + splx(s); + ata_reinit(atadev->channel); } static char * --- sys/dev/ata/atapi-cam.c +++ sys/dev/ata/atapi-cam.c @@ -111,6 +111,7 @@ struct cam_sim *sim = NULL; struct cam_path *path = NULL; int unit; + int s; LIST_FOREACH(scp, &all_buses, chain) { if (scp->ata_ch == ata_ch) @@ -121,10 +122,12 @@ M_ATACAM, M_NOWAIT | M_ZERO)) == NULL) goto error; + s = splbio(); scp->ata_ch = ata_ch; TAILQ_INIT(&scp->pending_hcbs); LIST_INSERT_HEAD(&all_buses, scp, chain); unit = device_get_unit(ata_ch->dev); + splx(s); if ((devq = cam_simq_alloc(16)) == NULL) goto error; @@ -638,17 +641,23 @@ static void free_hcb(struct atapi_hcb *hcb) { + int s; + + s = splbio(); TAILQ_REMOVE(&hcb->softc->pending_hcbs, hcb, chain); if (hcb->dxfer_alloc != NULL) free(hcb->dxfer_alloc, M_ATACAM); free(hcb, M_ATACAM); + splx(s); } static void free_softc(struct atapi_xpt_softc *scp) { struct atapi_hcb *hcb; + int s; + s=splbio(); if (scp != NULL) { TAILQ_FOREACH(hcb, &scp->pending_hcbs, chain) { free_hcb_and_ccb_done(hcb, CAM_UNREC_HBA_ERROR); @@ -671,6 +680,7 @@ LIST_REMOVE(scp, chain); free(scp, M_ATACAM); } + splx(s); } static struct atapi_xpt_softc * --- sys/dev/ata/atapi-cd.c +++ sys/dev/ata/atapi-cd.c @@ -194,7 +194,9 @@ struct acd_devlist *entry; struct buf *bp; int subdev; + int s; + s = splbio(); if (cdp->changer_info) { for (subdev = 0; subdev < cdp->changer_info->slots; subdev++) { if (cdp->driver[subdev] == cdp) @@ -236,15 +238,18 @@ ata_free_lun(&acd_lun_map, cdp->lun); free(cdp, M_ACD); atadev->driver = NULL; + splx(s); } static struct acd_softc * acd_init_lun(struct ata_device *atadev) { struct acd_softc *cdp; + int s; if (!(cdp = malloc(sizeof(struct acd_softc), M_ACD, M_NOWAIT | M_ZERO))) return NULL; + s = splbio(); TAILQ_INIT(&cdp->dev_list); bufq_init(&cdp->queue); cdp->device = atadev; @@ -255,8 +260,10 @@ if (!(cdp->stats = malloc(sizeof(struct devstat), M_ACD, M_NOWAIT | M_ZERO))) { free(cdp, M_ACD); + splx(s); return NULL; } + splx(s); return cdp; } @@ -1255,6 +1262,7 @@ int track, ntracks, len; u_int32_t sizes[2]; int8_t ccb[16]; + int s; bzero(&cdp->toc, sizeof(cdp->toc)); bzero(ccb, sizeof(ccb)); @@ -1326,6 +1334,7 @@ cdp->disklabel.d_magic2 = DISKMAGIC; cdp->disklabel.d_checksum = dkcksum(&cdp->disklabel); + s = splbio(); while ((entry = TAILQ_FIRST(&cdp->dev_list))) { destroy_dev(entry->dev); TAILQ_REMOVE(&cdp->dev_list, entry, chain); @@ -1341,6 +1350,7 @@ entry->dev->si_drv1 = cdp->dev->si_drv1; TAILQ_INSERT_TAIL(&cdp->dev_list, entry, chain); } + splx(s); #ifdef ACD_DEBUG if (cdp->disk_size && cdp->toc.hdr.ending_track) { --- sys/sys/ata.h +++ sys/sys/ata.h @@ -238,6 +238,8 @@ #define ATARAIDDELETE 10 #define ATARAIDSTATUS 11 #define ATAENCSTAT 12 +#define ATARAIDADDSPARE 13 +#define ATARAIDREBUILDSTOP 14 union { struct { @@ -258,6 +260,7 @@ int disks[16]; int interleave; int unit; + int disk_size; } raid_setup; struct raid_status { int type; @@ -270,8 +273,12 @@ #define AR_REBUILDING 4 int progress; + int not_okay[16]; } raid_status; struct { + int disk; + } raid_spare; + struct { int fan; int temp; int v05;