| @@ -5297,1999 +5297,1999 @@ spa_vdev_detach(spa_t *spa, uint64_t gui | | | @@ -5297,1999 +5297,1999 @@ spa_vdev_detach(spa_t *spa, uint64_t gui |
5297 | (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE); | | 5297 | (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE); |
5298 | mutex_enter(&spa_namespace_lock); | | 5298 | mutex_enter(&spa_namespace_lock); |
5299 | spa_close(altspa, FTAG); | | 5299 | spa_close(altspa, FTAG); |
5300 | } | | 5300 | } |
5301 | mutex_exit(&spa_namespace_lock); | | 5301 | mutex_exit(&spa_namespace_lock); |
5302 | | | 5302 | |
5303 | /* search the rest of the vdevs for spares to remove */ | | 5303 | /* search the rest of the vdevs for spares to remove */ |
5304 | spa_vdev_resilver_done(spa); | | 5304 | spa_vdev_resilver_done(spa); |
5305 | } | | 5305 | } |
5306 | | | 5306 | |
5307 | /* all done with the spa; OK to release */ | | 5307 | /* all done with the spa; OK to release */ |
5308 | mutex_enter(&spa_namespace_lock); | | 5308 | mutex_enter(&spa_namespace_lock); |
5309 | spa_close(spa, FTAG); | | 5309 | spa_close(spa, FTAG); |
5310 | mutex_exit(&spa_namespace_lock); | | 5310 | mutex_exit(&spa_namespace_lock); |
5311 | | | 5311 | |
5312 | return (error); | | 5312 | return (error); |
5313 | } | | 5313 | } |
5314 | | | 5314 | |
5315 | /* | | 5315 | /* |
5316 | * Split a set of devices from their mirrors, and create a new pool from them. | | 5316 | * Split a set of devices from their mirrors, and create a new pool from them. |
5317 | */ | | 5317 | */ |
5318 | int | | 5318 | int |
5319 | spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, | | 5319 | spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, |
5320 | nvlist_t *props, boolean_t exp) | | 5320 | nvlist_t *props, boolean_t exp) |
5321 | { | | 5321 | { |
5322 | int error = 0; | | 5322 | int error = 0; |
5323 | uint64_t txg, *glist; | | 5323 | uint64_t txg, *glist; |
5324 | spa_t *newspa; | | 5324 | spa_t *newspa; |
5325 | uint_t c, children, lastlog; | | 5325 | uint_t c, children, lastlog; |
5326 | nvlist_t **child, *nvl, *tmp; | | 5326 | nvlist_t **child, *nvl, *tmp; |
5327 | dmu_tx_t *tx; | | 5327 | dmu_tx_t *tx; |
5328 | char *altroot = NULL; | | 5328 | char *altroot = NULL; |
5329 | vdev_t *rvd, **vml = NULL; /* vdev modify list */ | | 5329 | vdev_t *rvd, **vml = NULL; /* vdev modify list */ |
5330 | boolean_t activate_slog; | | 5330 | boolean_t activate_slog; |
5331 | | | 5331 | |
5332 | ASSERT(spa_writeable(spa)); | | 5332 | ASSERT(spa_writeable(spa)); |
5333 | | | 5333 | |
5334 | txg = spa_vdev_enter(spa); | | 5334 | txg = spa_vdev_enter(spa); |
5335 | | | 5335 | |
5336 | /* clear the log and flush everything up to now */ | | 5336 | /* clear the log and flush everything up to now */ |
5337 | activate_slog = spa_passivate_log(spa); | | 5337 | activate_slog = spa_passivate_log(spa); |
5338 | (void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); | | 5338 | (void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); |
5339 | error = spa_offline_log(spa); | | 5339 | error = spa_offline_log(spa); |
5340 | txg = spa_vdev_config_enter(spa); | | 5340 | txg = spa_vdev_config_enter(spa); |
5341 | | | 5341 | |
5342 | if (activate_slog) | | 5342 | if (activate_slog) |
5343 | spa_activate_log(spa); | | 5343 | spa_activate_log(spa); |
5344 | | | 5344 | |
5345 | if (error != 0) | | 5345 | if (error != 0) |
5346 | return (spa_vdev_exit(spa, NULL, txg, error)); | | 5346 | return (spa_vdev_exit(spa, NULL, txg, error)); |
5347 | | | 5347 | |
5348 | /* check new spa name before going any further */ | | 5348 | /* check new spa name before going any further */ |
5349 | if (spa_lookup(newname) != NULL) | | 5349 | if (spa_lookup(newname) != NULL) |
5350 | return (spa_vdev_exit(spa, NULL, txg, EEXIST)); | | 5350 | return (spa_vdev_exit(spa, NULL, txg, EEXIST)); |
5351 | | | 5351 | |
5352 | /* | | 5352 | /* |
5353 | * scan through all the children to ensure they're all mirrors | | 5353 | * scan through all the children to ensure they're all mirrors |
5354 | */ | | 5354 | */ |
5355 | if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 || | | 5355 | if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 || |
5356 | nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, | | 5356 | nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, |
5357 | &children) != 0) | | 5357 | &children) != 0) |
5358 | return (spa_vdev_exit(spa, NULL, txg, EINVAL)); | | 5358 | return (spa_vdev_exit(spa, NULL, txg, EINVAL)); |
5359 | | | 5359 | |
5360 | /* first, check to ensure we've got the right child count */ | | 5360 | /* first, check to ensure we've got the right child count */ |
5361 | rvd = spa->spa_root_vdev; | | 5361 | rvd = spa->spa_root_vdev; |
5362 | lastlog = 0; | | 5362 | lastlog = 0; |
5363 | for (c = 0; c < rvd->vdev_children; c++) { | | 5363 | for (c = 0; c < rvd->vdev_children; c++) { |
5364 | vdev_t *vd = rvd->vdev_child[c]; | | 5364 | vdev_t *vd = rvd->vdev_child[c]; |
5365 | | | 5365 | |
5366 | /* don't count the holes & logs as children */ | | 5366 | /* don't count the holes & logs as children */ |
5367 | if (vd->vdev_islog || vd->vdev_ishole) { | | 5367 | if (vd->vdev_islog || vd->vdev_ishole) { |
5368 | if (lastlog == 0) | | 5368 | if (lastlog == 0) |
5369 | lastlog = c; | | 5369 | lastlog = c; |
5370 | continue; | | 5370 | continue; |
5371 | } | | 5371 | } |
5372 | | | 5372 | |
5373 | lastlog = 0; | | 5373 | lastlog = 0; |
5374 | } | | 5374 | } |
5375 | if (children != (lastlog != 0 ? lastlog : rvd->vdev_children)) | | 5375 | if (children != (lastlog != 0 ? lastlog : rvd->vdev_children)) |
5376 | return (spa_vdev_exit(spa, NULL, txg, EINVAL)); | | 5376 | return (spa_vdev_exit(spa, NULL, txg, EINVAL)); |
5377 | | | 5377 | |
5378 | /* next, ensure no spare or cache devices are part of the split */ | | 5378 | /* next, ensure no spare or cache devices are part of the split */ |
5379 | if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 || | | 5379 | if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 || |
5380 | nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0) | | 5380 | nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0) |
5381 | return (spa_vdev_exit(spa, NULL, txg, EINVAL)); | | 5381 | return (spa_vdev_exit(spa, NULL, txg, EINVAL)); |
5382 | | | 5382 | |
5383 | vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP); | | 5383 | vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP); |
5384 | glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP); | | 5384 | glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP); |
5385 | | | 5385 | |
5386 | /* then, loop over each vdev and validate it */ | | 5386 | /* then, loop over each vdev and validate it */ |
5387 | for (c = 0; c < children; c++) { | | 5387 | for (c = 0; c < children; c++) { |
5388 | uint64_t is_hole = 0; | | 5388 | uint64_t is_hole = 0; |
5389 | | | 5389 | |
5390 | (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, | | 5390 | (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, |
5391 | &is_hole); | | 5391 | &is_hole); |
5392 | | | 5392 | |
5393 | if (is_hole != 0) { | | 5393 | if (is_hole != 0) { |
5394 | if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole || | | 5394 | if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole || |
5395 | spa->spa_root_vdev->vdev_child[c]->vdev_islog) { | | 5395 | spa->spa_root_vdev->vdev_child[c]->vdev_islog) { |
5396 | continue; | | 5396 | continue; |
5397 | } else { | | 5397 | } else { |
5398 | error = SET_ERROR(EINVAL); | | 5398 | error = SET_ERROR(EINVAL); |
5399 | break; | | 5399 | break; |
5400 | } | | 5400 | } |
5401 | } | | 5401 | } |
5402 | | | 5402 | |
5403 | /* which disk is going to be split? */ | | 5403 | /* which disk is going to be split? */ |
5404 | if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID, | | 5404 | if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID, |
5405 | &glist[c]) != 0) { | | 5405 | &glist[c]) != 0) { |
5406 | error = SET_ERROR(EINVAL); | | 5406 | error = SET_ERROR(EINVAL); |
5407 | break; | | 5407 | break; |
5408 | } | | 5408 | } |
5409 | | | 5409 | |
5410 | /* look it up in the spa */ | | 5410 | /* look it up in the spa */ |
5411 | vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE); | | 5411 | vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE); |
5412 | if (vml[c] == NULL) { | | 5412 | if (vml[c] == NULL) { |
5413 | error = SET_ERROR(ENODEV); | | 5413 | error = SET_ERROR(ENODEV); |
5414 | break; | | 5414 | break; |
5415 | } | | 5415 | } |
5416 | | | 5416 | |
5417 | /* make sure there's nothing stopping the split */ | | 5417 | /* make sure there's nothing stopping the split */ |
5418 | if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops || | | 5418 | if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops || |
5419 | vml[c]->vdev_islog || | | 5419 | vml[c]->vdev_islog || |
5420 | vml[c]->vdev_ishole || | | 5420 | vml[c]->vdev_ishole || |
5421 | vml[c]->vdev_isspare || | | 5421 | vml[c]->vdev_isspare || |
5422 | vml[c]->vdev_isl2cache || | | 5422 | vml[c]->vdev_isl2cache || |
5423 | !vdev_writeable(vml[c]) || | | 5423 | !vdev_writeable(vml[c]) || |
5424 | vml[c]->vdev_children != 0 || | | 5424 | vml[c]->vdev_children != 0 || |
5425 | vml[c]->vdev_state != VDEV_STATE_HEALTHY || | | 5425 | vml[c]->vdev_state != VDEV_STATE_HEALTHY || |
5426 | c != spa->spa_root_vdev->vdev_child[c]->vdev_id) { | | 5426 | c != spa->spa_root_vdev->vdev_child[c]->vdev_id) { |
5427 | error = SET_ERROR(EINVAL); | | 5427 | error = SET_ERROR(EINVAL); |
5428 | break; | | 5428 | break; |
5429 | } | | 5429 | } |
5430 | | | 5430 | |
5431 | if (vdev_dtl_required(vml[c])) { | | 5431 | if (vdev_dtl_required(vml[c])) { |
5432 | error = SET_ERROR(EBUSY); | | 5432 | error = SET_ERROR(EBUSY); |
5433 | break; | | 5433 | break; |
5434 | } | | 5434 | } |
5435 | | | 5435 | |
5436 | /* we need certain info from the top level */ | | 5436 | /* we need certain info from the top level */ |
5437 | VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY, | | 5437 | VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY, |
5438 | vml[c]->vdev_top->vdev_ms_array) == 0); | | 5438 | vml[c]->vdev_top->vdev_ms_array) == 0); |
5439 | VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT, | | 5439 | VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT, |
5440 | vml[c]->vdev_top->vdev_ms_shift) == 0); | | 5440 | vml[c]->vdev_top->vdev_ms_shift) == 0); |
5441 | VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE, | | 5441 | VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE, |
5442 | vml[c]->vdev_top->vdev_asize) == 0); | | 5442 | vml[c]->vdev_top->vdev_asize) == 0); |
5443 | VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT, | | 5443 | VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT, |
5444 | vml[c]->vdev_top->vdev_ashift) == 0); | | 5444 | vml[c]->vdev_top->vdev_ashift) == 0); |
5445 | | | 5445 | |
5446 | /* transfer per-vdev ZAPs */ | | 5446 | /* transfer per-vdev ZAPs */ |
5447 | ASSERT3U(vml[c]->vdev_leaf_zap, !=, 0); | | 5447 | ASSERT3U(vml[c]->vdev_leaf_zap, !=, 0); |
5448 | VERIFY0(nvlist_add_uint64(child[c], | | 5448 | VERIFY0(nvlist_add_uint64(child[c], |
5449 | ZPOOL_CONFIG_VDEV_LEAF_ZAP, vml[c]->vdev_leaf_zap)); | | 5449 | ZPOOL_CONFIG_VDEV_LEAF_ZAP, vml[c]->vdev_leaf_zap)); |
5450 | | | 5450 | |
5451 | ASSERT3U(vml[c]->vdev_top->vdev_top_zap, !=, 0); | | 5451 | ASSERT3U(vml[c]->vdev_top->vdev_top_zap, !=, 0); |
5452 | VERIFY0(nvlist_add_uint64(child[c], | | 5452 | VERIFY0(nvlist_add_uint64(child[c], |
5453 | ZPOOL_CONFIG_VDEV_TOP_ZAP, | | 5453 | ZPOOL_CONFIG_VDEV_TOP_ZAP, |
5454 | vml[c]->vdev_parent->vdev_top_zap)); | | 5454 | vml[c]->vdev_parent->vdev_top_zap)); |
5455 | } | | 5455 | } |
5456 | | | 5456 | |
5457 | if (error != 0) { | | 5457 | if (error != 0) { |
5458 | kmem_free(vml, children * sizeof (vdev_t *)); | | 5458 | kmem_free(vml, children * sizeof (vdev_t *)); |
5459 | kmem_free(glist, children * sizeof (uint64_t)); | | 5459 | kmem_free(glist, children * sizeof (uint64_t)); |
5460 | return (spa_vdev_exit(spa, NULL, txg, error)); | | 5460 | return (spa_vdev_exit(spa, NULL, txg, error)); |
5461 | } | | 5461 | } |
5462 | | | 5462 | |
5463 | /* stop writers from using the disks */ | | 5463 | /* stop writers from using the disks */ |
5464 | for (c = 0; c < children; c++) { | | 5464 | for (c = 0; c < children; c++) { |
5465 | if (vml[c] != NULL) | | 5465 | if (vml[c] != NULL) |
5466 | vml[c]->vdev_offline = B_TRUE; | | 5466 | vml[c]->vdev_offline = B_TRUE; |
5467 | } | | 5467 | } |
5468 | vdev_reopen(spa->spa_root_vdev); | | 5468 | vdev_reopen(spa->spa_root_vdev); |
5469 | | | 5469 | |
5470 | /* | | 5470 | /* |
5471 | * Temporarily record the splitting vdevs in the spa config. This | | 5471 | * Temporarily record the splitting vdevs in the spa config. This |
5472 | * will disappear once the config is regenerated. | | 5472 | * will disappear once the config is regenerated. |
5473 | */ | | 5473 | */ |
5474 | VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); | | 5474 | VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); |
5475 | VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, | | 5475 | VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, |
5476 | glist, children) == 0); | | 5476 | glist, children) == 0); |
5477 | kmem_free(glist, children * sizeof (uint64_t)); | | 5477 | kmem_free(glist, children * sizeof (uint64_t)); |
5478 | | | 5478 | |
5479 | mutex_enter(&spa->spa_props_lock); | | 5479 | mutex_enter(&spa->spa_props_lock); |
5480 | VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, | | 5480 | VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, |
5481 | nvl) == 0); | | 5481 | nvl) == 0); |
5482 | mutex_exit(&spa->spa_props_lock); | | 5482 | mutex_exit(&spa->spa_props_lock); |
5483 | spa->spa_config_splitting = nvl; | | 5483 | spa->spa_config_splitting = nvl; |
5484 | vdev_config_dirty(spa->spa_root_vdev); | | 5484 | vdev_config_dirty(spa->spa_root_vdev); |
5485 | | | 5485 | |
5486 | /* configure and create the new pool */ | | 5486 | /* configure and create the new pool */ |
5487 | VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0); | | 5487 | VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0); |
5488 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, | | 5488 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, |
5489 | exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0); | | 5489 | exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0); |
5490 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, | | 5490 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, |
5491 | spa_version(spa)) == 0); | | 5491 | spa_version(spa)) == 0); |
5492 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, | | 5492 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, |
5493 | spa->spa_config_txg) == 0); | | 5493 | spa->spa_config_txg) == 0); |
5494 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, | | 5494 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, |
5495 | spa_generate_guid(NULL)) == 0); | | 5495 | spa_generate_guid(NULL)) == 0); |
5496 | VERIFY0(nvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)); | | 5496 | VERIFY0(nvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)); |
5497 | (void) nvlist_lookup_string(props, | | 5497 | (void) nvlist_lookup_string(props, |
5498 | zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); | | 5498 | zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); |
5499 | | | 5499 | |
5500 | /* add the new pool to the namespace */ | | 5500 | /* add the new pool to the namespace */ |
5501 | newspa = spa_add(newname, config, altroot); | | 5501 | newspa = spa_add(newname, config, altroot); |
5502 | newspa->spa_avz_action = AVZ_ACTION_REBUILD; | | 5502 | newspa->spa_avz_action = AVZ_ACTION_REBUILD; |
5503 | newspa->spa_config_txg = spa->spa_config_txg; | | 5503 | newspa->spa_config_txg = spa->spa_config_txg; |
5504 | spa_set_log_state(newspa, SPA_LOG_CLEAR); | | 5504 | spa_set_log_state(newspa, SPA_LOG_CLEAR); |
5505 | | | 5505 | |
5506 | /* release the spa config lock, retaining the namespace lock */ | | 5506 | /* release the spa config lock, retaining the namespace lock */ |
5507 | spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); | | 5507 | spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); |
5508 | | | 5508 | |
5509 | if (zio_injection_enabled) | | 5509 | if (zio_injection_enabled) |
5510 | zio_handle_panic_injection(spa, FTAG, 1); | | 5510 | zio_handle_panic_injection(spa, FTAG, 1); |
5511 | | | 5511 | |
5512 | spa_activate(newspa, spa_mode_global); | | 5512 | spa_activate(newspa, spa_mode_global); |
5513 | spa_async_suspend(newspa); | | 5513 | spa_async_suspend(newspa); |
5514 | | | 5514 | |
5515 | #ifndef illumos | | 5515 | #ifndef illumos |
5516 | /* mark that we are creating new spa by splitting */ | | 5516 | /* mark that we are creating new spa by splitting */ |
5517 | newspa->spa_splitting_newspa = B_TRUE; | | 5517 | newspa->spa_splitting_newspa = B_TRUE; |
5518 | #endif | | 5518 | #endif |
5519 | /* create the new pool from the disks of the original pool */ | | 5519 | /* create the new pool from the disks of the original pool */ |
5520 | error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE); | | 5520 | error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE); |
5521 | #ifndef illumos | | 5521 | #ifndef illumos |
5522 | newspa->spa_splitting_newspa = B_FALSE; | | 5522 | newspa->spa_splitting_newspa = B_FALSE; |
5523 | #endif | | 5523 | #endif |
5524 | if (error) | | 5524 | if (error) |
5525 | goto out; | | 5525 | goto out; |
5526 | | | 5526 | |
5527 | /* if that worked, generate a real config for the new pool */ | | 5527 | /* if that worked, generate a real config for the new pool */ |
5528 | if (newspa->spa_root_vdev != NULL) { | | 5528 | if (newspa->spa_root_vdev != NULL) { |
5529 | VERIFY(nvlist_alloc(&newspa->spa_config_splitting, | | 5529 | VERIFY(nvlist_alloc(&newspa->spa_config_splitting, |
5530 | NV_UNIQUE_NAME, KM_SLEEP) == 0); | | 5530 | NV_UNIQUE_NAME, KM_SLEEP) == 0); |
5531 | VERIFY(nvlist_add_uint64(newspa->spa_config_splitting, | | 5531 | VERIFY(nvlist_add_uint64(newspa->spa_config_splitting, |
5532 | ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0); | | 5532 | ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0); |
5533 | spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL, | | 5533 | spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL, |
5534 | B_TRUE)); | | 5534 | B_TRUE)); |
5535 | } | | 5535 | } |
5536 | | | 5536 | |
5537 | /* set the props */ | | 5537 | /* set the props */ |
5538 | if (props != NULL) { | | 5538 | if (props != NULL) { |
5539 | spa_configfile_set(newspa, props, B_FALSE); | | 5539 | spa_configfile_set(newspa, props, B_FALSE); |
5540 | error = spa_prop_set(newspa, props); | | 5540 | error = spa_prop_set(newspa, props); |
5541 | if (error) | | 5541 | if (error) |
5542 | goto out; | | 5542 | goto out; |
5543 | } | | 5543 | } |
5544 | | | 5544 | |
5545 | /* flush everything */ | | 5545 | /* flush everything */ |
5546 | txg = spa_vdev_config_enter(newspa); | | 5546 | txg = spa_vdev_config_enter(newspa); |
5547 | vdev_config_dirty(newspa->spa_root_vdev); | | 5547 | vdev_config_dirty(newspa->spa_root_vdev); |
5548 | (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG); | | 5548 | (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG); |
5549 | | | 5549 | |
5550 | if (zio_injection_enabled) | | 5550 | if (zio_injection_enabled) |
5551 | zio_handle_panic_injection(spa, FTAG, 2); | | 5551 | zio_handle_panic_injection(spa, FTAG, 2); |
5552 | | | 5552 | |
5553 | spa_async_resume(newspa); | | 5553 | spa_async_resume(newspa); |
5554 | | | 5554 | |
5555 | /* finally, update the original pool's config */ | | 5555 | /* finally, update the original pool's config */ |
5556 | txg = spa_vdev_config_enter(spa); | | 5556 | txg = spa_vdev_config_enter(spa); |
5557 | tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); | | 5557 | tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); |
5558 | error = dmu_tx_assign(tx, TXG_WAIT); | | 5558 | error = dmu_tx_assign(tx, TXG_WAIT); |
5559 | if (error != 0) | | 5559 | if (error != 0) |
5560 | dmu_tx_abort(tx); | | 5560 | dmu_tx_abort(tx); |
5561 | for (c = 0; c < children; c++) { | | 5561 | for (c = 0; c < children; c++) { |
5562 | if (vml[c] != NULL) { | | 5562 | if (vml[c] != NULL) { |
5563 | vdev_split(vml[c]); | | 5563 | vdev_split(vml[c]); |
5564 | if (error == 0) | | 5564 | if (error == 0) |
5565 | spa_history_log_internal(spa, "detach", tx, | | 5565 | spa_history_log_internal(spa, "detach", tx, |
5566 | "vdev=%s", vml[c]->vdev_path); | | 5566 | "vdev=%s", vml[c]->vdev_path); |
5567 | | | 5567 | |
5568 | vdev_free(vml[c]); | | 5568 | vdev_free(vml[c]); |
5569 | } | | 5569 | } |
5570 | } | | 5570 | } |
5571 | spa->spa_avz_action = AVZ_ACTION_REBUILD; | | 5571 | spa->spa_avz_action = AVZ_ACTION_REBUILD; |
5572 | vdev_config_dirty(spa->spa_root_vdev); | | 5572 | vdev_config_dirty(spa->spa_root_vdev); |
5573 | spa->spa_config_splitting = NULL; | | 5573 | spa->spa_config_splitting = NULL; |
5574 | nvlist_free(nvl); | | 5574 | nvlist_free(nvl); |
5575 | if (error == 0) | | 5575 | if (error == 0) |
5576 | dmu_tx_commit(tx); | | 5576 | dmu_tx_commit(tx); |
5577 | (void) spa_vdev_exit(spa, NULL, txg, 0); | | 5577 | (void) spa_vdev_exit(spa, NULL, txg, 0); |
5578 | | | 5578 | |
5579 | if (zio_injection_enabled) | | 5579 | if (zio_injection_enabled) |
5580 | zio_handle_panic_injection(spa, FTAG, 3); | | 5580 | zio_handle_panic_injection(spa, FTAG, 3); |
5581 | | | 5581 | |
5582 | /* split is complete; log a history record */ | | 5582 | /* split is complete; log a history record */ |
5583 | spa_history_log_internal(newspa, "split", NULL, | | 5583 | spa_history_log_internal(newspa, "split", NULL, |
5584 | "from pool %s", spa_name(spa)); | | 5584 | "from pool %s", spa_name(spa)); |
5585 | | | 5585 | |
5586 | kmem_free(vml, children * sizeof (vdev_t *)); | | 5586 | kmem_free(vml, children * sizeof (vdev_t *)); |
5587 | | | 5587 | |
5588 | /* if we're not going to mount the filesystems in userland, export */ | | 5588 | /* if we're not going to mount the filesystems in userland, export */ |
5589 | if (exp) | | 5589 | if (exp) |
5590 | error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL, | | 5590 | error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL, |
5591 | B_FALSE, B_FALSE); | | 5591 | B_FALSE, B_FALSE); |
5592 | | | 5592 | |
5593 | return (error); | | 5593 | return (error); |
5594 | | | 5594 | |
5595 | out: | | 5595 | out: |
5596 | spa_unload(newspa); | | 5596 | spa_unload(newspa); |
5597 | spa_deactivate(newspa); | | 5597 | spa_deactivate(newspa); |
5598 | spa_remove(newspa); | | 5598 | spa_remove(newspa); |
5599 | | | 5599 | |
5600 | txg = spa_vdev_config_enter(spa); | | 5600 | txg = spa_vdev_config_enter(spa); |
5601 | | | 5601 | |
5602 | /* re-online all offlined disks */ | | 5602 | /* re-online all offlined disks */ |
5603 | for (c = 0; c < children; c++) { | | 5603 | for (c = 0; c < children; c++) { |
5604 | if (vml[c] != NULL) | | 5604 | if (vml[c] != NULL) |
5605 | vml[c]->vdev_offline = B_FALSE; | | 5605 | vml[c]->vdev_offline = B_FALSE; |
5606 | } | | 5606 | } |
5607 | vdev_reopen(spa->spa_root_vdev); | | 5607 | vdev_reopen(spa->spa_root_vdev); |
5608 | | | 5608 | |
5609 | nvlist_free(spa->spa_config_splitting); | | 5609 | nvlist_free(spa->spa_config_splitting); |
5610 | spa->spa_config_splitting = NULL; | | 5610 | spa->spa_config_splitting = NULL; |
5611 | (void) spa_vdev_exit(spa, NULL, txg, error); | | 5611 | (void) spa_vdev_exit(spa, NULL, txg, error); |
5612 | | | 5612 | |
5613 | kmem_free(vml, children * sizeof (vdev_t *)); | | 5613 | kmem_free(vml, children * sizeof (vdev_t *)); |
5614 | return (error); | | 5614 | return (error); |
5615 | } | | 5615 | } |
5616 | | | 5616 | |
5617 | static nvlist_t * | | 5617 | static nvlist_t * |
5618 | spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) | | 5618 | spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) |
5619 | { | | 5619 | { |
5620 | for (int i = 0; i < count; i++) { | | 5620 | for (int i = 0; i < count; i++) { |
5621 | uint64_t guid; | | 5621 | uint64_t guid; |
5622 | | | 5622 | |
5623 | VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, | | 5623 | VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, |
5624 | &guid) == 0); | | 5624 | &guid) == 0); |
5625 | | | 5625 | |
5626 | if (guid == target_guid) | | 5626 | if (guid == target_guid) |
5627 | return (nvpp[i]); | | 5627 | return (nvpp[i]); |
5628 | } | | 5628 | } |
5629 | | | 5629 | |
5630 | return (NULL); | | 5630 | return (NULL); |
5631 | } | | 5631 | } |
5632 | | | 5632 | |
5633 | static void | | 5633 | static void |
5634 | spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, | | 5634 | spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, |
5635 | nvlist_t *dev_to_remove) | | 5635 | nvlist_t *dev_to_remove) |
5636 | { | | 5636 | { |
5637 | nvlist_t **newdev = NULL; | | 5637 | nvlist_t **newdev = NULL; |
5638 | | | 5638 | |
5639 | if (count > 1) | | 5639 | if (count > 1) |
5640 | newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); | | 5640 | newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); |
5641 | | | 5641 | |
5642 | for (int i = 0, j = 0; i < count; i++) { | | 5642 | for (int i = 0, j = 0; i < count; i++) { |
5643 | if (dev[i] == dev_to_remove) | | 5643 | if (dev[i] == dev_to_remove) |
5644 | continue; | | 5644 | continue; |
5645 | VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); | | 5645 | VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); |
5646 | } | | 5646 | } |
5647 | | | 5647 | |
5648 | VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); | | 5648 | VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); |
5649 | VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); | | 5649 | VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); |
5650 | | | 5650 | |
5651 | for (int i = 0; i < count - 1; i++) | | 5651 | for (int i = 0; i < count - 1; i++) |
5652 | nvlist_free(newdev[i]); | | 5652 | nvlist_free(newdev[i]); |
5653 | | | 5653 | |
5654 | if (count > 1) | | 5654 | if (count > 1) |
5655 | kmem_free(newdev, (count - 1) * sizeof (void *)); | | 5655 | kmem_free(newdev, (count - 1) * sizeof (void *)); |
5656 | } | | 5656 | } |
5657 | | | 5657 | |
5658 | /* | | 5658 | /* |
5659 | * Evacuate the device. | | 5659 | * Evacuate the device. |
5660 | */ | | 5660 | */ |
5661 | static int | | 5661 | static int |
5662 | spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) | | 5662 | spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) |
5663 | { | | 5663 | { |
5664 | uint64_t txg; | | 5664 | uint64_t txg; |
5665 | int error = 0; | | 5665 | int error = 0; |
5666 | | | 5666 | |
5667 | ASSERT(MUTEX_HELD(&spa_namespace_lock)); | | 5667 | ASSERT(MUTEX_HELD(&spa_namespace_lock)); |
5668 | ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); | | 5668 | ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); |
5669 | ASSERT(vd == vd->vdev_top); | | 5669 | ASSERT(vd == vd->vdev_top); |
5670 | | | 5670 | |
5671 | /* | | 5671 | /* |
5672 | * Evacuate the device. We don't hold the config lock as writer | | 5672 | * Evacuate the device. We don't hold the config lock as writer |
5673 | * since we need to do I/O but we do keep the | | 5673 | * since we need to do I/O but we do keep the |
5674 | * spa_namespace_lock held. Once this completes the device | | 5674 | * spa_namespace_lock held. Once this completes the device |
5675 | * should no longer have any blocks allocated on it. | | 5675 | * should no longer have any blocks allocated on it. |
5676 | */ | | 5676 | */ |
5677 | if (vd->vdev_islog) { | | 5677 | if (vd->vdev_islog) { |
5678 | if (vd->vdev_stat.vs_alloc != 0) | | 5678 | if (vd->vdev_stat.vs_alloc != 0) |
5679 | error = spa_offline_log(spa); | | 5679 | error = spa_offline_log(spa); |
5680 | } else { | | 5680 | } else { |
5681 | error = SET_ERROR(ENOTSUP); | | 5681 | error = SET_ERROR(ENOTSUP); |
5682 | } | | 5682 | } |
5683 | | | 5683 | |
5684 | if (error) | | 5684 | if (error) |
5685 | return (error); | | 5685 | return (error); |
5686 | | | 5686 | |
5687 | /* | | 5687 | /* |
5688 | * The evacuation succeeded. Remove any remaining MOS metadata | | 5688 | * The evacuation succeeded. Remove any remaining MOS metadata |
5689 | * associated with this vdev, and wait for these changes to sync. | | 5689 | * associated with this vdev, and wait for these changes to sync. |
5690 | */ | | 5690 | */ |
5691 | ASSERT0(vd->vdev_stat.vs_alloc); | | 5691 | ASSERT0(vd->vdev_stat.vs_alloc); |
5692 | txg = spa_vdev_config_enter(spa); | | 5692 | txg = spa_vdev_config_enter(spa); |
5693 | vd->vdev_removing = B_TRUE; | | 5693 | vd->vdev_removing = B_TRUE; |
5694 | vdev_dirty_leaves(vd, VDD_DTL, txg); | | 5694 | vdev_dirty_leaves(vd, VDD_DTL, txg); |
5695 | vdev_config_dirty(vd); | | 5695 | vdev_config_dirty(vd); |
5696 | spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); | | 5696 | spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); |
5697 | | | 5697 | |
5698 | return (0); | | 5698 | return (0); |
5699 | } | | 5699 | } |
5700 | | | 5700 | |
5701 | /* | | 5701 | /* |
5702 | * Complete the removal by cleaning up the namespace. | | 5702 | * Complete the removal by cleaning up the namespace. |
5703 | */ | | 5703 | */ |
5704 | static void | | 5704 | static void |
5705 | spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd) | | 5705 | spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd) |
5706 | { | | 5706 | { |
5707 | vdev_t *rvd = spa->spa_root_vdev; | | 5707 | vdev_t *rvd = spa->spa_root_vdev; |
5708 | uint64_t id = vd->vdev_id; | | 5708 | uint64_t id = vd->vdev_id; |
5709 | boolean_t last_vdev = (id == (rvd->vdev_children - 1)); | | 5709 | boolean_t last_vdev = (id == (rvd->vdev_children - 1)); |
5710 | | | 5710 | |
5711 | ASSERT(MUTEX_HELD(&spa_namespace_lock)); | | 5711 | ASSERT(MUTEX_HELD(&spa_namespace_lock)); |
5712 | ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); | | 5712 | ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); |
5713 | ASSERT(vd == vd->vdev_top); | | 5713 | ASSERT(vd == vd->vdev_top); |
5714 | | | 5714 | |
5715 | /* | | 5715 | /* |
5716 | * Only remove any devices which are empty. | | 5716 | * Only remove any devices which are empty. |
5717 | */ | | 5717 | */ |
5718 | if (vd->vdev_stat.vs_alloc != 0) | | 5718 | if (vd->vdev_stat.vs_alloc != 0) |
5719 | return; | | 5719 | return; |
5720 | | | 5720 | |
5721 | (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); | | 5721 | (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); |
5722 | | | 5722 | |
5723 | if (list_link_active(&vd->vdev_state_dirty_node)) | | 5723 | if (list_link_active(&vd->vdev_state_dirty_node)) |
5724 | vdev_state_clean(vd); | | 5724 | vdev_state_clean(vd); |
5725 | if (list_link_active(&vd->vdev_config_dirty_node)) | | 5725 | if (list_link_active(&vd->vdev_config_dirty_node)) |
5726 | vdev_config_clean(vd); | | 5726 | vdev_config_clean(vd); |
5727 | | | 5727 | |
5728 | vdev_free(vd); | | 5728 | vdev_free(vd); |
5729 | | | 5729 | |
5730 | if (last_vdev) { | | 5730 | if (last_vdev) { |
5731 | vdev_compact_children(rvd); | | 5731 | vdev_compact_children(rvd); |
5732 | } else { | | 5732 | } else { |
5733 | vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); | | 5733 | vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); |
5734 | vdev_add_child(rvd, vd); | | 5734 | vdev_add_child(rvd, vd); |
5735 | } | | 5735 | } |
5736 | vdev_config_dirty(rvd); | | 5736 | vdev_config_dirty(rvd); |
5737 | | | 5737 | |
5738 | /* | | 5738 | /* |
5739 | * Reassess the health of our root vdev. | | 5739 | * Reassess the health of our root vdev. |
5740 | */ | | 5740 | */ |
5741 | vdev_reopen(rvd); | | 5741 | vdev_reopen(rvd); |
5742 | } | | 5742 | } |
5743 | | | 5743 | |
5744 | /* | | 5744 | /* |
5745 | * Remove a device from the pool - | | 5745 | * Remove a device from the pool - |
5746 | * | | 5746 | * |
5747 | * Removing a device from the vdev namespace requires several steps | | 5747 | * Removing a device from the vdev namespace requires several steps |
5748 | * and can take a significant amount of time. As a result we use | | 5748 | * and can take a significant amount of time. As a result we use |
5749 | * the spa_vdev_config_[enter/exit] functions which allow us to | | 5749 | * the spa_vdev_config_[enter/exit] functions which allow us to |
5750 | * grab and release the spa_config_lock while still holding the namespace | | 5750 | * grab and release the spa_config_lock while still holding the namespace |
5751 | * lock. During each step the configuration is synced out. | | 5751 | * lock. During each step the configuration is synced out. |
5752 | * | | 5752 | * |
5753 | * Currently, this supports removing only hot spares, slogs, and level 2 ARC | | 5753 | * Currently, this supports removing only hot spares, slogs, and level 2 ARC |
5754 | * devices. | | 5754 | * devices. |
5755 | */ | | 5755 | */ |
5756 | int | | 5756 | int |
5757 | spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) | | 5757 | spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) |
5758 | { | | 5758 | { |
5759 | vdev_t *vd; | | 5759 | vdev_t *vd; |
5760 | sysevent_t *ev = NULL; | | 5760 | sysevent_t *ev = NULL; |
5761 | metaslab_group_t *mg; | | 5761 | metaslab_group_t *mg; |
5762 | nvlist_t **spares, **l2cache, *nv; | | 5762 | nvlist_t **spares, **l2cache, *nv; |
5763 | uint64_t txg = 0; | | 5763 | uint64_t txg = 0; |
5764 | uint_t nspares, nl2cache; | | 5764 | uint_t nspares, nl2cache; |
5765 | int error = 0; | | 5765 | int error = 0; |
5766 | boolean_t locked = MUTEX_HELD(&spa_namespace_lock); | | 5766 | boolean_t locked = MUTEX_HELD(&spa_namespace_lock); |
5767 | | | 5767 | |
5768 | ASSERT(spa_writeable(spa)); | | 5768 | ASSERT(spa_writeable(spa)); |
5769 | | | 5769 | |
5770 | if (!locked) | | 5770 | if (!locked) |
5771 | txg = spa_vdev_enter(spa); | | 5771 | txg = spa_vdev_enter(spa); |
5772 | | | 5772 | |
5773 | vd = spa_lookup_by_guid(spa, guid, B_FALSE); | | 5773 | vd = spa_lookup_by_guid(spa, guid, B_FALSE); |
5774 | | | 5774 | |
5775 | if (spa->spa_spares.sav_vdevs != NULL && | | 5775 | if (spa->spa_spares.sav_vdevs != NULL && |
5776 | nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, | | 5776 | nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, |
5777 | ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && | | 5777 | ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && |
5778 | (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { | | 5778 | (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { |
5779 | /* | | 5779 | /* |
5780 | * Only remove the hot spare if it's not currently in use | | 5780 | * Only remove the hot spare if it's not currently in use |
5781 | * in this pool. | | 5781 | * in this pool. |
5782 | */ | | 5782 | */ |
5783 | if (vd == NULL || unspare) { | | 5783 | if (vd == NULL || unspare) { |
5784 | if (vd == NULL) | | 5784 | if (vd == NULL) |
5785 | vd = spa_lookup_by_guid(spa, guid, B_TRUE); | | 5785 | vd = spa_lookup_by_guid(spa, guid, B_TRUE); |
5786 | ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_AUX); | | 5786 | ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_AUX); |
5787 | spa_vdev_remove_aux(spa->spa_spares.sav_config, | | 5787 | spa_vdev_remove_aux(spa->spa_spares.sav_config, |
5788 | ZPOOL_CONFIG_SPARES, spares, nspares, nv); | | 5788 | ZPOOL_CONFIG_SPARES, spares, nspares, nv); |
5789 | spa_load_spares(spa); | | 5789 | spa_load_spares(spa); |
5790 | spa->spa_spares.sav_sync = B_TRUE; | | 5790 | spa->spa_spares.sav_sync = B_TRUE; |
5791 | } else { | | 5791 | } else { |
5792 | error = SET_ERROR(EBUSY); | | 5792 | error = SET_ERROR(EBUSY); |
5793 | } | | 5793 | } |
5794 | } else if (spa->spa_l2cache.sav_vdevs != NULL && | | 5794 | } else if (spa->spa_l2cache.sav_vdevs != NULL && |
5795 | nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, | | 5795 | nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, |
5796 | ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && | | 5796 | ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && |
5797 | (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { | | 5797 | (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { |
5798 | /* | | 5798 | /* |
5799 | * Cache devices can always be removed. | | 5799 | * Cache devices can always be removed. |
5800 | */ | | 5800 | */ |
5801 | vd = spa_lookup_by_guid(spa, guid, B_TRUE); | | 5801 | vd = spa_lookup_by_guid(spa, guid, B_TRUE); |
5802 | ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_AUX); | | 5802 | ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_AUX); |
5803 | spa_vdev_remove_aux(spa->spa_l2cache.sav_config, | | 5803 | spa_vdev_remove_aux(spa->spa_l2cache.sav_config, |
5804 | ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); | | 5804 | ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); |
5805 | spa_load_l2cache(spa); | | 5805 | spa_load_l2cache(spa); |
5806 | spa->spa_l2cache.sav_sync = B_TRUE; | | 5806 | spa->spa_l2cache.sav_sync = B_TRUE; |
5807 | } else if (vd != NULL && vd->vdev_islog) { | | 5807 | } else if (vd != NULL && vd->vdev_islog) { |
5808 | ASSERT(!locked); | | 5808 | ASSERT(!locked); |
5809 | ASSERT(vd == vd->vdev_top); | | 5809 | ASSERT(vd == vd->vdev_top); |
5810 | | | 5810 | |
5811 | mg = vd->vdev_mg; | | 5811 | mg = vd->vdev_mg; |
5812 | | | 5812 | |
5813 | /* | | 5813 | /* |
5814 | * Stop allocating from this vdev. | | 5814 | * Stop allocating from this vdev. |
5815 | */ | | 5815 | */ |
5816 | metaslab_group_passivate(mg); | | 5816 | metaslab_group_passivate(mg); |
5817 | | | 5817 | |
5818 | /* | | 5818 | /* |
5819 | * Wait for the youngest allocations and frees to sync, | | 5819 | * Wait for the youngest allocations and frees to sync, |
5820 | * and then wait for the deferral of those frees to finish. | | 5820 | * and then wait for the deferral of those frees to finish. |
5821 | */ | | 5821 | */ |
5822 | spa_vdev_config_exit(spa, NULL, | | 5822 | spa_vdev_config_exit(spa, NULL, |
5823 | txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); | | 5823 | txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); |
5824 | | | 5824 | |
5825 | /* | | 5825 | /* |
5826 | * Attempt to evacuate the vdev. | | 5826 | * Attempt to evacuate the vdev. |
5827 | */ | | 5827 | */ |
5828 | error = spa_vdev_remove_evacuate(spa, vd); | | 5828 | error = spa_vdev_remove_evacuate(spa, vd); |
5829 | | | 5829 | |
5830 | txg = spa_vdev_config_enter(spa); | | 5830 | txg = spa_vdev_config_enter(spa); |
5831 | | | 5831 | |
5832 | /* | | 5832 | /* |
5833 | * If we couldn't evacuate the vdev, unwind. | | 5833 | * If we couldn't evacuate the vdev, unwind. |
5834 | */ | | 5834 | */ |
5835 | if (error) { | | 5835 | if (error) { |
5836 | metaslab_group_activate(mg); | | 5836 | metaslab_group_activate(mg); |
5837 | return (spa_vdev_exit(spa, NULL, txg, error)); | | 5837 | return (spa_vdev_exit(spa, NULL, txg, error)); |
5838 | } | | 5838 | } |
5839 | | | 5839 | |
5840 | /* | | 5840 | /* |
5841 | * Clean up the vdev namespace. | | 5841 | * Clean up the vdev namespace. |
5842 | */ | | 5842 | */ |
5843 | ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_DEV); | | 5843 | ev = spa_event_create(spa, vd, ESC_ZFS_VDEV_REMOVE_DEV); |
5844 | spa_vdev_remove_from_namespace(spa, vd); | | 5844 | spa_vdev_remove_from_namespace(spa, vd); |
5845 | | | 5845 | |
5846 | } else if (vd != NULL) { | | 5846 | } else if (vd != NULL) { |
5847 | /* | | 5847 | /* |
5848 | * Normal vdevs cannot be removed (yet). | | 5848 | * Normal vdevs cannot be removed (yet). |
5849 | */ | | 5849 | */ |
5850 | error = SET_ERROR(ENOTSUP); | | 5850 | error = SET_ERROR(ENOTSUP); |
5851 | } else { | | 5851 | } else { |
5852 | /* | | 5852 | /* |
5853 | * There is no vdev of any kind with the specified guid. | | 5853 | * There is no vdev of any kind with the specified guid. |
5854 | */ | | 5854 | */ |
5855 | error = SET_ERROR(ENOENT); | | 5855 | error = SET_ERROR(ENOENT); |
5856 | } | | 5856 | } |
5857 | | | 5857 | |
5858 | if (!locked) | | 5858 | if (!locked) |
5859 | error = spa_vdev_exit(spa, NULL, txg, error); | | 5859 | error = spa_vdev_exit(spa, NULL, txg, error); |
5860 | | | 5860 | |
5861 | if (ev) | | 5861 | if (ev) |
5862 | spa_event_post(ev); | | 5862 | spa_event_post(ev); |
5863 | | | 5863 | |
5864 | return (error); | | 5864 | return (error); |
5865 | } | | 5865 | } |
5866 | | | 5866 | |
5867 | /* | | 5867 | /* |
5868 | * Find any device that's done replacing, or a vdev marked 'unspare' that's | | 5868 | * Find any device that's done replacing, or a vdev marked 'unspare' that's |
5869 | * currently spared, so we can detach it. | | 5869 | * currently spared, so we can detach it. |
5870 | */ | | 5870 | */ |
5871 | static vdev_t * | | 5871 | static vdev_t * |
5872 | spa_vdev_resilver_done_hunt(vdev_t *vd) | | 5872 | spa_vdev_resilver_done_hunt(vdev_t *vd) |
5873 | { | | 5873 | { |
5874 | vdev_t *newvd, *oldvd; | | 5874 | vdev_t *newvd, *oldvd; |
5875 | | | 5875 | |
5876 | for (int c = 0; c < vd->vdev_children; c++) { | | 5876 | for (int c = 0; c < vd->vdev_children; c++) { |
5877 | oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); | | 5877 | oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); |
5878 | if (oldvd != NULL) | | 5878 | if (oldvd != NULL) |
5879 | return (oldvd); | | 5879 | return (oldvd); |
5880 | } | | 5880 | } |
5881 | | | 5881 | |
5882 | /* | | 5882 | /* |
5883 | * Check for a completed replacement. We always consider the first | | 5883 | * Check for a completed replacement. We always consider the first |
5884 | * vdev in the list to be the oldest vdev, and the last one to be | | 5884 | * vdev in the list to be the oldest vdev, and the last one to be |
5885 | * the newest (see spa_vdev_attach() for how that works). In | | 5885 | * the newest (see spa_vdev_attach() for how that works). In |
5886 | * the case where the newest vdev is faulted, we will not automatically | | 5886 | * the case where the newest vdev is faulted, we will not automatically |
5887 | * remove it after a resilver completes. This is OK as it will require | | 5887 | * remove it after a resilver completes. This is OK as it will require |
5888 | * user intervention to determine which disk the admin wishes to keep. | | 5888 | * user intervention to determine which disk the admin wishes to keep. |
5889 | */ | | 5889 | */ |
5890 | if (vd->vdev_ops == &vdev_replacing_ops) { | | 5890 | if (vd->vdev_ops == &vdev_replacing_ops) { |
5891 | ASSERT(vd->vdev_children > 1); | | 5891 | ASSERT(vd->vdev_children > 1); |
5892 | | | 5892 | |
5893 | newvd = vd->vdev_child[vd->vdev_children - 1]; | | 5893 | newvd = vd->vdev_child[vd->vdev_children - 1]; |
5894 | oldvd = vd->vdev_child[0]; | | 5894 | oldvd = vd->vdev_child[0]; |
5895 | | | 5895 | |
5896 | if (vdev_dtl_empty(newvd, DTL_MISSING) && | | 5896 | if (vdev_dtl_empty(newvd, DTL_MISSING) && |
5897 | vdev_dtl_empty(newvd, DTL_OUTAGE) && | | 5897 | vdev_dtl_empty(newvd, DTL_OUTAGE) && |
5898 | !vdev_dtl_required(oldvd)) | | 5898 | !vdev_dtl_required(oldvd)) |
5899 | return (oldvd); | | 5899 | return (oldvd); |
5900 | } | | 5900 | } |
5901 | | | 5901 | |
5902 | /* | | 5902 | /* |
5903 | * Check for a completed resilver with the 'unspare' flag set. | | 5903 | * Check for a completed resilver with the 'unspare' flag set. |
5904 | */ | | 5904 | */ |
5905 | if (vd->vdev_ops == &vdev_spare_ops) { | | 5905 | if (vd->vdev_ops == &vdev_spare_ops) { |
5906 | vdev_t *first = vd->vdev_child[0]; | | 5906 | vdev_t *first = vd->vdev_child[0]; |
5907 | vdev_t *last = vd->vdev_child[vd->vdev_children - 1]; | | 5907 | vdev_t *last = vd->vdev_child[vd->vdev_children - 1]; |
5908 | | | 5908 | |
5909 | if (last->vdev_unspare) { | | 5909 | if (last->vdev_unspare) { |
5910 | oldvd = first; | | 5910 | oldvd = first; |
5911 | newvd = last; | | 5911 | newvd = last; |
5912 | } else if (first->vdev_unspare) { | | 5912 | } else if (first->vdev_unspare) { |
5913 | oldvd = last; | | 5913 | oldvd = last; |
5914 | newvd = first; | | 5914 | newvd = first; |
5915 | } else { | | 5915 | } else { |
5916 | oldvd = NULL; | | 5916 | oldvd = NULL; |
5917 | } | | 5917 | } |
5918 | | | 5918 | |
5919 | if (oldvd != NULL && | | 5919 | if (oldvd != NULL && |
5920 | vdev_dtl_empty(newvd, DTL_MISSING) && | | 5920 | vdev_dtl_empty(newvd, DTL_MISSING) && |
5921 | vdev_dtl_empty(newvd, DTL_OUTAGE) && | | 5921 | vdev_dtl_empty(newvd, DTL_OUTAGE) && |
5922 | !vdev_dtl_required(oldvd)) | | 5922 | !vdev_dtl_required(oldvd)) |
5923 | return (oldvd); | | 5923 | return (oldvd); |
5924 | | | 5924 | |
5925 | /* | | 5925 | /* |
5926 | * If there are more than two spares attached to a disk, | | 5926 | * If there are more than two spares attached to a disk, |
5927 | * and those spares are not required, then we want to | | 5927 | * and those spares are not required, then we want to |
5928 | * attempt to free them up now so that they can be used | | 5928 | * attempt to free them up now so that they can be used |
5929 | * by other pools. Once we're back down to a single | | 5929 | * by other pools. Once we're back down to a single |
5930 | * disk+spare, we stop removing them. | | 5930 | * disk+spare, we stop removing them. |
5931 | */ | | 5931 | */ |
5932 | if (vd->vdev_children > 2) { | | 5932 | if (vd->vdev_children > 2) { |
5933 | newvd = vd->vdev_child[1]; | | 5933 | newvd = vd->vdev_child[1]; |
5934 | | | 5934 | |
5935 | if (newvd->vdev_isspare && last->vdev_isspare && | | 5935 | if (newvd->vdev_isspare && last->vdev_isspare && |
5936 | vdev_dtl_empty(last, DTL_MISSING) && | | 5936 | vdev_dtl_empty(last, DTL_MISSING) && |
5937 | vdev_dtl_empty(last, DTL_OUTAGE) && | | 5937 | vdev_dtl_empty(last, DTL_OUTAGE) && |
5938 | !vdev_dtl_required(newvd)) | | 5938 | !vdev_dtl_required(newvd)) |
5939 | return (newvd); | | 5939 | return (newvd); |
5940 | } | | 5940 | } |
5941 | } | | 5941 | } |
5942 | | | 5942 | |
5943 | return (NULL); | | 5943 | return (NULL); |
5944 | } | | 5944 | } |
5945 | | | 5945 | |
5946 | static void | | 5946 | static void |
5947 | spa_vdev_resilver_done(spa_t *spa) | | 5947 | spa_vdev_resilver_done(spa_t *spa) |
5948 | { | | 5948 | { |
5949 | vdev_t *vd, *pvd, *ppvd; | | 5949 | vdev_t *vd, *pvd, *ppvd; |
5950 | uint64_t guid, sguid, pguid, ppguid; | | 5950 | uint64_t guid, sguid, pguid, ppguid; |
5951 | | | 5951 | |
5952 | spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); | | 5952 | spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); |
5953 | | | 5953 | |
5954 | while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { | | 5954 | while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { |
5955 | pvd = vd->vdev_parent; | | 5955 | pvd = vd->vdev_parent; |
5956 | ppvd = pvd->vdev_parent; | | 5956 | ppvd = pvd->vdev_parent; |
5957 | guid = vd->vdev_guid; | | 5957 | guid = vd->vdev_guid; |
5958 | pguid = pvd->vdev_guid; | | 5958 | pguid = pvd->vdev_guid; |
5959 | ppguid = ppvd->vdev_guid; | | 5959 | ppguid = ppvd->vdev_guid; |
5960 | sguid = 0; | | 5960 | sguid = 0; |
5961 | /* | | 5961 | /* |
5962 | * If we have just finished replacing a hot spared device, then | | 5962 | * If we have just finished replacing a hot spared device, then |
5963 | * we need to detach the parent's first child (the original hot | | 5963 | * we need to detach the parent's first child (the original hot |
5964 | * spare) as well. | | 5964 | * spare) as well. |
5965 | */ | | 5965 | */ |
5966 | if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 && | | 5966 | if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 && |
5967 | ppvd->vdev_children == 2) { | | 5967 | ppvd->vdev_children == 2) { |
5968 | ASSERT(pvd->vdev_ops == &vdev_replacing_ops); | | 5968 | ASSERT(pvd->vdev_ops == &vdev_replacing_ops); |
5969 | sguid = ppvd->vdev_child[1]->vdev_guid; | | 5969 | sguid = ppvd->vdev_child[1]->vdev_guid; |
5970 | } | | 5970 | } |
5971 | ASSERT(vd->vdev_resilver_txg == 0 || !vdev_dtl_required(vd)); | | 5971 | ASSERT(vd->vdev_resilver_txg == 0 || !vdev_dtl_required(vd)); |
5972 | | | 5972 | |
5973 | spa_config_exit(spa, SCL_ALL, FTAG); | | 5973 | spa_config_exit(spa, SCL_ALL, FTAG); |
5974 | if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) | | 5974 | if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) |
5975 | return; | | 5975 | return; |
5976 | if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) | | 5976 | if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) |
5977 | return; | | 5977 | return; |
5978 | spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); | | 5978 | spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); |
5979 | } | | 5979 | } |
5980 | | | 5980 | |
5981 | spa_config_exit(spa, SCL_ALL, FTAG); | | 5981 | spa_config_exit(spa, SCL_ALL, FTAG); |
5982 | } | | 5982 | } |
5983 | | | 5983 | |
5984 | /* | | 5984 | /* |
5985 | * Update the stored path or FRU for this vdev. | | 5985 | * Update the stored path or FRU for this vdev. |
5986 | */ | | 5986 | */ |
5987 | int | | 5987 | int |
5988 | spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, | | 5988 | spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, |
5989 | boolean_t ispath) | | 5989 | boolean_t ispath) |
5990 | { | | 5990 | { |
5991 | vdev_t *vd; | | 5991 | vdev_t *vd; |
5992 | boolean_t sync = B_FALSE; | | 5992 | boolean_t sync = B_FALSE; |
5993 | | | 5993 | |
5994 | ASSERT(spa_writeable(spa)); | | 5994 | ASSERT(spa_writeable(spa)); |
5995 | | | 5995 | |
5996 | spa_vdev_state_enter(spa, SCL_ALL); | | 5996 | spa_vdev_state_enter(spa, SCL_ALL); |
5997 | | | 5997 | |
5998 | if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) | | 5998 | if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) |
5999 | return (spa_vdev_state_exit(spa, NULL, ENOENT)); | | 5999 | return (spa_vdev_state_exit(spa, NULL, ENOENT)); |
6000 | | | 6000 | |
6001 | if (!vd->vdev_ops->vdev_op_leaf) | | 6001 | if (!vd->vdev_ops->vdev_op_leaf) |
6002 | return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); | | 6002 | return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); |
6003 | | | 6003 | |
6004 | if (ispath) { | | 6004 | if (ispath) { |
6005 | if (strcmp(value, vd->vdev_path) != 0) { | | 6005 | if (strcmp(value, vd->vdev_path) != 0) { |
6006 | spa_strfree(vd->vdev_path); | | 6006 | spa_strfree(vd->vdev_path); |
6007 | vd->vdev_path = spa_strdup(value); | | 6007 | vd->vdev_path = spa_strdup(value); |
6008 | sync = B_TRUE; | | 6008 | sync = B_TRUE; |
6009 | } | | 6009 | } |
6010 | } else { | | 6010 | } else { |
6011 | if (vd->vdev_fru == NULL) { | | 6011 | if (vd->vdev_fru == NULL) { |
6012 | vd->vdev_fru = spa_strdup(value); | | 6012 | vd->vdev_fru = spa_strdup(value); |
6013 | sync = B_TRUE; | | 6013 | sync = B_TRUE; |
6014 | } else if (strcmp(value, vd->vdev_fru) != 0) { | | 6014 | } else if (strcmp(value, vd->vdev_fru) != 0) { |
6015 | spa_strfree(vd->vdev_fru); | | 6015 | spa_strfree(vd->vdev_fru); |
6016 | vd->vdev_fru = spa_strdup(value); | | 6016 | vd->vdev_fru = spa_strdup(value); |
6017 | sync = B_TRUE; | | 6017 | sync = B_TRUE; |
6018 | } | | 6018 | } |
6019 | } | | 6019 | } |
6020 | | | 6020 | |
6021 | return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0)); | | 6021 | return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0)); |
6022 | } | | 6022 | } |
6023 | | | 6023 | |
6024 | int | | 6024 | int |
6025 | spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) | | 6025 | spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) |
6026 | { | | 6026 | { |
6027 | return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); | | 6027 | return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); |
6028 | } | | 6028 | } |
6029 | | | 6029 | |
6030 | int | | 6030 | int |
6031 | spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) | | 6031 | spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) |
6032 | { | | 6032 | { |
6033 | return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); | | 6033 | return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); |
6034 | } | | 6034 | } |
6035 | | | 6035 | |
6036 | /* | | 6036 | /* |
6037 | * ========================================================================== | | 6037 | * ========================================================================== |
6038 | * SPA Scanning | | 6038 | * SPA Scanning |
6039 | * ========================================================================== | | 6039 | * ========================================================================== |
6040 | */ | | 6040 | */ |
6041 | | | 6041 | |
6042 | int | | 6042 | int |
6043 | spa_scan_stop(spa_t *spa) | | 6043 | spa_scan_stop(spa_t *spa) |
6044 | { | | 6044 | { |
6045 | ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); | | 6045 | ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); |
6046 | if (dsl_scan_resilvering(spa->spa_dsl_pool)) | | 6046 | if (dsl_scan_resilvering(spa->spa_dsl_pool)) |
6047 | return (SET_ERROR(EBUSY)); | | 6047 | return (SET_ERROR(EBUSY)); |
6048 | return (dsl_scan_cancel(spa->spa_dsl_pool)); | | 6048 | return (dsl_scan_cancel(spa->spa_dsl_pool)); |
6049 | } | | 6049 | } |
6050 | | | 6050 | |
6051 | int | | 6051 | int |
6052 | spa_scan(spa_t *spa, pool_scan_func_t func) | | 6052 | spa_scan(spa_t *spa, pool_scan_func_t func) |
6053 | { | | 6053 | { |
6054 | ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); | | 6054 | ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); |
6055 | | | 6055 | |
6056 | if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE) | | 6056 | if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE) |
6057 | return (SET_ERROR(ENOTSUP)); | | 6057 | return (SET_ERROR(ENOTSUP)); |
6058 | | | 6058 | |
6059 | /* | | 6059 | /* |
6060 | * If a resilver was requested, but there is no DTL on a | | 6060 | * If a resilver was requested, but there is no DTL on a |
6061 | * writeable leaf device, we have nothing to do. | | 6061 | * writeable leaf device, we have nothing to do. |
6062 | */ | | 6062 | */ |
6063 | if (func == POOL_SCAN_RESILVER && | | 6063 | if (func == POOL_SCAN_RESILVER && |
6064 | !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { | | 6064 | !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { |
6065 | spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); | | 6065 | spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); |
6066 | return (0); | | 6066 | return (0); |
6067 | } | | 6067 | } |
6068 | | | 6068 | |
6069 | return (dsl_scan(spa->spa_dsl_pool, func)); | | 6069 | return (dsl_scan(spa->spa_dsl_pool, func)); |
6070 | } | | 6070 | } |
6071 | | | 6071 | |
6072 | /* | | 6072 | /* |
6073 | * ========================================================================== | | 6073 | * ========================================================================== |
6074 | * SPA async task processing | | 6074 | * SPA async task processing |
6075 | * ========================================================================== | | 6075 | * ========================================================================== |
6076 | */ | | 6076 | */ |
6077 | | | 6077 | |
6078 | static void | | 6078 | static void |
6079 | spa_async_remove(spa_t *spa, vdev_t *vd) | | 6079 | spa_async_remove(spa_t *spa, vdev_t *vd) |
6080 | { | | 6080 | { |
6081 | if (vd->vdev_remove_wanted) { | | 6081 | if (vd->vdev_remove_wanted) { |
6082 | vd->vdev_remove_wanted = B_FALSE; | | 6082 | vd->vdev_remove_wanted = B_FALSE; |
6083 | vd->vdev_delayed_close = B_FALSE; | | 6083 | vd->vdev_delayed_close = B_FALSE; |
6084 | vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); | | 6084 | vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); |
6085 | | | 6085 | |
6086 | /* | | 6086 | /* |
6087 | * We want to clear the stats, but we don't want to do a full | | 6087 | * We want to clear the stats, but we don't want to do a full |
6088 | * vdev_clear() as that will cause us to throw away | | 6088 | * vdev_clear() as that will cause us to throw away |
6089 | * degraded/faulted state as well as attempt to reopen the | | 6089 | * degraded/faulted state as well as attempt to reopen the |
6090 | * device, all of which is a waste. | | 6090 | * device, all of which is a waste. |
6091 | */ | | 6091 | */ |
6092 | vd->vdev_stat.vs_read_errors = 0; | | 6092 | vd->vdev_stat.vs_read_errors = 0; |
6093 | vd->vdev_stat.vs_write_errors = 0; | | 6093 | vd->vdev_stat.vs_write_errors = 0; |
6094 | vd->vdev_stat.vs_checksum_errors = 0; | | 6094 | vd->vdev_stat.vs_checksum_errors = 0; |
6095 | | | 6095 | |
6096 | vdev_state_dirty(vd->vdev_top); | | 6096 | vdev_state_dirty(vd->vdev_top); |
6097 | /* Tell userspace that the vdev is gone. */ | | 6097 | /* Tell userspace that the vdev is gone. */ |
6098 | zfs_post_remove(spa, vd); | | 6098 | zfs_post_remove(spa, vd); |
6099 | } | | 6099 | } |
6100 | | | 6100 | |
6101 | for (int c = 0; c < vd->vdev_children; c++) | | 6101 | for (int c = 0; c < vd->vdev_children; c++) |
6102 | spa_async_remove(spa, vd->vdev_child[c]); | | 6102 | spa_async_remove(spa, vd->vdev_child[c]); |
6103 | } | | 6103 | } |
6104 | | | 6104 | |
6105 | static void | | 6105 | static void |
6106 | spa_async_probe(spa_t *spa, vdev_t *vd) | | 6106 | spa_async_probe(spa_t *spa, vdev_t *vd) |
6107 | { | | 6107 | { |
6108 | if (vd->vdev_probe_wanted) { | | 6108 | if (vd->vdev_probe_wanted) { |
6109 | vd->vdev_probe_wanted = B_FALSE; | | 6109 | vd->vdev_probe_wanted = B_FALSE; |
6110 | vdev_reopen(vd); /* vdev_open() does the actual probe */ | | 6110 | vdev_reopen(vd); /* vdev_open() does the actual probe */ |
6111 | } | | 6111 | } |
6112 | | | 6112 | |
6113 | for (int c = 0; c < vd->vdev_children; c++) | | 6113 | for (int c = 0; c < vd->vdev_children; c++) |
6114 | spa_async_probe(spa, vd->vdev_child[c]); | | 6114 | spa_async_probe(spa, vd->vdev_child[c]); |
6115 | } | | 6115 | } |
6116 | | | 6116 | |
6117 | static void | | 6117 | static void |
6118 | spa_async_autoexpand(spa_t *spa, vdev_t *vd) | | 6118 | spa_async_autoexpand(spa_t *spa, vdev_t *vd) |
6119 | { | | 6119 | { |
6120 | sysevent_id_t eid; | | 6120 | sysevent_id_t eid; |
6121 | nvlist_t *attr; | | 6121 | nvlist_t *attr; |
6122 | char *physpath; | | 6122 | char *physpath; |
6123 | | | 6123 | |
6124 | if (!spa->spa_autoexpand) | | 6124 | if (!spa->spa_autoexpand) |
6125 | return; | | 6125 | return; |
6126 | | | 6126 | |
6127 | for (int c = 0; c < vd->vdev_children; c++) { | | 6127 | for (int c = 0; c < vd->vdev_children; c++) { |
6128 | vdev_t *cvd = vd->vdev_child[c]; | | 6128 | vdev_t *cvd = vd->vdev_child[c]; |
6129 | spa_async_autoexpand(spa, cvd); | | 6129 | spa_async_autoexpand(spa, cvd); |
6130 | } | | 6130 | } |
6131 | | | 6131 | |
6132 | if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) | | 6132 | if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) |
6133 | return; | | 6133 | return; |
6134 | | | 6134 | |
6135 | physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); | | 6135 | physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); |
6136 | (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); | | 6136 | (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); |
6137 | | | 6137 | |
6138 | VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); | | 6138 | VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); |
6139 | VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); | | 6139 | VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); |
6140 | | | 6140 | |
6141 | (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, | | 6141 | (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, |
6142 | ESC_ZFS_VDEV_AUTOEXPAND, attr, &eid, DDI_SLEEP); | | 6142 | ESC_ZFS_VDEV_AUTOEXPAND, attr, &eid, DDI_SLEEP); |
6143 | | | 6143 | |
6144 | nvlist_free(attr); | | 6144 | nvlist_free(attr); |
6145 | kmem_free(physpath, MAXPATHLEN); | | 6145 | kmem_free(physpath, MAXPATHLEN); |
6146 | } | | 6146 | } |
6147 | | | 6147 | |
6148 | static void | | 6148 | static void |
6149 | spa_async_thread(void *arg) | | 6149 | spa_async_thread(void *arg) |
6150 | { | | 6150 | { |
6151 | spa_t *spa = arg; | | 6151 | spa_t *spa = arg; |
6152 | int tasks; | | 6152 | int tasks; |
6153 | | | 6153 | |
6154 | ASSERT(spa->spa_sync_on); | | 6154 | ASSERT(spa->spa_sync_on); |
6155 | | | 6155 | |
6156 | mutex_enter(&spa->spa_async_lock); | | 6156 | mutex_enter(&spa->spa_async_lock); |
6157 | tasks = spa->spa_async_tasks; | | 6157 | tasks = spa->spa_async_tasks; |
6158 | spa->spa_async_tasks &= SPA_ASYNC_REMOVE; | | 6158 | spa->spa_async_tasks &= SPA_ASYNC_REMOVE; |
6159 | mutex_exit(&spa->spa_async_lock); | | 6159 | mutex_exit(&spa->spa_async_lock); |
6160 | | | 6160 | |
6161 | /* | | 6161 | /* |
6162 | * See if the config needs to be updated. | | 6162 | * See if the config needs to be updated. |
6163 | */ | | 6163 | */ |
6164 | if (tasks & SPA_ASYNC_CONFIG_UPDATE) { | | 6164 | if (tasks & SPA_ASYNC_CONFIG_UPDATE) { |
6165 | uint64_t old_space, new_space; | | 6165 | uint64_t old_space, new_space; |
6166 | | | 6166 | |
6167 | mutex_enter(&spa_namespace_lock); | | 6167 | mutex_enter(&spa_namespace_lock); |
6168 | old_space = metaslab_class_get_space(spa_normal_class(spa)); | | 6168 | old_space = metaslab_class_get_space(spa_normal_class(spa)); |
6169 | spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); | | 6169 | spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); |
6170 | new_space = metaslab_class_get_space(spa_normal_class(spa)); | | 6170 | new_space = metaslab_class_get_space(spa_normal_class(spa)); |
6171 | mutex_exit(&spa_namespace_lock); | | 6171 | mutex_exit(&spa_namespace_lock); |
6172 | | | 6172 | |
6173 | /* | | 6173 | /* |
6174 | * If the pool grew as a result of the config update, | | 6174 | * If the pool grew as a result of the config update, |
6175 | * then log an internal history event. | | 6175 | * then log an internal history event. |
6176 | */ | | 6176 | */ |
6177 | if (new_space != old_space) { | | 6177 | if (new_space != old_space) { |
6178 | spa_history_log_internal(spa, "vdev online", NULL, | | 6178 | spa_history_log_internal(spa, "vdev online", NULL, |
6179 | "pool '%s' size: %llu(+%llu)", | | 6179 | "pool '%s' size: %llu(+%llu)", |
6180 | spa_name(spa), new_space, new_space - old_space); | | 6180 | spa_name(spa), new_space, new_space - old_space); |
6181 | } | | 6181 | } |
6182 | } | | 6182 | } |
6183 | | | 6183 | |
6184 | if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { | | 6184 | if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { |
6185 | spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); | | 6185 | spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); |
6186 | spa_async_autoexpand(spa, spa->spa_root_vdev); | | 6186 | spa_async_autoexpand(spa, spa->spa_root_vdev); |
6187 | spa_config_exit(spa, SCL_CONFIG, FTAG); | | 6187 | spa_config_exit(spa, SCL_CONFIG, FTAG); |
6188 | } | | 6188 | } |
6189 | | | 6189 | |
6190 | /* | | 6190 | /* |
6191 | * See if any devices need to be probed. | | 6191 | * See if any devices need to be probed. |
6192 | */ | | 6192 | */ |
6193 | if (tasks & SPA_ASYNC_PROBE) { | | 6193 | if (tasks & SPA_ASYNC_PROBE) { |
6194 | spa_vdev_state_enter(spa, SCL_NONE); | | 6194 | spa_vdev_state_enter(spa, SCL_NONE); |
6195 | spa_async_probe(spa, spa->spa_root_vdev); | | 6195 | spa_async_probe(spa, spa->spa_root_vdev); |
6196 | (void) spa_vdev_state_exit(spa, NULL, 0); | | 6196 | (void) spa_vdev_state_exit(spa, NULL, 0); |
6197 | } | | 6197 | } |
6198 | | | 6198 | |
6199 | /* | | 6199 | /* |
6200 | * If any devices are done replacing, detach them. | | 6200 | * If any devices are done replacing, detach them. |
6201 | */ | | 6201 | */ |
6202 | if (tasks & SPA_ASYNC_RESILVER_DONE) | | 6202 | if (tasks & SPA_ASYNC_RESILVER_DONE) |
6203 | spa_vdev_resilver_done(spa); | | 6203 | spa_vdev_resilver_done(spa); |
6204 | | | 6204 | |
6205 | /* | | 6205 | /* |
6206 | * Kick off a resilver. | | 6206 | * Kick off a resilver. |
6207 | */ | | 6207 | */ |
6208 | if (tasks & SPA_ASYNC_RESILVER) | | 6208 | if (tasks & SPA_ASYNC_RESILVER) |
6209 | dsl_resilver_restart(spa->spa_dsl_pool, 0); | | 6209 | dsl_resilver_restart(spa->spa_dsl_pool, 0); |
6210 | | | 6210 | |
6211 | /* | | 6211 | /* |
6212 | * Let the world know that we're done. | | 6212 | * Let the world know that we're done. |
6213 | */ | | 6213 | */ |
6214 | mutex_enter(&spa->spa_async_lock); | | 6214 | mutex_enter(&spa->spa_async_lock); |
6215 | spa->spa_async_thread = NULL; | | 6215 | spa->spa_async_thread = NULL; |
6216 | cv_broadcast(&spa->spa_async_cv); | | 6216 | cv_broadcast(&spa->spa_async_cv); |
6217 | mutex_exit(&spa->spa_async_lock); | | 6217 | mutex_exit(&spa->spa_async_lock); |
6218 | thread_exit(); | | 6218 | thread_exit(); |
6219 | } | | 6219 | } |
6220 | | | 6220 | |
6221 | static void | | 6221 | static void |
6222 | spa_async_thread_vd(void *arg) | | 6222 | spa_async_thread_vd(void *arg) |
6223 | { | | 6223 | { |
6224 | spa_t *spa = arg; | | 6224 | spa_t *spa = arg; |
6225 | int tasks; | | 6225 | int tasks; |
6226 | | | 6226 | |
6227 | ASSERT(spa->spa_sync_on); | | 6227 | ASSERT(spa->spa_sync_on); |
6228 | | | 6228 | |
6229 | mutex_enter(&spa->spa_async_lock); | | 6229 | mutex_enter(&spa->spa_async_lock); |
6230 | tasks = spa->spa_async_tasks; | | 6230 | tasks = spa->spa_async_tasks; |
6231 | retry: | | 6231 | retry: |
6232 | spa->spa_async_tasks &= ~SPA_ASYNC_REMOVE; | | 6232 | spa->spa_async_tasks &= ~SPA_ASYNC_REMOVE; |
6233 | mutex_exit(&spa->spa_async_lock); | | 6233 | mutex_exit(&spa->spa_async_lock); |
6234 | | | 6234 | |
6235 | /* | | 6235 | /* |
6236 | * See if any devices need to be marked REMOVED. | | 6236 | * See if any devices need to be marked REMOVED. |
6237 | */ | | 6237 | */ |
6238 | if (tasks & SPA_ASYNC_REMOVE) { | | 6238 | if (tasks & SPA_ASYNC_REMOVE) { |
6239 | spa_vdev_state_enter(spa, SCL_NONE); | | 6239 | spa_vdev_state_enter(spa, SCL_NONE); |
6240 | spa_async_remove(spa, spa->spa_root_vdev); | | 6240 | spa_async_remove(spa, spa->spa_root_vdev); |
6241 | for (int i = 0; i < spa->spa_l2cache.sav_count; i++) | | 6241 | for (int i = 0; i < spa->spa_l2cache.sav_count; i++) |
6242 | spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); | | 6242 | spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); |
6243 | for (int i = 0; i < spa->spa_spares.sav_count; i++) | | 6243 | for (int i = 0; i < spa->spa_spares.sav_count; i++) |
6244 | spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); | | 6244 | spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); |
6245 | (void) spa_vdev_state_exit(spa, NULL, 0); | | 6245 | (void) spa_vdev_state_exit(spa, NULL, 0); |
6246 | } | | 6246 | } |
6247 | | | 6247 | |
6248 | /* | | 6248 | /* |
6249 | * Let the world know that we're done. | | 6249 | * Let the world know that we're done. |
6250 | */ | | 6250 | */ |
6251 | mutex_enter(&spa->spa_async_lock); | | 6251 | mutex_enter(&spa->spa_async_lock); |
6252 | tasks = spa->spa_async_tasks; | | 6252 | tasks = spa->spa_async_tasks; |
6253 | if ((tasks & SPA_ASYNC_REMOVE) != 0) | | 6253 | if ((tasks & SPA_ASYNC_REMOVE) != 0) |
6254 | goto retry; | | 6254 | goto retry; |
6255 | spa->spa_async_thread_vd = NULL; | | 6255 | spa->spa_async_thread_vd = NULL; |
6256 | cv_broadcast(&spa->spa_async_cv); | | 6256 | cv_broadcast(&spa->spa_async_cv); |
6257 | mutex_exit(&spa->spa_async_lock); | | 6257 | mutex_exit(&spa->spa_async_lock); |
6258 | thread_exit(); | | 6258 | thread_exit(); |
6259 | } | | 6259 | } |
6260 | | | 6260 | |
6261 | void | | 6261 | void |
6262 | spa_async_suspend(spa_t *spa) | | 6262 | spa_async_suspend(spa_t *spa) |
6263 | { | | 6263 | { |
6264 | mutex_enter(&spa->spa_async_lock); | | 6264 | mutex_enter(&spa->spa_async_lock); |
6265 | spa->spa_async_suspended++; | | 6265 | spa->spa_async_suspended++; |
6266 | while (spa->spa_async_thread != NULL && | | 6266 | while (spa->spa_async_thread != NULL && |
6267 | spa->spa_async_thread_vd != NULL) | | 6267 | spa->spa_async_thread_vd != NULL) |
6268 | cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); | | 6268 | cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); |
6269 | mutex_exit(&spa->spa_async_lock); | | 6269 | mutex_exit(&spa->spa_async_lock); |
6270 | } | | 6270 | } |
6271 | | | 6271 | |
6272 | void | | 6272 | void |
6273 | spa_async_resume(spa_t *spa) | | 6273 | spa_async_resume(spa_t *spa) |
6274 | { | | 6274 | { |
6275 | mutex_enter(&spa->spa_async_lock); | | 6275 | mutex_enter(&spa->spa_async_lock); |
6276 | ASSERT(spa->spa_async_suspended != 0); | | 6276 | ASSERT(spa->spa_async_suspended != 0); |
6277 | spa->spa_async_suspended--; | | 6277 | spa->spa_async_suspended--; |
6278 | mutex_exit(&spa->spa_async_lock); | | 6278 | mutex_exit(&spa->spa_async_lock); |
6279 | } | | 6279 | } |
6280 | | | 6280 | |
6281 | static boolean_t | | 6281 | static boolean_t |
6282 | spa_async_tasks_pending(spa_t *spa) | | 6282 | spa_async_tasks_pending(spa_t *spa) |
6283 | { | | 6283 | { |
6284 | uint_t non_config_tasks; | | 6284 | uint_t non_config_tasks; |
6285 | uint_t config_task; | | 6285 | uint_t config_task; |
6286 | boolean_t config_task_suspended; | | 6286 | boolean_t config_task_suspended; |
6287 | | | 6287 | |
6288 | non_config_tasks = spa->spa_async_tasks & ~(SPA_ASYNC_CONFIG_UPDATE | | | 6288 | non_config_tasks = spa->spa_async_tasks & ~(SPA_ASYNC_CONFIG_UPDATE | |
6289 | SPA_ASYNC_REMOVE); | | 6289 | SPA_ASYNC_REMOVE); |
6290 | config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE; | | 6290 | config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE; |
6291 | if (spa->spa_ccw_fail_time == 0) { | | 6291 | if (spa->spa_ccw_fail_time == 0) { |
6292 | config_task_suspended = B_FALSE; | | 6292 | config_task_suspended = B_FALSE; |
6293 | } else { | | 6293 | } else { |
6294 | config_task_suspended = | | 6294 | config_task_suspended = |
6295 | (gethrtime() - spa->spa_ccw_fail_time) < | | 6295 | (gethrtime() - spa->spa_ccw_fail_time) < |
6296 | (zfs_ccw_retry_interval * NANOSEC); | | 6296 | ((hrtime_t)zfs_ccw_retry_interval * NANOSEC); |
6297 | } | | 6297 | } |
6298 | | | 6298 | |
6299 | return (non_config_tasks || (config_task && !config_task_suspended)); | | 6299 | return (non_config_tasks || (config_task && !config_task_suspended)); |
6300 | } | | 6300 | } |
6301 | | | 6301 | |
6302 | static void | | 6302 | static void |
6303 | spa_async_dispatch(spa_t *spa) | | 6303 | spa_async_dispatch(spa_t *spa) |
6304 | { | | 6304 | { |
6305 | mutex_enter(&spa->spa_async_lock); | | 6305 | mutex_enter(&spa->spa_async_lock); |
6306 | if (spa_async_tasks_pending(spa) && | | 6306 | if (spa_async_tasks_pending(spa) && |
6307 | !spa->spa_async_suspended && | | 6307 | !spa->spa_async_suspended && |
6308 | spa->spa_async_thread == NULL && | | 6308 | spa->spa_async_thread == NULL && |
6309 | rootdir != NULL) | | 6309 | rootdir != NULL) |
6310 | spa->spa_async_thread = thread_create(NULL, 0, | | 6310 | spa->spa_async_thread = thread_create(NULL, 0, |
6311 | spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); | | 6311 | spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); |
6312 | mutex_exit(&spa->spa_async_lock); | | 6312 | mutex_exit(&spa->spa_async_lock); |
6313 | } | | 6313 | } |
6314 | | | 6314 | |
6315 | static void | | 6315 | static void |
6316 | spa_async_dispatch_vd(spa_t *spa) | | 6316 | spa_async_dispatch_vd(spa_t *spa) |
6317 | { | | 6317 | { |
6318 | mutex_enter(&spa->spa_async_lock); | | 6318 | mutex_enter(&spa->spa_async_lock); |
6319 | if ((spa->spa_async_tasks & SPA_ASYNC_REMOVE) != 0 && | | 6319 | if ((spa->spa_async_tasks & SPA_ASYNC_REMOVE) != 0 && |
6320 | !spa->spa_async_suspended && | | 6320 | !spa->spa_async_suspended && |
6321 | spa->spa_async_thread_vd == NULL && | | 6321 | spa->spa_async_thread_vd == NULL && |
6322 | rootdir != NULL) | | 6322 | rootdir != NULL) |
6323 | spa->spa_async_thread_vd = thread_create(NULL, 0, | | 6323 | spa->spa_async_thread_vd = thread_create(NULL, 0, |
6324 | spa_async_thread_vd, spa, 0, &p0, TS_RUN, maxclsyspri); | | 6324 | spa_async_thread_vd, spa, 0, &p0, TS_RUN, maxclsyspri); |
6325 | mutex_exit(&spa->spa_async_lock); | | 6325 | mutex_exit(&spa->spa_async_lock); |
6326 | } | | 6326 | } |
6327 | | | 6327 | |
6328 | void | | 6328 | void |
6329 | spa_async_request(spa_t *spa, int task) | | 6329 | spa_async_request(spa_t *spa, int task) |
6330 | { | | 6330 | { |
6331 | zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task); | | 6331 | zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task); |
6332 | mutex_enter(&spa->spa_async_lock); | | 6332 | mutex_enter(&spa->spa_async_lock); |
6333 | spa->spa_async_tasks |= task; | | 6333 | spa->spa_async_tasks |= task; |
6334 | mutex_exit(&spa->spa_async_lock); | | 6334 | mutex_exit(&spa->spa_async_lock); |
6335 | spa_async_dispatch_vd(spa); | | 6335 | spa_async_dispatch_vd(spa); |
6336 | } | | 6336 | } |
6337 | | | 6337 | |
6338 | /* | | 6338 | /* |
6339 | * ========================================================================== | | 6339 | * ========================================================================== |
6340 | * SPA syncing routines | | 6340 | * SPA syncing routines |
6341 | * ========================================================================== | | 6341 | * ========================================================================== |
6342 | */ | | 6342 | */ |
6343 | | | 6343 | |
6344 | static int | | 6344 | static int |
6345 | bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) | | 6345 | bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) |
6346 | { | | 6346 | { |
6347 | bpobj_t *bpo = arg; | | 6347 | bpobj_t *bpo = arg; |
6348 | bpobj_enqueue(bpo, bp, tx); | | 6348 | bpobj_enqueue(bpo, bp, tx); |
6349 | return (0); | | 6349 | return (0); |
6350 | } | | 6350 | } |
6351 | | | 6351 | |
6352 | static int | | 6352 | static int |
6353 | spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) | | 6353 | spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) |
6354 | { | | 6354 | { |
6355 | zio_t *zio = arg; | | 6355 | zio_t *zio = arg; |
6356 | | | 6356 | |
6357 | zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, | | 6357 | zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, |
6358 | BP_GET_PSIZE(bp), zio->io_flags)); | | 6358 | BP_GET_PSIZE(bp), zio->io_flags)); |
6359 | return (0); | | 6359 | return (0); |
6360 | } | | 6360 | } |
6361 | | | 6361 | |
6362 | /* | | 6362 | /* |
6363 | * Note: this simple function is not inlined to make it easier to dtrace the | | 6363 | * Note: this simple function is not inlined to make it easier to dtrace the |
6364 | * amount of time spent syncing frees. | | 6364 | * amount of time spent syncing frees. |
6365 | */ | | 6365 | */ |
6366 | static void | | 6366 | static void |
6367 | spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx) | | 6367 | spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx) |
6368 | { | | 6368 | { |
6369 | zio_t *zio = zio_root(spa, NULL, NULL, 0); | | 6369 | zio_t *zio = zio_root(spa, NULL, NULL, 0); |
6370 | bplist_iterate(bpl, spa_free_sync_cb, zio, tx); | | 6370 | bplist_iterate(bpl, spa_free_sync_cb, zio, tx); |
6371 | VERIFY(zio_wait(zio) == 0); | | 6371 | VERIFY(zio_wait(zio) == 0); |
6372 | } | | 6372 | } |
6373 | | | 6373 | |
6374 | /* | | 6374 | /* |
6375 | * Note: this simple function is not inlined to make it easier to dtrace the | | 6375 | * Note: this simple function is not inlined to make it easier to dtrace the |
6376 | * amount of time spent syncing deferred frees. | | 6376 | * amount of time spent syncing deferred frees. |
6377 | */ | | 6377 | */ |
6378 | static void | | 6378 | static void |
6379 | spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx) | | 6379 | spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx) |
6380 | { | | 6380 | { |
6381 | zio_t *zio = zio_root(spa, NULL, NULL, 0); | | 6381 | zio_t *zio = zio_root(spa, NULL, NULL, 0); |
6382 | VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj, | | 6382 | VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj, |
6383 | spa_free_sync_cb, zio, tx), ==, 0); | | 6383 | spa_free_sync_cb, zio, tx), ==, 0); |
6384 | VERIFY0(zio_wait(zio)); | | 6384 | VERIFY0(zio_wait(zio)); |
6385 | } | | 6385 | } |
6386 | | | 6386 | |
6387 | | | 6387 | |
6388 | static void | | 6388 | static void |
6389 | spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) | | 6389 | spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) |
6390 | { | | 6390 | { |
6391 | char *packed = NULL; | | 6391 | char *packed = NULL; |
6392 | size_t bufsize; | | 6392 | size_t bufsize; |
6393 | size_t nvsize = 0; | | 6393 | size_t nvsize = 0; |
6394 | dmu_buf_t *db; | | 6394 | dmu_buf_t *db; |
6395 | | | 6395 | |
6396 | VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); | | 6396 | VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); |
6397 | | | 6397 | |
6398 | /* | | 6398 | /* |
6399 | * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration | | 6399 | * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration |
6400 | * information. This avoids the dmu_buf_will_dirty() path and | | 6400 | * information. This avoids the dmu_buf_will_dirty() path and |
6401 | * saves us a pre-read to get data we don't actually care about. | | 6401 | * saves us a pre-read to get data we don't actually care about. |
6402 | */ | | 6402 | */ |
6403 | bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE); | | 6403 | bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE); |
6404 | packed = kmem_alloc(bufsize, KM_SLEEP); | | 6404 | packed = kmem_alloc(bufsize, KM_SLEEP); |
6405 | | | 6405 | |
6406 | VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, | | 6406 | VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, |
6407 | KM_SLEEP) == 0); | | 6407 | KM_SLEEP) == 0); |
6408 | bzero(packed + nvsize, bufsize - nvsize); | | 6408 | bzero(packed + nvsize, bufsize - nvsize); |
6409 | | | 6409 | |
6410 | dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); | | 6410 | dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); |
6411 | | | 6411 | |
6412 | kmem_free(packed, bufsize); | | 6412 | kmem_free(packed, bufsize); |
6413 | | | 6413 | |
6414 | VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); | | 6414 | VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); |
6415 | dmu_buf_will_dirty(db, tx); | | 6415 | dmu_buf_will_dirty(db, tx); |
6416 | *(uint64_t *)db->db_data = nvsize; | | 6416 | *(uint64_t *)db->db_data = nvsize; |
6417 | dmu_buf_rele(db, FTAG); | | 6417 | dmu_buf_rele(db, FTAG); |
6418 | } | | 6418 | } |
6419 | | | 6419 | |
6420 | static void | | 6420 | static void |
6421 | spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, | | 6421 | spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, |
6422 | const char *config, const char *entry) | | 6422 | const char *config, const char *entry) |
6423 | { | | 6423 | { |
6424 | nvlist_t *nvroot; | | 6424 | nvlist_t *nvroot; |
6425 | nvlist_t **list; | | 6425 | nvlist_t **list; |
6426 | int i; | | 6426 | int i; |
6427 | | | 6427 | |
6428 | if (!sav->sav_sync) | | 6428 | if (!sav->sav_sync) |
6429 | return; | | 6429 | return; |
6430 | | | 6430 | |
6431 | /* | | 6431 | /* |
6432 | * Update the MOS nvlist describing the list of available devices. | | 6432 | * Update the MOS nvlist describing the list of available devices. |
6433 | * spa_validate_aux() will have already made sure this nvlist is | | 6433 | * spa_validate_aux() will have already made sure this nvlist is |
6434 | * valid and the vdevs are labeled appropriately. | | 6434 | * valid and the vdevs are labeled appropriately. |
6435 | */ | | 6435 | */ |
6436 | if (sav->sav_object == 0) { | | 6436 | if (sav->sav_object == 0) { |
6437 | sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, | | 6437 | sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, |
6438 | DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, | | 6438 | DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, |
6439 | sizeof (uint64_t), tx); | | 6439 | sizeof (uint64_t), tx); |
6440 | VERIFY(zap_update(spa->spa_meta_objset, | | 6440 | VERIFY(zap_update(spa->spa_meta_objset, |
6441 | DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, | | 6441 | DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, |
6442 | &sav->sav_object, tx) == 0); | | 6442 | &sav->sav_object, tx) == 0); |
6443 | } | | 6443 | } |
6444 | | | 6444 | |
6445 | VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); | | 6445 | VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); |
6446 | if (sav->sav_count == 0) { | | 6446 | if (sav->sav_count == 0) { |
6447 | VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); | | 6447 | VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); |
6448 | } else { | | 6448 | } else { |
6449 | list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); | | 6449 | list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); |
6450 | for (i = 0; i < sav->sav_count; i++) | | 6450 | for (i = 0; i < sav->sav_count; i++) |
6451 | list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], | | 6451 | list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], |
6452 | B_FALSE, VDEV_CONFIG_L2CACHE); | | 6452 | B_FALSE, VDEV_CONFIG_L2CACHE); |
6453 | VERIFY(nvlist_add_nvlist_array(nvroot, config, list, | | 6453 | VERIFY(nvlist_add_nvlist_array(nvroot, config, list, |
6454 | sav->sav_count) == 0); | | 6454 | sav->sav_count) == 0); |
6455 | for (i = 0; i < sav->sav_count; i++) | | 6455 | for (i = 0; i < sav->sav_count; i++) |
6456 | nvlist_free(list[i]); | | 6456 | nvlist_free(list[i]); |
6457 | kmem_free(list, sav->sav_count * sizeof (void *)); | | 6457 | kmem_free(list, sav->sav_count * sizeof (void *)); |
6458 | } | | 6458 | } |
6459 | | | 6459 | |
6460 | spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); | | 6460 | spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); |
6461 | nvlist_free(nvroot); | | 6461 | nvlist_free(nvroot); |
6462 | | | 6462 | |
6463 | sav->sav_sync = B_FALSE; | | 6463 | sav->sav_sync = B_FALSE; |
6464 | } | | 6464 | } |
6465 | | | 6465 | |
6466 | /* | | 6466 | /* |
6467 | * Rebuild spa's all-vdev ZAP from the vdev ZAPs indicated in each vdev_t. | | 6467 | * Rebuild spa's all-vdev ZAP from the vdev ZAPs indicated in each vdev_t. |
6468 | * The all-vdev ZAP must be empty. | | 6468 | * The all-vdev ZAP must be empty. |
6469 | */ | | 6469 | */ |
6470 | static void | | 6470 | static void |
6471 | spa_avz_build(vdev_t *vd, uint64_t avz, dmu_tx_t *tx) | | 6471 | spa_avz_build(vdev_t *vd, uint64_t avz, dmu_tx_t *tx) |
6472 | { | | 6472 | { |
6473 | spa_t *spa = vd->vdev_spa; | | 6473 | spa_t *spa = vd->vdev_spa; |
6474 | if (vd->vdev_top_zap != 0) { | | 6474 | if (vd->vdev_top_zap != 0) { |
6475 | VERIFY0(zap_add_int(spa->spa_meta_objset, avz, | | 6475 | VERIFY0(zap_add_int(spa->spa_meta_objset, avz, |
6476 | vd->vdev_top_zap, tx)); | | 6476 | vd->vdev_top_zap, tx)); |
6477 | } | | 6477 | } |
6478 | if (vd->vdev_leaf_zap != 0) { | | 6478 | if (vd->vdev_leaf_zap != 0) { |
6479 | VERIFY0(zap_add_int(spa->spa_meta_objset, avz, | | 6479 | VERIFY0(zap_add_int(spa->spa_meta_objset, avz, |
6480 | vd->vdev_leaf_zap, tx)); | | 6480 | vd->vdev_leaf_zap, tx)); |
6481 | } | | 6481 | } |
6482 | for (uint64_t i = 0; i < vd->vdev_children; i++) { | | 6482 | for (uint64_t i = 0; i < vd->vdev_children; i++) { |
6483 | spa_avz_build(vd->vdev_child[i], avz, tx); | | 6483 | spa_avz_build(vd->vdev_child[i], avz, tx); |
6484 | } | | 6484 | } |
6485 | } | | 6485 | } |
6486 | | | 6486 | |
6487 | static void | | 6487 | static void |
6488 | spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) | | 6488 | spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) |
6489 | { | | 6489 | { |
6490 | nvlist_t *config; | | 6490 | nvlist_t *config; |
6491 | | | 6491 | |
6492 | /* | | 6492 | /* |
6493 | * If the pool is being imported from a pre-per-vdev-ZAP version of ZFS, | | 6493 | * If the pool is being imported from a pre-per-vdev-ZAP version of ZFS, |
6494 | * its config may not be dirty but we still need to build per-vdev ZAPs. | | 6494 | * its config may not be dirty but we still need to build per-vdev ZAPs. |
6495 | * Similarly, if the pool is being assembled (e.g. after a split), we | | 6495 | * Similarly, if the pool is being assembled (e.g. after a split), we |
6496 | * need to rebuild the AVZ although the config may not be dirty. | | 6496 | * need to rebuild the AVZ although the config may not be dirty. |
6497 | */ | | 6497 | */ |
6498 | if (list_is_empty(&spa->spa_config_dirty_list) && | | 6498 | if (list_is_empty(&spa->spa_config_dirty_list) && |
6499 | spa->spa_avz_action == AVZ_ACTION_NONE) | | 6499 | spa->spa_avz_action == AVZ_ACTION_NONE) |
6500 | return; | | 6500 | return; |
6501 | | | 6501 | |
6502 | spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); | | 6502 | spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); |
6503 | | | 6503 | |
6504 | ASSERT(spa->spa_avz_action == AVZ_ACTION_NONE || | | 6504 | ASSERT(spa->spa_avz_action == AVZ_ACTION_NONE || |
6505 | spa->spa_all_vdev_zaps != 0); | | 6505 | spa->spa_all_vdev_zaps != 0); |
6506 | | | 6506 | |
6507 | if (spa->spa_avz_action == AVZ_ACTION_REBUILD) { | | 6507 | if (spa->spa_avz_action == AVZ_ACTION_REBUILD) { |
6508 | /* Make and build the new AVZ */ | | 6508 | /* Make and build the new AVZ */ |
6509 | uint64_t new_avz = zap_create(spa->spa_meta_objset, | | 6509 | uint64_t new_avz = zap_create(spa->spa_meta_objset, |
6510 | DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); | | 6510 | DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); |
6511 | spa_avz_build(spa->spa_root_vdev, new_avz, tx); | | 6511 | spa_avz_build(spa->spa_root_vdev, new_avz, tx); |
6512 | | | 6512 | |
6513 | /* Diff old AVZ with new one */ | | 6513 | /* Diff old AVZ with new one */ |
6514 | zap_cursor_t zc; | | 6514 | zap_cursor_t zc; |
6515 | zap_attribute_t za; | | 6515 | zap_attribute_t za; |
6516 | | | 6516 | |
6517 | for (zap_cursor_init(&zc, spa->spa_meta_objset, | | 6517 | for (zap_cursor_init(&zc, spa->spa_meta_objset, |
6518 | spa->spa_all_vdev_zaps); | | 6518 | spa->spa_all_vdev_zaps); |
6519 | zap_cursor_retrieve(&zc, &za) == 0; | | 6519 | zap_cursor_retrieve(&zc, &za) == 0; |
6520 | zap_cursor_advance(&zc)) { | | 6520 | zap_cursor_advance(&zc)) { |
6521 | uint64_t vdzap = za.za_first_integer; | | 6521 | uint64_t vdzap = za.za_first_integer; |
6522 | if (zap_lookup_int(spa->spa_meta_objset, new_avz, | | 6522 | if (zap_lookup_int(spa->spa_meta_objset, new_avz, |
6523 | vdzap) == ENOENT) { | | 6523 | vdzap) == ENOENT) { |
6524 | /* | | 6524 | /* |
6525 | * ZAP is listed in old AVZ but not in new one; | | 6525 | * ZAP is listed in old AVZ but not in new one; |
6526 | * destroy it | | 6526 | * destroy it |
6527 | */ | | 6527 | */ |
6528 | VERIFY0(zap_destroy(spa->spa_meta_objset, vdzap, | | 6528 | VERIFY0(zap_destroy(spa->spa_meta_objset, vdzap, |
6529 | tx)); | | 6529 | tx)); |
6530 | } | | 6530 | } |
6531 | } | | 6531 | } |
6532 | | | 6532 | |
6533 | zap_cursor_fini(&zc); | | 6533 | zap_cursor_fini(&zc); |
6534 | | | 6534 | |
6535 | /* Destroy the old AVZ */ | | 6535 | /* Destroy the old AVZ */ |
6536 | VERIFY0(zap_destroy(spa->spa_meta_objset, | | 6536 | VERIFY0(zap_destroy(spa->spa_meta_objset, |
6537 | spa->spa_all_vdev_zaps, tx)); | | 6537 | spa->spa_all_vdev_zaps, tx)); |
6538 | | | 6538 | |
6539 | /* Replace the old AVZ in the dir obj with the new one */ | | 6539 | /* Replace the old AVZ in the dir obj with the new one */ |
6540 | VERIFY0(zap_update(spa->spa_meta_objset, | | 6540 | VERIFY0(zap_update(spa->spa_meta_objset, |
6541 | DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, | | 6541 | DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, |
6542 | sizeof (new_avz), 1, &new_avz, tx)); | | 6542 | sizeof (new_avz), 1, &new_avz, tx)); |
6543 | | | 6543 | |
6544 | spa->spa_all_vdev_zaps = new_avz; | | 6544 | spa->spa_all_vdev_zaps = new_avz; |
6545 | } else if (spa->spa_avz_action == AVZ_ACTION_DESTROY) { | | 6545 | } else if (spa->spa_avz_action == AVZ_ACTION_DESTROY) { |
6546 | zap_cursor_t zc; | | 6546 | zap_cursor_t zc; |
6547 | zap_attribute_t za; | | 6547 | zap_attribute_t za; |
6548 | | | 6548 | |
6549 | /* Walk through the AVZ and destroy all listed ZAPs */ | | 6549 | /* Walk through the AVZ and destroy all listed ZAPs */ |
6550 | for (zap_cursor_init(&zc, spa->spa_meta_objset, | | 6550 | for (zap_cursor_init(&zc, spa->spa_meta_objset, |
6551 | spa->spa_all_vdev_zaps); | | 6551 | spa->spa_all_vdev_zaps); |
6552 | zap_cursor_retrieve(&zc, &za) == 0; | | 6552 | zap_cursor_retrieve(&zc, &za) == 0; |
6553 | zap_cursor_advance(&zc)) { | | 6553 | zap_cursor_advance(&zc)) { |
6554 | uint64_t zap = za.za_first_integer; | | 6554 | uint64_t zap = za.za_first_integer; |
6555 | VERIFY0(zap_destroy(spa->spa_meta_objset, zap, tx)); | | 6555 | VERIFY0(zap_destroy(spa->spa_meta_objset, zap, tx)); |
6556 | } | | 6556 | } |
6557 | | | 6557 | |
6558 | zap_cursor_fini(&zc); | | 6558 | zap_cursor_fini(&zc); |
6559 | | | 6559 | |
6560 | /* Destroy and unlink the AVZ itself */ | | 6560 | /* Destroy and unlink the AVZ itself */ |
6561 | VERIFY0(zap_destroy(spa->spa_meta_objset, | | 6561 | VERIFY0(zap_destroy(spa->spa_meta_objset, |
6562 | spa->spa_all_vdev_zaps, tx)); | | 6562 | spa->spa_all_vdev_zaps, tx)); |
6563 | VERIFY0(zap_remove(spa->spa_meta_objset, | | 6563 | VERIFY0(zap_remove(spa->spa_meta_objset, |
6564 | DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, tx)); | | 6564 | DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, tx)); |
6565 | spa->spa_all_vdev_zaps = 0; | | 6565 | spa->spa_all_vdev_zaps = 0; |
6566 | } | | 6566 | } |
6567 | | | 6567 | |
6568 | if (spa->spa_all_vdev_zaps == 0) { | | 6568 | if (spa->spa_all_vdev_zaps == 0) { |
6569 | spa->spa_all_vdev_zaps = zap_create_link(spa->spa_meta_objset, | | 6569 | spa->spa_all_vdev_zaps = zap_create_link(spa->spa_meta_objset, |
6570 | DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, | | 6570 | DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, |
6571 | DMU_POOL_VDEV_ZAP_MAP, tx); | | 6571 | DMU_POOL_VDEV_ZAP_MAP, tx); |
6572 | } | | 6572 | } |
6573 | spa->spa_avz_action = AVZ_ACTION_NONE; | | 6573 | spa->spa_avz_action = AVZ_ACTION_NONE; |
6574 | | | 6574 | |
6575 | /* Create ZAPs for vdevs that don't have them. */ | | 6575 | /* Create ZAPs for vdevs that don't have them. */ |
6576 | vdev_construct_zaps(spa->spa_root_vdev, tx); | | 6576 | vdev_construct_zaps(spa->spa_root_vdev, tx); |
6577 | | | 6577 | |
6578 | config = spa_config_generate(spa, spa->spa_root_vdev, | | 6578 | config = spa_config_generate(spa, spa->spa_root_vdev, |
6579 | dmu_tx_get_txg(tx), B_FALSE); | | 6579 | dmu_tx_get_txg(tx), B_FALSE); |
6580 | | | 6580 | |
6581 | /* | | 6581 | /* |
6582 | * If we're upgrading the spa version then make sure that | | 6582 | * If we're upgrading the spa version then make sure that |
6583 | * the config object gets updated with the correct version. | | 6583 | * the config object gets updated with the correct version. |
6584 | */ | | 6584 | */ |
6585 | if (spa->spa_ubsync.ub_version < spa->spa_uberblock.ub_version) | | 6585 | if (spa->spa_ubsync.ub_version < spa->spa_uberblock.ub_version) |
6586 | fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, | | 6586 | fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, |
6587 | spa->spa_uberblock.ub_version); | | 6587 | spa->spa_uberblock.ub_version); |
6588 | | | 6588 | |
6589 | spa_config_exit(spa, SCL_STATE, FTAG); | | 6589 | spa_config_exit(spa, SCL_STATE, FTAG); |
6590 | | | 6590 | |
6591 | nvlist_free(spa->spa_config_syncing); | | 6591 | nvlist_free(spa->spa_config_syncing); |
6592 | spa->spa_config_syncing = config; | | 6592 | spa->spa_config_syncing = config; |
6593 | | | 6593 | |
6594 | spa_sync_nvlist(spa, spa->spa_config_object, config, tx); | | 6594 | spa_sync_nvlist(spa, spa->spa_config_object, config, tx); |
6595 | } | | 6595 | } |
6596 | | | 6596 | |
6597 | static void | | 6597 | static void |
6598 | spa_sync_version(void *arg, dmu_tx_t *tx) | | 6598 | spa_sync_version(void *arg, dmu_tx_t *tx) |
6599 | { | | 6599 | { |
6600 | uint64_t *versionp = arg; | | 6600 | uint64_t *versionp = arg; |
6601 | uint64_t version = *versionp; | | 6601 | uint64_t version = *versionp; |
6602 | spa_t *spa = dmu_tx_pool(tx)->dp_spa; | | 6602 | spa_t *spa = dmu_tx_pool(tx)->dp_spa; |
6603 | | | 6603 | |
6604 | /* | | 6604 | /* |
6605 | * Setting the version is special cased when first creating the pool. | | 6605 | * Setting the version is special cased when first creating the pool. |
6606 | */ | | 6606 | */ |
6607 | ASSERT(tx->tx_txg != TXG_INITIAL); | | 6607 | ASSERT(tx->tx_txg != TXG_INITIAL); |
6608 | | | 6608 | |
6609 | ASSERT(SPA_VERSION_IS_SUPPORTED(version)); | | 6609 | ASSERT(SPA_VERSION_IS_SUPPORTED(version)); |
6610 | ASSERT(version >= spa_version(spa)); | | 6610 | ASSERT(version >= spa_version(spa)); |
6611 | | | 6611 | |
6612 | spa->spa_uberblock.ub_version = version; | | 6612 | spa->spa_uberblock.ub_version = version; |
6613 | vdev_config_dirty(spa->spa_root_vdev); | | 6613 | vdev_config_dirty(spa->spa_root_vdev); |
6614 | spa_history_log_internal(spa, "set", tx, "version=%lld", version); | | 6614 | spa_history_log_internal(spa, "set", tx, "version=%lld", version); |
6615 | } | | 6615 | } |
6616 | | | 6616 | |
6617 | /* | | 6617 | /* |
6618 | * Set zpool properties. | | 6618 | * Set zpool properties. |
6619 | */ | | 6619 | */ |
6620 | static void | | 6620 | static void |
6621 | spa_sync_props(void *arg, dmu_tx_t *tx) | | 6621 | spa_sync_props(void *arg, dmu_tx_t *tx) |
6622 | { | | 6622 | { |
6623 | nvlist_t *nvp = arg; | | 6623 | nvlist_t *nvp = arg; |
6624 | spa_t *spa = dmu_tx_pool(tx)->dp_spa; | | 6624 | spa_t *spa = dmu_tx_pool(tx)->dp_spa; |
6625 | objset_t *mos = spa->spa_meta_objset; | | 6625 | objset_t *mos = spa->spa_meta_objset; |
6626 | nvpair_t *elem = NULL; | | 6626 | nvpair_t *elem = NULL; |
6627 | | | 6627 | |
6628 | mutex_enter(&spa->spa_props_lock); | | 6628 | mutex_enter(&spa->spa_props_lock); |
6629 | | | 6629 | |
6630 | while ((elem = nvlist_next_nvpair(nvp, elem))) { | | 6630 | while ((elem = nvlist_next_nvpair(nvp, elem))) { |
6631 | uint64_t intval; | | 6631 | uint64_t intval; |
6632 | char *strval, *fname; | | 6632 | char *strval, *fname; |
6633 | zpool_prop_t prop; | | 6633 | zpool_prop_t prop; |
6634 | const char *propname; | | 6634 | const char *propname; |
6635 | zprop_type_t proptype; | | 6635 | zprop_type_t proptype; |
6636 | spa_feature_t fid; | | 6636 | spa_feature_t fid; |
6637 | | | 6637 | |
6638 | switch (prop = zpool_name_to_prop(nvpair_name(elem))) { | | 6638 | switch (prop = zpool_name_to_prop(nvpair_name(elem))) { |
6639 | case ZPROP_INVAL: | | 6639 | case ZPROP_INVAL: |
6640 | /* | | 6640 | /* |
6641 | * We checked this earlier in spa_prop_validate(). | | 6641 | * We checked this earlier in spa_prop_validate(). |
6642 | */ | | 6642 | */ |
6643 | ASSERT(zpool_prop_feature(nvpair_name(elem))); | | 6643 | ASSERT(zpool_prop_feature(nvpair_name(elem))); |
6644 | | | 6644 | |
6645 | fname = strchr(nvpair_name(elem), '@') + 1; | | 6645 | fname = strchr(nvpair_name(elem), '@') + 1; |
6646 | VERIFY0(zfeature_lookup_name(fname, &fid)); | | 6646 | VERIFY0(zfeature_lookup_name(fname, &fid)); |
6647 | | | 6647 | |
6648 | spa_feature_enable(spa, fid, tx); | | 6648 | spa_feature_enable(spa, fid, tx); |
6649 | spa_history_log_internal(spa, "set", tx, | | 6649 | spa_history_log_internal(spa, "set", tx, |
6650 | "%s=enabled", nvpair_name(elem)); | | 6650 | "%s=enabled", nvpair_name(elem)); |
6651 | break; | | 6651 | break; |
6652 | | | 6652 | |
6653 | case ZPOOL_PROP_VERSION: | | 6653 | case ZPOOL_PROP_VERSION: |
6654 | intval = fnvpair_value_uint64(elem); | | 6654 | intval = fnvpair_value_uint64(elem); |
6655 | /* | | 6655 | /* |
6656 | * The version is synced seperatly before other | | 6656 | * The version is synced seperatly before other |
6657 | * properties and should be correct by now. | | 6657 | * properties and should be correct by now. |
6658 | */ | | 6658 | */ |
6659 | ASSERT3U(spa_version(spa), >=, intval); | | 6659 | ASSERT3U(spa_version(spa), >=, intval); |
6660 | break; | | 6660 | break; |
6661 | | | 6661 | |
6662 | case ZPOOL_PROP_ALTROOT: | | 6662 | case ZPOOL_PROP_ALTROOT: |
6663 | /* | | 6663 | /* |
6664 | * 'altroot' is a non-persistent property. It should | | 6664 | * 'altroot' is a non-persistent property. It should |
6665 | * have been set temporarily at creation or import time. | | 6665 | * have been set temporarily at creation or import time. |
6666 | */ | | 6666 | */ |
6667 | ASSERT(spa->spa_root != NULL); | | 6667 | ASSERT(spa->spa_root != NULL); |
6668 | break; | | 6668 | break; |
6669 | | | 6669 | |
6670 | case ZPOOL_PROP_READONLY: | | 6670 | case ZPOOL_PROP_READONLY: |
6671 | case ZPOOL_PROP_CACHEFILE: | | 6671 | case ZPOOL_PROP_CACHEFILE: |
6672 | /* | | 6672 | /* |
6673 | * 'readonly' and 'cachefile' are also non-persisitent | | 6673 | * 'readonly' and 'cachefile' are also non-persisitent |
6674 | * properties. | | 6674 | * properties. |
6675 | */ | | 6675 | */ |
6676 | break; | | 6676 | break; |
6677 | case ZPOOL_PROP_COMMENT: | | 6677 | case ZPOOL_PROP_COMMENT: |
6678 | strval = fnvpair_value_string(elem); | | 6678 | strval = fnvpair_value_string(elem); |
6679 | if (spa->spa_comment != NULL) | | 6679 | if (spa->spa_comment != NULL) |
6680 | spa_strfree(spa->spa_comment); | | 6680 | spa_strfree(spa->spa_comment); |
6681 | spa->spa_comment = spa_strdup(strval); | | 6681 | spa->spa_comment = spa_strdup(strval); |
6682 | /* | | 6682 | /* |
6683 | * We need to dirty the configuration on all the vdevs | | 6683 | * We need to dirty the configuration on all the vdevs |
6684 | * so that their labels get updated. It's unnecessary | | 6684 | * so that their labels get updated. It's unnecessary |
6685 | * to do this for pool creation since the vdev's | | 6685 | * to do this for pool creation since the vdev's |
6686 | * configuratoin has already been dirtied. | | 6686 | * configuratoin has already been dirtied. |
6687 | */ | | 6687 | */ |
6688 | if (tx->tx_txg != TXG_INITIAL) | | 6688 | if (tx->tx_txg != TXG_INITIAL) |
6689 | vdev_config_dirty(spa->spa_root_vdev); | | 6689 | vdev_config_dirty(spa->spa_root_vdev); |
6690 | spa_history_log_internal(spa, "set", tx, | | 6690 | spa_history_log_internal(spa, "set", tx, |
6691 | "%s=%s", nvpair_name(elem), strval); | | 6691 | "%s=%s", nvpair_name(elem), strval); |
6692 | break; | | 6692 | break; |
6693 | default: | | 6693 | default: |
6694 | /* | | 6694 | /* |
6695 | * Set pool property values in the poolprops mos object. | | 6695 | * Set pool property values in the poolprops mos object. |
6696 | */ | | 6696 | */ |
6697 | if (spa->spa_pool_props_object == 0) { | | 6697 | if (spa->spa_pool_props_object == 0) { |
6698 | spa->spa_pool_props_object = | | 6698 | spa->spa_pool_props_object = |
6699 | zap_create_link(mos, DMU_OT_POOL_PROPS, | | 6699 | zap_create_link(mos, DMU_OT_POOL_PROPS, |
6700 | DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, | | 6700 | DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, |
6701 | tx); | | 6701 | tx); |
6702 | } | | 6702 | } |
6703 | | | 6703 | |
6704 | /* normalize the property name */ | | 6704 | /* normalize the property name */ |
6705 | propname = zpool_prop_to_name(prop); | | 6705 | propname = zpool_prop_to_name(prop); |
6706 | proptype = zpool_prop_get_type(prop); | | 6706 | proptype = zpool_prop_get_type(prop); |
6707 | | | 6707 | |
6708 | if (nvpair_type(elem) == DATA_TYPE_STRING) { | | 6708 | if (nvpair_type(elem) == DATA_TYPE_STRING) { |
6709 | ASSERT(proptype == PROP_TYPE_STRING); | | 6709 | ASSERT(proptype == PROP_TYPE_STRING); |
6710 | strval = fnvpair_value_string(elem); | | 6710 | strval = fnvpair_value_string(elem); |
6711 | VERIFY0(zap_update(mos, | | 6711 | VERIFY0(zap_update(mos, |
6712 | spa->spa_pool_props_object, propname, | | 6712 | spa->spa_pool_props_object, propname, |
6713 | 1, strlen(strval) + 1, strval, tx)); | | 6713 | 1, strlen(strval) + 1, strval, tx)); |
6714 | spa_history_log_internal(spa, "set", tx, | | 6714 | spa_history_log_internal(spa, "set", tx, |
6715 | "%s=%s", nvpair_name(elem), strval); | | 6715 | "%s=%s", nvpair_name(elem), strval); |
6716 | } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { | | 6716 | } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { |
6717 | intval = fnvpair_value_uint64(elem); | | 6717 | intval = fnvpair_value_uint64(elem); |
6718 | | | 6718 | |
6719 | if (proptype == PROP_TYPE_INDEX) { | | 6719 | if (proptype == PROP_TYPE_INDEX) { |
6720 | const char *unused; | | 6720 | const char *unused; |
6721 | VERIFY0(zpool_prop_index_to_string( | | 6721 | VERIFY0(zpool_prop_index_to_string( |
6722 | prop, intval, &unused)); | | 6722 | prop, intval, &unused)); |
6723 | } | | 6723 | } |
6724 | VERIFY0(zap_update(mos, | | 6724 | VERIFY0(zap_update(mos, |
6725 | spa->spa_pool_props_object, propname, | | 6725 | spa->spa_pool_props_object, propname, |
6726 | 8, 1, &intval, tx)); | | 6726 | 8, 1, &intval, tx)); |
6727 | spa_history_log_internal(spa, "set", tx, | | 6727 | spa_history_log_internal(spa, "set", tx, |
6728 | "%s=%lld", nvpair_name(elem), intval); | | 6728 | "%s=%lld", nvpair_name(elem), intval); |
6729 | } else { | | 6729 | } else { |
6730 | ASSERT(0); /* not allowed */ | | 6730 | ASSERT(0); /* not allowed */ |
6731 | } | | 6731 | } |
6732 | | | 6732 | |
6733 | switch (prop) { | | 6733 | switch (prop) { |
6734 | case ZPOOL_PROP_DELEGATION: | | 6734 | case ZPOOL_PROP_DELEGATION: |
6735 | spa->spa_delegation = intval; | | 6735 | spa->spa_delegation = intval; |
6736 | break; | | 6736 | break; |
6737 | case ZPOOL_PROP_BOOTFS: | | 6737 | case ZPOOL_PROP_BOOTFS: |
6738 | spa->spa_bootfs = intval; | | 6738 | spa->spa_bootfs = intval; |
6739 | break; | | 6739 | break; |
6740 | case ZPOOL_PROP_FAILUREMODE: | | 6740 | case ZPOOL_PROP_FAILUREMODE: |
6741 | spa->spa_failmode = intval; | | 6741 | spa->spa_failmode = intval; |
6742 | break; | | 6742 | break; |
6743 | case ZPOOL_PROP_AUTOEXPAND: | | 6743 | case ZPOOL_PROP_AUTOEXPAND: |
6744 | spa->spa_autoexpand = intval; | | 6744 | spa->spa_autoexpand = intval; |
6745 | if (tx->tx_txg != TXG_INITIAL) | | 6745 | if (tx->tx_txg != TXG_INITIAL) |
6746 | spa_async_request(spa, | | 6746 | spa_async_request(spa, |
6747 | SPA_ASYNC_AUTOEXPAND); | | 6747 | SPA_ASYNC_AUTOEXPAND); |
6748 | break; | | 6748 | break; |
6749 | case ZPOOL_PROP_DEDUPDITTO: | | 6749 | case ZPOOL_PROP_DEDUPDITTO: |
6750 | spa->spa_dedup_ditto = intval; | | 6750 | spa->spa_dedup_ditto = intval; |
6751 | break; | | 6751 | break; |
6752 | default: | | 6752 | default: |
6753 | break; | | 6753 | break; |
6754 | } | | 6754 | } |
6755 | } | | 6755 | } |
6756 | | | 6756 | |
6757 | } | | 6757 | } |
6758 | | | 6758 | |
6759 | mutex_exit(&spa->spa_props_lock); | | 6759 | mutex_exit(&spa->spa_props_lock); |
6760 | } | | 6760 | } |
6761 | | | 6761 | |
6762 | /* | | 6762 | /* |
6763 | * Perform one-time upgrade on-disk changes. spa_version() does not | | 6763 | * Perform one-time upgrade on-disk changes. spa_version() does not |
6764 | * reflect the new version this txg, so there must be no changes this | | 6764 | * reflect the new version this txg, so there must be no changes this |
6765 | * txg to anything that the upgrade code depends on after it executes. | | 6765 | * txg to anything that the upgrade code depends on after it executes. |
6766 | * Therefore this must be called after dsl_pool_sync() does the sync | | 6766 | * Therefore this must be called after dsl_pool_sync() does the sync |
6767 | * tasks. | | 6767 | * tasks. |
6768 | */ | | 6768 | */ |
6769 | static void | | 6769 | static void |
6770 | spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) | | 6770 | spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) |
6771 | { | | 6771 | { |
6772 | dsl_pool_t *dp = spa->spa_dsl_pool; | | 6772 | dsl_pool_t *dp = spa->spa_dsl_pool; |
6773 | | | 6773 | |
6774 | ASSERT(spa->spa_sync_pass == 1); | | 6774 | ASSERT(spa->spa_sync_pass == 1); |
6775 | | | 6775 | |
6776 | rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); | | 6776 | rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); |
6777 | | | 6777 | |
6778 | if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && | | 6778 | if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && |
6779 | spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { | | 6779 | spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { |
6780 | dsl_pool_create_origin(dp, tx); | | 6780 | dsl_pool_create_origin(dp, tx); |
6781 | | | 6781 | |
6782 | /* Keeping the origin open increases spa_minref */ | | 6782 | /* Keeping the origin open increases spa_minref */ |
6783 | spa->spa_minref += 3; | | 6783 | spa->spa_minref += 3; |
6784 | } | | 6784 | } |
6785 | | | 6785 | |
6786 | if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && | | 6786 | if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && |
6787 | spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { | | 6787 | spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { |
6788 | dsl_pool_upgrade_clones(dp, tx); | | 6788 | dsl_pool_upgrade_clones(dp, tx); |
6789 | } | | 6789 | } |
6790 | | | 6790 | |
6791 | if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES && | | 6791 | if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES && |
6792 | spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) { | | 6792 | spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) { |
6793 | dsl_pool_upgrade_dir_clones(dp, tx); | | 6793 | dsl_pool_upgrade_dir_clones(dp, tx); |
6794 | | | 6794 | |
6795 | /* Keeping the freedir open increases spa_minref */ | | 6795 | /* Keeping the freedir open increases spa_minref */ |
6796 | spa->spa_minref += 3; | | 6796 | spa->spa_minref += 3; |
6797 | } | | 6797 | } |
6798 | | | 6798 | |
6799 | if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES && | | 6799 | if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES && |
6800 | spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { | | 6800 | spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { |
6801 | spa_feature_create_zap_objects(spa, tx); | | 6801 | spa_feature_create_zap_objects(spa, tx); |
6802 | } | | 6802 | } |
6803 | | | 6803 | |
6804 | /* | | 6804 | /* |
6805 | * LZ4_COMPRESS feature's behaviour was changed to activate_on_enable | | 6805 | * LZ4_COMPRESS feature's behaviour was changed to activate_on_enable |
6806 | * when possibility to use lz4 compression for metadata was added | | 6806 | * when possibility to use lz4 compression for metadata was added |
6807 | * Old pools that have this feature enabled must be upgraded to have | | 6807 | * Old pools that have this feature enabled must be upgraded to have |
6808 | * this feature active | | 6808 | * this feature active |
6809 | */ | | 6809 | */ |
6810 | if (spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { | | 6810 | if (spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { |
6811 | boolean_t lz4_en = spa_feature_is_enabled(spa, | | 6811 | boolean_t lz4_en = spa_feature_is_enabled(spa, |
6812 | SPA_FEATURE_LZ4_COMPRESS); | | 6812 | SPA_FEATURE_LZ4_COMPRESS); |
6813 | boolean_t lz4_ac = spa_feature_is_active(spa, | | 6813 | boolean_t lz4_ac = spa_feature_is_active(spa, |
6814 | SPA_FEATURE_LZ4_COMPRESS); | | 6814 | SPA_FEATURE_LZ4_COMPRESS); |
6815 | | | 6815 | |
6816 | if (lz4_en && !lz4_ac) | | 6816 | if (lz4_en && !lz4_ac) |
6817 | spa_feature_incr(spa, SPA_FEATURE_LZ4_COMPRESS, tx); | | 6817 | spa_feature_incr(spa, SPA_FEATURE_LZ4_COMPRESS, tx); |
6818 | } | | 6818 | } |
6819 | | | 6819 | |
6820 | /* | | 6820 | /* |
6821 | * If we haven't written the salt, do so now. Note that the | | 6821 | * If we haven't written the salt, do so now. Note that the |
6822 | * feature may not be activated yet, but that's fine since | | 6822 | * feature may not be activated yet, but that's fine since |
6823 | * the presence of this ZAP entry is backwards compatible. | | 6823 | * the presence of this ZAP entry is backwards compatible. |
6824 | */ | | 6824 | */ |
6825 | if (zap_contains(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, | | 6825 | if (zap_contains(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, |
6826 | DMU_POOL_CHECKSUM_SALT) == ENOENT) { | | 6826 | DMU_POOL_CHECKSUM_SALT) == ENOENT) { |
6827 | VERIFY0(zap_add(spa->spa_meta_objset, | | 6827 | VERIFY0(zap_add(spa->spa_meta_objset, |
6828 | DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT, 1, | | 6828 | DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT, 1, |
6829 | sizeof (spa->spa_cksum_salt.zcs_bytes), | | 6829 | sizeof (spa->spa_cksum_salt.zcs_bytes), |
6830 | spa->spa_cksum_salt.zcs_bytes, tx)); | | 6830 | spa->spa_cksum_salt.zcs_bytes, tx)); |
6831 | } | | 6831 | } |
6832 | | | 6832 | |
6833 | rrw_exit(&dp->dp_config_rwlock, FTAG); | | 6833 | rrw_exit(&dp->dp_config_rwlock, FTAG); |
6834 | } | | 6834 | } |
6835 | | | 6835 | |
6836 | /* | | 6836 | /* |
6837 | * Sync the specified transaction group. New blocks may be dirtied as | | 6837 | * Sync the specified transaction group. New blocks may be dirtied as |
6838 | * part of the process, so we iterate until it converges. | | 6838 | * part of the process, so we iterate until it converges. |
6839 | */ | | 6839 | */ |
6840 | | | 6840 | |
6841 | void | | 6841 | void |
6842 | spa_sync(spa_t *spa, uint64_t txg) | | 6842 | spa_sync(spa_t *spa, uint64_t txg) |
6843 | { | | 6843 | { |
6844 | dsl_pool_t *dp = spa->spa_dsl_pool; | | 6844 | dsl_pool_t *dp = spa->spa_dsl_pool; |
6845 | objset_t *mos = spa->spa_meta_objset; | | 6845 | objset_t *mos = spa->spa_meta_objset; |
6846 | bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; | | 6846 | bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; |
6847 | vdev_t *rvd = spa->spa_root_vdev; | | 6847 | vdev_t *rvd = spa->spa_root_vdev; |
6848 | vdev_t *vd; | | 6848 | vdev_t *vd; |
6849 | dmu_tx_t *tx; | | 6849 | dmu_tx_t *tx; |
6850 | int error; | | 6850 | int error; |
6851 | uint32_t max_queue_depth = zfs_vdev_async_write_max_active * | | 6851 | uint32_t max_queue_depth = zfs_vdev_async_write_max_active * |
6852 | zfs_vdev_queue_depth_pct / 100; | | 6852 | zfs_vdev_queue_depth_pct / 100; |
6853 | | | 6853 | |
6854 | VERIFY(spa_writeable(spa)); | | 6854 | VERIFY(spa_writeable(spa)); |
6855 | | | 6855 | |
6856 | /* | | 6856 | /* |
6857 | * Lock out configuration changes. | | 6857 | * Lock out configuration changes. |
6858 | */ | | 6858 | */ |
6859 | spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); | | 6859 | spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); |
6860 | | | 6860 | |
6861 | spa->spa_syncing_txg = txg; | | 6861 | spa->spa_syncing_txg = txg; |
6862 | spa->spa_sync_pass = 0; | | 6862 | spa->spa_sync_pass = 0; |
6863 | | | 6863 | |
6864 | mutex_enter(&spa->spa_alloc_lock); | | 6864 | mutex_enter(&spa->spa_alloc_lock); |
6865 | VERIFY0(avl_numnodes(&spa->spa_alloc_tree)); | | 6865 | VERIFY0(avl_numnodes(&spa->spa_alloc_tree)); |
6866 | mutex_exit(&spa->spa_alloc_lock); | | 6866 | mutex_exit(&spa->spa_alloc_lock); |
6867 | | | 6867 | |
6868 | /* | | 6868 | /* |
6869 | * If there are any pending vdev state changes, convert them | | 6869 | * If there are any pending vdev state changes, convert them |
6870 | * into config changes that go out with this transaction group. | | 6870 | * into config changes that go out with this transaction group. |
6871 | */ | | 6871 | */ |
6872 | spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); | | 6872 | spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); |
6873 | while (list_head(&spa->spa_state_dirty_list) != NULL) { | | 6873 | while (list_head(&spa->spa_state_dirty_list) != NULL) { |
6874 | /* | | 6874 | /* |
6875 | * We need the write lock here because, for aux vdevs, | | 6875 | * We need the write lock here because, for aux vdevs, |
6876 | * calling vdev_config_dirty() modifies sav_config. | | 6876 | * calling vdev_config_dirty() modifies sav_config. |
6877 | * This is ugly and will become unnecessary when we | | 6877 | * This is ugly and will become unnecessary when we |
6878 | * eliminate the aux vdev wart by integrating all vdevs | | 6878 | * eliminate the aux vdev wart by integrating all vdevs |
6879 | * into the root vdev tree. | | 6879 | * into the root vdev tree. |
6880 | */ | | 6880 | */ |
6881 | spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); | | 6881 | spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); |
6882 | spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); | | 6882 | spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); |
6883 | while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { | | 6883 | while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { |
6884 | vdev_state_clean(vd); | | 6884 | vdev_state_clean(vd); |
6885 | vdev_config_dirty(vd); | | 6885 | vdev_config_dirty(vd); |
6886 | } | | 6886 | } |
6887 | spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); | | 6887 | spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); |
6888 | spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); | | 6888 | spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); |
6889 | } | | 6889 | } |
6890 | spa_config_exit(spa, SCL_STATE, FTAG); | | 6890 | spa_config_exit(spa, SCL_STATE, FTAG); |
6891 | | | 6891 | |
6892 | tx = dmu_tx_create_assigned(dp, txg); | | 6892 | tx = dmu_tx_create_assigned(dp, txg); |
6893 | | | 6893 | |
6894 | spa->spa_sync_starttime = gethrtime(); | | 6894 | spa->spa_sync_starttime = gethrtime(); |
6895 | #ifdef illumos | | 6895 | #ifdef illumos |
6896 | VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, | | 6896 | VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, |
6897 | spa->spa_sync_starttime + spa->spa_deadman_synctime)); | | 6897 | spa->spa_sync_starttime + spa->spa_deadman_synctime)); |
6898 | #endif /* illumos */ | | 6898 | #endif /* illumos */ |
6899 | #ifdef __FreeBSD__ | | 6899 | #ifdef __FreeBSD__ |
6900 | #ifdef _KERNEL | | 6900 | #ifdef _KERNEL |
6901 | callout_schedule(&spa->spa_deadman_cycid, | | 6901 | callout_schedule(&spa->spa_deadman_cycid, |
6902 | hz * spa->spa_deadman_synctime / NANOSEC); | | 6902 | hz * spa->spa_deadman_synctime / NANOSEC); |
6903 | #endif | | 6903 | #endif |
6904 | #endif /* __FreeBSD__ */ | | 6904 | #endif /* __FreeBSD__ */ |
6905 | #ifdef __NetBSD__ | | 6905 | #ifdef __NetBSD__ |
6906 | #ifdef _KERNEL | | 6906 | #ifdef _KERNEL |
6907 | callout_schedule(&spa->spa_deadman_cycid, | | 6907 | callout_schedule(&spa->spa_deadman_cycid, |
6908 | hz * spa->spa_deadman_synctime / NANOSEC); | | 6908 | hz * spa->spa_deadman_synctime / NANOSEC); |
6909 | #endif | | 6909 | #endif |
6910 | #endif | | 6910 | #endif |
6911 | | | 6911 | |
6912 | /* | | 6912 | /* |
6913 | * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, | | 6913 | * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, |
6914 | * set spa_deflate if we have no raid-z vdevs. | | 6914 | * set spa_deflate if we have no raid-z vdevs. |
6915 | */ | | 6915 | */ |
6916 | if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && | | 6916 | if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && |
6917 | spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { | | 6917 | spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { |
6918 | int i; | | 6918 | int i; |
6919 | | | 6919 | |
6920 | for (i = 0; i < rvd->vdev_children; i++) { | | 6920 | for (i = 0; i < rvd->vdev_children; i++) { |
6921 | vd = rvd->vdev_child[i]; | | 6921 | vd = rvd->vdev_child[i]; |
6922 | if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) | | 6922 | if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) |
6923 | break; | | 6923 | break; |
6924 | } | | 6924 | } |
6925 | if (i == rvd->vdev_children) { | | 6925 | if (i == rvd->vdev_children) { |
6926 | spa->spa_deflate = TRUE; | | 6926 | spa->spa_deflate = TRUE; |
6927 | VERIFY(0 == zap_add(spa->spa_meta_objset, | | 6927 | VERIFY(0 == zap_add(spa->spa_meta_objset, |
6928 | DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, | | 6928 | DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, |
6929 | sizeof (uint64_t), 1, &spa->spa_deflate, tx)); | | 6929 | sizeof (uint64_t), 1, &spa->spa_deflate, tx)); |
6930 | } | | 6930 | } |
6931 | } | | 6931 | } |
6932 | | | 6932 | |
6933 | /* | | 6933 | /* |
6934 | * Set the top-level vdev's max queue depth. Evaluate each | | 6934 | * Set the top-level vdev's max queue depth. Evaluate each |
6935 | * top-level's async write queue depth in case it changed. | | 6935 | * top-level's async write queue depth in case it changed. |
6936 | * The max queue depth will not change in the middle of syncing | | 6936 | * The max queue depth will not change in the middle of syncing |
6937 | * out this txg. | | 6937 | * out this txg. |
6938 | */ | | 6938 | */ |
6939 | uint64_t queue_depth_total = 0; | | 6939 | uint64_t queue_depth_total = 0; |
6940 | for (int c = 0; c < rvd->vdev_children; c++) { | | 6940 | for (int c = 0; c < rvd->vdev_children; c++) { |
6941 | vdev_t *tvd = rvd->vdev_child[c]; | | 6941 | vdev_t *tvd = rvd->vdev_child[c]; |
6942 | metaslab_group_t *mg = tvd->vdev_mg; | | 6942 | metaslab_group_t *mg = tvd->vdev_mg; |
6943 | | | 6943 | |
6944 | if (mg == NULL || mg->mg_class != spa_normal_class(spa) || | | 6944 | if (mg == NULL || mg->mg_class != spa_normal_class(spa) || |
6945 | !metaslab_group_initialized(mg)) | | 6945 | !metaslab_group_initialized(mg)) |
6946 | continue; | | 6946 | continue; |
6947 | | | 6947 | |
6948 | /* | | 6948 | /* |
6949 | * It is safe to do a lock-free check here because only async | | 6949 | * It is safe to do a lock-free check here because only async |
6950 | * allocations look at mg_max_alloc_queue_depth, and async | | 6950 | * allocations look at mg_max_alloc_queue_depth, and async |
6951 | * allocations all happen from spa_sync(). | | 6951 | * allocations all happen from spa_sync(). |
6952 | */ | | 6952 | */ |
6953 | ASSERT0(refcount_count(&mg->mg_alloc_queue_depth)); | | 6953 | ASSERT0(refcount_count(&mg->mg_alloc_queue_depth)); |
6954 | mg->mg_max_alloc_queue_depth = max_queue_depth; | | 6954 | mg->mg_max_alloc_queue_depth = max_queue_depth; |
6955 | queue_depth_total += mg->mg_max_alloc_queue_depth; | | 6955 | queue_depth_total += mg->mg_max_alloc_queue_depth; |
6956 | } | | 6956 | } |
6957 | metaslab_class_t *mc = spa_normal_class(spa); | | 6957 | metaslab_class_t *mc = spa_normal_class(spa); |
6958 | ASSERT0(refcount_count(&mc->mc_alloc_slots)); | | 6958 | ASSERT0(refcount_count(&mc->mc_alloc_slots)); |
6959 | mc->mc_alloc_max_slots = queue_depth_total; | | 6959 | mc->mc_alloc_max_slots = queue_depth_total; |
6960 | mc->mc_alloc_throttle_enabled = zio_dva_throttle_enabled; | | 6960 | mc->mc_alloc_throttle_enabled = zio_dva_throttle_enabled; |
6961 | | | 6961 | |
6962 | ASSERT3U(mc->mc_alloc_max_slots, <=, | | 6962 | ASSERT3U(mc->mc_alloc_max_slots, <=, |
6963 | max_queue_depth * rvd->vdev_children); | | 6963 | max_queue_depth * rvd->vdev_children); |
6964 | | | 6964 | |
6965 | /* | | 6965 | /* |
6966 | * Iterate to convergence. | | 6966 | * Iterate to convergence. |
6967 | */ | | 6967 | */ |
6968 | do { | | 6968 | do { |
6969 | int pass = ++spa->spa_sync_pass; | | 6969 | int pass = ++spa->spa_sync_pass; |
6970 | | | 6970 | |
6971 | spa_sync_config_object(spa, tx); | | 6971 | spa_sync_config_object(spa, tx); |
6972 | spa_sync_aux_dev(spa, &spa->spa_spares, tx, | | 6972 | spa_sync_aux_dev(spa, &spa->spa_spares, tx, |
6973 | ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); | | 6973 | ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); |
6974 | spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, | | 6974 | spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, |
6975 | ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); | | 6975 | ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); |
6976 | spa_errlog_sync(spa, txg); | | 6976 | spa_errlog_sync(spa, txg); |
6977 | dsl_pool_sync(dp, txg); | | 6977 | dsl_pool_sync(dp, txg); |
6978 | | | 6978 | |
6979 | if (pass < zfs_sync_pass_deferred_free) { | | 6979 | if (pass < zfs_sync_pass_deferred_free) { |
6980 | spa_sync_frees(spa, free_bpl, tx); | | 6980 | spa_sync_frees(spa, free_bpl, tx); |
6981 | } else { | | 6981 | } else { |
6982 | /* | | 6982 | /* |
6983 | * We can not defer frees in pass 1, because | | 6983 | * We can not defer frees in pass 1, because |
6984 | * we sync the deferred frees later in pass 1. | | 6984 | * we sync the deferred frees later in pass 1. |
6985 | */ | | 6985 | */ |
6986 | ASSERT3U(pass, >, 1); | | 6986 | ASSERT3U(pass, >, 1); |
6987 | bplist_iterate(free_bpl, bpobj_enqueue_cb, | | 6987 | bplist_iterate(free_bpl, bpobj_enqueue_cb, |
6988 | &spa->spa_deferred_bpobj, tx); | | 6988 | &spa->spa_deferred_bpobj, tx); |
6989 | } | | 6989 | } |
6990 | | | 6990 | |
6991 | ddt_sync(spa, txg); | | 6991 | ddt_sync(spa, txg); |
6992 | dsl_scan_sync(dp, tx); | | 6992 | dsl_scan_sync(dp, tx); |
6993 | | | 6993 | |
6994 | while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) | | 6994 | while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) |
6995 | vdev_sync(vd, txg); | | 6995 | vdev_sync(vd, txg); |
6996 | | | 6996 | |
6997 | if (pass == 1) { | | 6997 | if (pass == 1) { |
6998 | spa_sync_upgrades(spa, tx); | | 6998 | spa_sync_upgrades(spa, tx); |
6999 | ASSERT3U(txg, >=, | | 6999 | ASSERT3U(txg, >=, |
7000 | spa->spa_uberblock.ub_rootbp.blk_birth); | | 7000 | spa->spa_uberblock.ub_rootbp.blk_birth); |
7001 | /* | | 7001 | /* |
7002 | * Note: We need to check if the MOS is dirty | | 7002 | * Note: We need to check if the MOS is dirty |
7003 | * because we could have marked the MOS dirty | | 7003 | * because we could have marked the MOS dirty |
7004 | * without updating the uberblock (e.g. if we | | 7004 | * without updating the uberblock (e.g. if we |
7005 | * have sync tasks but no dirty user data). We | | 7005 | * have sync tasks but no dirty user data). We |
7006 | * need to check the uberblock's rootbp because | | 7006 | * need to check the uberblock's rootbp because |
7007 | * it is updated if we have synced out dirty | | 7007 | * it is updated if we have synced out dirty |
7008 | * data (though in this case the MOS will most | | 7008 | * data (though in this case the MOS will most |
7009 | * likely also be dirty due to second order | | 7009 | * likely also be dirty due to second order |
7010 | * effects, we don't want to rely on that here). | | 7010 | * effects, we don't want to rely on that here). |
7011 | */ | | 7011 | */ |
7012 | if (spa->spa_uberblock.ub_rootbp.blk_birth < txg && | | 7012 | if (spa->spa_uberblock.ub_rootbp.blk_birth < txg && |
7013 | !dmu_objset_is_dirty(mos, txg)) { | | 7013 | !dmu_objset_is_dirty(mos, txg)) { |
7014 | /* | | 7014 | /* |
7015 | * Nothing changed on the first pass, | | 7015 | * Nothing changed on the first pass, |
7016 | * therefore this TXG is a no-op. Avoid | | 7016 | * therefore this TXG is a no-op. Avoid |
7017 | * syncing deferred frees, so that we | | 7017 | * syncing deferred frees, so that we |
7018 | * can keep this TXG as a no-op. | | 7018 | * can keep this TXG as a no-op. |
7019 | */ | | 7019 | */ |
7020 | ASSERT(txg_list_empty(&dp->dp_dirty_datasets, | | 7020 | ASSERT(txg_list_empty(&dp->dp_dirty_datasets, |
7021 | txg)); | | 7021 | txg)); |
7022 | ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); | | 7022 | ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); |
7023 | ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg)); | | 7023 | ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg)); |
7024 | break; | | 7024 | break; |
7025 | } | | 7025 | } |
7026 | spa_sync_deferred_frees(spa, tx); | | 7026 | spa_sync_deferred_frees(spa, tx); |
7027 | } | | 7027 | } |
7028 | | | 7028 | |
7029 | } while (dmu_objset_is_dirty(mos, txg)); | | 7029 | } while (dmu_objset_is_dirty(mos, txg)); |
7030 | | | 7030 | |
7031 | if (!list_is_empty(&spa->spa_config_dirty_list)) { | | 7031 | if (!list_is_empty(&spa->spa_config_dirty_list)) { |
7032 | /* | | 7032 | /* |
7033 | * Make sure that the number of ZAPs for all the vdevs matches | | 7033 | * Make sure that the number of ZAPs for all the vdevs matches |
7034 | * the number of ZAPs in the per-vdev ZAP list. This only gets | | 7034 | * the number of ZAPs in the per-vdev ZAP list. This only gets |
7035 | * called if the config is dirty; otherwise there may be | | 7035 | * called if the config is dirty; otherwise there may be |
7036 | * outstanding AVZ operations that weren't completed in | | 7036 | * outstanding AVZ operations that weren't completed in |
7037 | * spa_sync_config_object. | | 7037 | * spa_sync_config_object. |
7038 | */ | | 7038 | */ |
7039 | uint64_t all_vdev_zap_entry_count; | | 7039 | uint64_t all_vdev_zap_entry_count; |
7040 | ASSERT0(zap_count(spa->spa_meta_objset, | | 7040 | ASSERT0(zap_count(spa->spa_meta_objset, |
7041 | spa->spa_all_vdev_zaps, &all_vdev_zap_entry_count)); | | 7041 | spa->spa_all_vdev_zaps, &all_vdev_zap_entry_count)); |
7042 | ASSERT3U(vdev_count_verify_zaps(spa->spa_root_vdev), ==, | | 7042 | ASSERT3U(vdev_count_verify_zaps(spa->spa_root_vdev), ==, |
7043 | all_vdev_zap_entry_count); | | 7043 | all_vdev_zap_entry_count); |
7044 | } | | 7044 | } |
7045 | | | 7045 | |
7046 | /* | | 7046 | /* |
7047 | * Rewrite the vdev configuration (which includes the uberblock) | | 7047 | * Rewrite the vdev configuration (which includes the uberblock) |
7048 | * to commit the transaction group. | | 7048 | * to commit the transaction group. |
7049 | * | | 7049 | * |
7050 | * If there are no dirty vdevs, we sync the uberblock to a few | | 7050 | * If there are no dirty vdevs, we sync the uberblock to a few |
7051 | * random top-level vdevs that are known to be visible in the | | 7051 | * random top-level vdevs that are known to be visible in the |
7052 | * config cache (see spa_vdev_add() for a complete description). | | 7052 | * config cache (see spa_vdev_add() for a complete description). |
7053 | * If there *are* dirty vdevs, sync the uberblock to all vdevs. | | 7053 | * If there *are* dirty vdevs, sync the uberblock to all vdevs. |
7054 | */ | | 7054 | */ |
7055 | for (;;) { | | 7055 | for (;;) { |
7056 | /* | | 7056 | /* |
7057 | * We hold SCL_STATE to prevent vdev open/close/etc. | | 7057 | * We hold SCL_STATE to prevent vdev open/close/etc. |
7058 | * while we're attempting to write the vdev labels. | | 7058 | * while we're attempting to write the vdev labels. |
7059 | */ | | 7059 | */ |
7060 | spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); | | 7060 | spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); |
7061 | | | 7061 | |
7062 | if (list_is_empty(&spa->spa_config_dirty_list)) { | | 7062 | if (list_is_empty(&spa->spa_config_dirty_list)) { |
7063 | vdev_t *svd[SPA_DVAS_PER_BP]; | | 7063 | vdev_t *svd[SPA_DVAS_PER_BP]; |
7064 | int svdcount = 0; | | 7064 | int svdcount = 0; |
7065 | int children = rvd->vdev_children; | | 7065 | int children = rvd->vdev_children; |
7066 | int c0 = spa_get_random(children); | | 7066 | int c0 = spa_get_random(children); |
7067 | | | 7067 | |
7068 | for (int c = 0; c < children; c++) { | | 7068 | for (int c = 0; c < children; c++) { |
7069 | vd = rvd->vdev_child[(c0 + c) % children]; | | 7069 | vd = rvd->vdev_child[(c0 + c) % children]; |
7070 | if (vd->vdev_ms_array == 0 || vd->vdev_islog) | | 7070 | if (vd->vdev_ms_array == 0 || vd->vdev_islog) |
7071 | continue; | | 7071 | continue; |
7072 | svd[svdcount++] = vd; | | 7072 | svd[svdcount++] = vd; |
7073 | if (svdcount == SPA_DVAS_PER_BP) | | 7073 | if (svdcount == SPA_DVAS_PER_BP) |
7074 | break; | | 7074 | break; |
7075 | } | | 7075 | } |
7076 | error = vdev_config_sync(svd, svdcount, txg); | | 7076 | error = vdev_config_sync(svd, svdcount, txg); |
7077 | } else { | | 7077 | } else { |
7078 | error = vdev_config_sync(rvd->vdev_child, | | 7078 | error = vdev_config_sync(rvd->vdev_child, |
7079 | rvd->vdev_children, txg); | | 7079 | rvd->vdev_children, txg); |
7080 | } | | 7080 | } |
7081 | | | 7081 | |
7082 | if (error == 0) | | 7082 | if (error == 0) |
7083 | spa->spa_last_synced_guid = rvd->vdev_guid; | | 7083 | spa->spa_last_synced_guid = rvd->vdev_guid; |
7084 | | | 7084 | |
7085 | spa_config_exit(spa, SCL_STATE, FTAG); | | 7085 | spa_config_exit(spa, SCL_STATE, FTAG); |
7086 | | | 7086 | |
7087 | if (error == 0) | | 7087 | if (error == 0) |
7088 | break; | | 7088 | break; |
7089 | zio_suspend(spa, NULL); | | 7089 | zio_suspend(spa, NULL); |
7090 | zio_resume_wait(spa); | | 7090 | zio_resume_wait(spa); |
7091 | } | | 7091 | } |
7092 | dmu_tx_commit(tx); | | 7092 | dmu_tx_commit(tx); |
7093 | | | 7093 | |
7094 | #ifdef illumos | | 7094 | #ifdef illumos |
7095 | VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY)); | | 7095 | VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY)); |
7096 | #endif /* illumos */ | | 7096 | #endif /* illumos */ |
7097 | #ifdef __FreeBSD__ | | 7097 | #ifdef __FreeBSD__ |
7098 | #ifdef _KERNEL | | 7098 | #ifdef _KERNEL |
7099 | callout_drain(&spa->spa_deadman_cycid); | | 7099 | callout_drain(&spa->spa_deadman_cycid); |
7100 | #endif | | 7100 | #endif |
7101 | #endif /* __FreeBSD__ */ | | 7101 | #endif /* __FreeBSD__ */ |
7102 | #ifdef __NetBSD__ | | 7102 | #ifdef __NetBSD__ |
7103 | #ifdef _KERNEL | | 7103 | #ifdef _KERNEL |
7104 | callout_drain(&spa->spa_deadman_cycid); | | 7104 | callout_drain(&spa->spa_deadman_cycid); |
7105 | #endif | | 7105 | #endif |
7106 | #endif /* __NetBSD__ */ | | 7106 | #endif /* __NetBSD__ */ |
7107 | | | 7107 | |
7108 | /* | | 7108 | /* |
7109 | * Clear the dirty config list. | | 7109 | * Clear the dirty config list. |
7110 | */ | | 7110 | */ |
7111 | while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) | | 7111 | while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) |
7112 | vdev_config_clean(vd); | | 7112 | vdev_config_clean(vd); |
7113 | | | 7113 | |
7114 | /* | | 7114 | /* |
7115 | * Now that the new config has synced transactionally, | | 7115 | * Now that the new config has synced transactionally, |
7116 | * let it become visible to the config cache. | | 7116 | * let it become visible to the config cache. |
7117 | */ | | 7117 | */ |
7118 | if (spa->spa_config_syncing != NULL) { | | 7118 | if (spa->spa_config_syncing != NULL) { |
7119 | spa_config_set(spa, spa->spa_config_syncing); | | 7119 | spa_config_set(spa, spa->spa_config_syncing); |
7120 | spa->spa_config_txg = txg; | | 7120 | spa->spa_config_txg = txg; |
7121 | spa->spa_config_syncing = NULL; | | 7121 | spa->spa_config_syncing = NULL; |
7122 | } | | 7122 | } |
7123 | | | 7123 | |
7124 | dsl_pool_sync_done(dp, txg); | | 7124 | dsl_pool_sync_done(dp, txg); |
7125 | | | 7125 | |
7126 | mutex_enter(&spa->spa_alloc_lock); | | 7126 | mutex_enter(&spa->spa_alloc_lock); |
7127 | VERIFY0(avl_numnodes(&spa->spa_alloc_tree)); | | 7127 | VERIFY0(avl_numnodes(&spa->spa_alloc_tree)); |
7128 | mutex_exit(&spa->spa_alloc_lock); | | 7128 | mutex_exit(&spa->spa_alloc_lock); |
7129 | | | 7129 | |
7130 | /* | | 7130 | /* |
7131 | * Update usable space statistics. | | 7131 | * Update usable space statistics. |
7132 | */ | | 7132 | */ |
7133 | while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) | | 7133 | while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) |
7134 | vdev_sync_done(vd, txg); | | 7134 | vdev_sync_done(vd, txg); |
7135 | | | 7135 | |
7136 | spa_update_dspace(spa); | | 7136 | spa_update_dspace(spa); |
7137 | | | 7137 | |
7138 | /* | | 7138 | /* |
7139 | * It had better be the case that we didn't dirty anything | | 7139 | * It had better be the case that we didn't dirty anything |
7140 | * since vdev_config_sync(). | | 7140 | * since vdev_config_sync(). |
7141 | */ | | 7141 | */ |
7142 | ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); | | 7142 | ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); |
7143 | ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); | | 7143 | ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); |
7144 | ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); | | 7144 | ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); |
7145 | | | 7145 | |
7146 | spa->spa_sync_pass = 0; | | 7146 | spa->spa_sync_pass = 0; |
7147 | | | 7147 | |
7148 | /* | | 7148 | /* |
7149 | * Update the last synced uberblock here. We want to do this at | | 7149 | * Update the last synced uberblock here. We want to do this at |
7150 | * the end of spa_sync() so that consumers of spa_last_synced_txg() | | 7150 | * the end of spa_sync() so that consumers of spa_last_synced_txg() |
7151 | * will be guaranteed that all the processing associated with | | 7151 | * will be guaranteed that all the processing associated with |
7152 | * that txg has been completed. | | 7152 | * that txg has been completed. |
7153 | */ | | 7153 | */ |
7154 | spa->spa_ubsync = spa->spa_uberblock; | | 7154 | spa->spa_ubsync = spa->spa_uberblock; |
7155 | spa_config_exit(spa, SCL_CONFIG, FTAG); | | 7155 | spa_config_exit(spa, SCL_CONFIG, FTAG); |
7156 | | | 7156 | |
7157 | spa_handle_ignored_writes(spa); | | 7157 | spa_handle_ignored_writes(spa); |
7158 | | | 7158 | |
7159 | /* | | 7159 | /* |
7160 | * If any async tasks have been requested, kick them off. | | 7160 | * If any async tasks have been requested, kick them off. |
7161 | */ | | 7161 | */ |
7162 | spa_async_dispatch(spa); | | 7162 | spa_async_dispatch(spa); |
7163 | spa_async_dispatch_vd(spa); | | 7163 | spa_async_dispatch_vd(spa); |
7164 | } | | 7164 | } |
7165 | | | 7165 | |
7166 | /* | | 7166 | /* |
7167 | * Sync all pools. We don't want to hold the namespace lock across these | | 7167 | * Sync all pools. We don't want to hold the namespace lock across these |
7168 | * operations, so we take a reference on the spa_t and drop the lock during the | | 7168 | * operations, so we take a reference on the spa_t and drop the lock during the |
7169 | * sync. | | 7169 | * sync. |
7170 | */ | | 7170 | */ |
7171 | void | | 7171 | void |
7172 | spa_sync_allpools(void) | | 7172 | spa_sync_allpools(void) |
7173 | { | | 7173 | { |
7174 | spa_t *spa = NULL; | | 7174 | spa_t *spa = NULL; |
7175 | mutex_enter(&spa_namespace_lock); | | 7175 | mutex_enter(&spa_namespace_lock); |
7176 | while ((spa = spa_next(spa)) != NULL) { | | 7176 | while ((spa = spa_next(spa)) != NULL) { |
7177 | if (spa_state(spa) != POOL_STATE_ACTIVE || | | 7177 | if (spa_state(spa) != POOL_STATE_ACTIVE || |
7178 | !spa_writeable(spa) || spa_suspended(spa)) | | 7178 | !spa_writeable(spa) || spa_suspended(spa)) |
7179 | continue; | | 7179 | continue; |
7180 | spa_open_ref(spa, FTAG); | | 7180 | spa_open_ref(spa, FTAG); |
7181 | mutex_exit(&spa_namespace_lock); | | 7181 | mutex_exit(&spa_namespace_lock); |
7182 | txg_wait_synced(spa_get_dsl(spa), 0); | | 7182 | txg_wait_synced(spa_get_dsl(spa), 0); |
7183 | mutex_enter(&spa_namespace_lock); | | 7183 | mutex_enter(&spa_namespace_lock); |
7184 | spa_close(spa, FTAG); | | 7184 | spa_close(spa, FTAG); |
7185 | } | | 7185 | } |
7186 | mutex_exit(&spa_namespace_lock); | | 7186 | mutex_exit(&spa_namespace_lock); |
7187 | } | | 7187 | } |
7188 | | | 7188 | |
7189 | /* | | 7189 | /* |
7190 | * ========================================================================== | | 7190 | * ========================================================================== |
7191 | * Miscellaneous routines | | 7191 | * Miscellaneous routines |
7192 | * ========================================================================== | | 7192 | * ========================================================================== |
7193 | */ | | 7193 | */ |
7194 | | | 7194 | |
7195 | /* | | 7195 | /* |
7196 | * Remove all pools in the system. | | 7196 | * Remove all pools in the system. |
7197 | */ | | 7197 | */ |
7198 | void | | 7198 | void |
7199 | spa_evict_all(void) | | 7199 | spa_evict_all(void) |
7200 | { | | 7200 | { |
7201 | spa_t *spa; | | 7201 | spa_t *spa; |
7202 | | | 7202 | |
7203 | /* | | 7203 | /* |
7204 | * Remove all cached state. All pools should be closed now, | | 7204 | * Remove all cached state. All pools should be closed now, |
7205 | * so every spa in the AVL tree should be unreferenced. | | 7205 | * so every spa in the AVL tree should be unreferenced. |
7206 | */ | | 7206 | */ |
7207 | mutex_enter(&spa_namespace_lock); | | 7207 | mutex_enter(&spa_namespace_lock); |
7208 | while ((spa = spa_next(NULL)) != NULL) { | | 7208 | while ((spa = spa_next(NULL)) != NULL) { |
7209 | /* | | 7209 | /* |
7210 | * Stop async tasks. The async thread may need to detach | | 7210 | * Stop async tasks. The async thread may need to detach |
7211 | * a device that's been replaced, which requires grabbing | | 7211 | * a device that's been replaced, which requires grabbing |
7212 | * spa_namespace_lock, so we must drop it here. | | 7212 | * spa_namespace_lock, so we must drop it here. |
7213 | */ | | 7213 | */ |
7214 | spa_open_ref(spa, FTAG); | | 7214 | spa_open_ref(spa, FTAG); |
7215 | mutex_exit(&spa_namespace_lock); | | 7215 | mutex_exit(&spa_namespace_lock); |
7216 | spa_async_suspend(spa); | | 7216 | spa_async_suspend(spa); |
7217 | mutex_enter(&spa_namespace_lock); | | 7217 | mutex_enter(&spa_namespace_lock); |
7218 | spa_close(spa, FTAG); | | 7218 | spa_close(spa, FTAG); |
7219 | | | 7219 | |
7220 | if (spa->spa_state != POOL_STATE_UNINITIALIZED) { | | 7220 | if (spa->spa_state != POOL_STATE_UNINITIALIZED) { |
7221 | spa_unload(spa); | | 7221 | spa_unload(spa); |
7222 | spa_deactivate(spa); | | 7222 | spa_deactivate(spa); |
7223 | } | | 7223 | } |
7224 | spa_remove(spa); | | 7224 | spa_remove(spa); |
7225 | } | | 7225 | } |
7226 | mutex_exit(&spa_namespace_lock); | | 7226 | mutex_exit(&spa_namespace_lock); |
7227 | } | | 7227 | } |
7228 | | | 7228 | |
7229 | vdev_t * | | 7229 | vdev_t * |
7230 | spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) | | 7230 | spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) |
7231 | { | | 7231 | { |
7232 | vdev_t *vd; | | 7232 | vdev_t *vd; |
7233 | int i; | | 7233 | int i; |
7234 | | | 7234 | |
7235 | if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) | | 7235 | if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) |
7236 | return (vd); | | 7236 | return (vd); |
7237 | | | 7237 | |
7238 | if (aux) { | | 7238 | if (aux) { |
7239 | for (i = 0; i < spa->spa_l2cache.sav_count; i++) { | | 7239 | for (i = 0; i < spa->spa_l2cache.sav_count; i++) { |
7240 | vd = spa->spa_l2cache.sav_vdevs[i]; | | 7240 | vd = spa->spa_l2cache.sav_vdevs[i]; |
7241 | if (vd->vdev_guid == guid) | | 7241 | if (vd->vdev_guid == guid) |
7242 | return (vd); | | 7242 | return (vd); |
7243 | } | | 7243 | } |
7244 | | | 7244 | |
7245 | for (i = 0; i < spa->spa_spares.sav_count; i++) { | | 7245 | for (i = 0; i < spa->spa_spares.sav_count; i++) { |
7246 | vd = spa->spa_spares.sav_vdevs[i]; | | 7246 | vd = spa->spa_spares.sav_vdevs[i]; |
7247 | if (vd->vdev_guid == guid) | | 7247 | if (vd->vdev_guid == guid) |
7248 | return (vd); | | 7248 | return (vd); |
7249 | } | | 7249 | } |
7250 | } | | 7250 | } |
7251 | | | 7251 | |
7252 | return (NULL); | | 7252 | return (NULL); |
7253 | } | | 7253 | } |
7254 | | | 7254 | |
7255 | void | | 7255 | void |
7256 | spa_upgrade(spa_t *spa, uint64_t version) | | 7256 | spa_upgrade(spa_t *spa, uint64_t version) |
7257 | { | | 7257 | { |
7258 | ASSERT(spa_writeable(spa)); | | 7258 | ASSERT(spa_writeable(spa)); |
7259 | | | 7259 | |
7260 | spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); | | 7260 | spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); |
7261 | | | 7261 | |
7262 | /* | | 7262 | /* |
7263 | * This should only be called for a non-faulted pool, and since a | | 7263 | * This should only be called for a non-faulted pool, and since a |
7264 | * future version would result in an unopenable pool, this shouldn't be | | 7264 | * future version would result in an unopenable pool, this shouldn't be |
7265 | * possible. | | 7265 | * possible. |
7266 | */ | | 7266 | */ |
7267 | ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version)); | | 7267 | ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version)); |
7268 | ASSERT3U(version, >=, spa->spa_uberblock.ub_version); | | 7268 | ASSERT3U(version, >=, spa->spa_uberblock.ub_version); |
7269 | | | 7269 | |
7270 | spa->spa_uberblock.ub_version = version; | | 7270 | spa->spa_uberblock.ub_version = version; |
7271 | vdev_config_dirty(spa->spa_root_vdev); | | 7271 | vdev_config_dirty(spa->spa_root_vdev); |
7272 | | | 7272 | |
7273 | spa_config_exit(spa, SCL_ALL, FTAG); | | 7273 | spa_config_exit(spa, SCL_ALL, FTAG); |
7274 | | | 7274 | |
7275 | txg_wait_synced(spa_get_dsl(spa), 0); | | 7275 | txg_wait_synced(spa_get_dsl(spa), 0); |
7276 | } | | 7276 | } |
7277 | | | 7277 | |
7278 | boolean_t | | 7278 | boolean_t |
7279 | spa_has_spare(spa_t *spa, uint64_t guid) | | 7279 | spa_has_spare(spa_t *spa, uint64_t guid) |
7280 | { | | 7280 | { |
7281 | int i; | | 7281 | int i; |
7282 | uint64_t spareguid; | | 7282 | uint64_t spareguid; |
7283 | spa_aux_vdev_t *sav = &spa->spa_spares; | | 7283 | spa_aux_vdev_t *sav = &spa->spa_spares; |
7284 | | | 7284 | |
7285 | for (i = 0; i < sav->sav_count; i++) | | 7285 | for (i = 0; i < sav->sav_count; i++) |
7286 | if (sav->sav_vdevs[i]->vdev_guid == guid) | | 7286 | if (sav->sav_vdevs[i]->vdev_guid == guid) |
7287 | return (B_TRUE); | | 7287 | return (B_TRUE); |
7288 | | | 7288 | |
7289 | for (i = 0; i < sav->sav_npending; i++) { | | 7289 | for (i = 0; i < sav->sav_npending; i++) { |
7290 | if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, | | 7290 | if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, |
7291 | &spareguid) == 0 && spareguid == guid) | | 7291 | &spareguid) == 0 && spareguid == guid) |
7292 | return (B_TRUE); | | 7292 | return (B_TRUE); |
7293 | } | | 7293 | } |
7294 | | | 7294 | |
7295 | return (B_FALSE); | | 7295 | return (B_FALSE); |