Utility function for distributing computations among a pool of workers for parallel processing.

distribute_load(x, n = get_number_of_threads())

Arguments

x

integer number of item to process.

n

integer number of threads.

Value

list object.

Details

This function returns a list containing an element for each worker. Each element contains a integer vector specifying the indices that the worker should process.

See also

Examples

# imagine that we have 10 jobs that need processing. For simplicity, # our jobs will involve adding 1 to each element in 1:10. values <- 1:10 # we could complete this processing using the following vectorized code result <- 1 + 1:10 print(result)
#> [1] 2 3 4 5 6 7 8 9 10 11
# however, if our jobs were complex then we would be better off using # functionals result <- lapply(1:10, function(x) x + 1) print(result)
#> [[1]] #> [1] 2 #> #> [[2]] #> [1] 3 #> #> [[3]] #> [1] 4 #> #> [[4]] #> [1] 5 #> #> [[5]] #> [1] 6 #> #> [[6]] #> [1] 7 #> #> [[7]] #> [1] 8 #> #> [[8]] #> [1] 9 #> #> [[9]] #> [1] 10 #> #> [[10]] #> [1] 11 #>
# we could do one better, and use the "plyr" package to handle the # processing result <- plyr::llply(1:10, function(x) x + 1) print(result)
#> [[1]] #> [1] 2 #> #> [[2]] #> [1] 3 #> #> [[3]] #> [1] 4 #> #> [[4]] #> [1] 5 #> #> [[5]] #> [1] 6 #> #> [[6]] #> [1] 7 #> #> [[7]] #> [1] 8 #> #> [[8]] #> [1] 9 #> #> [[9]] #> [1] 10 #> #> [[10]] #> [1] 11 #>
# we could also use the parallel processing options available through "plyr" # to use more computation resources to complete the jobs (note that since # these jobs are very quick to process this is actually slower). cl <- parallel::makeCluster(2, "PSOCK") doParallel::registerDoParallel(cl) result <- plyr::llply(1:10, function(x) x + 1, .parallel = TRUE)
#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
cl <- parallel::stopCluster(cl) print(result)
#> [[1]] #> [1] 2 #> #> [[2]] #> [1] 3 #> #> [[3]] #> [1] 4 #> #> [[4]] #> [1] 5 #> #> [[5]] #> [1] 6 #> #> [[6]] #> [1] 7 #> #> [[7]] #> [1] 8 #> #> [[8]] #> [1] 9 #> #> [[9]] #> [1] 10 #> #> [[10]] #> [1] 11 #>
# however this approach iterates over each element individually, we could # use the distribute_load function to split the N jobs up into K super # jobs, and evaluate each super job using vectorized code. x <- 1:10 cl <- parallel::makeCluster(2, "PSOCK") parallel::clusterExport(cl, 'x', envir = environment()) doParallel::registerDoParallel(cl) l <- distribute_load(length(x), n = 2) result <- plyr::llply(l, function(i) x[i] + 1, .parallel = TRUE)
#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
cl <- parallel::stopCluster(cl) print(result)
#> [[1]] #> [1] 2 3 4 5 6 #> #> [[2]] #> [1] 7 8 9 10 11 #>