Utility function for distributing computations among a pool of workers for parallel processing.

distribute_load(x, n = 1)

Arguments

x

integer number of item to process.

n

integer number of threads.

Value

list object.

Details

This function returns a list containing an element for each worker. Each element contains a integer vector specifying the indices that the worker should process.

Examples

# \dontrun{

# imagine that we have 10 jobs that need processing. For simplicity,
# our jobs will involve adding 1 to each element in 1:10.
values <- 1:10

# we could complete this processing using the following vectorized code
result <- 1 + 1:10
print(result)
#>  [1]  2  3  4  5  6  7  8  9 10 11

# however, if our jobs were complex then we would be better off using
# functionals
result <- lapply(1:10, function(x) x + 1)
print(result)
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 3
#> 
#> [[3]]
#> [1] 4
#> 
#> [[4]]
#> [1] 5
#> 
#> [[5]]
#> [1] 6
#> 
#> [[6]]
#> [1] 7
#> 
#> [[7]]
#> [1] 8
#> 
#> [[8]]
#> [1] 9
#> 
#> [[9]]
#> [1] 10
#> 
#> [[10]]
#> [1] 11
#> 

# we could do one better, and use the "plyr" package to handle the
# processing
result <- plyr::llply(1:10, function(x) x + 1)
print(result)
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 3
#> 
#> [[3]]
#> [1] 4
#> 
#> [[4]]
#> [1] 5
#> 
#> [[5]]
#> [1] 6
#> 
#> [[6]]
#> [1] 7
#> 
#> [[7]]
#> [1] 8
#> 
#> [[8]]
#> [1] 9
#> 
#> [[9]]
#> [1] 10
#> 
#> [[10]]
#> [1] 11
#> 

# we could also use the parallel processing options available through "plyr"
# to use more computation resources to complete the jobs (note that since
# these jobs are very quick to process this is actually slower).
cl <- parallel::makeCluster(2, "PSOCK")
doParallel::registerDoParallel(cl)
result <- plyr::llply(1:10, function(x) x + 1, .parallel = TRUE)
#> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’
#> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’
cl <- parallel::stopCluster(cl)
print(result)
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 3
#> 
#> [[3]]
#> [1] 4
#> 
#> [[4]]
#> [1] 5
#> 
#> [[5]]
#> [1] 6
#> 
#> [[6]]
#> [1] 7
#> 
#> [[7]]
#> [1] 8
#> 
#> [[8]]
#> [1] 9
#> 
#> [[9]]
#> [1] 10
#> 
#> [[10]]
#> [1] 11
#> 

# however this approach iterates over each element individually, we could
# use the distribute_load function to split the N jobs up into K super
# jobs, and evaluate each super job using vectorized code.
x <- 1:10
cl <- parallel::makeCluster(2, "PSOCK")
parallel::clusterExport(cl, 'x', envir = environment())
doParallel::registerDoParallel(cl)
l <- distribute_load(length(x), n = 2)
result <- plyr::llply(l, function(i) x[i] + 1, .parallel = TRUE)
#> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’
#> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’
cl <- parallel::stopCluster(cl)
print(result)
#> [[1]]
#> [1] 2 3 4 5 6
#> 
#> [[2]]
#> [1]  7  8  9 10 11
#> 
# }