RandState
// when calling curand\_init() in kernels, its arguments
// seed and offset are from this struct. All kernels
// share the same seed and offset.
struct CudaRandState {
// the default value for seed is from
// https://github.com/pytorch/pytorch/blob/master/c10/core/GeneratorImpl.h#L56
//
// It has a good distribution of 0s and 1s in bit representation.
uint64\_t seed = 67280421310721u;
uint64\_t offset = 0;
};
struct CpuRandState {
uint64\_t seed = std::mt19937::default_seed;
std::mt19937 generator;
};
GetRandState
static CudaRandState &GetCudaRandState(ContextPtr context) {
int32\_t device_id = context->GetDeviceId();
K2_CHECK_LT(device_id, kMaxNumGpus);
static CudaRandState rand_states[kMaxNumGpus];
return rand_states[device_id];
}
RandCpu
template <typename T, typename Distribution>
static void RandCpu(int32\_t dim, T low, T high, T *out) {
Distribution distribution(low, high);
auto &generator = GetCpuRandState().generator;
for (int32\_t i = 0; i != dim; ++i) {
out[i] = distribution(generator);
}
}
Seed
GetSeed
/* Get the current seed of the device associated with `context`.
*
* @param [in] context It specifies the device whose seed is to be returned.
* It can be either a CPU context or a CUDA context.
*
* @return Return the seed of the device associated with the given `context`.
*
* TODO(fangjun): we may not need it.
*/
uint64\_t GetSeed(ContextPtr context) {
DeviceType device_type = context->GetDeviceType();
if (device_type == kCuda) return GetCudaRandState(context).seed;
K2_CHECK_EQ(device_type, kCpu);
return GetCpuRandState().seed;
}
SetSeed
/* Set the seed of the device associated with the given `context`.
*
* @param [in] context It specifies the device whose seed is to be set.
* It can be either a CPU context or a CUDA context.
*
* @param [in] seed The target seed.
*/
void SetSeed(ContextPtr context, uint64\_t seed) {
DeviceType device_type = context->GetDeviceType();
if (device_type == kCuda) {
// TODO(fangjun): we may need a lock here
CudaRandState &state = GetCudaRandState(context);
state.seed = seed;
state.offset = 0;
return;
}
K2_CHECK_EQ(device_type, kCpu);
CpuRandState &state = GetCpuRandState();
state.seed = seed;
state.generator.seed(seed);
}
Rand
/* Fill the given array with random numbers from a uniform distribution on
* the interval [low, high).
*
* low is inclusive and high is exclusive.
*
* `T` can be `float`, `double`, or `int32\_t`.
*
* @param [in] context It specifies the device on which
* `array\_data` resides
* @param [in] low The lower bound of the interval (inclusive).
* @param [in] high The upper bound of the interval (exclusive).
* @param [in] dim Number of elements in the output array.
* @param [out] array\_data Pointer to the beginning of the output array.
*/
template <>
void Rand<float>(ContextPtr context, float low, float high, int32\_t dim,
float *array_data) {
K2_CHECK_LT(low, high);
if (dim == 0) return;
DeviceType device_type = context->GetDeviceType();
if (device_type == kCpu) {
RandCpu<float, std::uniform_real_distribution<float>>(dim, low, high,
array_data);
return;
}
K2_CHECK_EQ(device_type, kCuda);
#ifdef K2\_WITH\_CUDA
CudaRandState &state = GetCudaRandState(context);
float range = high - low;
auto generate_rand_lambda_float = [=] __device__(int32\_t i) {
curandStatePhilox4_32_10_t philox_state;
curand_init(state.seed,
i, // sequence
state.offset, &philox_state);
float4 r = curand_uniform4(&philox_state);
// curand\_uniform4() returns a number in (0, 1],
// we want to transform it to [0, 1)
//
// CAUTION: `1 - r.x` is not used here as it may be rounded up to 1
// when `r.x` is close to 0
float t = (r.x == 1.0f) ? 0.0f : r.x;
array_data[i] = t * range + low;
};
EvalDevice(context, dim, generate_rand_lambda_float);
state.offset += 4;
#else
K2_LOG(FATAL) << "Unreachable code";
#endif
}
/* Fill the given array with random numbers from a uniform distribution on
* the interval [low, high).
*
* low is inclusive and high is exclusive.
*
* `T` can be `float`, `double`, or `int32\_t`.
*
* @param [in] low The lower bound of the interval (inclusive).
* @param [in] high The upper bound of the interval (exclusive).
* @param [out] array The array is modified in-place.
*/
template <typename T>
void Rand(T low, T high, Array1<T> *array) {
Rand(array->Context(), low, high, array->Dim(), array->Data());
}
/* Returns an array filled with random numbers from a uniform distribution on
* the interval [low, high).
*
* low is inclusive and high is exclusive.
*
* `T` can be `float`, `double`, or `int32\_t`.
*
* @param [in] context It specifies the device on which the random
* numbers are generated.
* @param [in] low The lower bound of the interval (inclusive).
* @param [in] high The upper bound of the interval (exclusive).
* @param [in] dim The dimension of the returned array.
*/
template <typename T>
Array1<T> Rand(ContextPtr context, T low, T high, int32\_t dim) {
Array1<T> ans(context, dim);
Rand(low, high, &ans);
return ans;
}
