k2 之 rand

RandState

  
// when calling curand\_init() in kernels, its arguments  
// seed and offset are from this struct. All kernels  
// share the same seed and offset.  
struct CudaRandState {  
  // the default value for seed is from  
  // https://github.com/pytorch/pytorch/blob/master/c10/core/GeneratorImpl.h#L56  
  //  
  // It has a good distribution of 0s and 1s in bit representation.  
  uint64\_t seed = 67280421310721u;  
  uint64\_t offset = 0;  
};  
  
struct CpuRandState {  
  uint64\_t seed = std::mt19937::default_seed;  
  std::mt19937 generator;  
};  

GetRandState

  
static CudaRandState &GetCudaRandState(ContextPtr context) {  
  int32\_t device_id = context->GetDeviceId();  
  K2_CHECK_LT(device_id, kMaxNumGpus);  
  
  static CudaRandState rand_states[kMaxNumGpus];  
  return rand_states[device_id];  
}  

RandCpu

  
template <typename T, typename Distribution>  
static void RandCpu(int32\_t dim, T low, T high, T *out) {  
  Distribution distribution(low, high);  
  auto &generator = GetCpuRandState().generator;  
  
  for (int32\_t i = 0; i != dim; ++i) {  
    out[i] = distribution(generator);  
  }  
}  

Seed

GetSeed

  
/* Get the current seed of the device associated with `context`.  
 *  
 * @param [in] context  It specifies the device whose seed is to be returned.  
 *                      It can be either a CPU context or a CUDA context.  
 *  
 * @return  Return the seed of the device associated with the given `context`.  
 *  
 * TODO(fangjun): we may not need it.  
 */  
uint64\_t GetSeed(ContextPtr context) {  
  DeviceType device_type = context->GetDeviceType();  
  if (device_type == kCuda) return GetCudaRandState(context).seed;  
  
  K2_CHECK_EQ(device_type, kCpu);  
  return GetCpuRandState().seed;  
}  

SetSeed

  
/* Set the seed of the device associated with the given `context`.  
 *  
 * @param [in] context  It specifies the device whose seed is to be set.  
 *                      It can be either a CPU context or a CUDA context.  
 *  
 * @param [in] seed     The target seed.  
 */  
void SetSeed(ContextPtr context, uint64\_t seed) {  
  DeviceType device_type = context->GetDeviceType();  
  if (device_type == kCuda) {  
    // TODO(fangjun): we may need a lock here  
    CudaRandState &state = GetCudaRandState(context);  
    state.seed = seed;  
    state.offset = 0;  
    return;  
  }  
  
  K2_CHECK_EQ(device_type, kCpu);  
  CpuRandState &state = GetCpuRandState();  
  state.seed = seed;  
  state.generator.seed(seed);  
}  

Rand

  
/* Fill the given array with random numbers from a uniform distribution on  
 * the interval [low, high).  
 *  
 * low is inclusive and high is exclusive.  
 *  
 * `T` can be `float`, `double`, or `int32\_t`.  
 *  
 * @param [in]  context      It specifies the device on which  
 *                           `array\_data` resides  
 * @param [in]  low          The lower bound of the interval (inclusive).  
 * @param [in]  high         The upper bound of the interval (exclusive).  
 * @param [in]  dim          Number of elements in the output array.  
 * @param [out] array\_data   Pointer to the beginning of the output array.  
 */  
template <>  
void Rand<float>(ContextPtr context, float low, float high, int32\_t dim,  
                 float *array_data) {  
  K2_CHECK_LT(low, high);  
  if (dim == 0) return;  
  
  DeviceType device_type = context->GetDeviceType();  
  if (device_type == kCpu) {  
    RandCpu<float, std::uniform_real_distribution<float>>(dim, low, high,  
                                                          array_data);  
    return;  
  }  
  
  K2_CHECK_EQ(device_type, kCuda);  
#ifdef K2\_WITH\_CUDA  
  CudaRandState &state = GetCudaRandState(context);  
  float range = high - low;  
  auto generate_rand_lambda_float = [=] __device__(int32\_t i) {  
    curandStatePhilox4_32_10_t philox_state;  
    curand_init(state.seed,  
                i,  // sequence  
                state.offset, &philox_state);  
  
    float4 r = curand_uniform4(&philox_state);  
  
    // curand\_uniform4() returns a number in (0, 1],  
    // we want to transform it to [0, 1)  
    //  
    // CAUTION: `1 - r.x` is not used here as it may be rounded up to 1  
    // when `r.x` is close to 0  
    float t = (r.x == 1.0f) ? 0.0f : r.x;  
    array_data[i] = t * range + low;  
  };  
  EvalDevice(context, dim, generate_rand_lambda_float);  
  state.offset += 4;  
#else  
  K2_LOG(FATAL) << "Unreachable code";  
#endif  
}  

  
/* Fill the given array with random numbers from a uniform distribution on  
 * the interval [low, high).  
 *  
 * low is inclusive and high is exclusive.  
 *  
 * `T` can be `float`, `double`, or `int32\_t`.  
 *  
 * @param [in]  low       The lower bound of the interval (inclusive).  
 * @param [in]  high      The upper bound of the interval (exclusive).  
 * @param [out] array     The array is modified in-place.  
 */  
template <typename T>  
void Rand(T low, T high, Array1<T> *array) {  
  Rand(array->Context(), low, high, array->Dim(), array->Data());  
}  

  
/* Returns an array filled with random numbers from a uniform distribution on  
 * the interval [low, high).  
 *  
 * low is inclusive and high is exclusive.  
 *  
 * `T` can be `float`, `double`, or `int32\_t`.  
 *  
 * @param [in]  context  It specifies the device on which the random  
 *                       numbers are generated.  
 * @param [in]  low      The lower bound of the interval (inclusive).  
 * @param [in]  high     The upper bound of the interval (exclusive).  
 * @param [in]  dim      The dimension of the returned array.  
 */  
template <typename T>  
Array1<T> Rand(ContextPtr context, T low, T high, int32\_t dim) {  
  Array1<T> ans(context, dim);  
  Rand(low, high, &ans);  
  return ans;  
}  

参考文献

picture.image

0
0
0
0
评论
未登录
暂无评论