1) 结果 CPU time (ms): 280000 Processing 134217728 elements... CPU Processing time: 279.448986 (ms) GPU time (ms): 0 GPU Processing time: 4.268032 (ms) Speedup: 65.474903X Test PASSED 2) 对于 CPU,效果差不多 对于GPU,效果不准,不知道为什么 3) CPU代码 gettimeofday(&start_time,NULL); start2 = std::clock(); computeGold( reference, h_data, num_elements); end2 = std::clock(); duration = 1.0f * (end2 - start2) * 1000000 / (double)CLOCKS_PER_SEC; std::cout << "CPU time (ms): " << duration << std::endl; gettimeofday(&end_time,NULL); printf("Processing %d elements...\n", num_elements); double start_count = (double) start_time.tv_sec + 1.e-6 * (double) start_time.tv_usec; double end_count = (double) end_time.tv_sec + 1.e-6 * (double) end_time.tv_usec; double host_ms = (double)( (end_count - start_count) * 1000); printf("CPU Processing time: %lf (ms)\n", host_ms); 4) GPU代码 cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start); // **===-------------- Modify the body of this function -----------===** start2 = std::clock(); XXXCPU(d_odata, d_idata, padded_num_elements); // **===-----------------------------------------------------------===** cudaEventRecord(stop); cudaEventSynchronize(stop); float device_ms = 0; cudaEventElapsedTime(&device_ms, start, stop); end2 = std::clock(); duration = 1.0f * (end2 - start2) * 1000 / (double)CLOCKS_PER_SEC; std::cout << "GPU time (ms): " << duration << std::endl; printf("GPU Processing time: %f (ms)\n", device_ms); 5) 可见确实不行 |
说点什么...