21 #define RUNTIME_API_CALL(apiFuncCall) \ 23 cudaError_t _status = apiFuncCall; \ 24 if (_status != cudaSuccess) { \ 25 fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ 26 __FILE__, __LINE__, #apiFuncCall, cudaGetErrorString(_status));\ 31 #define CUPTI_CALL(call) \ 33 CUptiResult _status = call; \ 34 if (_status != CUPTI_SUCCESS) { \ 36 cuptiGetResultString(_status, &errstr); \ 37 fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ 38 __FILE__, __LINE__, #call, errstr); \ 43 #define BUF_SIZE (32 * 16384) 44 #define ALIGN_SIZE (8) 47 static int val[12]={0};
54 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_INVALID:
58 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_NONE:
62 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_INST_FETCH:
65 return "Instruction fetch";
66 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_EXEC_DEPENDENCY:
69 return "Execution dependency";
70 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_DEPENDENCY:
73 return "Memory dependency";
74 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_TEXTURE:
78 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_SYNC:
82 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_CONSTANT_MEMORY_DEPENDENCY:
85 return "Constant memory dependency";
86 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_PIPE_BUSY:
90 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_THROTTLE:
93 return "Memory throttle";
94 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_NOT_SELECTED:
97 return "Warp Not selected";
98 case CUPTI_ACTIVITY_PC_SAMPLING_STALL_OTHER:
112 switch (record->kind) {
113 case CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR:
115 CUpti_ActivitySourceLocator *sourceLocator = (CUpti_ActivitySourceLocator *)record;
116 printf(
"Source Locator Id %d, File %s Line %d\n", sourceLocator->id, sourceLocator->fileName, sourceLocator->lineNumber);
119 case CUPTI_ACTIVITY_KIND_PC_SAMPLING:
121 CUpti_ActivityPCSampling *psRecord = (CUpti_ActivityPCSampling *)record;
122 printf(
"source %u, functionId %u, pc 0x%x, corr %u, samples %u, stallreason %s\n",
123 psRecord->sourceLocatorId,
124 psRecord->functionId,
126 psRecord->correlationId,
131 case CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO:
133 CUpti_ActivityPCSamplingRecordInfo *pcsriResult =
134 (CUpti_ActivityPCSamplingRecordInfo *)(
void *)record;
136 printf(
"\n\n************** PC_SAMPLING_RECORD_SUMMARY ************************\n");
137 printf(
"corr %u, totalSamples %llu, droppedSamples %llu, sampling period %llu\n",
138 pcsriResult->correlationId,
139 (
unsigned long long)pcsriResult->totalSamples,
140 (
unsigned long long)pcsriResult->droppedSamples,
141 (
unsigned long long)pcsriResult->samplingPeriodInCycles);
144 case CUPTI_ACTIVITY_KIND_FUNCTION:
146 CUpti_ActivityFunction *fResult =
147 (CUpti_ActivityFunction *)record;
149 printf(
"\n\n************************************ ACTIVITY_KIND_FUNCTION_SUMMARY **********************************\n");
150 printf(
"id %u, ctx %u, moduleId %u, functionIndex %u, name %s\n",
154 fResult->functionIndex,
156 printf(
"\n\n\n\n**************************************************************************************************\n");
159 case CUPTI_ACTIVITY_KIND_KERNEL:
161 CUpti_ActivityKernel3 *kernel = (CUpti_ActivityKernel3 *)record;
162 printf(
"\n\n************************************** KERNEL_RECORD_SUMMARY **********************************\n");
163 printf(
"Kernel %s , device %d, context %d, correlation %d, stream %d,[start-end][%ld-%ld]\n\n",kernel->name,
164 kernel->deviceId,kernel->contextId,kernel->correlationId,kernel->streamId,kernel->start,kernel->end);
178 *
buffer = (uint8_t*) calloc(1, *size);
181 printf(
"Error: out of memory\n");
190 CUpti_Activity *record = NULL;
192 status = cuptiActivityGetNextRecord(
buffer, validSize, &record);
193 if(
status == CUPTI_SUCCESS) {
196 else if (
status == CUPTI_ERROR_MAX_LIMIT_REACHED) {
205 CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped));
207 printf(
"Dropped %u activity records\n", (
unsigned int)dropped);
209 printf(
"\n\n\n\n\n\n");
210 printf(
"************* STALL SUMMARY ********************\n");
215 printf(
"*************************************************\n\n");
230 CUpti_ModuleResourceData *moduleResourceData = (CUpti_ModuleResourceData *)resourceDescriptor;
233 if (cbid == CUPTI_CBID_RESOURCE_MODULE_LOADED) {
237 pCubin = moduleResourceData->pCubin;
238 cubinSize = moduleResourceData->cubinSize;
241 cubin = fopen(
"sass_source_map.cubin",
"wb");
242 fwrite(pCubin,
sizeof(uint8_t), cubinSize, cubin);
245 }
else if (cbid == CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING) {
253 if (cbid == CUPTI_CBID_RESOURCE_MODULE_LOADED) {
255 }
else if (cbid == CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING) {
263 CUpti_CallbackId cbid,
const void *cbdata)
265 if (domain == CUPTI_CB_DOMAIN_RESOURCE) {
275 int deviceId, sampRate;
280 cuCtxCreate(&cuCtx,0,deviceId);
282 CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PC_SAMPLING));
285 CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL));
288 CUpti_ActivityPCSamplingConfig config;
290 config.samplingPeriod= sampRate;
291 CUPTI_CALL(cuptiActivityConfigurePCSampling(cuCtx, &config));
static CUpti_SubscriberHandle g_subscriber
__attribute__((constructor))
static void printActivity(CUpti_Activity *record)
void CUPTIAPI dumpCudaModule(CUpti_CallbackId cbid, void *resourceDescriptor)
size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
static void CUPTIAPI traceCallback(void *userdata, CUpti_CallbackDomain domain, CUpti_CallbackId cbid, const void *cbdata)
static void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords)
static void handleResource(CUpti_CallbackId cbid, const CUpti_ResourceData *resourceData)
static const char * getStallReasonString(CUpti_ActivityPCSamplingStallReason reason, unsigned int samples)
static void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize)
CUresult CUDAAPI cuInit(unsigned int myInt)
static char * stall_name[12]