PAPI  5.7.0.0
activity.c
Go to the documentation of this file.
1 /*
2  * Author : Sangamesh Ragate
3  * Date : 18th Nov 2015
4  * ICl-UTK
5  * Description : This is the shared library that sets up the environent
6  * for the cuda application by creating the context and keeping it ready
7  * to perform PC sampling of the cuda application as soon as it launces the kernel
8  */
9 
10 
11 
12 #include <cuda.h>
13 #include <cupti.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 
18 static CUpti_SubscriberHandle g_subscriber;
19 
20 
21 #define RUNTIME_API_CALL(apiFuncCall) \
22 do { \
23  cudaError_t _status = apiFuncCall; \
24  if (_status != cudaSuccess) { \
25  fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \
26  __FILE__, __LINE__, #apiFuncCall, cudaGetErrorString(_status));\
27  exit(-1); \
28  } \
29 } while (0)
30 
31 #define CUPTI_CALL(call) \
32 do { \
33  CUptiResult _status = call; \
34  if (_status != CUPTI_SUCCESS) { \
35  const char *errstr; \
36  cuptiGetResultString(_status, &errstr); \
37  fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \
38  __FILE__, __LINE__, #call, errstr); \
39  exit(-1); \
40  } \
41 } while (0)
42 
43 #define BUF_SIZE (32 * 16384)
44 #define ALIGN_SIZE (8)
45 
46 static char* stall_name[12];
47 static int val[12]={0};
48 
49 
50 static const char *
51 getStallReasonString(CUpti_ActivityPCSamplingStallReason reason,unsigned int samples)
52 {
53  switch (reason) {
54  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_INVALID:
55  stall_name[0]="Stall_invalid";
56  val[0] += samples;
57  return "Invalid";
58  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_NONE:
59  stall_name[1]="Stall_none";
60  val[1] += samples;
61  return "Selected";
62  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_INST_FETCH:
63  stall_name[2]="Stall_inst_fetch";
64  val[2] += samples;
65  return "Instruction fetch";
66  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_EXEC_DEPENDENCY:
67  stall_name[3]="Stall_exec_dependency";
68  val[3] += samples;
69  return "Execution dependency";
70  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_DEPENDENCY:
71  stall_name[4]="Stall_mem_dependency";
72  val[4] += samples;
73  return "Memory dependency";
74  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_TEXTURE:
75  stall_name[5]="Stall_texture";
76  val[5] += samples;
77  return "Texture";
78  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_SYNC:
79  stall_name[6]="Stall_sync";
80  val[6] += samples;
81  return "Sync";
82  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_CONSTANT_MEMORY_DEPENDENCY:
83  stall_name[7]="Stall_const_mem_dependency";
84  val[7] += samples;
85  return "Constant memory dependency";
86  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_PIPE_BUSY:
87  stall_name[8]="Stall_pipe_busy";
88  val[8] += samples;
89  return "Pipe busy";
90  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_THROTTLE:
91  stall_name[9]="Stall_memory_throttle";
92  val[9] += samples;
93  return "Memory throttle";
94  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_NOT_SELECTED:
95  stall_name[10]="Stall_warp_not_selected";
96  val[10] += samples;
97  return "Warp Not selected";
98  case CUPTI_ACTIVITY_PC_SAMPLING_STALL_OTHER:
99  stall_name[11]="Stall_other";
100  val[11] += samples;
101  return "Other";
102  default:
103  break;
104  }
105 
106  return NULL;
107 }
108 
109 static void
110 printActivity(CUpti_Activity *record)
111 {
112  switch (record->kind) {
113  case CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR:
114  {
115  CUpti_ActivitySourceLocator *sourceLocator = (CUpti_ActivitySourceLocator *)record;
116  printf("Source Locator Id %d, File %s Line %d\n", sourceLocator->id, sourceLocator->fileName, sourceLocator->lineNumber);
117  break;
118  }
119  case CUPTI_ACTIVITY_KIND_PC_SAMPLING:
120  {
121  CUpti_ActivityPCSampling *psRecord = (CUpti_ActivityPCSampling *)record;
122  printf("source %u, functionId %u, pc 0x%x, corr %u, samples %u, stallreason %s\n",
123  psRecord->sourceLocatorId,
124  psRecord->functionId,
125  psRecord->pcOffset,
126  psRecord->correlationId,
127  psRecord->samples,
128  getStallReasonString(psRecord->stallReason,psRecord->samples));
129  break;
130  }
131  case CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO:
132  {
133  CUpti_ActivityPCSamplingRecordInfo *pcsriResult =
134  (CUpti_ActivityPCSamplingRecordInfo *)(void *)record;
135 
136  printf("\n\n************** PC_SAMPLING_RECORD_SUMMARY ************************\n");
137  printf("corr %u, totalSamples %llu, droppedSamples %llu, sampling period %llu\n",
138  pcsriResult->correlationId,
139  (unsigned long long)pcsriResult->totalSamples,
140  (unsigned long long)pcsriResult->droppedSamples,
141  (unsigned long long)pcsriResult->samplingPeriodInCycles);
142  break;
143  }
144  case CUPTI_ACTIVITY_KIND_FUNCTION:
145  {
146  CUpti_ActivityFunction *fResult =
147  (CUpti_ActivityFunction *)record;
148 
149  printf("\n\n************************************ ACTIVITY_KIND_FUNCTION_SUMMARY **********************************\n");
150  printf("id %u, ctx %u, moduleId %u, functionIndex %u, name %s\n",
151  fResult->id,
152  fResult->contextId,
153  fResult->moduleId,
154  fResult->functionIndex,
155  fResult->name);
156  printf("\n\n\n\n**************************************************************************************************\n");
157  break;
158  }
159  case CUPTI_ACTIVITY_KIND_KERNEL:
160  {
161  CUpti_ActivityKernel3 *kernel = (CUpti_ActivityKernel3 *)record;
162  printf("\n\n************************************** KERNEL_RECORD_SUMMARY **********************************\n");
163  printf("Kernel %s , device %d, context %d, correlation %d, stream %d,[start-end][%ld-%ld]\n\n",kernel->name,
164  kernel->deviceId,kernel->contextId,kernel->correlationId,kernel->streamId,kernel->start,kernel->end);
165  break;
166  }
167 
168  default:
169  printf("\n");
170  break;
171  }
172 }
173 
174 static void CUPTIAPI
175 bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords)
176 {
177  *size = BUF_SIZE + ALIGN_SIZE;
178  *buffer = (uint8_t*) calloc(1, *size);
179  *maxNumRecords = 0;
180  if (*buffer == NULL) {
181  printf("Error: out of memory\n");
182  exit(-1);
183  }
184 }
185 
186 static void CUPTIAPI
187 bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize)
188 {
189  CUptiResult status;
190  CUpti_Activity *record = NULL;
191  do {
192  status = cuptiActivityGetNextRecord(buffer, validSize, &record);
193  if(status == CUPTI_SUCCESS) {
194  printActivity(record);
195  }
196  else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) {
197  break;
198  }
199  else {
201  }
202  } while (1);
203 
204  size_t dropped;
205  CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped));
206  if (dropped != 0) {
207  printf("Dropped %u activity records\n", (unsigned int)dropped);
208  }
209  printf("\n\n\n\n\n\n");
210  printf("************* STALL SUMMARY ********************\n");
211  int i;
212  for(i=0;i<12;++i)
213  if(stall_name[i] != NULL)
214  printf("%s = %d \n",stall_name[i],val[i]);
215  printf("*************************************************\n\n");
216 
217 
218 }
219 
220 #define DUMP_CUBIN 1
221 
222 void CUPTIAPI dumpCudaModule(CUpti_CallbackId cbid, void *resourceDescriptor)
223 {
224 #if DUMP_CUBIN
225  const char *pCubin;
226  size_t cubinSize;
227 
228 
229  //dump the cubin at MODULE_LOADED_STARTING
230  CUpti_ModuleResourceData *moduleResourceData = (CUpti_ModuleResourceData *)resourceDescriptor;
231  #endif
232 
233  if (cbid == CUPTI_CBID_RESOURCE_MODULE_LOADED) {
234  #if DUMP_CUBIN
235  // You can use nvdisasm to dump the SASS from the cubin.
236  // Try nvdisasm -b -fun <function_id> sass_to_source.cubin
237  pCubin = moduleResourceData->pCubin;
238  cubinSize = moduleResourceData->cubinSize;
239 
240  FILE *cubin;
241  cubin = fopen("sass_source_map.cubin", "wb");
242  fwrite(pCubin, sizeof(uint8_t), cubinSize, cubin);
243  fclose(cubin);
244  #endif
245  }else if (cbid == CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING) {
246  // You can dump the cubin either at MODULE_LOADED or MODULE_UNLOAD_STARTING
247  }
248 }
249 
250 static void
251 handleResource(CUpti_CallbackId cbid, const CUpti_ResourceData *resourceData)
252 {
253  if (cbid == CUPTI_CBID_RESOURCE_MODULE_LOADED) {
254  dumpCudaModule(cbid, resourceData->resourceDescriptor);
255  }else if (cbid == CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING) {
256  dumpCudaModule(cbid, resourceData->resourceDescriptor);
257  }
258 }
259 
260 
261 static void CUPTIAPI
262 traceCallback(void *userdata, CUpti_CallbackDomain domain,
263  CUpti_CallbackId cbid, const void *cbdata)
264 {
265  if (domain == CUPTI_CB_DOMAIN_RESOURCE) {
266  handleResource(cbid, (CUpti_ResourceData *)cbdata);
267  }
268 }
269 
270 
271 __attribute__((constructor)) void
272 initTrace()
273 {
274  //get the arguments from the environment variables
275  int deviceId, sampRate;
276 
277  CUcontext cuCtx;
278  deviceId = atoi(getenv("GPU_DEVICE_ID"));
279  cuInit(0);
280  cuCtxCreate(&cuCtx,0,deviceId);
281  CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted));
282  CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PC_SAMPLING));
283  //CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_BRANCH));
284 
285  CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL));
286  CUPTI_CALL(cuptiSubscribe(&g_subscriber, (CUpti_CallbackFunc)traceCallback, NULL));
287  CUPTI_CALL(cuptiEnableDomain(1, g_subscriber, CUPTI_CB_DOMAIN_RESOURCE));
288  CUpti_ActivityPCSamplingConfig config;
289  sampRate=atoi(getenv("PC_SAMPLING_RATE"));
290  config.samplingPeriod= sampRate;
291  CUPTI_CALL(cuptiActivityConfigurePCSampling(cuCtx, &config));
292 }
293 
294 __attribute__((destructor)) void
295 finiTrace()
296 {
297 // printf("FLushing CUPTI \n");
298  CUPTI_CALL(cuptiActivityFlushAll(0));
299 }
300 
static CUpti_SubscriberHandle g_subscriber
Definition: activity.c:18
int atoi()
__attribute__((constructor))
Definition: activity.c:271
static void printActivity(CUpti_Activity *record)
Definition: activity.c:110
char * getenv()
#define CUPTI_CALL(call)
Definition: activity.c:31
void CUPTIAPI dumpCudaModule(CUpti_CallbackId cbid, void *resourceDescriptor)
Definition: activity.c:222
size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
Definition: appio.c:387
static void CUPTIAPI traceCallback(void *userdata, CUpti_CallbackDomain domain, CUpti_CallbackId cbid, const void *cbdata)
Definition: activity.c:262
static void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords)
Definition: activity.c:175
static void handleResource(CUpti_CallbackId cbid, const CUpti_ResourceData *resourceData)
Definition: activity.c:251
static const char * getStallReasonString(CUpti_ActivityPCSamplingStallReason reason, unsigned int samples)
Definition: activity.c:51
#define BUF_SIZE
Definition: activity.c:43
static void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize)
Definition: activity.c:187
CUresult CUDAAPI cuInit(unsigned int myInt)
Definition: benchSANVML.c:48
static char * stall_name[12]
Definition: activity.c:46
char * buffer
Definition: iozone.c:1366
static int val[12]
Definition: activity.c:47
#define ALIGN_SIZE
Definition: activity.c:44
long long status
Definition: iozone.c:1335
void exit()
int i
Definition: fileop.c:140