Blender V4.3
btThreadSupportWin32.cpp
Go to the documentation of this file.
1/*
2Bullet Continuous Collision Detection and Physics Library
3Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
4
5This software is provided 'as-is', without any express or implied warranty.
6In no event will the authors be held liable for any damages arising from the use of this software.
7Permission is granted to anyone to use this software for any purpose,
8including commercial applications, and to alter it and redistribute it freely,
9subject to the following restrictions:
10
111. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
122. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
133. This notice may not be removed or altered from any source distribution.
14*/
15
16#if defined(_WIN32) && BT_THREADSAFE
17
18#include "LinearMath/btScalar.h"
19#include "LinearMath/btMinMax.h"
23#include <windows.h>
24#include <stdio.h>
25
26struct btProcessorInfo
27{
28 int numLogicalProcessors;
29 int numCores;
30 int numNumaNodes;
31 int numL1Cache;
32 int numL2Cache;
33 int numL3Cache;
34 int numPhysicalPackages;
35 static const int maxNumTeamMasks = 32;
36 int numTeamMasks;
37 UINT64 processorTeamMasks[maxNumTeamMasks];
38};
39
40UINT64 getProcessorTeamMask(const btProcessorInfo& procInfo, int procId)
41{
42 UINT64 procMask = UINT64(1) << procId;
43 for (int i = 0; i < procInfo.numTeamMasks; ++i)
44 {
45 if (procMask & procInfo.processorTeamMasks[i])
46 {
47 return procInfo.processorTeamMasks[i];
48 }
49 }
50 return 0;
51}
52
53int getProcessorTeamIndex(const btProcessorInfo& procInfo, int procId)
54{
55 UINT64 procMask = UINT64(1) << procId;
56 for (int i = 0; i < procInfo.numTeamMasks; ++i)
57 {
58 if (procMask & procInfo.processorTeamMasks[i])
59 {
60 return i;
61 }
62 }
63 return -1;
64}
65
66int countSetBits(ULONG64 bits)
67{
68 int count = 0;
69 while (bits)
70 {
71 if (bits & 1)
72 {
73 count++;
74 }
75 bits >>= 1;
76 }
77 return count;
78}
79
80typedef BOOL(WINAPI* Pfn_GetLogicalProcessorInformation)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
81
82void getProcessorInformation(btProcessorInfo* procInfo)
83{
84 memset(procInfo, 0, sizeof(*procInfo));
85 Pfn_GetLogicalProcessorInformation getLogicalProcInfo =
86 (Pfn_GetLogicalProcessorInformation)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation");
87 if (getLogicalProcInfo == NULL)
88 {
89 // no info
90 return;
91 }
92 PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL;
93 DWORD bufSize = 0;
94 while (true)
95 {
96 if (getLogicalProcInfo(buf, &bufSize))
97 {
98 break;
99 }
100 else
101 {
102 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
103 {
104 if (buf)
105 {
106 free(buf);
107 }
108 buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(bufSize);
109 }
110 }
111 }
112
113 int len = bufSize / sizeof(*buf);
114 for (int i = 0; i < len; ++i)
115 {
116 PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i;
117 switch (info->Relationship)
118 {
119 case RelationNumaNode:
120 procInfo->numNumaNodes++;
121 break;
122
123 case RelationProcessorCore:
124 procInfo->numCores++;
125 procInfo->numLogicalProcessors += countSetBits(info->ProcessorMask);
126 break;
127
128 case RelationCache:
129 if (info->Cache.Level == 1)
130 {
131 procInfo->numL1Cache++;
132 }
133 else if (info->Cache.Level == 2)
134 {
135 procInfo->numL2Cache++;
136 }
137 else if (info->Cache.Level == 3)
138 {
139 procInfo->numL3Cache++;
140 // processors that share L3 cache are considered to be on the same team
141 // because they can more easily work together on the same data.
142 // Large performance penalties will occur if 2 or more threads from different
143 // teams attempt to frequently read and modify the same cache lines.
144 //
145 // On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
146 // 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
147 // CCXs are operating on the same data, many cycles will be spent keeping the
148 // two caches coherent.
149 if (procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks)
150 {
151 procInfo->processorTeamMasks[procInfo->numTeamMasks] = info->ProcessorMask;
152 procInfo->numTeamMasks++;
153 }
154 }
155 break;
156
157 case RelationProcessorPackage:
158 procInfo->numPhysicalPackages++;
159 break;
160 }
161 }
162 free(buf);
163}
164
166class btThreadSupportWin32 : public btThreadSupportInterface
167{
168public:
169 struct btThreadStatus
170 {
171 int m_taskId;
172 int m_commandId;
173 int m_status;
174
175 ThreadFunc m_userThreadFunc;
176 void* m_userPtr; //for taskDesc etc
177
178 void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
179
180 void* m_eventStartHandle;
181 char m_eventStartHandleName[32];
182
183 void* m_eventCompleteHandle;
184 char m_eventCompleteHandleName[32];
185 };
186
187private:
188 btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
189 btAlignedObjectArray<void*> m_completeHandles;
190 int m_numThreads;
191 DWORD_PTR m_startedThreadMask;
192 btProcessorInfo m_processorInfo;
193
194 void startThreads(const ConstructionInfo& threadInfo);
195 void stopThreads();
196 int waitForResponse();
197
198public:
199 btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo);
200 virtual ~btThreadSupportWin32();
201
202 virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
203 virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
204 virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
205
206 virtual void runTask(int threadIndex, void* userData) BT_OVERRIDE;
207 virtual void waitForAllTasks() BT_OVERRIDE;
208
209 virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
210 virtual void deleteCriticalSection(btCriticalSection* criticalSection) BT_OVERRIDE;
211};
212
213btThreadSupportWin32::btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo)
214{
215 startThreads(threadConstructionInfo);
216}
217
218btThreadSupportWin32::~btThreadSupportWin32()
219{
220 stopThreads();
221}
222
223DWORD WINAPI win32threadStartFunc(LPVOID lpParam)
224{
225 btThreadSupportWin32::btThreadStatus* status = (btThreadSupportWin32::btThreadStatus*)lpParam;
226
227 while (1)
228 {
229 WaitForSingleObject(status->m_eventStartHandle, INFINITE);
230 void* userPtr = status->m_userPtr;
231
232 if (userPtr)
233 {
234 btAssert(status->m_status);
235 status->m_userThreadFunc(userPtr);
236 status->m_status = 2;
237 SetEvent(status->m_eventCompleteHandle);
238 }
239 else
240 {
241 //exit Thread
242 status->m_status = 3;
243 printf("Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle);
244 SetEvent(status->m_eventCompleteHandle);
245 break;
246 }
247 }
248 printf("Thread TERMINATED\n");
249 return 0;
250}
251
252void btThreadSupportWin32::runTask(int threadIndex, void* userData)
253{
254 btThreadStatus& threadStatus = m_activeThreadStatus[threadIndex];
255 btAssert(threadIndex >= 0);
256 btAssert(int(threadIndex) < m_activeThreadStatus.size());
257
258 threadStatus.m_commandId = 1;
259 threadStatus.m_status = 1;
260 threadStatus.m_userPtr = userData;
261 m_startedThreadMask |= DWORD_PTR(1) << threadIndex;
262
264 SetEvent(threadStatus.m_eventStartHandle);
265}
266
267int btThreadSupportWin32::waitForResponse()
268{
269 btAssert(m_activeThreadStatus.size());
270
271 int last = -1;
272 DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, INFINITE);
273 btAssert(res != WAIT_FAILED);
274 last = res - WAIT_OBJECT_0;
275
276 btThreadStatus& threadStatus = m_activeThreadStatus[last];
277 btAssert(threadStatus.m_threadHandle);
278 btAssert(threadStatus.m_eventCompleteHandle);
279
280 //WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
281 btAssert(threadStatus.m_status > 1);
282 threadStatus.m_status = 0;
283
285 btAssert(last >= 0);
286 m_startedThreadMask &= ~(DWORD_PTR(1) << last);
287
288 return last;
289}
290
291void btThreadSupportWin32::waitForAllTasks()
292{
293 while (m_startedThreadMask)
294 {
295 waitForResponse();
296 }
297}
298
299void btThreadSupportWin32::startThreads(const ConstructionInfo& threadConstructionInfo)
300{
301 static int uniqueId = 0;
302 uniqueId++;
303 btProcessorInfo& procInfo = m_processorInfo;
304 getProcessorInformation(&procInfo);
305 DWORD_PTR dwProcessAffinityMask = 0;
306 DWORD_PTR dwSystemAffinityMask = 0;
307 if (!GetProcessAffinityMask(GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask))
308 {
309 dwProcessAffinityMask = 0;
310 }
312 m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
313
314 m_activeThreadStatus.resize(m_numThreads);
315 m_completeHandles.resize(m_numThreads);
316 m_startedThreadMask = 0;
317
318 // set main thread affinity
319 if (DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask(procInfo, 0))
320 {
321 SetThreadAffinityMask(GetCurrentThread(), mask);
322 SetThreadIdealProcessor(GetCurrentThread(), 0);
323 }
324
325 for (int i = 0; i < m_numThreads; i++)
326 {
327 printf("starting thread %d\n", i);
328
329 btThreadStatus& threadStatus = m_activeThreadStatus[i];
330
331 LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL;
332 SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
333 LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc;
334 LPVOID lpParameter = &threadStatus;
335 DWORD dwCreationFlags = 0;
336 LPDWORD lpThreadId = 0;
337
338 threadStatus.m_userPtr = 0;
339
340 sprintf(threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
341 threadStatus.m_eventStartHandle = CreateEventA(0, false, false, threadStatus.m_eventStartHandleName);
342
343 sprintf(threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
344 threadStatus.m_eventCompleteHandle = CreateEventA(0, false, false, threadStatus.m_eventCompleteHandleName);
345
346 m_completeHandles[i] = threadStatus.m_eventCompleteHandle;
347
348 HANDLE handle = CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId);
349 //SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
350 // highest priority -- can cause erratic performance when numThreads > numCores
351 // we don't want worker threads to be higher priority than the main thread or the main thread could get
352 // totally shut out and unable to tell the workers to stop
353 //SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
354
355 {
356 int processorId = i + 1; // leave processor 0 for main thread
357 DWORD_PTR teamMask = getProcessorTeamMask(procInfo, processorId);
358 if (teamMask)
359 {
360 // bind each thread to only execute on processors of it's assigned team
361 // - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
362 // - for multi-socket Intel this will keep threads from migrating from one socket to another
363 // - for AMD Ryzen this will keep threads from migrating from one CCX to another
364 DWORD_PTR mask = teamMask & dwProcessAffinityMask;
365 if (mask)
366 {
367 SetThreadAffinityMask(handle, mask);
368 }
369 }
370 SetThreadIdealProcessor(handle, processorId);
371 }
372
373 threadStatus.m_taskId = i;
374 threadStatus.m_commandId = 0;
375 threadStatus.m_status = 0;
376 threadStatus.m_threadHandle = handle;
377 threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
378
379 printf("started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle);
380 }
381}
382
384void btThreadSupportWin32::stopThreads()
385{
386 for (int i = 0; i < m_activeThreadStatus.size(); i++)
387 {
388 btThreadStatus& threadStatus = m_activeThreadStatus[i];
389 if (threadStatus.m_status > 0)
390 {
391 WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
392 }
393
394 threadStatus.m_userPtr = NULL;
395 SetEvent(threadStatus.m_eventStartHandle);
396 WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
397
398 CloseHandle(threadStatus.m_eventCompleteHandle);
399 CloseHandle(threadStatus.m_eventStartHandle);
400 CloseHandle(threadStatus.m_threadHandle);
401 }
402
403 m_activeThreadStatus.clear();
404 m_completeHandles.clear();
405}
406
407class btWin32CriticalSection : public btCriticalSection
408{
409private:
410 CRITICAL_SECTION mCriticalSection;
411
412public:
413 btWin32CriticalSection()
414 {
415 InitializeCriticalSection(&mCriticalSection);
416 }
417
418 ~btWin32CriticalSection()
419 {
420 DeleteCriticalSection(&mCriticalSection);
421 }
422
423 void lock()
424 {
425 EnterCriticalSection(&mCriticalSection);
426 }
427
428 void unlock()
429 {
430 LeaveCriticalSection(&mCriticalSection);
431 }
432};
433
434btCriticalSection* btThreadSupportWin32::createCriticalSection()
435{
436 unsigned char* mem = (unsigned char*)btAlignedAlloc(sizeof(btWin32CriticalSection), 16);
437 btWin32CriticalSection* cs = new (mem) btWin32CriticalSection();
438 return cs;
439}
440
441void btThreadSupportWin32::deleteCriticalSection(btCriticalSection* criticalSection)
442{
443 criticalSection->~btCriticalSection();
444 btAlignedFree(criticalSection);
445}
446
448{
449 return new btThreadSupportWin32(info);
450}
451
452#endif //defined(_WIN32) && BT_THREADSAFE
void BLI_kdtree_nd_ free(KDTree *tree)
#define FALSE
volatile int lock
#define btAlignedFree(ptr)
#define btAlignedAlloc(size, alignment)
SIMD_FORCE_INLINE const T & btMin(const T &a, const T &b)
Definition btMinMax.h:21
static int uniqueId
#define btAssert(x)
Definition btScalar.h:295
#define BT_OVERRIDE
Definition btThreads.h:26
const unsigned int BT_MAX_THREAD_COUNT
Definition btThreads.h:31
SIMD_FORCE_INLINE void clear()
clear the array, deallocated memory. Generally it is better to use array.resize(0),...
SIMD_FORCE_INLINE int size() const
return the number of elements in the array
SIMD_FORCE_INLINE void resize(int newsize, const T &fillData=T())
virtual int getCacheFriendlyNumThreads() const =0
virtual int getLogicalToPhysicalCoreRatio() const =0
virtual void waitForAllTasks()=0
static btThreadSupportInterface * create(const ConstructionInfo &info)
virtual void runTask(int threadIndex, void *userData)=0
virtual int getNumWorkerThreads() const =0
#define printf
#define NULL
int len
int count