Jump to content
559 posts in this topic

Recommended Posts

Posted (edited)

Pros : flash brief restored (in latest commit)

 

Boot-args -v keepsyms=1 debug=0x100 IGLogLevel=8 -NGreenDebug -liludbg liludump=100 ngreen-dmc=skip -allow3d -disablegfxfirmware -ngreenexp -ngreenfullmtlcore -ngreenfullmtldyld ngreenV142=3 -ngreenV142diag2d

 

With -ngreenlegacyown   => HD mouse cursor but no flash brief

With : ngreenV142=1          => stable but submitBlit circuit-breaked for safety, no flash brief (green/yellow/blue screen)

Kernel-2026-05-03-131336.panic Lilu_1.7.2_23.6.txt

Edited by Stezza88
Posted (edited)

Still have to commit and push.. but now i've gated between 2 boot-args the flash brief apparition, i started to think now that "flash brief apple logo is always there" but i cannot see it because is painted black, so the problems are :

1) what causes if? => FOUND IT : seeing the logo with both args means the timer-driven PLANE_CTL/STRIDE/SURF rewrite is what is restoring first light.

2) why is blocked to first frame?  => i think that void Gen11::blit3d_initialize_scratch_space(void *that), uint32_t Gen11::submitBlit(void *that, void *param_1, void *param_2, void *param_3, bool param_4) are main responsable of drawing these

 

(-ngreenexp) -ngreenv79lin -ngreenv60poll

 

Here is the causing code :

// ── V79: Plane monitor / optional linearization ──
	// Keep -ngreenexp diagnostic-only by default. The old linearization writes can
	// produce false first-light by reinterpreting the BIOS boot plane with the wrong
	// layout, which shows up as yellow/blue/green Apple flashes instead of the normal
	// grey splash. Re-enable the mutating path only with explicit -ngreenv79lin.
	if (isExperimentalMonitorEnabled() && isV79PlaneLinearizationEnabled()) {
		if (v60Count <= 5 || v60Count == 10 || v60Count == 20 || v60Count == 30) {
			uint32_t planCtl  = NGreen::callback->readReg32(0x70180); // PLANE_CTL
			uint32_t planStrd = NGreen::callback->readReg32(0x70188); // PLANE_STRIDE
			uint32_t tiling = (planCtl >> 10) & 0x7; // bits[12:10]
			if (tiling != 0) {
				uint32_t newCtl = planCtl & ~(0x7u << 10); // clear tiling -> linear
				uint32_t newStrd = planStrd;
				if (tiling == 1) {
					newStrd = planStrd * 8;
				} else if (tiling == 4) {
					newStrd = planStrd * 16;
				}
				NGreen::callback->writeReg32(0x70180, newCtl);
				NGreen::callback->writeReg32(0x70188, newStrd);
				uint32_t planSurf = NGreen::callback->readReg32(0x7019C);
				NGreen::callback->writeReg32(0x7019C, planSurf);
				SYSLOG("ngreen", "V79[%d]: tiling %d->linear CTL 0x%x->0x%x STRIDE 0x%x->0x%x SURF=0x%x",
					   v60Count, tiling, planCtl, newCtl, planStrd, newStrd, planSurf);
			} else {
				SYSLOG("ngreen", "V79[%d]: already linear CTL=0x%x STRIDE=0x%x", v60Count, planCtl, planStrd);
			}
		}
	}
// V45: Delayed child check callback — runs on a kernel thread after a configurable delay.
// Logs the IOService state and child count at the specified time after start().
static void v45DelayedChildCheck(thread_call_param_t p0, thread_call_param_t p1) {
	if (!isV60V45PollingEnabled())
		return;

	auto *svc = static_cast<IOService *>(p0);
	unsigned delayMs = (unsigned)(uintptr_t)p1;
	
	uint64_t state = svc->getState();
	OSIterator *iter = svc->getClientIterator();
	int count = 0;
	if (iter) {
		OSObject *obj;
		while ((obj = iter->getNextObject())) {
			auto *child = OSDynamicCast(IOService, obj);
			if (child) {
				uint64_t childState = child->getState();
				SYSLOG("ngreen", "V45: T+%ums child[%d]: %s class=%s state=0x%llx",
					   delayMs, count, child->getName(),
					   child->getMetaClass()->getClassName(),
					   (unsigned long long)childState);
				
				// V55: Enhanced IGAccelDevice diagnostics + force-start
				if (delayMs >= 3000) {
					const char *childName = child->getName();
					if (childName && (
							strcmp(childName, "IGAccelDevice") == 0 ||
							strcmp(childName, "IGAccelSharedUserClient") == 0 ||
							strcmp(childName, "IOAccelDisplayPipeUserClient2") == 0 ||
							strcmp(childName, "IGAccelCommandQueue") == 0)) {
						// Dump child's provider and property state
						auto *childProvider = child->getProvider();
						SYSLOG("ngreen", "V55: %s state=0x%llx provider=%s isOpen=%d",
							   childName, (unsigned long long)childState,
							   childProvider ? childProvider->getName() : "NULL",
							   child->isOpen());
						
						// Check if the accelerator (our parent) is open
						SYSLOG("ngreen", "V55: accelerator isOpen=%d", svc->isOpen());
						
						if (childState == 0) {
							SYSLOG("ngreen", "V55: %s at state=0x0 — calling registerService()", childName);
							child->registerService(kIOServiceAsynchronous);
							IODelay(500);
							uint64_t newState = child->getState();
							SYSLOG("ngreen", "V55: %s after registerService → state=0x%llx",
								   childName, (unsigned long long)newState);
						}
					}
				}
				count++;
			}
		}
		iter->release();
	}
	
	// Also check if the service is open (someone called IOServiceOpen on it)
	bool isOpen = svc->isOpen();
	
	SYSLOG("ngreen", "V45: T+%ums: %d children, state=0x%llx (reg=%d match=%d pub=%d fmatch=%d inact=%d), isOpen=%d",
		   delayMs, count,
		   (unsigned long long)state,
		   !!(state & 0x02), !!(state & 0x04), !!(state & 0x08), !!(state & 0x10), !!(state & 0x01),
		   isOpen);
	
	// Check provider's children too (siblings in IOAccelerator match category)
	auto *provider = svc->getProvider();
	if (provider) {
		OSIterator *sibs = provider->getClientIterator();
		if (sibs) {
			int sibIdx = 0;
			OSObject *s;
			while ((s = sibs->getNextObject())) {
				auto *sibSvc = OSDynamicCast(IOService, s);
				if (sibSvc) {
					SYSLOG("ngreen", "V45: T+%ums provider child[%d]: %s class=%s",
						   delayMs, sibIdx, sibSvc->getName(),
						   sibSvc->getMetaClass()->getClassName());
					sibIdx++;
				}
			}
			sibs->release();
		}
	}
}

// V45: Helper to schedule a delayed child check
static void v45ScheduleDelayedCheck(void *accelInstance, unsigned delayMs) {
	if (!isV60V45PollingEnabled())
		return;

	thread_call_t tc = thread_call_allocate(v45DelayedChildCheck,
											static_cast<thread_call_param_t>(accelInstance));
	if (tc) {
		uint64_t deadline;
		clock_interval_to_deadline(delayMs, kMillisecondScale, &deadline);
		thread_call_enter1_delayed(tc,
								   (thread_call_param_t)(uintptr_t)delayMs,
								   deadline);
	}
}

// V60: GPU health monitor — 2s interval with V57-proven R/W ERROR_GEN6 clear.
// V59 ReadRegister32 intercept caused regression (0 children). Back to timer-based.
// Also applies EMR mask-all every cycle to prevent Apple from unmasking errors.
void Gen11::v60GpuHealthMonitor(thread_call_param_t param0, thread_call_param_t param1) {
	if (!isV60V45PollingEnabled())
		return;

	static int v60Count = 0;
	static uint32_t v60LastHead = 0xDEAD;
	static uint32_t v60LastTail = 0xDEAD;
	v60Count++;
	
	auto *svc = static_cast<IOService *>(param0);

	// Guard: bail if MMIO mapping has been released (GPU torn down before timer fired).
	// Release the retain we hold — stop the timer loop.
	if (!NGreen::callback->mmioValid()) {
		SYSLOG("ngreen", "v60[%d]: MMIO not ready, stopping health monitor", v60Count);
		svc->release();
		return;
	}

	// 1. Read GPU state via direct MMIO
	uint32_t realErr = NGreen::callback->readReg32(ERROR_GEN6);
	uint32_t rcsHead = NGreen::callback->readReg32(RING_HEAD(RENDER_RING_BASE));
	uint32_t rcsTail = NGreen::callback->readReg32(RING_TAIL(RENDER_RING_BASE));
	uint32_t rcsCtl  = NGreen::callback->readReg32(RING_CTL(RENDER_RING_BASE));
	uint32_t emr     = NGreen::callback->readReg32(RING_EMR(RENDER_RING_BASE));
	
	// 2. ExecList state
	uint32_t execStatus = NGreen::callback->readReg32(RING_EXECLIST_STATUS(RENDER_RING_BASE));
	uint32_t csbPtr = NGreen::callback->readReg32(RING_CONTEXT_STATUS_PTR(RENDER_RING_BASE));
	
	// V63: Additional scheduler/context diagnostics
	uint32_t acthd    = NGreen::callback->readReg32(RING_ACTHD(RENDER_RING_BASE));
	uint32_t ccid     = NGreen::callback->readReg32(RING_CCID(RENDER_RING_BASE));
	uint32_t ringFault = NGreen::callback->readReg32(GEN12_RING_FAULT_REG);
	
	// 3. Track ring HEAD/TAIL movement
	bool headChanged = (rcsHead != v60LastHead);
	bool tailChanged = (rcsTail != v60LastTail);
	
	// 4. Count children + find IGAccelDevice state
	int childCount = 0;
	uint64_t accelDevState = 0xDEAD;
	OSIterator *iter = svc->getClientIterator();
	if (iter) {
		OSObject *obj;
		while ((obj = iter->getNextObject())) {
			auto *child = OSDynamicCast(IOService, obj);
			if (child) {
				const char *cn = child->getName();
				if (cn && strcmp(cn, "IGAccelDevice") == 0) {
					accelDevState = child->getState();
				}
				childCount++;
			}
		}
		iter->release();
	}
	
	// V70: Track child transitions — dump all children when count or accelDev state changes
	{
		static int lastChildCount = -1;
		static uint64_t lastAccelDevState = 0xBEEF;
		if (childCount != lastChildCount || accelDevState != lastAccelDevState) {
			SYSLOG("ngreen", "V70: TRANSITION ch %d->%d dev 0x%llx->0x%llx [iter %d]",
				   lastChildCount, childCount,
				   (unsigned long long)lastAccelDevState, (unsigned long long)accelDevState,
				   v60Count);
			// Dump ALL children at the transition point
			OSIterator *iter2 = svc->getClientIterator();
			if (iter2) {
				int idx = 0;
				OSObject *obj2;
				while ((obj2 = iter2->getNextObject())) {
					auto *child2 = OSDynamicCast(IOService, obj2);
					if (child2) {
						SYSLOG("ngreen", "V70: ch[%d]: %s cls=%s st=0x%llx",
							   idx, child2->getName(),
							   child2->getMetaClass()->getClassName(),
							   (unsigned long long)child2->getState());
						idx++;
					}
				}
				iter2->release();
			}
			// If IGAccelDevice just disappeared, dump GPU error state at that moment
			if (accelDevState == 0xDEAD && lastAccelDevState != 0xDEAD && lastAccelDevState != 0xBEEF) {
				SYSLOG("ngreen", "V70: *** IGAccelDevice LOST! Was dev=0x%llx ***",
					   (unsigned long long)lastAccelDevState);
				uint32_t errAtLoss = NGreen::callback->readReg32(ERROR_GEN6);
				uint32_t tlb0Loss  = NGreen::callback->readReg32(GEN8_FAULT_TLB_DATA0);
				uint32_t tlb1Loss  = NGreen::callback->readReg32(GEN8_FAULT_TLB_DATA1);
				uint32_t faultLoss = NGreen::callback->readReg32(GEN12_RING_FAULT_REG);
				SYSLOG("ngreen", "V70: at loss: ERR=0x%x TLB0=0x%x TLB1=0x%x FAULT=0x%x",
					   errAtLoss, tlb0Loss, tlb1Loss, faultLoss);
				// Dump GGTT PTEs 0-3 to check general GGTT health at this moment
				for (int pg = 0; pg < 4; pg++) {
					uint32_t pteLo = NGreen::callback->readReg32(GGTT_PTE_LO(pg));
					uint32_t pteHi = NGreen::callback->readReg32(GGTT_PTE_HI(pg));
					SYSLOG("ngreen", "V70: loss GGTT[%d]=0x%08x:%08x %s",
						   pg, pteHi, pteLo, (pteLo & 1) ? "V" : "INV");
				}
			}
			lastChildCount = childCount;
			lastAccelDevState = accelDevState;
		}
	}
void Gen11::FastWriteRegister32(void *that,unsigned long param_1,uint32_t param_2)
{
	// ── V72: EMR write intercept — force all errors masked ──
	if (param_1 == 0x20b4 || param_1 == 0x220b4) {
		if (param_2 != 0xFFFFFFFF) {
			static int v72FastCount = 0;
			if (v72FastCount < 20) {
				v72FastCount++;
				SYSLOG("ngreen", "V72F[%d]: EMR write blocked @ 0x%lx val=0x%x -> 0xffffffff",
					   v72FastCount, param_1, param_2);
			}
			param_2 = 0xFFFFFFFF;
		}
	}

	if (param_1 == 0x70188) { // PLANE_STRIDE Pipe A Plane 1
		UInt32 linear = param_2 * 8;
		DBGLOG("ngreen", "FastWrite PLANE_STRIDE fixup: 0x%x -> 0x%x (linear)", param_2, linear);
		param_2 = linear;
	}
	if (param_1 == 0x70180) { // PLANE_CTL Pipe A Plane 1
		param_2 &= ~(0x7u << 10); // clear tiling bits[12:10] → linear (V80 fix: was <<12)
		DBGLOG("ngreen", "FastWrite PLANE_CTL fixup: forced linear tiling 0x%x", param_2);
	}

	return FunctionCast(FastWriteRegister32, callback->oFastWriteRegister32)(that,param_1,param_2 );
}

 

Edited by Stezza88

I’m happy to see your effort and dedication. It’s a shame I can’t join you on this journey I don’t have the appropriate knowledge to build Kexts, and I also don’t have the hardware for testing. But I’m rooting for your success wishing you all the best!

  • Like 2
Posted (edited)
9 minutes ago, Mirone said:

I’m happy to see your effort and dedication. It’s a shame I can’t join you on this journey I don’t have the appropriate knowledge to build Kexts, and I also don’t have the hardware for testing. But I’m rooting for your success wishing you all the best!

 

It's a question of "life or death" XD

 

Still have to study big parts of this code.. it's mostly IA generated..

Edited by Stezza88
  • Like 1
Posted (edited)

Only one thing is clear This Is GPU (partially) working, this library Is on fire from good times my friends, we are not here to sell lies

IMG_20260503_192045341.jpg

Lilu_1.7.2_23.6.txt Kernel-2026-05-03-192956.panic

Edited by Stezza88
  • Like 3
Posted (edited)

Just for my debug history

// V45: Delayed child check callback — runs on a kernel thread after a configurable delay.
// Logs the IOService state and child count at the specified time after start().
static void v45DelayedChildCheck(thread_call_param_t p0, thread_call_param_t p1) {
	if (!isV60V45PollingEnabled())
		return;

	auto *svc = static_cast<IOService *>(p0);
	unsigned delayMs = (unsigned)(uintptr_t)p1;
	
	uint64_t state = svc->getState();
	OSIterator *iter = svc->getClientIterator();
	int count = 0;
	if (iter) {
		OSObject *obj;
		while ((obj = iter->getNextObject())) {
			auto *child = OSDynamicCast(IOService, obj);
			if (child) {
				uint64_t childState = child->getState();
				SYSLOG("ngreen", "V45: T+%ums child[%d]: %s class=%s state=0x%llx",
					   delayMs, count, child->getName(),
					   child->getMetaClass()->getClassName(),
					   (unsigned long long)childState);
				
				// V55: Enhanced IGAccelDevice diagnostics + force-start
				if (delayMs >= 3000) {
					const char *childName = child->getName();
					if (childName && (
							strcmp(childName, "IGAccelDevice") == 0 ||
							strcmp(childName, "IGAccelSharedUserClient") == 0 ||
							strcmp(childName, "IOAccelDisplayPipeUserClient2") == 0 ||
							strcmp(childName, "IGAccelCommandQueue") == 0)) {
						// Dump child's provider and property state
						auto *childProvider = child->getProvider();
						SYSLOG("ngreen", "V55: %s state=0x%llx provider=%s isOpen=%d",
							   childName, (unsigned long long)childState,
							   childProvider ? childProvider->getName() : "NULL",
							   child->isOpen());
						
						// Check if the accelerator (our parent) is open
						SYSLOG("ngreen", "V55: accelerator isOpen=%d", svc->isOpen());
						
						if (childState == 0) {
							SYSLOG("ngreen", "V55: %s at state=0x0 — calling registerService()", childName);
							child->registerService(kIOServiceAsynchronous);
							IODelay(500);
							uint64_t newState = child->getState();
							SYSLOG("ngreen", "V55: %s after registerService → state=0x%llx",
								   childName, (unsigned long long)newState);
						}
					}
				}
				count++;
			}
		}
		iter->release();
	}
	
	// Also check if the service is open (someone called IOServiceOpen on it)
	bool isOpen = svc->isOpen();
	
	SYSLOG("ngreen", "V45: T+%ums: %d children, state=0x%llx (reg=%d match=%d pub=%d fmatch=%d inact=%d), isOpen=%d",
		   delayMs, count,
		   (unsigned long long)state,
		   !!(state & 0x02), !!(state & 0x04), !!(state & 0x08), !!(state & 0x10), !!(state & 0x01),
		   isOpen);
	
	// Check provider's children too (siblings in IOAccelerator match category)
	auto *provider = svc->getProvider();
	if (provider) {
		OSIterator *sibs = provider->getClientIterator();
		if (sibs) {
			int sibIdx = 0;
			OSObject *s;
			while ((s = sibs->getNextObject())) {
				auto *sibSvc = OSDynamicCast(IOService, s);
				if (sibSvc) {
					SYSLOG("ngreen", "V45: T+%ums provider child[%d]: %s class=%s",
						   delayMs, sibIdx, sibSvc->getName(),
						   sibSvc->getMetaClass()->getClassName());
					sibIdx++;
				}
			}
			sibs->release();
		}
	}
}
uint8_t Gen11::barrierSubmission(void *queue, void *accelerator, void *cmdDesc,
	                             void *event, uint16_t count, const uint16_t *list) {
	if (!NGreen::callback->isRealTGL) {
		// V130: spoof-path guard for deterministic barrierSubmission+0x198 crash.
		// Boot-arg control (value or flags):
		//   ngreenV130=0 or -ngreenV130fail -> bypass and return 0 (default)
		//   ngreenV130=1 or -ngreenV130pass -> bypass and return 1
		//   ngreenV130=2 or -ngreenV130orig -> call original implementation
		//   ngreenV130=3 or -ngreenV130hybrid -> bypass during init, original during render
		// NOTE: on spoofed RPL this is unsafe unless explicitly forced because
		// the original path submits Blit2D/Blit3D barriers through BCS.
		// Use -ngreenV130forceorig only when intentionally validating that path.
		static int v130Mode = -1;
		if (v130Mode < 0) {
			int parsed = 0;
			const bool forceOrig = checkKernelArgument("-ngreenV130forceorig");
			if (PE_parse_boot_argn("ngreenV130", &parsed, sizeof(parsed))) {
				v130Mode = parsed;
			} else if (checkKernelArgument("-ngreenV130orig")) {
				v130Mode = 2;
			} else if (checkKernelArgument("-ngreenV130hybrid")) {
				v130Mode = 3;
			} else if (checkKernelArgument("-ngreenV130pass")) {
				v130Mode = 1;
			} else if (checkKernelArgument("-ngreenV130fail")) {
				v130Mode = 0;
			} else {
				v130Mode = 0;
			}

			if (v130Mode == 2 && !forceOrig) {
				SYSLOG("ngreen", "V150: ngreenV130orig requested on spoofed RPL; forcing mode=1 to avoid BCS barrier stall (use -ngreenV130forceorig to override)");
				v130Mode = 1;
			}
			SYSLOG("ngreen", "V130: INIT barrierSubmission spoof mode=%d (0=bypass-ret0, 1=bypass-ret1, 2=call-original, 3=hybrid init-bypass/render-orig) forceOrig=%d",
			       v130Mode, forceOrig ? 1 : 0);
		}

		static int v130CallCount = 0;
		static int v130CallInitPhase = 0;
		static int v130CallRenderPhase = 0;
		static int v130HybridWarmup = -1;
		if (v130HybridWarmup < 0) {
			int parsedWarmup = 12;
			if (PE_parse_boot_argn("ngreenV130warmup", &parsedWarmup, sizeof(parsedWarmup))) {
				if (parsedWarmup < 0) parsedWarmup = 0;
				if (parsedWarmup > 200) parsedWarmup = 200;
			}
			v130HybridWarmup = parsedWarmup;
			if (v130Mode == 3) {
				SYSLOG("ngreen", "V130: hybrid warmup=%d calls (override with ngreenV130warmup=<0..200>)", v130HybridWarmup);
			}
		}
		
		v130CallCount++;
		// V151 DIAGNOSTIC: Enhanced logging to trace call patterns through init vs render phases
		// For hybrid mode, keep bypass window short to avoid WindowServer init starvation.
		bool isInitPhase = v130CallCount <= v130HybridWarmup;
		if (isInitPhase) v130CallInitPhase++;
		else v130CallRenderPhase++;
		
		if (v130CallCount <= 24 || (v130CallCount > 100 && v130CallCount <= 124) ||
		    (v130CallCount > 1000 && v130CallCount % 100 == 0)) {
			SYSLOG("ngreen", "V130[%d|init=%d|render=%d]: phase=%s q=%p acc=%p cmd=%p evt=%p count=%u mode=%d",
			       v130CallCount, v130CallInitPhase, v130CallRenderPhase,
			       isInitPhase ? "INIT" : "RENDER", queue, accelerator, cmdDesc, event,
			       static_cast<unsigned>(count), v130Mode);
		}

		uint8_t retVal = 0;
		if (v130Mode == 3 && !isInitPhase) {
			if (v130CallCount <= 24 || (v130CallCount > 100 && v130CallCount <= 124)) {
				SYSLOG("ngreen", "V130[%d]: HYBRID render phase -> ORIGINAL path", v130CallCount);
			}
			retVal = FunctionCast(barrierSubmission, callback->obarrierSubmission)(queue, accelerator,
			                                                                     cmdDesc, event,
			                                                                     count, list);
			if (v130CallCount <= 24 || (v130CallCount > 100 && v130CallCount <= 124)) {
				SYSLOG("ngreen", "V130[%d]: HYBRID original returned %u", v130CallCount, static_cast<unsigned>(retVal));
			}
			return retVal;
		}

		// Log the return path for first few calls to capture mode decision
		if (v130Mode == 2) {
			if (v130CallCount <= 24) {
				SYSLOG("ngreen", "V130[%d]: RETURNING ORIGINAL path (mode=2)", v130CallCount);
			}
			retVal = FunctionCast(barrierSubmission, callback->obarrierSubmission)(queue, accelerator,
			                                                                     cmdDesc, event,
			                                                                     count, list);
			if (v130CallCount <= 24) {
				SYSLOG("ngreen", "V130[%d]: ORIGINAL returned %u", v130CallCount, static_cast<unsigned>(retVal));
			}
			return retVal;
		}

		retVal = static_cast<uint8_t>((v130Mode == 1 || v130Mode == 3) ? 1 : 0);
		if (v130CallCount <= 24) {
			SYSLOG("ngreen", "V130[%d]: RETURNING BYPASS path (mode=%d, ret=%u)", v130CallCount, v130Mode, static_cast<unsigned>(retVal));
		}
		return retVal;
	}
	return FunctionCast(barrierSubmission, callback->obarrierSubmission)(queue, accelerator,
	                                                                     cmdDesc, event,
	                                                                     count, list);
}

 

Edited by Stezza88
  • Like 2
Posted (edited)

Found a big CLUE! Wait for updates...

 

Root cause: DisplayPipeSupported=1 (GPU display pipe active) on non-TGL hardware. WindowServer opens IOAccelDisplayPipeUserClient2, calls DisplaySurface::AccessComplete repeatedly. AccessComplete is stubbed to return 0 (V187) → surface FB Ready signal is never sent → WindowServer hangs waiting for it → watchdogd kills it twice → KP. The V142/submitBlit analysis was a red herring; the GPU rendered fine (as seen in the photo), but the compositor's surface-flip signaling is dead.

The fix: automatically force DisplayPipeSupported=0 for !isRealTGL

Kernel-2026-05-03-235954.panic WindowServer_2026-05-03-235634_MacBook-Pro.userspace_watchdog_timeout.spin

 

 

Edited by Stezza88
  • Like 1
8 hours ago, jalavoui said:

dont mess with this

image.png.a630ffb4276954bfbd756c9e45f66d90.png

 

focus on mtl bundle not loading in wserver log

 

Told IA that this is not a big problem, MTL are testified in Lilu logs and it's sufficient..

  • Like 1
Posted (edited)

* Big fixes committed and pushed now I'll test it!

New boot-args using : -v keepsyms=1 debug=0x100 IGLogLevel=8 -NGreenDebug -liludbg liludump=125 ngreen-dmc=skip -allow3d -disablegfxfirmware -ngreenfullmtldyld -ngreenfullmtlcore -ngreendp1 -ngreenexp ngreenV142=3 -ngreenV142diag2d ngreenV130=2 -ngreenv60 (as optional to see mysterious flash brief -ngreenv60rw)

 

Check readme for these args :

-ngreendp0

-ngreendp1
 

// Boot-arg "ngreen-dmc":
// not set or "skip" → safe fallback: passthrough original + AUX only (proven working)
// "tgl" → load TGL DMC v2.12 blob + TGL display engine registers
// + ICL/TGL combo PHY signal levels (PHY_A eDP, PHY_B DP)
// "adlp" → load ADL-P DMC v2.16 blob + ADL-P display engine registers
// + combo PHY signal levels (PHY_A eDP)
// "icl" → passthrough original ICL DMC load + ICL combo PHY signal levels
Edited by Stezza88
  • Like 1
Posted (edited)
void Gen11::computeLaneCount(void *that, const void *timing, unsigned int linkRate, unsigned int bpp, unsigned int *laneCount) {
	if (!laneCount) return;

	// Call original first — handles all standard DP rates on both real TGL and spoofed paths.
	FunctionCast(computeLaneCount, callback->ocomputeLaneCount)(that, timing, linkRate, bpp, laneCount);

	// Real TGL: preserve Apple's result unchanged.
	if (NGreen::callback->isRealTGL)
		return;

	// For !isRealTGL (RPL spoofed): Apple's DPCD-based result (MAX_LANE_COUNT) reflects
	// the panel's capability, but UEFI/GOP may have trained the link at a different lane
	// count.  Read DDI_BUF_CTL_A to discover the actual HW-trained lane count and
	// override Apple's result if UEFI trained more lanes than Apple computed.
	//
	// DDI_BUF_CTL PORT_WIDTH field bits[3:1]:
	//   000 = x1 (1 lane),  001 = x2 (2 lanes),  011 = x4 (4 lanes)
	const uint32_t ddiA    = NGreen::callback->readReg32(0x64000);  // DDI_BUF_CTL_A
	const unsigned int width   = (ddiA >> 1) & 0x7u;
	const unsigned int hwLanes = (width >= 3) ? 4u : (width >= 1) ? 2u : 1u;

	static int v90L4Logs = 0;
	if (v90L4Logs < 10) {
		v90L4Logs++;
		SYSLOG("ngreen", "V90L4[%d]: linkRate=%u bpp=%u appleLC=%u DDI_BUF_CTL_A=0x%x hwLanes=%u",
		       v90L4Logs, linkRate, bpp, *laneCount, ddiA, hwLanes);
	}

	if (hwLanes > *laneCount)
		*laneCount = hwLanes;
}
int hw=1;
int Gen11::hwSetMode
		  (void *that,void *param_1,
		   void *param_2,void *param_3)
{
	// On RPL-P (spoofed TGL), setPortMode ran just before us and restored TRANS_DDI_FUNC_CTL_A
	// to 0x8A000106 (4-lane eDP, matching UEFI DDI_BUF_CTL_A). paramsFbCompare inside the
	// original hwSetMode reads TRANS_DDI_FUNC_CTL_A and compares it with the target 0x8A010102
	// (2-lane DP SST). Write the correct value NOW so paramsFbCompare sees no lane-count change
	// needed and leaves DDI_BUF_CTL_A alone — preserving the UEFI-trained 4-lane eDP link.
	if (!NGreen::callback->isRealTGL) {
		NGreen::callback->writeReg32(TRANS_DDI_FUNC_CTL_A, 0x8A010102);
		DBGLOG("ngreen", "hwSetMode: pre-write TRANS_DDI_FUNC_CTL_A=0x8A010102 to prevent paramsFbCompare lane reprog");
	}
	auto ret= FunctionCast(hwSetMode, callback->ohwSetMode)(that, param_1, param_2, param_3);
	if (hw)
		enablePipe(that, param_1, param_2, param_3);
	hw=0;
	return ret;
}
void Gen11::setupOptimalLaneCount(void *that, const void *timing, unsigned int bpp) {
	// Always run Apple's original first to populate all other LinkConfig fields.
	FunctionCast(setupOptimalLaneCount, callback->osetupOptimalLaneCount)(that, timing, bpp);

	if (NGreen::callback->isRealTGL)
		return;

	// Read HW-trained lane count from DDI_BUF_CTL_A bits[3:1].
	// PORT_WIDTH: 0=x1, 1=x2, 3=x4.
	const uint32_t ddiA    = NGreen::callback->readReg32(0x64000);
	const unsigned int width   = (ddiA >> 1) & 0x7u;
	const unsigned int hwLanes = (width >= 3) ? 4u : (width >= 1) ? 2u : 1u;

	// The port object stores the cached optimal lane count at a known offset.
	// AppleIntelPort::setupOptimalLaneCount writes fOptimalLaneCount (confirmed
	// by IDA: str result into [x0 + offset] before returning).
	// We patch it post-call so the cap-to-DPCD logic is overridden.
	// Offset 0x148 is fOptimalLaneCount in AppleIntelPort on this kext version.
	unsigned int &cached = getMember<unsigned int>(that, 0x148);

	static int v90L5Logs = 0;
	if (v90L5Logs < 10) {
		v90L5Logs++;
		SYSLOG("ngreen", "V90L5[%d]: setupOptimalLC: was=%u DDI_BUF_CTL_A=0x%x hwLanes=%u",
		       v90L5Logs, cached, ddiA, hwLanes);
	}

	if (hwLanes > cached)
		cached = hwLanes;
}

 

Edited by Stezza88
Posted (edited)
uint8_t Gen11::hwRegsNeedUpdate
		  (void *that,void *param_1,
		   void *param_2,void *param_3,void *param_4,
		   void *param_5)
{
	// IDA (hwSetMode + hwRegsNeedUpdate) confirms param_3 is the pending CRTCParams
	// built by SetupParams. CRTCParams+0x04 is TRANS_DDI_FUNC_CTL. On spoofed !TGL,
	// clear bit[16] before compare/apply so the full modeset path does not request
	// 0x8a010106 against an already-trained 0x8a000106 link.
	if (!NGreen::callback->isRealTGL && param_3) {
		auto *pending = reinterpret_cast<uint32_t *>(param_3);

		// V97P: clear bit[16] in TRANS_DDI_FUNC_CTL (offset +0x04).
		// Apple's SetupParams sets bit16 as a port-type flag; UEFI/HW never sets it.
		// Without this the DDI FUNC_CTL compare fires and triggers a full modeset
		// that disrupts the already-trained 4-lane eDP link → black screen.
		uint32_t &transDdi = pending[1]; // offset +0x04
		if (transDdi & (1u << 16)) {
			static int v97PCount = 0;
			if (v97PCount < 12) {
				v97PCount++;
				SYSLOG("ngreen", "V97P[%d]: CRTCParams TRANS_DDI_FUNC_CTL 0x%x -> 0x%x",
				       v97PCount, transDdi, transDdi & ~(1u << 16));
			}
			transDdi &= ~(1u << 16);
		}

		// V97C: align pending TRANS_CONF (offset +0x2C) with the live HW value.
		// paramsFbCompare logs "TRANS_CONF 0xc0000000->0xc0000024": HW has bits[5,2]
		// clear (UEFI default), Apple wants to set them (interlace/depth config).
		// Writing those bits to an active pipe causes a transient signal disruption
		// that sets the panel's DPCD InterLane Alignment Lost bit (0x202=0x80),
		// which checkLinkStatus detects ~10 s later and tears down the display.
		// Fix: replace the pending TRANS_CONF with the current HW register value so
		// paramsFbCompare sees no change and the partial pipe update is suppressed.
		// PIPE_CONF_A (= TRANS_CONF in ICL+) = 0x70008.
		// NOTE: 0x60008 is TRANS_HSYNC_A (horizontal sync timing) — do NOT use that.
		uint32_t &transConf = pending[11]; // offset +0x2C
		const uint32_t hwTransConf = NGreen::callback->readReg32(0x70008);
		if (transConf != hwTransConf) {
			static int v97CCount = 0;
			if (v97CCount < 12) {
				v97CCount++;
				SYSLOG("ngreen", "V97C[%d]: CRTCParams TRANS_CONF 0x%x -> HW 0x%x (suppressed pipe update)",
				       v97CCount, transConf, hwTransConf);
			}
			transConf = hwTransConf;
		}
	}

	// Return the original result so that register reprogramming proceeds normally.
	// The lane count mismatch (4→2) that previously broke the display is now fixed
	// by the computeLaneCount hook forcing 4 lanes.  Without register updates, the
	// plane surface address and stride never get written, leaving the stale BIOS
	// framebuffer on screen (grey/black vertical bars with only cursor visible).
	return FunctionCast(hwRegsNeedUpdate, callback->ohwRegsNeedUpdate)(that, param_1, param_2, param_3, param_4, param_5);
}
Edited by Stezza88
Posted (edited)
On 5/3/2026 at 5:30 PM, Mirone said:

I’m happy to see your effort and dedication. It’s a shame I can’t join you on this journey I don’t have the appropriate knowledge to build Kexts, and I also don’t have the hardware for testing. But I’m rooting for your success wishing you all the best!


P.S. git is ready to build... just open the root folder in Xcode then clean and build.

Edited by Stezza88
Posted (edited)
IOReturn Genx::wrapICLReadAUX(void *that, uint32_t address, void *buffer, uint32_t length) {

	IOReturn retVal = FunctionCast(wrapICLReadAUX, callback->orgICLReadAUX)(that, address, buffer, length);

	static int auxLogCount = 0;
	if (auxLogCount < 40) {
		auxLogCount++;
		uint8_t *b = reinterpret_cast<uint8_t *>(buffer);
		if (length >= 2)
			SYSLOG("ngreen", "V97AUX[%d]: addr=0x%04x len=%u ret=0x%x [0]=0x%02x [1]=0x%02x",
			       auxLogCount, address, length, retVal, b ? b[0] : 0xFF, (b && length >= 2) ? b[1] : 0xFF);
		else
			SYSLOG("ngreen", "V97AUX[%d]: addr=0x%04x len=%u ret=0x%x",
			       auxLogCount, address, length, retVal);
	}

	if (!NGreen::callback->isRealTGL && address == 0x0100 && buffer && length >= 1) {
		// V98T: Clamp observed link-training set to HBR2 + 2 lanes on spoofed path.
		// Some panel/driver combinations oscillate with aggressive defaults (HBR3 / 4-lane bits).
		auto *raw = reinterpret_cast<uint8_t *>(buffer);
		if (raw[0] > 0x14) raw[0] = 0x14;      // LINK_BW_SET <= HBR2
		if (length >= 2) {
			raw[1] = (raw[1] & 0xE0) | 0x02;   // lane count = 2, keep upper feature bits
		}
		static int v98tLogs = 0;
		if (v98tLogs < 10) {
			v98tLogs++;
			if (length >= 2)
				SYSLOG("ngreen", "V98T[%d]: clamped 0x0100 read to bw=0x%02x lanes=0x%02x",
				       v98tLogs, raw[0], raw[1]);
			else
				SYSLOG("ngreen", "V98T[%d]: clamped 0x0100 read to bw=0x%02x (len=1)",
				       v98tLogs, raw[0]);
		}
	}

	// V99: Suppress spurious LINK_STATUS_UPDATED (DPCD[0x204] bit7) on RPL-P.
	// HDCP probing reads DPCD 0x6921d, which causes the eDP panel to assert IRQ_HPD,
	// setting LINK_STATUS_UPDATED=1. Apple's checkLinkStatus then sees
	// INTERLANE_ALIGN_DONE=0 and tears down the display (~10s after boot).
	// The physical link is healthy; only the IRQ flag is spurious.
	// Clearing bit7 of DPCD[0x204] prevents the driver from acting on the IRQ.
	if (!NGreen::callback->isRealTGL && address == 0x0202 && buffer && length >= 3) {
		auto *raw = reinterpret_cast<uint8_t *>(buffer);
		if (raw[2] & 0x80) {
			static int v99Logs = 0;
			if (v99Logs < 10) {
				v99Logs++;
				SYSLOG("ngreen", "V99[%d]: suppressed DPCD 0x204 LINK_STATUS_UPDATED "
				       "(was 0x%02x, lanes=[0x%02x 0x%02x])",
				       v99Logs, raw[2], raw[0], raw[1]);
			}
			raw[2] &= ~0x80u; // clear LINK_STATUS_UPDATED
		}
	}

	if (address != 0x0000 && address != 0x2200) return retVal;

	if (length < sizeof(DPCDCap16) || buffer == nullptr)
		return retVal;

	auto caps = reinterpret_cast<DPCDCap16*>(buffer);

	if (!NGreen::callback->isRealTGL) {
		// V98: Advertise max 2 lanes so Apple's initial computeLaneCount picks the
		// conservative path; our setupOptimalLaneCount + computeLaneCount hooks then
		// override to the HW-trained 4 lanes via DDI_BUF_CTL_A readback.
		// Do NOT cap maxLinkRate — the hardware is trained at its full rate (HBR3,
		// 0x1e) by UEFI. Capping to HBR2 (0x14) caused Apple to compute wrong M/N
		// ratios (linkSymbolClock=540 vs actual 810MHz) → panel loses sync after ~10s.
		caps->maxLaneCount = (caps->maxLaneCount & 0xE0) | 0x02; // advertise max 2 lanes
		static int v98Logs = 0;
		if (v98Logs < 10) {
			v98Logs++;
			SYSLOG("ngreen", "V98[%d]: DPCD caps @0x%04x maxLinkRate=0x%02x (kept) maxLane=0x%02x (capped to 2)",
			       v98Logs, address, caps->maxLinkRate, caps->maxLaneCount);
		}
	}

	if (caps->revision < 0x03) {
		caps->maxLinkRate=0;
	}

	return retVal;
}

 

There is much hard coding garbage... need to check later

Edited by Stezza88
×
×
  • Create New...