Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

n64: improve and extend cache coherency checks #1314

Merged
merged 7 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ares/n64/ai/ai.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ auto AI::sample(f64& left, f64& right) -> void {

if(io.dmaLength[0] && io.dmaEnable) {
io.dmaAddress[0].bit(13,23) += io.dmaAddressCarry;
auto data = rdram.ram.read<Word>(io.dmaAddress[0]);
auto data = rdram.ram.read<Word>(io.dmaAddress[0], "AI");
auto l = s16(data >> 16);
auto r = s16(data >> 0);
left = l / 32768.0;
Expand All @@ -50,8 +50,9 @@ auto AI::sample(f64& left, f64& right) -> void {
}
if(!io.dmaLength[0]) {
if(--io.dmaCount) {
io.dmaAddress[0] = io.dmaAddress[1];
io.dmaLength [0] = io.dmaLength [1];
io.dmaAddress[0] = io.dmaAddress[1];
io.dmaLength [0] = io.dmaLength [1];
io.dmaOriginPc[0] = io.dmaOriginPc[1];
mi.raise(MI::IRQ::AI);
}
}
Expand Down
1 change: 1 addition & 0 deletions ares/n64/ai/ai.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ struct AI : Thread, Memory::RCP<AI> {
n1 dmaAddressCarry;
n18 dmaLength[2];
n2 dmaCount;
u64 dmaOriginPc[2];
n14 dacRate;
n4 bitRate;
} io;
Expand Down
1 change: 1 addition & 0 deletions ares/n64/ai/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ auto AI::writeWord(u32 address, u32 data_, Thread& thread) -> void {
if(io.dmaCount < 2) {
if(io.dmaCount == 0) mi.raise(MI::IRQ::AI);
io.dmaLength[io.dmaCount] = length;
io.dmaOriginPc[io.dmaCount] = cpu.ipu.pc;
io.dmaCount++;
}
}
Expand Down
3 changes: 2 additions & 1 deletion ares/n64/cartridge/flash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ auto Cartridge::Flash::writeWord(u32 address, u64 data) -> void {
}
if(mode == Mode::Write) {
for(u32 index = 0; index < 128; index += 2) {
u16 half = rdram.ram.read<Half>(source + index);
// FIXME: this is obviously wrong, the flash can't access RDRAM
u16 half = rdram.ram.read<Half>(source + index, "Flash");
Memory::Writable::write<Half>(offset + index, half);
}
}
Expand Down
1 change: 1 addition & 0 deletions ares/n64/cpu/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ auto CPU::instruction() -> void {
return exception.nmi();
}
if (scc.sysadFrozen) {
step(1 * 2);
return;
}

Expand Down
4 changes: 3 additions & 1 deletion ares/n64/cpu/cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,11 @@ struct CPU : Thread {
template<u32 Size> auto write(u32 address, u64 data) -> void;

bool valid;
bool dirty;
u16 dirty;
u32 tag;
u16 index;
u64 fillPc;
u64 dirtyPc;
union {
u8 bytes[16];
u16 halfs[8];
Expand Down
12 changes: 7 additions & 5 deletions ares/n64/cpu/dcache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ auto CPU::DataCache::Line::hit(u32 address) const -> bool {

auto CPU::DataCache::Line::fill(u32 address) -> void {
cpu.step(40 * 2);
valid = 1;
dirty = 0;
tag = address & ~0x0000'0fff;
valid = 1;
dirty = 0;
tag = address & ~0x0000'0fff;
fillPc = cpu.ipu.pc;
cpu.busReadBurst<DCache>(tag | index, words);
}

Expand Down Expand Up @@ -41,7 +42,8 @@ auto CPU::DataCache::Line::write(u32 address, u64 data) -> void {
words[address >> 2 & 2 | 0] = data >> 32;
words[address >> 2 & 2 | 1] = data >> 0;
}
dirty = 1;
dirty |= ((1 << Size) - 1) << (address & 0xF);
dirtyPc = cpu.ipu.pc;
}

template<u32 Size>
Expand All @@ -60,7 +62,7 @@ auto CPU::DataCache::readDebug(u32 vaddr, u32 address) -> u8 {
auto& line = this->line(vaddr);
if(!line.hit(address)) {
Thread dummyThread{};
return bus.read<Byte>(address, dummyThread);
return bus.read<Byte>(address, dummyThread, "Ares Debugger");
}
return line.read<Byte>(address);
}
Expand Down
10 changes: 5 additions & 5 deletions ares/n64/cpu/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ auto CPU::devirtualizeDebug(u64 vaddr) -> u64 {

template<u32 Size>
inline auto CPU::busWrite(u32 address, u64 data) -> void {
bus.write<Size>(address, data, *this);
bus.write<Size>(address, data, *this, "CPU");
}

template<u32 Size>
Expand All @@ -156,7 +156,7 @@ inline auto CPU::busWriteBurst(u32 address, u32 *data) -> void {

template<u32 Size>
inline auto CPU::busRead(u32 address) -> u64 {
return bus.read<Size>(address, *this);
return bus.read<Size>(address, *this, "CPU");
}

template<u32 Size>
Expand Down Expand Up @@ -239,17 +239,17 @@ auto CPU::readDebug(u64 vaddr) -> u8 {
case Context::Segment::Mapped:
if(auto match = tlb.load(vaddr, true)) {
if(match.cache) return dcache.readDebug(vaddr, match.address & context.physMask);
return bus.read<Byte>(match.address & context.physMask, dummyThread);
return bus.read<Byte>(match.address & context.physMask, dummyThread, "Ares Debugger");
}
return 0;
case Context::Segment::Cached:
return dcache.readDebug(vaddr, vaddr & 0x1fff'ffff);
case Context::Segment::Cached32:
return dcache.readDebug(vaddr, vaddr & 0xffff'ffff);
case Context::Segment::Direct:
return bus.read<Byte>(vaddr & 0x1fff'ffff, dummyThread);
return bus.read<Byte>(vaddr & 0x1fff'ffff, dummyThread, "Ares Debugger");
case Context::Segment::Direct32:
return bus.read<Byte>(vaddr & 0xffff'ffff, dummyThread);
return bus.read<Byte>(vaddr & 0xffff'ffff, dummyThread, "Ares Debugger");
}

unreachable;
Expand Down
2 changes: 1 addition & 1 deletion ares/n64/cpu/recompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ auto CPU::Recompiler::emit(u32 vaddr, u32 address, bool singleInstruction) -> Bl
Thread thread;
bool hasBranched = 0;
while(true) {
u32 instruction = bus.read<Word>(address, thread);
u32 instruction = bus.read<Word>(address, thread, "Ares Recompiler");
if(callInstructionPrologue) {
mov32(reg(1), imm(instruction));
call(&CPU::instructionPrologue);
Expand Down
79 changes: 45 additions & 34 deletions ares/n64/memory/bus.hpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
template<u32 Size>
inline auto Bus::read(u32 address, Thread& thread) -> u64 {
static constexpr u64 unmapped = 0;
inline auto Bus::read(u32 address, Thread& thread, const char *peripheral) -> u64 {
static_assert(Size == Byte || Size == Half || Size == Word || Size == Dual);

if(address <= 0x007f'ffff) return rdram.ram.read<Size>(address);
if(address <= 0x03ef'ffff) return unmapped;
if(address <= 0x03ef'ffff) return rdram.ram.read<Size>(address, peripheral);
if(address <= 0x03ff'ffff) return rdram.read<Size>(address, thread);
if(Size == Dual) return freezeDualRead(address), 0;
if(address <= 0x0407'ffff) return rsp.read<Size>(address, thread);
if(address <= 0x040f'ffff) return rsp.status.read<Size>(address, thread);
if(address <= 0x040b'ffff) return rsp.status.read<Size>(address, thread);
if(address <= 0x040f'ffff) return freezeUnmapped(address), 0;
if(address <= 0x041f'ffff) return rdp.read<Size>(address, thread);
if(address <= 0x042f'ffff) return rdp.io.read<Size>(address, thread);
if(address <= 0x043f'ffff) return mi.read<Size>(address, thread);
Expand All @@ -16,48 +16,49 @@ inline auto Bus::read(u32 address, Thread& thread) -> u64 {
if(address <= 0x046f'ffff) return pi.read<Size>(address, thread);
if(address <= 0x047f'ffff) return ri.read<Size>(address, thread);
if(address <= 0x048f'ffff) return si.read<Size>(address, thread);
if(address <= 0x04ff'ffff) return unmapped;
if(address <= 0x04ff'ffff) return freezeUnmapped(address), 0;
if(address <= 0x1fbf'ffff) return pi.read<Size>(address, thread);
if(address <= 0x1fcf'ffff) return si.read<Size>(address, thread);
if(address <= 0x7fff'ffff) return pi.read<Size>(address, thread);
return unmapped;
return freezeUnmapped(address), 0;
}

template<u32 Size>
inline auto Bus::readBurst(u32 address, u32 *data, Thread& thread) -> void {
static_assert(Size == DCache || Size == ICache);

if(address <= 0x03ef'ffff) return rdram.ram.readBurst<Size>(address, data, "CPU");
if(address <= 0x03ff'ffff) {
data[0] = read<Word>(address | 0x0, thread);
data[1] = read<Word>(address | 0x4, thread);
data[2] = read<Word>(address | 0x8, thread);
data[3] = read<Word>(address | 0xc, thread);
// FIXME: not hardware validated, no idea of the behavior
data[0] = rdram.readWord(address | 0x0, thread);
data[1] = 0;
data[2] = 0;
data[3] = 0;
if constexpr(Size == ICache) {
data[4] = read<Word>(address | 0x10, thread);
data[5] = read<Word>(address | 0x14, thread);
data[6] = read<Word>(address | 0x18, thread);
data[7] = read<Word>(address | 0x1c, thread);
data[4] = 0;
data[5] = 0;
data[6] = 0;
data[7] = 0;
}
return;
}

debug(unusual, "[Bus::readBurst] CPU frozen because of cached read to non-RDRAM area: 0x", hex(address, 8L));
cpu.scc.sysadFrozen = true;
return freezeUncached(address);
}

template<u32 Size>
inline auto Bus::write(u32 address, u64 data, Thread& thread) -> void {
inline auto Bus::write(u32 address, u64 data, Thread& thread, const char *peripheral) -> void {
static_assert(Size == Byte || Size == Half || Size == Word || Size == Dual);
if constexpr(Accuracy::CPU::Recompiler) {
cpu.recompiler.invalidate(address + 0); if constexpr(Size == Dual)
cpu.recompiler.invalidate(address + 4);
}

if(address <= 0x007f'ffff) return rdram.ram.write<Size>(address, data);
if(address <= 0x03ef'ffff) return;
if(address <= 0x03ef'ffff) return rdram.ram.write<Size>(address, data, peripheral);
if(address <= 0x03ff'ffff) return rdram.write<Size>(address, data, thread);
if(address <= 0x0407'ffff) return rsp.write<Size>(address, data, thread);
if(address <= 0x040f'ffff) return rsp.status.write<Size>(address, data, thread);
if(address <= 0x040b'ffff) return rsp.status.write<Size>(address, data, thread);
if(address <= 0x040f'ffff) return freezeUnmapped(address);
if(address <= 0x041f'ffff) return rdp.write<Size>(address, data, thread);
if(address <= 0x042f'ffff) return rdp.io.write<Size>(address, data, thread);
if(address <= 0x043f'ffff) return mi.write<Size>(address, data, thread);
Expand All @@ -66,31 +67,41 @@ inline auto Bus::write(u32 address, u64 data, Thread& thread) -> void {
if(address <= 0x046f'ffff) return pi.write<Size>(address, data, thread);
if(address <= 0x047f'ffff) return ri.write<Size>(address, data, thread);
if(address <= 0x048f'ffff) return si.write<Size>(address, data, thread);
if(address <= 0x04ff'ffff) return;
if(address <= 0x04ff'ffff) return freezeUnmapped(address);
if(address <= 0x1fbf'ffff) return pi.write<Size>(address, data, thread);
if(address <= 0x1fcf'ffff) return si.write<Size>(address, data, thread);
if(address <= 0x7fff'ffff) return pi.write<Size>(address, data, thread);
return;
return freezeUnmapped(address);
}

template<u32 Size>
inline auto Bus::writeBurst(u32 address, u32 *data, Thread& thread) -> void {
static_assert(Size == DCache || Size == ICache);
if constexpr(Accuracy::CPU::Recompiler) {
cpu.recompiler.invalidateRange(address, Size == DCache ? 16 : 32);
}

if(address <= 0x03ef'ffff) return rdram.ram.writeBurst<Size>(address, data, "CPU");
if(address <= 0x03ff'ffff) {
write<Word>(address | 0x0, data[0], thread);
write<Word>(address | 0x4, data[1], thread);
write<Word>(address | 0x8, data[2], thread);
write<Word>(address | 0xc, data[3], thread);
if constexpr(Size == ICache) {
write<Word>(address | 0x10, data[4], thread);
write<Word>(address | 0x14, data[5], thread);
write<Word>(address | 0x18, data[6], thread);
write<Word>(address | 0x1c, data[7], thread);
}
// FIXME: not hardware validated, but a good guess
rdram.writeWord(address | 0x0, data[0], thread);
return;
}

debug(unusual, "[Bus::readBurst] CPU frozen because of cached write to non-RDRAM area: 0x", hex(address, 8L));
return freezeUncached(address);
}

inline auto Bus::freezeUnmapped(u32 address) -> void {
debug(unusual, "[Bus::freezeUnmapped] CPU frozen because of access to RCP unmapped area: 0x", hex(address, 8L));
cpu.scc.sysadFrozen = true;
}

inline auto Bus::freezeUncached(u32 address) -> void {
debug(unusual, "[Bus::freezeUncached] CPU frozen because of cached access to non-RDRAM area: 0x", hex(address, 8L));
cpu.scc.sysadFrozen = true;
}

inline auto Bus::freezeDualRead(u32 address) -> void {
debug(unusual, "[Bus::freezeDualRead] CPU frozen because of 64-bit read from non-RDRAM area: 0x ", hex(address, 8L));
cpu.scc.sysadFrozen = true;
}
8 changes: 6 additions & 2 deletions ares/n64/memory/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,15 @@ namespace Memory {

struct Bus {
//bus.hpp
template<u32 Size> auto read(u32 address, Thread& thread) -> u64;
template<u32 Size> auto write(u32 address, u64 data, Thread& thread) -> void;
template<u32 Size> auto read(u32 address, Thread& thread, const char *peripheral) -> u64;
template<u32 Size> auto write(u32 address, u64 data, Thread& thread, const char *peripheral) -> void;

template<u32 Size> auto readBurst(u32 address, u32* data, Thread& thread) -> void;
template<u32 Size> auto writeBurst(u32 address, u32* data, Thread& thread) -> void;

auto freezeUnmapped(u32 address) -> void;
auto freezeUncached(u32 address) -> void;
auto freezeDualRead(u32 address) -> void;
};

extern Bus bus;
2 changes: 1 addition & 1 deletion ares/n64/n64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ namespace ares::Nintendo64 {
#include <n64/pif/pif.hpp>
#include <n64/ri/ri.hpp>
#include <n64/si/si.hpp>
#include <n64/rdram/rdram.hpp>
#include <n64/cpu/cpu.hpp>
#include <n64/rdram/rdram.hpp>
#include <n64/rsp/rsp.hpp>
#include <n64/rdp/rdp.hpp>
#include <n64/memory/bus.hpp>
Expand Down
22 changes: 3 additions & 19 deletions ares/n64/pi/dma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,7 @@ auto PI::dmaRead() -> void {

u32 lastCacheline = 0xffff'ffff;
for(u32 address = 0; address < io.readLength; address += 2) {
if (system.homebrewMode && ((io.dramAddress + address) & ~15) != lastCacheline) {
lastCacheline = address & ~15;
auto& line = cpu.dcache.line(io.dramAddress + address);
if (line.hit(io.dramAddress) && line.dirty) {
debug(unusual, "PI DMA reading from cached memory ", hex((io.dramAddress + address) | 0x80000000), " (missing cache writeback?)");
}
}
u16 data = rdram.ram.read<Half>(io.dramAddress + address);
u16 data = rdram.ram.read<Half>(io.dramAddress + address, "PI DMA");
busWrite<Half>(io.pbusAddress + address, data);
}
}
Expand Down Expand Up @@ -49,17 +42,8 @@ auto PI::dmaWrite() -> void {
cpu.recompiler.invalidateRange(io.dramAddress, cur_len);
}

u32 lastCacheline = 0xffff'ffff;
for (u32 i = 0; i < cur_len; i++) {
if (system.homebrewMode && (io.dramAddress & ~15) != lastCacheline) {
lastCacheline = io.dramAddress & ~15;
auto& line = cpu.dcache.line(io.dramAddress);
if (line.hit(io.dramAddress)) {
debug(unusual, "PI DMA writing to cached memory ", hex(io.dramAddress | 0x80000000), " (missing cache invalidation?)");
}
}
rdram.ram.write<Byte>(io.dramAddress++, mem[i]);
}
for (u32 i = 0; i < cur_len; i++)
rdram.ram.write<Byte>(io.dramAddress++, mem[i], "PI DMA");
io.dramAddress = (io.dramAddress + 7) & ~7;

first_block = false;
Expand Down
2 changes: 2 additions & 0 deletions ares/n64/pi/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ auto PI::ioWrite(u32 address, u32 data_) -> void {
//PI_READ_LENGTH
io.readLength = n24(data);
io.dmaBusy = 1;
io.originPc = cpu.ipu.pc;
queue.insert(Queue::PI_DMA_Read, dmaDuration(true));
dmaRead();
}
Expand All @@ -106,6 +107,7 @@ auto PI::ioWrite(u32 address, u32 data_) -> void {
//PI_WRITE_LENGTH
io.writeLength = n24(data);
io.dmaBusy = 1;
io.originPc = cpu.ipu.pc;
queue.insert(Queue::PI_DMA_Write, dmaDuration(false));
dmaWrite();
}
Expand Down
1 change: 1 addition & 0 deletions ares/n64/pi/pi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct PI : Memory::RCP<PI> {
n32 readLength;
n32 writeLength;
n32 busLatch;
u64 originPc;
} io;

struct BSD {
Expand Down
4 changes: 2 additions & 2 deletions ares/n64/pif/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ auto PIF::dmaRead(u32 address, u32 ramAddress) -> void {
intA(Read, Size64);
for(u32 offset = 0; offset < 64; offset += 4) {
u32 data = readInt(address + offset);
rdram.ram.write<Word>(ramAddress + offset, data);
rdram.ram.write<Word>(ramAddress + offset, data, "SI DMA");
}
}

auto PIF::dmaWrite(u32 address, u32 ramAddress) -> void {
for(u32 offset = 0; offset < 64; offset += 4) {
u32 data = rdram.ram.read<Word>(ramAddress + offset);
u32 data = rdram.ram.read<Word>(ramAddress + offset, "SI DMA");
writeInt(address + offset, data);
}
intA(Write, Size64);
Expand Down
Loading
Loading