From 9db292f616eefff1dfa6cf1b8ec28f21eed860c5 Mon Sep 17 00:00:00 2001 From: doyle Date: Tue, 20 Jun 2023 23:19:17 +1000 Subject: [PATCH] perfware/part1: Support listing 0057 --- part1/build.bat | 18 +++ part1/listing_0057_challenge_cycles.txt | 37 +++-- part1/sim8086.c | 179 ++++++++++++++++-------- part1/sim8086.h | 1 + project.rdbg | Bin 4493 -> 4303 bytes 5 files changed, 156 insertions(+), 79 deletions(-) diff --git a/part1/build.bat b/part1/build.bat index 8bb18e3..897e461 100644 --- a/part1/build.bat +++ b/part1/build.bat @@ -287,3 +287,21 @@ nasm %build_dir_listing_0056%_disassembled.asm fc /B %build_dir_listing_0056% %build_dir_listing_0056%_disassembled || exit /b 1 fc /N %build_dir_listing_0056%.txt %build_dir_listing_0056%_disassembled.txt || exit /b 1 + +REM ================================================================================================ +set listing_0057=listing_0057_challenge_cycles +set build_dir_listing_0057=%build_dir%\%listing_0057% + +copy /Y %script_dir%\%listing_0057% %build_dir% 1>NUL +copy /Y %script_dir%\%listing_0057%.txt %build_dir% 1>NUL + +pushd %build_dir% +%build_dir%\sim8086.exe --exec --log-instruction-ptr --log-cycle-counts 8086 %build_dir_listing_0057% > %build_dir_listing_0057%_disassembled.txt +%build_dir%\sim8086.exe %build_dir_listing_0057% > %build_dir_listing_0057%_disassembled.asm +%build_dir%\sim8086.exe --exec --log-instruction-ptr --log-cycle-counts 8088 %build_dir_listing_0057% >> %build_dir_listing_0057%_disassembled.txt +popd + +nasm %build_dir_listing_0057%_disassembled.asm + +fc /B %build_dir_listing_0057% %build_dir_listing_0057%_disassembled || exit /b 1 +fc /N %build_dir_listing_0057%.txt %build_dir_listing_0057%_disassembled.txt || exit /b 1 diff --git a/part1/listing_0057_challenge_cycles.txt b/part1/listing_0057_challenge_cycles.txt index 51cceb4..d480d1c 100644 --- a/part1/listing_0057_challenge_cycles.txt +++ b/part1/listing_0057_challenge_cycles.txt @@ -33,8 +33,6 @@ Final registers: ip: 0x0036 (54) flags: A - - ************** **** 8088 **** ************** @@ -44,23 +42,23 @@ They will be inaccurate, both because the manual clocks are estimates, and becau some of the entries in the manual look highly suspicious and are probably typos. --- test\listing_0057_challenge_cycles execution --- -mov bx, 1000 ; Clocks: +4 = 4 | bx:0x0->0x3e8 ip:0x0->0x3 -mov bp, 2000 ; Clocks: +4 = 8 | bp:0x0->0x7d0 ip:0x3->0x6 -mov si, 3000 ; Clocks: +4 = 12 | si:0x0->0xbb8 ip:0x6->0x9 -mov di, 4000 ; Clocks: +4 = 16 | di:0x0->0xfa0 ip:0x9->0xc -mov cx, [bp+di] ; Clocks: +19 = 35 (8 + 7ea + 4p) | ip:0xc->0xe -mov word [bx+si], cx ; Clocks: +20 = 55 (9 + 7ea + 4p) | ip:0xe->0x10 -mov cx, [bp+si] ; Clocks: +20 = 75 (8 + 8ea + 4p) | ip:0x10->0x12 -mov word [bx+di], cx ; Clocks: +21 = 96 (9 + 8ea + 4p) | ip:0x12->0x14 -mov cx, [bp+di+1000] ; Clocks: +23 = 119 (8 + 11ea + 4p) | ip:0x14->0x18 -mov word [bx+si+1000], cx ; Clocks: +24 = 143 (9 + 11ea + 4p) | ip:0x18->0x1c -mov cx, [bp+si+1000] ; Clocks: +24 = 167 (8 + 12ea + 4p) | ip:0x1c->0x20 -mov word [bx+di+1000], cx ; Clocks: +25 = 192 (9 + 12ea + 4p) | ip:0x20->0x24 -add dx, [bp+si+1000] ; Clocks: +25 = 217 (9 + 12ea + 4p) | ip:0x24->0x28 flags:->PZ -add word [bp+si], 76 ; Clocks: +33 = 250 (17 + 8ea + 8p) | ip:0x28->0x2b flags:PZ-> -add dx, [bp+si+1001] ; Clocks: +25 = 275 (9 + 12ea + 4p) | ip:0x2b->0x2f flags:->PZ -add word [di+999], dx ; Clocks: +33 = 308 (16 + 9ea + 8p) | ip:0x2f->0x33 flags:PZ->P -add word [bp+si], 75 ; Clocks: +33 = 341 (17 + 8ea + 8p) | ip:0x33->0x36 flags:P->A +mov bx, 1000 ; Clocks: +4 = 4 | bx:0x0->0x3e8 ip:0x0->0x3 +mov bp, 2000 ; Clocks: +4 = 8 | bp:0x0->0x7d0 ip:0x3->0x6 +mov si, 3000 ; Clocks: +4 = 12 | si:0x0->0xbb8 ip:0x6->0x9 +mov di, 4000 ; Clocks: +4 = 16 | di:0x0->0xfa0 ip:0x9->0xc +mov cx, [bp+di] ; Clocks: +19 = 35 (8 + 7ea + 4p) | ip:0xc->0xe +mov word [bx+si], cx ; Clocks: +20 = 55 (9 + 7ea + 4p) | ip:0xe->0x10 +mov cx, [bp+si] ; Clocks: +20 = 75 (8 + 8ea + 4p) | ip:0x10->0x12 +mov word [bx+di], cx ; Clocks: +21 = 96 (9 + 8ea + 4p) | ip:0x12->0x14 +mov cx, [bp+di+1000] ; Clocks: +23 = 119 (8 + 11ea + 4p) | ip:0x14->0x18 +mov word [bx+si+1000], cx ; Clocks: +24 = 143 (9 + 11ea + 4p) | ip:0x18->0x1c +mov cx, [bp+si+1000] ; Clocks: +24 = 167 (8 + 12ea + 4p) | ip:0x1c->0x20 +mov word [bx+di+1000], cx ; Clocks: +25 = 192 (9 + 12ea + 4p) | ip:0x20->0x24 +add dx, [bp+si+1000] ; Clocks: +25 = 217 (9 + 12ea + 4p) | ip:0x24->0x28 flags:->PZ +add word [bp+si], 76 ; Clocks: +33 = 250 (17 + 8ea + 8p) | ip:0x28->0x2b flags:PZ-> +add dx, [bp+si+1001] ; Clocks: +25 = 275 (9 + 12ea + 4p) | ip:0x2b->0x2f flags:->PZ +add word [di+999], dx ; Clocks: +33 = 308 (16 + 9ea + 8p) | ip:0x2f->0x33 flags:PZ->P +add word [bp+si], 75 ; Clocks: +33 = 341 (17 + 8ea + 8p) | ip:0x33->0x36 flags:P->A Final registers: bx: 0x03e8 (1000) @@ -69,3 +67,4 @@ Final registers: di: 0x0fa0 (4000) ip: 0x0036 (54) flags: A + diff --git a/part1/sim8086.c b/part1/sim8086.c index 8c2724a..7c96603 100644 --- a/part1/sim8086.c +++ b/part1/sim8086.c @@ -375,6 +375,8 @@ S86_Opcode S86_DecodeOpcode(S86_BufferIterator *buffer_it, S86_MnemonicOp *seg_reg, bool cycle_count_8088) { + (void)cycle_count_8088; + size_t buffer_start_index = buffer_it->index; char op_code_bytes[2] = {0}; size_t op_code_size = 0; @@ -422,6 +424,7 @@ S86_Opcode S86_DecodeOpcode(S86_BufferIterator *buffer_it, S86_ASSERT(op_decode_type != S86_OpDecodeType_Count && "Unknown instruction"); S86_Opcode result = {0}; + result.type = op_decode_type; result.mnemonic = op_decode->mnemonic; result.lock_prefix = *lock_prefix; result.seg_reg_prefix = *seg_reg; @@ -860,44 +863,6 @@ S86_Opcode S86_DecodeOpcode(S86_BufferIterator *buffer_it, : S86_WordBytePrefix_Byte; } - if ((op_decode_type >= S86_OpDecodeType_MOVRegOrMemToOrFromReg) && - (op_decode_type <= S86_OpDecodeType_MOVSegRegToRegOrMem)) { - if (S86_MnemonicOpIsRegister(result.dest) && result.src == S86_MnemonicOp_Immediate && !result.effective_addr_loads_mem) { - result.base_clocks = 4; - } else if (S86_MnemonicOpIsRegister(result.dest) && S86_MnemonicOpIsRegister(result.src) && !result.effective_addr_loads_mem) { - result.base_clocks = 2; - } else if (S86_MnemonicOpIsRegister(result.dest) && result.src == S86_MnemonicOp_DirectAddress && result.effective_addr_loads_mem && result.effective_addr == S86_EffectiveAddress_Src) { - result.base_clocks = 8; - result.effective_address_clocks = 6; - if (cycle_count_8088 && result.wide) { - result.transfer_penalty_clocks = 4; - } - } else if (S86_MnemonicOpIsRegister(result.dest) && S86_MnemonicOpIsRegister(result.src) && result.effective_addr_loads_mem && result.effective_addr == S86_EffectiveAddress_Src) { - result.base_clocks = 8; - result.effective_address_clocks = result.displacement ? 9 : 5; - if (cycle_count_8088 && result.wide) { - result.transfer_penalty_clocks = 4; - } - } else if (S86_MnemonicOpIsRegister(result.dest) && S86_MnemonicOpIsRegister(result.src) && result.effective_addr_loads_mem && result.effective_addr == S86_EffectiveAddress_Dest) { - result.base_clocks = 9; - result.effective_address_clocks = result.displacement ? 9 : 5; - if (cycle_count_8088 && result.wide) { - result.transfer_penalty_clocks = 4; - } - } - } else if (op_decode_type >= S86_OpDecodeType_ADDRegOrMemToOrFromReg && op_decode_type <= S86_OpDecodeType_ADDImmediateToAccum) { - if (S86_MnemonicOpIsRegister(result.dest) && S86_MnemonicOpIsRegister(result.src) && result.effective_addr == S86_EffectiveAddress_None) { - result.base_clocks = 3; - } else if (S86_MnemonicOpIsRegister(result.dest) && S86_MnemonicOpIsRegister(result.src) && result.effective_addr == S86_EffectiveAddress_Dest) { - result.base_clocks = 16; - result.effective_address_clocks = result.displacement ? 9 : 5; - if (cycle_count_8088 && result.wide) { - result.transfer_penalty_clocks = 4 * 2; - } - } else if (S86_MnemonicOpIsRegister(result.dest) && result.src == S86_MnemonicOp_Immediate) { - result.base_clocks = 4; - } - } size_t buffer_end_index = buffer_it->index; result.byte_size = S86_CAST(uint8_t)(buffer_end_index - buffer_start_index); @@ -1373,6 +1338,11 @@ int main(int argc, char **argv) } // NOTE: Simulate instruction ============================================================== + bool cycle_count_8088 = log_cycle_counts == CycleCount_8088; + uint32_t base_clocks = 0; + uint32_t effective_address_clocks = 0; + uint32_t transfer_penalty_clocks = 0; + S86_RegisterFile prev_register_file = register_file; switch (opcode.mnemonic) { case S86_Mnemonic_PUSH: /*FALLTHRU*/ @@ -1543,6 +1513,48 @@ int main(int argc, char **argv) *dest_lo = S86_CAST(uint8_t)(src >> 0); if (dest_hi) *dest_hi = S86_CAST(uint8_t)(src >> 8); + + if (S86_MnemonicOpIsRegister(opcode.dest) && opcode.src == S86_MnemonicOp_Immediate && !opcode.effective_addr_loads_mem) { + base_clocks = 4; + } else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && !opcode.effective_addr_loads_mem) { + base_clocks = 2; + } else if (S86_MnemonicOpIsRegister(opcode.dest) && opcode.src == S86_MnemonicOp_DirectAddress && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Src) { + base_clocks = 8; + effective_address_clocks = 6; + if (cycle_count_8088 && opcode.wide) { + transfer_penalty_clocks = 4; + } + } else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Src) { + base_clocks = 8; + effective_address_clocks = opcode.displacement ? 9 : 5; + if (cycle_count_8088 && opcode.wide) { + transfer_penalty_clocks = 4; + } + } else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Dest) { + base_clocks = 9; + effective_address_clocks = opcode.displacement ? 9 : 5; + if (cycle_count_8088 && opcode.wide) { + transfer_penalty_clocks = 4; + } + } else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsEffectiveAddress(opcode.src) && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Src) { + base_clocks = 8; + if (cycle_count_8088 && opcode.wide) + transfer_penalty_clocks = 4; + if (opcode.src == S86_MnemonicOp_BP_DI || opcode.src == S86_MnemonicOp_BX_SI) { + effective_address_clocks = opcode.displacement ? 11 : 7; + } else if (opcode.src == S86_MnemonicOp_BP_SI || opcode.src == S86_MnemonicOp_BX_DI) { + effective_address_clocks = opcode.displacement ? 12 : 8; + } + } else if (S86_MnemonicOpIsEffectiveAddress(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Dest) { + base_clocks = 9; + if (cycle_count_8088 && opcode.wide) + transfer_penalty_clocks = 4; + if (opcode.dest == S86_MnemonicOp_BP_DI || opcode.dest == S86_MnemonicOp_BX_SI) { + effective_address_clocks = opcode.displacement ? 11 : 7; + } else if (opcode.dest == S86_MnemonicOp_BP_SI || opcode.dest == S86_MnemonicOp_BX_DI) { + effective_address_clocks = opcode.displacement ? 12 : 8; + } + } } break; case S86_Mnemonic_ADD: /*FALLTHRU*/ @@ -1556,10 +1568,11 @@ int main(int argc, char **argv) } S86_ASSERT(dest_map); - bool subtract = opcode.mnemonic != S86_Mnemonic_ADD; - bool byte_op = opcode.dest >= S86_MnemonicOp_AL && opcode.dest <= S86_MnemonicOp_BH; + bool subtract = opcode.mnemonic != S86_Mnemonic_ADD; + bool byte_op = opcode.dest >= S86_MnemonicOp_AL && opcode.dest <= S86_MnemonicOp_BH; - uint16_t src = 0; + uint16_t src = 0; + uint16_t src_address = 0; if (opcode.src == S86_MnemonicOp_Immediate) { if (byte_op) { S86_ASSERT(opcode.immediate < S86_CAST(uint8_t)-1); @@ -1581,21 +1594,20 @@ int main(int argc, char **argv) if ((src_map->mnemonic_op >= S86_MnemonicOp_BX_SI && src_map->mnemonic_op <= S86_MnemonicOp_BP_DI) || (opcode.effective_addr == S86_EffectiveAddress_Src && opcode.effective_addr_loads_mem)) { - uint16_t address = 0; if (src_map->mnemonic_op == S86_MnemonicOp_BX_SI) { - address = src_map->reg->word + register_file.reg.file.si.word; + src_address = S86_CAST(uint16_t)(src_map->reg->word + register_file.reg.file.si.word + opcode.displacement); } else if (src_map->mnemonic_op == S86_MnemonicOp_BX_DI) { - address = src_map->reg->word + register_file.reg.file.di.word; + src_address = S86_CAST(uint16_t)(src_map->reg->word + register_file.reg.file.di.word + opcode.displacement); } else if (src_map->mnemonic_op == S86_MnemonicOp_BP_SI) { - address = src_map->reg->word + register_file.reg.file.si.word; + src_address = S86_CAST(uint16_t)(src_map->reg->word + register_file.reg.file.si.word + opcode.displacement); } else if (src_map->mnemonic_op == S86_MnemonicOp_BP_DI) { - address = src_map->reg->word + register_file.reg.file.di.word; + src_address = S86_CAST(uint16_t)(src_map->reg->word + register_file.reg.file.di.word + opcode.displacement); } else if (opcode.effective_addr == S86_EffectiveAddress_Src) { - address = src_map->reg->word; + src_address = S86_CAST(uint16_t)(src_map->reg->word + opcode.displacement); } else { S86_ASSERT(!"Invalid code path"); } - src = *(uint16_t *)&memory[address]; + src = *(uint16_t *)&memory[src_address]; } else { src = byte_op ? src_map->reg->bytes[src_map->byte] : src_map->reg->word; } @@ -1614,13 +1626,15 @@ int main(int argc, char **argv) if (opcode.effective_addr == S86_EffectiveAddress_Dest && opcode.effective_addr_loads_mem) { uint16_t address = dest_map->reg->word; if (dest_map->mnemonic_op == S86_MnemonicOp_BX_SI) { - address = dest_map->reg->word + register_file.reg.file.si.word; + address = S86_CAST(uint16_t)(dest_map->reg->word + register_file.reg.file.si.word + opcode.displacement); } else if (dest_map->mnemonic_op == S86_MnemonicOp_BX_DI) { - address = dest_map->reg->word + register_file.reg.file.di.word; + address = S86_CAST(uint16_t)(dest_map->reg->word + register_file.reg.file.di.word + opcode.displacement); } else if (dest_map->mnemonic_op == S86_MnemonicOp_BP_SI) { - address = dest_map->reg->word + register_file.reg.file.si.word; + address = S86_CAST(uint16_t)(dest_map->reg->word + register_file.reg.file.si.word + opcode.displacement); } else if (dest_map->mnemonic_op == S86_MnemonicOp_BP_DI) { - address = dest_map->reg->word + register_file.reg.file.di.word; + address = S86_CAST(uint16_t)(dest_map->reg->word + register_file.reg.file.di.word + opcode.displacement); + } else if (opcode.effective_addr == S86_EffectiveAddress_Dest) { + address = S86_CAST(uint16_t)(dest_map->reg->word + opcode.displacement); } dest_lo = memory + address; dest_hi = byte_op ? NULL : memory + (address + 1); @@ -1705,6 +1719,50 @@ int main(int argc, char **argv) int lo_bit_count = _mm_popcnt_u32(S86_CAST(uint32_t)*dest_lo); register_file.flags.parity = lo_bit_count % 2 == 0; register_file.flags.zero = byte_op ? *dest_lo == 0 : *(uint16_t*)dest_lo == 0; + + if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && opcode.effective_addr == S86_EffectiveAddress_None) { + base_clocks = 3; + } else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && opcode.effective_addr == S86_EffectiveAddress_Dest) { + base_clocks = 16; + effective_address_clocks = opcode.displacement ? 9 : 5; + if (cycle_count_8088) { + if (opcode.wide) { + transfer_penalty_clocks = 8; + } + } else { + if ((uintptr_t)dest_lo & 1) { + transfer_penalty_clocks = 8; + } + } + } else if (S86_MnemonicOpIsRegister(opcode.dest) && opcode.src == S86_MnemonicOp_Immediate) { + base_clocks = 4; + } else if (S86_MnemonicOpIsEffectiveAddress(opcode.dest) && opcode.src == S86_MnemonicOp_Immediate && opcode.effective_addr_loads_mem) { + base_clocks = 17; + if (cycle_count_8088 && opcode.wide) + transfer_penalty_clocks = 8; + if (opcode.dest == S86_MnemonicOp_BP_DI || opcode.dest == S86_MnemonicOp_BX_SI) { + effective_address_clocks = opcode.displacement ? 11 : 7; + } else if (opcode.dest == S86_MnemonicOp_BP_SI || opcode.dest == S86_MnemonicOp_BX_DI) { + effective_address_clocks = opcode.displacement ? 12 : 8; + } + } else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsEffectiveAddress(opcode.src) && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Src) { + base_clocks = 9; + if (cycle_count_8088) { + if (opcode.wide) { + transfer_penalty_clocks = 4; + } + } else { + if (src_address & 1) { + transfer_penalty_clocks = 4; + } + } + + if (opcode.src == S86_MnemonicOp_BP_DI || opcode.src == S86_MnemonicOp_BX_SI) { + effective_address_clocks = opcode.displacement ? 11 : 7; + } else if (opcode.src == S86_MnemonicOp_BP_SI || opcode.src == S86_MnemonicOp_BX_DI) { + effective_address_clocks = opcode.displacement ? 12 : 8; + } + } } break; case S86_Mnemonic_JNE_JNZ: { @@ -1739,20 +1797,21 @@ int main(int argc, char **argv) register_file.instruction_ptr += S86_CAST(int16_t)opcode.displacement; } break; } - clocks_counter += opcode.base_clocks + opcode.effective_address_clocks + opcode.transfer_penalty_clocks; + + clocks_counter += base_clocks + effective_address_clocks + transfer_penalty_clocks; // NOTE: Printing ========================================================================== S86_PrintFmt(" ; "); // NOTE: Clocks if (log_cycle_counts) { - S86_PrintFmt("Clocks: +%u = %u", opcode.base_clocks + opcode.effective_address_clocks + opcode.transfer_penalty_clocks, clocks_counter); - if (opcode.effective_address_clocks || opcode.transfer_penalty_clocks) { - S86_PrintFmt(" (%u", opcode.base_clocks); - if (opcode.effective_address_clocks) - S86_PrintFmt(" + %uea", opcode.effective_address_clocks); - if (opcode.transfer_penalty_clocks) - S86_PrintFmt(" + %up", opcode.transfer_penalty_clocks); + S86_PrintFmt("Clocks: +%u = %u", base_clocks + effective_address_clocks + transfer_penalty_clocks, clocks_counter); + if (effective_address_clocks || transfer_penalty_clocks) { + S86_PrintFmt(" (%u", base_clocks); + if (effective_address_clocks) + S86_PrintFmt(" + %uea", effective_address_clocks); + if (transfer_penalty_clocks) + S86_PrintFmt(" + %up", transfer_penalty_clocks); S86_PrintFmt(")"); } S86_PrintFmt(" | "); diff --git a/part1/sim8086.h b/part1/sim8086.h index d91e5b0..b3f12f1 100644 --- a/part1/sim8086.h +++ b/part1/sim8086.h @@ -323,6 +323,7 @@ typedef enum S86_WordBytePrefix { } S86_WordBytePrefix; typedef struct S86_Opcode { + S86_OpDecodeType type; uint8_t byte_size; ///< Number of bytes used to encode this opcode S86_Mnemonic mnemonic; ///< Mnemonic type S86_EffectiveAddress effective_addr; ///< Src/dest op is an effective address calculation diff --git a/project.rdbg b/project.rdbg index 7c059d1b32dd710147fc27e95cd7001cec6a5420..9d77ee46d031e4aa068a85ccbee3049fdeb152be 100644 GIT binary patch delta 226 zcmeBGKCd{TgwbO1MpmQA{EY0(ddc=swg6)an01FqaN-UXM%K+SjJp^axhB`MByzJc zFfbIQ7MJFfuugu+BF@Mg05g^45G*O<9703c%MiAhd u9LVp&4&;df@nl1OVRlAF4j>aOK3Rd4mjx`5C?LLh1OHvd$v_5^sW|}oQ!}jq delta 225 zcmX@F*sDCDgfV5}Mx)7hm;@)AGpbD7;LK<-aib-(UNYC_7{*i_@%