perfware/part1: Support listing 0057

This commit is contained in:
doyle 2023-06-20 23:19:17 +10:00
parent 56cc90c57b
commit 9db292f616
5 changed files with 156 additions and 79 deletions

View File

@ -287,3 +287,21 @@ nasm %build_dir_listing_0056%_disassembled.asm
fc /B %build_dir_listing_0056% %build_dir_listing_0056%_disassembled || exit /b 1
fc /N %build_dir_listing_0056%.txt %build_dir_listing_0056%_disassembled.txt || exit /b 1
REM ================================================================================================
set listing_0057=listing_0057_challenge_cycles
set build_dir_listing_0057=%build_dir%\%listing_0057%
copy /Y %script_dir%\%listing_0057% %build_dir% 1>NUL
copy /Y %script_dir%\%listing_0057%.txt %build_dir% 1>NUL
pushd %build_dir%
%build_dir%\sim8086.exe --exec --log-instruction-ptr --log-cycle-counts 8086 %build_dir_listing_0057% > %build_dir_listing_0057%_disassembled.txt
%build_dir%\sim8086.exe %build_dir_listing_0057% > %build_dir_listing_0057%_disassembled.asm
%build_dir%\sim8086.exe --exec --log-instruction-ptr --log-cycle-counts 8088 %build_dir_listing_0057% >> %build_dir_listing_0057%_disassembled.txt
popd
nasm %build_dir_listing_0057%_disassembled.asm
fc /B %build_dir_listing_0057% %build_dir_listing_0057%_disassembled || exit /b 1
fc /N %build_dir_listing_0057%.txt %build_dir_listing_0057%_disassembled.txt || exit /b 1

View File

@ -33,8 +33,6 @@ Final registers:
ip: 0x0036 (54)
flags: A
**************
**** 8088 ****
**************
@ -44,23 +42,23 @@ They will be inaccurate, both because the manual clocks are estimates, and becau
some of the entries in the manual look highly suspicious and are probably typos.
--- test\listing_0057_challenge_cycles execution ---
mov bx, 1000 ; Clocks: +4 = 4 | bx:0x0->0x3e8 ip:0x0->0x3
mov bp, 2000 ; Clocks: +4 = 8 | bp:0x0->0x7d0 ip:0x3->0x6
mov si, 3000 ; Clocks: +4 = 12 | si:0x0->0xbb8 ip:0x6->0x9
mov di, 4000 ; Clocks: +4 = 16 | di:0x0->0xfa0 ip:0x9->0xc
mov cx, [bp+di] ; Clocks: +19 = 35 (8 + 7ea + 4p) | ip:0xc->0xe
mov word [bx+si], cx ; Clocks: +20 = 55 (9 + 7ea + 4p) | ip:0xe->0x10
mov cx, [bp+si] ; Clocks: +20 = 75 (8 + 8ea + 4p) | ip:0x10->0x12
mov word [bx+di], cx ; Clocks: +21 = 96 (9 + 8ea + 4p) | ip:0x12->0x14
mov cx, [bp+di+1000] ; Clocks: +23 = 119 (8 + 11ea + 4p) | ip:0x14->0x18
mov word [bx+si+1000], cx ; Clocks: +24 = 143 (9 + 11ea + 4p) | ip:0x18->0x1c
mov cx, [bp+si+1000] ; Clocks: +24 = 167 (8 + 12ea + 4p) | ip:0x1c->0x20
mov word [bx+di+1000], cx ; Clocks: +25 = 192 (9 + 12ea + 4p) | ip:0x20->0x24
add dx, [bp+si+1000] ; Clocks: +25 = 217 (9 + 12ea + 4p) | ip:0x24->0x28 flags:->PZ
add word [bp+si], 76 ; Clocks: +33 = 250 (17 + 8ea + 8p) | ip:0x28->0x2b flags:PZ->
add dx, [bp+si+1001] ; Clocks: +25 = 275 (9 + 12ea + 4p) | ip:0x2b->0x2f flags:->PZ
add word [di+999], dx ; Clocks: +33 = 308 (16 + 9ea + 8p) | ip:0x2f->0x33 flags:PZ->P
add word [bp+si], 75 ; Clocks: +33 = 341 (17 + 8ea + 8p) | ip:0x33->0x36 flags:P->A
mov bx, 1000 ; Clocks: +4 = 4 | bx:0x0->0x3e8 ip:0x0->0x3
mov bp, 2000 ; Clocks: +4 = 8 | bp:0x0->0x7d0 ip:0x3->0x6
mov si, 3000 ; Clocks: +4 = 12 | si:0x0->0xbb8 ip:0x6->0x9
mov di, 4000 ; Clocks: +4 = 16 | di:0x0->0xfa0 ip:0x9->0xc
mov cx, [bp+di] ; Clocks: +19 = 35 (8 + 7ea + 4p) | ip:0xc->0xe
mov word [bx+si], cx ; Clocks: +20 = 55 (9 + 7ea + 4p) | ip:0xe->0x10
mov cx, [bp+si] ; Clocks: +20 = 75 (8 + 8ea + 4p) | ip:0x10->0x12
mov word [bx+di], cx ; Clocks: +21 = 96 (9 + 8ea + 4p) | ip:0x12->0x14
mov cx, [bp+di+1000] ; Clocks: +23 = 119 (8 + 11ea + 4p) | ip:0x14->0x18
mov word [bx+si+1000], cx ; Clocks: +24 = 143 (9 + 11ea + 4p) | ip:0x18->0x1c
mov cx, [bp+si+1000] ; Clocks: +24 = 167 (8 + 12ea + 4p) | ip:0x1c->0x20
mov word [bx+di+1000], cx ; Clocks: +25 = 192 (9 + 12ea + 4p) | ip:0x20->0x24
add dx, [bp+si+1000] ; Clocks: +25 = 217 (9 + 12ea + 4p) | ip:0x24->0x28 flags:->PZ
add word [bp+si], 76 ; Clocks: +33 = 250 (17 + 8ea + 8p) | ip:0x28->0x2b flags:PZ->
add dx, [bp+si+1001] ; Clocks: +25 = 275 (9 + 12ea + 4p) | ip:0x2b->0x2f flags:->PZ
add word [di+999], dx ; Clocks: +33 = 308 (16 + 9ea + 8p) | ip:0x2f->0x33 flags:PZ->P
add word [bp+si], 75 ; Clocks: +33 = 341 (17 + 8ea + 8p) | ip:0x33->0x36 flags:P->A
Final registers:
bx: 0x03e8 (1000)
@ -69,3 +67,4 @@ Final registers:
di: 0x0fa0 (4000)
ip: 0x0036 (54)
flags: A

View File

@ -375,6 +375,8 @@ S86_Opcode S86_DecodeOpcode(S86_BufferIterator *buffer_it,
S86_MnemonicOp *seg_reg,
bool cycle_count_8088)
{
(void)cycle_count_8088;
size_t buffer_start_index = buffer_it->index;
char op_code_bytes[2] = {0};
size_t op_code_size = 0;
@ -422,6 +424,7 @@ S86_Opcode S86_DecodeOpcode(S86_BufferIterator *buffer_it,
S86_ASSERT(op_decode_type != S86_OpDecodeType_Count && "Unknown instruction");
S86_Opcode result = {0};
result.type = op_decode_type;
result.mnemonic = op_decode->mnemonic;
result.lock_prefix = *lock_prefix;
result.seg_reg_prefix = *seg_reg;
@ -860,44 +863,6 @@ S86_Opcode S86_DecodeOpcode(S86_BufferIterator *buffer_it,
: S86_WordBytePrefix_Byte;
}
if ((op_decode_type >= S86_OpDecodeType_MOVRegOrMemToOrFromReg) &&
(op_decode_type <= S86_OpDecodeType_MOVSegRegToRegOrMem)) {
if (S86_MnemonicOpIsRegister(result.dest) && result.src == S86_MnemonicOp_Immediate && !result.effective_addr_loads_mem) {
result.base_clocks = 4;
} else if (S86_MnemonicOpIsRegister(result.dest) && S86_MnemonicOpIsRegister(result.src) && !result.effective_addr_loads_mem) {
result.base_clocks = 2;
} else if (S86_MnemonicOpIsRegister(result.dest) && result.src == S86_MnemonicOp_DirectAddress && result.effective_addr_loads_mem && result.effective_addr == S86_EffectiveAddress_Src) {
result.base_clocks = 8;
result.effective_address_clocks = 6;
if (cycle_count_8088 && result.wide) {
result.transfer_penalty_clocks = 4;
}
} else if (S86_MnemonicOpIsRegister(result.dest) && S86_MnemonicOpIsRegister(result.src) && result.effective_addr_loads_mem && result.effective_addr == S86_EffectiveAddress_Src) {
result.base_clocks = 8;
result.effective_address_clocks = result.displacement ? 9 : 5;
if (cycle_count_8088 && result.wide) {
result.transfer_penalty_clocks = 4;
}
} else if (S86_MnemonicOpIsRegister(result.dest) && S86_MnemonicOpIsRegister(result.src) && result.effective_addr_loads_mem && result.effective_addr == S86_EffectiveAddress_Dest) {
result.base_clocks = 9;
result.effective_address_clocks = result.displacement ? 9 : 5;
if (cycle_count_8088 && result.wide) {
result.transfer_penalty_clocks = 4;
}
}
} else if (op_decode_type >= S86_OpDecodeType_ADDRegOrMemToOrFromReg && op_decode_type <= S86_OpDecodeType_ADDImmediateToAccum) {
if (S86_MnemonicOpIsRegister(result.dest) && S86_MnemonicOpIsRegister(result.src) && result.effective_addr == S86_EffectiveAddress_None) {
result.base_clocks = 3;
} else if (S86_MnemonicOpIsRegister(result.dest) && S86_MnemonicOpIsRegister(result.src) && result.effective_addr == S86_EffectiveAddress_Dest) {
result.base_clocks = 16;
result.effective_address_clocks = result.displacement ? 9 : 5;
if (cycle_count_8088 && result.wide) {
result.transfer_penalty_clocks = 4 * 2;
}
} else if (S86_MnemonicOpIsRegister(result.dest) && result.src == S86_MnemonicOp_Immediate) {
result.base_clocks = 4;
}
}
size_t buffer_end_index = buffer_it->index;
result.byte_size = S86_CAST(uint8_t)(buffer_end_index - buffer_start_index);
@ -1373,6 +1338,11 @@ int main(int argc, char **argv)
}
// NOTE: Simulate instruction ==============================================================
bool cycle_count_8088 = log_cycle_counts == CycleCount_8088;
uint32_t base_clocks = 0;
uint32_t effective_address_clocks = 0;
uint32_t transfer_penalty_clocks = 0;
S86_RegisterFile prev_register_file = register_file;
switch (opcode.mnemonic) {
case S86_Mnemonic_PUSH: /*FALLTHRU*/
@ -1543,6 +1513,48 @@ int main(int argc, char **argv)
*dest_lo = S86_CAST(uint8_t)(src >> 0);
if (dest_hi)
*dest_hi = S86_CAST(uint8_t)(src >> 8);
if (S86_MnemonicOpIsRegister(opcode.dest) && opcode.src == S86_MnemonicOp_Immediate && !opcode.effective_addr_loads_mem) {
base_clocks = 4;
} else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && !opcode.effective_addr_loads_mem) {
base_clocks = 2;
} else if (S86_MnemonicOpIsRegister(opcode.dest) && opcode.src == S86_MnemonicOp_DirectAddress && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Src) {
base_clocks = 8;
effective_address_clocks = 6;
if (cycle_count_8088 && opcode.wide) {
transfer_penalty_clocks = 4;
}
} else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Src) {
base_clocks = 8;
effective_address_clocks = opcode.displacement ? 9 : 5;
if (cycle_count_8088 && opcode.wide) {
transfer_penalty_clocks = 4;
}
} else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Dest) {
base_clocks = 9;
effective_address_clocks = opcode.displacement ? 9 : 5;
if (cycle_count_8088 && opcode.wide) {
transfer_penalty_clocks = 4;
}
} else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsEffectiveAddress(opcode.src) && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Src) {
base_clocks = 8;
if (cycle_count_8088 && opcode.wide)
transfer_penalty_clocks = 4;
if (opcode.src == S86_MnemonicOp_BP_DI || opcode.src == S86_MnemonicOp_BX_SI) {
effective_address_clocks = opcode.displacement ? 11 : 7;
} else if (opcode.src == S86_MnemonicOp_BP_SI || opcode.src == S86_MnemonicOp_BX_DI) {
effective_address_clocks = opcode.displacement ? 12 : 8;
}
} else if (S86_MnemonicOpIsEffectiveAddress(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Dest) {
base_clocks = 9;
if (cycle_count_8088 && opcode.wide)
transfer_penalty_clocks = 4;
if (opcode.dest == S86_MnemonicOp_BP_DI || opcode.dest == S86_MnemonicOp_BX_SI) {
effective_address_clocks = opcode.displacement ? 11 : 7;
} else if (opcode.dest == S86_MnemonicOp_BP_SI || opcode.dest == S86_MnemonicOp_BX_DI) {
effective_address_clocks = opcode.displacement ? 12 : 8;
}
}
} break;
case S86_Mnemonic_ADD: /*FALLTHRU*/
@ -1556,10 +1568,11 @@ int main(int argc, char **argv)
}
S86_ASSERT(dest_map);
bool subtract = opcode.mnemonic != S86_Mnemonic_ADD;
bool byte_op = opcode.dest >= S86_MnemonicOp_AL && opcode.dest <= S86_MnemonicOp_BH;
bool subtract = opcode.mnemonic != S86_Mnemonic_ADD;
bool byte_op = opcode.dest >= S86_MnemonicOp_AL && opcode.dest <= S86_MnemonicOp_BH;
uint16_t src = 0;
uint16_t src = 0;
uint16_t src_address = 0;
if (opcode.src == S86_MnemonicOp_Immediate) {
if (byte_op) {
S86_ASSERT(opcode.immediate < S86_CAST(uint8_t)-1);
@ -1581,21 +1594,20 @@ int main(int argc, char **argv)
if ((src_map->mnemonic_op >= S86_MnemonicOp_BX_SI &&
src_map->mnemonic_op <= S86_MnemonicOp_BP_DI) || (opcode.effective_addr == S86_EffectiveAddress_Src && opcode.effective_addr_loads_mem)) {
uint16_t address = 0;
if (src_map->mnemonic_op == S86_MnemonicOp_BX_SI) {
address = src_map->reg->word + register_file.reg.file.si.word;
src_address = S86_CAST(uint16_t)(src_map->reg->word + register_file.reg.file.si.word + opcode.displacement);
} else if (src_map->mnemonic_op == S86_MnemonicOp_BX_DI) {
address = src_map->reg->word + register_file.reg.file.di.word;
src_address = S86_CAST(uint16_t)(src_map->reg->word + register_file.reg.file.di.word + opcode.displacement);
} else if (src_map->mnemonic_op == S86_MnemonicOp_BP_SI) {
address = src_map->reg->word + register_file.reg.file.si.word;
src_address = S86_CAST(uint16_t)(src_map->reg->word + register_file.reg.file.si.word + opcode.displacement);
} else if (src_map->mnemonic_op == S86_MnemonicOp_BP_DI) {
address = src_map->reg->word + register_file.reg.file.di.word;
src_address = S86_CAST(uint16_t)(src_map->reg->word + register_file.reg.file.di.word + opcode.displacement);
} else if (opcode.effective_addr == S86_EffectiveAddress_Src) {
address = src_map->reg->word;
src_address = S86_CAST(uint16_t)(src_map->reg->word + opcode.displacement);
} else {
S86_ASSERT(!"Invalid code path");
}
src = *(uint16_t *)&memory[address];
src = *(uint16_t *)&memory[src_address];
} else {
src = byte_op ? src_map->reg->bytes[src_map->byte] : src_map->reg->word;
}
@ -1614,13 +1626,15 @@ int main(int argc, char **argv)
if (opcode.effective_addr == S86_EffectiveAddress_Dest && opcode.effective_addr_loads_mem) {
uint16_t address = dest_map->reg->word;
if (dest_map->mnemonic_op == S86_MnemonicOp_BX_SI) {
address = dest_map->reg->word + register_file.reg.file.si.word;
address = S86_CAST(uint16_t)(dest_map->reg->word + register_file.reg.file.si.word + opcode.displacement);
} else if (dest_map->mnemonic_op == S86_MnemonicOp_BX_DI) {
address = dest_map->reg->word + register_file.reg.file.di.word;
address = S86_CAST(uint16_t)(dest_map->reg->word + register_file.reg.file.di.word + opcode.displacement);
} else if (dest_map->mnemonic_op == S86_MnemonicOp_BP_SI) {
address = dest_map->reg->word + register_file.reg.file.si.word;
address = S86_CAST(uint16_t)(dest_map->reg->word + register_file.reg.file.si.word + opcode.displacement);
} else if (dest_map->mnemonic_op == S86_MnemonicOp_BP_DI) {
address = dest_map->reg->word + register_file.reg.file.di.word;
address = S86_CAST(uint16_t)(dest_map->reg->word + register_file.reg.file.di.word + opcode.displacement);
} else if (opcode.effective_addr == S86_EffectiveAddress_Dest) {
address = S86_CAST(uint16_t)(dest_map->reg->word + opcode.displacement);
}
dest_lo = memory + address;
dest_hi = byte_op ? NULL : memory + (address + 1);
@ -1705,6 +1719,50 @@ int main(int argc, char **argv)
int lo_bit_count = _mm_popcnt_u32(S86_CAST(uint32_t)*dest_lo);
register_file.flags.parity = lo_bit_count % 2 == 0;
register_file.flags.zero = byte_op ? *dest_lo == 0 : *(uint16_t*)dest_lo == 0;
if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && opcode.effective_addr == S86_EffectiveAddress_None) {
base_clocks = 3;
} else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsRegister(opcode.src) && opcode.effective_addr == S86_EffectiveAddress_Dest) {
base_clocks = 16;
effective_address_clocks = opcode.displacement ? 9 : 5;
if (cycle_count_8088) {
if (opcode.wide) {
transfer_penalty_clocks = 8;
}
} else {
if ((uintptr_t)dest_lo & 1) {
transfer_penalty_clocks = 8;
}
}
} else if (S86_MnemonicOpIsRegister(opcode.dest) && opcode.src == S86_MnemonicOp_Immediate) {
base_clocks = 4;
} else if (S86_MnemonicOpIsEffectiveAddress(opcode.dest) && opcode.src == S86_MnemonicOp_Immediate && opcode.effective_addr_loads_mem) {
base_clocks = 17;
if (cycle_count_8088 && opcode.wide)
transfer_penalty_clocks = 8;
if (opcode.dest == S86_MnemonicOp_BP_DI || opcode.dest == S86_MnemonicOp_BX_SI) {
effective_address_clocks = opcode.displacement ? 11 : 7;
} else if (opcode.dest == S86_MnemonicOp_BP_SI || opcode.dest == S86_MnemonicOp_BX_DI) {
effective_address_clocks = opcode.displacement ? 12 : 8;
}
} else if (S86_MnemonicOpIsRegister(opcode.dest) && S86_MnemonicOpIsEffectiveAddress(opcode.src) && opcode.effective_addr_loads_mem && opcode.effective_addr == S86_EffectiveAddress_Src) {
base_clocks = 9;
if (cycle_count_8088) {
if (opcode.wide) {
transfer_penalty_clocks = 4;
}
} else {
if (src_address & 1) {
transfer_penalty_clocks = 4;
}
}
if (opcode.src == S86_MnemonicOp_BP_DI || opcode.src == S86_MnemonicOp_BX_SI) {
effective_address_clocks = opcode.displacement ? 11 : 7;
} else if (opcode.src == S86_MnemonicOp_BP_SI || opcode.src == S86_MnemonicOp_BX_DI) {
effective_address_clocks = opcode.displacement ? 12 : 8;
}
}
} break;
case S86_Mnemonic_JNE_JNZ: {
@ -1739,20 +1797,21 @@ int main(int argc, char **argv)
register_file.instruction_ptr += S86_CAST(int16_t)opcode.displacement;
} break;
}
clocks_counter += opcode.base_clocks + opcode.effective_address_clocks + opcode.transfer_penalty_clocks;
clocks_counter += base_clocks + effective_address_clocks + transfer_penalty_clocks;
// NOTE: Printing ==========================================================================
S86_PrintFmt(" ; ");
// NOTE: Clocks
if (log_cycle_counts) {
S86_PrintFmt("Clocks: +%u = %u", opcode.base_clocks + opcode.effective_address_clocks + opcode.transfer_penalty_clocks, clocks_counter);
if (opcode.effective_address_clocks || opcode.transfer_penalty_clocks) {
S86_PrintFmt(" (%u", opcode.base_clocks);
if (opcode.effective_address_clocks)
S86_PrintFmt(" + %uea", opcode.effective_address_clocks);
if (opcode.transfer_penalty_clocks)
S86_PrintFmt(" + %up", opcode.transfer_penalty_clocks);
S86_PrintFmt("Clocks: +%u = %u", base_clocks + effective_address_clocks + transfer_penalty_clocks, clocks_counter);
if (effective_address_clocks || transfer_penalty_clocks) {
S86_PrintFmt(" (%u", base_clocks);
if (effective_address_clocks)
S86_PrintFmt(" + %uea", effective_address_clocks);
if (transfer_penalty_clocks)
S86_PrintFmt(" + %up", transfer_penalty_clocks);
S86_PrintFmt(")");
}
S86_PrintFmt(" | ");

View File

@ -323,6 +323,7 @@ typedef enum S86_WordBytePrefix {
} S86_WordBytePrefix;
typedef struct S86_Opcode {
S86_OpDecodeType type;
uint8_t byte_size; ///< Number of bytes used to encode this opcode
S86_Mnemonic mnemonic; ///< Mnemonic type
S86_EffectiveAddress effective_addr; ///< Src/dest op is an effective address calculation

Binary file not shown.