perfware/part1: Start to add clock counts for MOV

This commit is contained in:
doyle 2023-06-13 22:14:48 +10:00
parent d888968ce0
commit 16b8483312
10 changed files with 316 additions and 8 deletions

View File

@ -269,3 +269,20 @@ nasm %build_dir_listing_0055%_disassembled.asm
fc /B %build_dir_listing_0055% %build_dir_listing_0055%_disassembled || exit /b 1
fc /N %build_dir_listing_0055%.txt %build_dir_listing_0055%_disassembled.txt || exit /b 1
REM ================================================================================================
set listing_0056=listing_0056_estimating_cycles
set build_dir_listing_0056=%build_dir%\%listing_0056%
copy /Y %script_dir%\%listing_0056% %build_dir% 1>NUL
copy /Y %script_dir%\%listing_0056%.txt %build_dir% 1>NUL
pushd %build_dir%
%build_dir%\sim8086.exe --exec --log-instruction-ptr --log-cycle-counts --dump %build_dir_listing_0056% > %build_dir_listing_0056%_disassembled.txt
%build_dir%\sim8086.exe %build_dir_listing_0056% > %build_dir_listing_0056%_disassembled.asm
popd
nasm %build_dir_listing_0056%_disassembled.asm
fc /B %build_dir_listing_0056% %build_dir_listing_0056%_disassembled || exit /b 1
fc /N %build_dir_listing_0056%.txt %build_dir_listing_0056%_disassembled.txt || exit /b 1

Binary file not shown.

View File

@ -0,0 +1,41 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 56
; ========================================================================
bits 16
mov bx, 1000
mov bp, 2000
mov si, 3000
mov di, 4000
mov cx, bx
mov dx, 12
mov dx, [1000]
mov cx, [bx]
mov cx, [bp]
mov [si], cx
mov [di], cx
mov cx, [bx + 1000]
mov cx, [bp + 1000]
mov [si + 1000], cx
mov [di + 1000], cx
add cx, dx
add [di + 1000], cx
add dx, 50

View File

@ -0,0 +1,72 @@
**************
**** 8086 ****
**************
WARNING: Clocks reported by this utility are strictly from the 8086 manual.
They will be inaccurate, both because the manual clocks are estimates, and because
some of the entries in the manual look highly suspicious and are probably typos.
--- test\listing_0056_estimating_cycles execution ---
mov bx, 1000 ; Clocks: +4 = 4 | bx:0x0->0x3e8 ip:0x0->0x3
mov bp, 2000 ; Clocks: +4 = 8 | bp:0x0->0x7d0 ip:0x3->0x6
mov si, 3000 ; Clocks: +4 = 12 | si:0x0->0xbb8 ip:0x6->0x9
mov di, 4000 ; Clocks: +4 = 16 | di:0x0->0xfa0 ip:0x9->0xc
mov cx, bx ; Clocks: +2 = 18 | cx:0x0->0x3e8 ip:0xc->0xe
mov dx, 12 ; Clocks: +4 = 22 | dx:0x0->0xc ip:0xe->0x11
mov dx, [+1000] ; Clocks: +14 = 36 (8 + 6ea) | dx:0xc->0x0 ip:0x11->0x15
mov cx, [bx] ; Clocks: +13 = 49 (8 + 5ea) | cx:0x3e8->0x0 ip:0x15->0x17
mov cx, [bp] ; Clocks: +13 = 62 (8 + 5ea) | ip:0x17->0x1a
mov word [si], cx ; Clocks: +14 = 76 (9 + 5ea) | ip:0x1a->0x1c
mov word [di], cx ; Clocks: +14 = 90 (9 + 5ea) | ip:0x1c->0x1e
mov cx, [bx+1000] ; Clocks: +17 = 107 (8 + 9ea) | ip:0x1e->0x22
mov cx, [bp+1000] ; Clocks: +17 = 124 (8 + 9ea) | ip:0x22->0x26
mov word [si+1000], cx ; Clocks: +18 = 142 (9 + 9ea) | ip:0x26->0x2a
mov word [di+1000], cx ; Clocks: +18 = 160 (9 + 9ea) | ip:0x2a->0x2e
add cx, dx ; Clocks: +3 = 163 | ip:0x2e->0x30 flags:->PZ
add word [di+1000], cx ; Clocks: +25 = 188 (16 + 9ea) | ip:0x30->0x34
add dx, 50 ; Clocks: +4 = 192 | dx:0x0->0x32 ip:0x34->0x37 flags:PZ->
Final registers:
bx: 0x03e8 (1000)
dx: 0x0032 (50)
bp: 0x07d0 (2000)
si: 0x0bb8 (3000)
di: 0x0fa0 (4000)
ip: 0x0037 (55)
**************
**** 8088 ****
**************
WARNING: Clocks reported by this utility are strictly from the 8086 manual.
They will be inaccurate, both because the manual clocks are estimates, and because
some of the entries in the manual look highly suspicious and are probably typos.
--- test\listing_0056_estimating_cycles execution ---
mov bx, 1000 ; Clocks: +4 = 4 | bx:0x0->0x3e8 ip:0x0->0x3
mov bp, 2000 ; Clocks: +4 = 8 | bp:0x0->0x7d0 ip:0x3->0x6
mov si, 3000 ; Clocks: +4 = 12 | si:0x0->0xbb8 ip:0x6->0x9
mov di, 4000 ; Clocks: +4 = 16 | di:0x0->0xfa0 ip:0x9->0xc
mov cx, bx ; Clocks: +2 = 18 | cx:0x0->0x3e8 ip:0xc->0xe
mov dx, 12 ; Clocks: +4 = 22 | dx:0x0->0xc ip:0xe->0x11
mov dx, [+1000] ; Clocks: +18 = 40 (8 + 6ea + 4p) | dx:0xc->0x0 ip:0x11->0x15
mov cx, [bx] ; Clocks: +17 = 57 (8 + 5ea + 4p) | cx:0x3e8->0x0 ip:0x15->0x17
mov cx, [bp] ; Clocks: +17 = 74 (8 + 5ea + 4p) | ip:0x17->0x1a
mov word [si], cx ; Clocks: +18 = 92 (9 + 5ea + 4p) | ip:0x1a->0x1c
mov word [di], cx ; Clocks: +18 = 110 (9 + 5ea + 4p) | ip:0x1c->0x1e
mov cx, [bx+1000] ; Clocks: +21 = 131 (8 + 9ea + 4p) | ip:0x1e->0x22
mov cx, [bp+1000] ; Clocks: +21 = 152 (8 + 9ea + 4p) | ip:0x22->0x26
mov word [si+1000], cx ; Clocks: +22 = 174 (9 + 9ea + 4p) | ip:0x26->0x2a
mov word [di+1000], cx ; Clocks: +22 = 196 (9 + 9ea + 4p) | ip:0x2a->0x2e
add cx, dx ; Clocks: +3 = 199 | ip:0x2e->0x30 flags:->PZ
add word [di+1000], cx ; Clocks: +33 = 232 (16 + 9ea + 8p) | ip:0x30->0x34
add dx, 50 ; Clocks: +4 = 236 | dx:0x0->0x32 ip:0x34->0x37 flags:PZ->
Final registers:
bx: 0x03e8 (1000)
dx: 0x0032 (50)
bp: 0x07d0 (2000)
si: 0x0bb8 (3000)
di: 0x0fa0 (4000)
ip: 0x0037 (55)

View File

@ -0,0 +1,2 @@
»иЅРѕё ї  
‰ ‹‹и‰€и‹Љи‰‰и’иѓL’й•зѓK

View File

@ -0,0 +1,42 @@
; ========================================================================
;
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Please see https://computerenhance.com for further information
;
; ========================================================================
; ========================================================================
; LISTING 57
; ========================================================================
bits 16
mov bx, 1000
mov bp, 2000
mov si, 3000
mov di, 4000
mov cx, [bp + di]
mov [bx + si], cx
mov cx, [bp + si]
mov [bx + di], cx
mov cx, [bp + di + 1000]
mov [bx + si + 1000], cx
mov cx, [bp + si + 1000]
mov [bx + di + 1000], cx
add dx, [bp + si + 1000]
add word [bp + si], 76
add dx, [bp + si + 1001]
add [di + 999], dx
add word [bp + si], 75

View File

@ -0,0 +1,71 @@
**************
**** 8086 ****
**************
WARNING: Clocks reported by this utility are strictly from the 8086 manual.
They will be inaccurate, both because the manual clocks are estimates, and because
some of the entries in the manual look highly suspicious and are probably typos.
--- test\listing_0057_challenge_cycles execution ---
mov bx, 1000 ; Clocks: +4 = 4 | bx:0x0->0x3e8 ip:0x0->0x3
mov bp, 2000 ; Clocks: +4 = 8 | bp:0x0->0x7d0 ip:0x3->0x6
mov si, 3000 ; Clocks: +4 = 12 | si:0x0->0xbb8 ip:0x6->0x9
mov di, 4000 ; Clocks: +4 = 16 | di:0x0->0xfa0 ip:0x9->0xc
mov cx, [bp+di] ; Clocks: +15 = 31 (8 + 7ea) | ip:0xc->0xe
mov word [bx+si], cx ; Clocks: +16 = 47 (9 + 7ea) | ip:0xe->0x10
mov cx, [bp+si] ; Clocks: +16 = 63 (8 + 8ea) | ip:0x10->0x12
mov word [bx+di], cx ; Clocks: +17 = 80 (9 + 8ea) | ip:0x12->0x14
mov cx, [bp+di+1000] ; Clocks: +19 = 99 (8 + 11ea) | ip:0x14->0x18
mov word [bx+si+1000], cx ; Clocks: +20 = 119 (9 + 11ea) | ip:0x18->0x1c
mov cx, [bp+si+1000] ; Clocks: +20 = 139 (8 + 12ea) | ip:0x1c->0x20
mov word [bx+di+1000], cx ; Clocks: +21 = 160 (9 + 12ea) | ip:0x20->0x24
add dx, [bp+si+1000] ; Clocks: +21 = 181 (9 + 12ea) | ip:0x24->0x28 flags:->PZ
add word [bp+si], 76 ; Clocks: +25 = 206 (17 + 8ea) | ip:0x28->0x2b flags:PZ->
add dx, [bp+si+1001] ; Clocks: +25 = 231 (9 + 12ea + 4p) | ip:0x2b->0x2f flags:->PZ
add word [di+999], dx ; Clocks: +33 = 264 (16 + 9ea + 8p) | ip:0x2f->0x33 flags:PZ->P
add word [bp+si], 75 ; Clocks: +25 = 289 (17 + 8ea) | ip:0x33->0x36 flags:P->A
Final registers:
bx: 0x03e8 (1000)
bp: 0x07d0 (2000)
si: 0x0bb8 (3000)
di: 0x0fa0 (4000)
ip: 0x0036 (54)
flags: A
**************
**** 8088 ****
**************
WARNING: Clocks reported by this utility are strictly from the 8086 manual.
They will be inaccurate, both because the manual clocks are estimates, and because
some of the entries in the manual look highly suspicious and are probably typos.
--- test\listing_0057_challenge_cycles execution ---
mov bx, 1000 ; Clocks: +4 = 4 | bx:0x0->0x3e8 ip:0x0->0x3
mov bp, 2000 ; Clocks: +4 = 8 | bp:0x0->0x7d0 ip:0x3->0x6
mov si, 3000 ; Clocks: +4 = 12 | si:0x0->0xbb8 ip:0x6->0x9
mov di, 4000 ; Clocks: +4 = 16 | di:0x0->0xfa0 ip:0x9->0xc
mov cx, [bp+di] ; Clocks: +19 = 35 (8 + 7ea + 4p) | ip:0xc->0xe
mov word [bx+si], cx ; Clocks: +20 = 55 (9 + 7ea + 4p) | ip:0xe->0x10
mov cx, [bp+si] ; Clocks: +20 = 75 (8 + 8ea + 4p) | ip:0x10->0x12
mov word [bx+di], cx ; Clocks: +21 = 96 (9 + 8ea + 4p) | ip:0x12->0x14
mov cx, [bp+di+1000] ; Clocks: +23 = 119 (8 + 11ea + 4p) | ip:0x14->0x18
mov word [bx+si+1000], cx ; Clocks: +24 = 143 (9 + 11ea + 4p) | ip:0x18->0x1c
mov cx, [bp+si+1000] ; Clocks: +24 = 167 (8 + 12ea + 4p) | ip:0x1c->0x20
mov word [bx+di+1000], cx ; Clocks: +25 = 192 (9 + 12ea + 4p) | ip:0x20->0x24
add dx, [bp+si+1000] ; Clocks: +25 = 217 (9 + 12ea + 4p) | ip:0x24->0x28 flags:->PZ
add word [bp+si], 76 ; Clocks: +33 = 250 (17 + 8ea + 8p) | ip:0x28->0x2b flags:PZ->
add dx, [bp+si+1001] ; Clocks: +25 = 275 (9 + 12ea + 4p) | ip:0x2b->0x2f flags:->PZ
add word [di+999], dx ; Clocks: +33 = 308 (16 + 9ea + 8p) | ip:0x2f->0x33 flags:PZ->P
add word [bp+si], 75 ; Clocks: +33 = 341 (17 + 8ea + 8p) | ip:0x33->0x36 flags:P->A
Final registers:
bx: 0x03e8 (1000)
bp: 0x07d0 (2000)
si: 0x0bb8 (3000)
di: 0x0fa0 (4000)
ip: 0x0036 (54)
flags: A

View File

@ -196,6 +196,21 @@ S86_Str8 S86_MnemonicOpStr8(S86_MnemonicOp type)
return result;
}
bool S86_MnemonicOpIsAccumulator(S86_MnemonicOp type)
{
bool result = type == S86_MnemonicOp_AX ||
type == S86_MnemonicOp_AL ||
type == S86_MnemonicOp_AH;
return result;
}
bool S86_MnemonicOpIsRegister(S86_MnemonicOp type)
{
bool result = (type >= S86_MnemonicOp_AL && type <= S86_MnemonicOp_DI) ||
(type >= S86_MnemonicOp_ES && type <= S86_MnemonicOp_DS);
return result;
}
S86_Str8 S86_RegisterFileRegArrayStr8(S86_RegisterFileRegArray type)
{
S86_Str8 result = {0};
@ -398,10 +413,11 @@ S86_Opcode S86_DecodeOpcode(S86_BufferIterator *buffer_it,
S86_ASSERT(op_code_size > 0 && op_code_size <= S86_ARRAY_UCOUNT(op_code_bytes));
S86_ASSERT(op_decode_type != S86_OpDecodeType_Count && "Unknown instruction");
S86_Opcode result = {0};
result.mnemonic = op_decode->mnemonic;
result.lock_prefix = *lock_prefix;
result.seg_reg_prefix = *seg_reg;
S86_Opcode result = {0};
result.mnemonic = op_decode->mnemonic;
result.lock_prefix = *lock_prefix;
result.seg_reg_prefix = *seg_reg;
S86_ASSERT(*seg_reg == S86_MnemonicOp_Invalid || (*seg_reg >= S86_MnemonicOp_ES && *seg_reg <= S86_MnemonicOp_DS));
switch (op_decode_type) {
// NOTE: Instruction Pattern => [0b0000'0000W | 0bAA00'0CCC | DISP-LO | DISP-HI]
@ -836,6 +852,26 @@ S86_Opcode S86_DecodeOpcode(S86_BufferIterator *buffer_it,
: S86_WordBytePrefix_Byte;
}
if ((op_decode_type >= S86_OpDecodeType_MOVRegOrMemToOrFromReg) &&
(op_decode_type <= S86_OpDecodeType_MOVSegRegToRegOrMem)) {
if (result.src == S86_MnemonicOp_DirectAddress &&
S86_MnemonicOpIsAccumulator(result.dest)) {
result.clocks = 4;
} else if (S86_MnemonicOpIsAccumulator(result.src) &&
result.dest == S86_MnemonicOp_DirectAddress) {
result.clocks = 4;
} else if (S86_MnemonicOpIsRegister(result.src) &&
S86_MnemonicOpIsRegister(result.dest)) {
result.clocks = 2;
} else if (result.src == S86_MnemonicOp_DirectAddress &&
S86_MnemonicOpIsRegister(result.dest)) {
result.clocks = 2;
} else if (result.src == S86_MnemonicOp_Immediate &&
S86_MnemonicOpIsRegister(result.dest)) {
result.clocks = 4;
}
}
size_t buffer_end_index = buffer_it->index;
result.byte_size = S86_CAST(uint8_t)(buffer_end_index - buffer_start_index);
S86_ASSERT(result.immediate < S86_CAST(uint16_t)-1);
@ -850,6 +886,7 @@ typedef struct S86_MnemonicOpToRegisterFileMap {
char const CLI_ARG_EXEC[] = "--exec";
char const CLI_ARG_LOG_INSTRUCTION_PTR[] = "--log-instruction-ptr";
char const CLI_ARG_LOG_CYCLE_COUNTS[] = "--log-cycle-counts";
char const CLI_ARG_DUMP[] = "--dump";
#define PRINT_USAGE \
S86_PrintLnFmt("USAGE: sim8086.exe [%.*s] [%.*s] <binary asm file>", \
@ -868,12 +905,14 @@ int main(int argc, char **argv)
return -1;
}
S86_Str8 CLI_ARG_EXEC_STR8 = (S86_Str8){(char *)CLI_ARG_EXEC, S86_ARRAY_UCOUNT(CLI_ARG_EXEC) - 1};
S86_Str8 CLI_ARG_EXEC_STR8 = (S86_Str8){(char *)CLI_ARG_EXEC, S86_ARRAY_UCOUNT(CLI_ARG_EXEC) - 1};
S86_Str8 CLI_ARG_LOG_INSTRUCTION_PTR_STR8 = (S86_Str8){(char *)CLI_ARG_LOG_INSTRUCTION_PTR, S86_ARRAY_UCOUNT(CLI_ARG_LOG_INSTRUCTION_PTR) - 1};
S86_Str8 CLI_ARG_DUMP_STR8 = (S86_Str8){(char *)CLI_ARG_DUMP, S86_ARRAY_UCOUNT(CLI_ARG_DUMP) - 1};
S86_Str8 CLI_ARG_LOG_CYCLE_COUNTS_STR8 = (S86_Str8){(char *)CLI_ARG_LOG_CYCLE_COUNTS, S86_ARRAY_UCOUNT(CLI_ARG_LOG_CYCLE_COUNTS) - 1};
S86_Str8 CLI_ARG_DUMP_STR8 = (S86_Str8){(char *)CLI_ARG_DUMP, S86_ARRAY_UCOUNT(CLI_ARG_DUMP) - 1};
bool exec_mode = false;
bool log_instruction_ptr = false;
bool log_cycle_counts = false;
bool dump = false;
S86_Str8 file_path = {0};
for (int arg_index = 1; arg_index < argc; arg_index++) {
@ -883,6 +922,8 @@ int main(int argc, char **argv)
exec_mode = true;
} else if (S86_Str8_Equals(arg_str8, CLI_ARG_LOG_INSTRUCTION_PTR_STR8)) {
log_instruction_ptr = true;
} else if (S86_Str8_Equals(arg_str8, CLI_ARG_LOG_CYCLE_COUNTS_STR8)) {
log_cycle_counts = true;
} else if (S86_Str8_Equals(arg_str8, CLI_ARG_DUMP_STR8)) {
dump = true;
} else {
@ -1203,10 +1244,21 @@ int main(int argc, char **argv)
// NOTE: Decode assembly
// =========================================================================
if (exec_mode)
if (exec_mode) {
if (log_cycle_counts) { // NOTE: Print disclaimer + header
S86_PrintLn(S86_STR8("**************"));
S86_PrintLn(S86_STR8("**** 8086 ****"));
S86_PrintLn(S86_STR8("**************"));
S86_PrintLn(S86_STR8(""));
S86_PrintLn(S86_STR8("WARNING: Clocks reported by this utility are strictly from the 8086 manual."));
S86_PrintLn(S86_STR8("They will be inaccurate, both because the manual clocks are estimates, and because"));
S86_PrintLn(S86_STR8("some of the entries in the manual look highly suspicious and are probably typos."));
S86_PrintLn(S86_STR8(""));
}
S86_PrintLnFmt("--- test\\%s execution ---", file_name);
else
} else {
S86_PrintLn(S86_STR8("bits 16"));
}
uint32_t const S86_MEMORY_SIZE = 1024 * 1024;
S86_RegisterFile register_file = {0};
@ -1252,6 +1304,7 @@ int main(int argc, char **argv)
instruction_buffer.data = (char *)&memory[register_file.instruction_ptr];
instruction_buffer.size = buffer.size;
S86_BufferIterator instruction_it = S86_BufferIteratorInit(instruction_buffer);
uint32_t clocks_counter = 0;
bool lock_prefix = false;
S86_MnemonicOp seg_reg = S86_CAST(S86_MnemonicOp)0;
@ -1597,10 +1650,16 @@ int main(int argc, char **argv)
register_file.instruction_ptr += S86_CAST(int16_t)opcode.displacement;
} break;
}
clocks_counter += opcode.clocks;
// NOTE: Printing ==========================================================================
S86_PrintFmt(" ; ");
// NOTE: Clocks
if (log_cycle_counts) {
S86_PrintFmt("Clocks: +%u = %u |", opcode.clocks, clocks_counter);
}
// NOTE: Registers
for (size_t index = 0; index < S86_RegisterFileRegArray_Count; index++) {
if (register_file.reg.array[index].word != prev_register_file.reg.array[index].word) {

View File

@ -307,6 +307,7 @@ typedef enum S86_MnemonicOp {
S86_MnemonicOp_DirectInterSegment,
S86_MnemonicOp_Jump,
S86_MnemonicOp_Count,
} S86_MnemonicOp;
typedef enum S86_EffectiveAddress {
@ -336,6 +337,7 @@ typedef struct S86_Opcode {
int32_t immediate; ///< Immediate value when src/dest op is an immediate
bool immediate_is_8bit; ///< Immediate was 8bit and sign extended
S86_MnemonicOp seg_reg_prefix; ///< Segment register that should prefix the upcoming instruction
uint32_t clocks; ///< Number of cycles required to complete this operation
} S86_Opcode;
typedef enum S86_RegisterByte {
@ -407,6 +409,8 @@ S86_Str8 S86_MnemonicStr8 (S86_Mnemonic type);
S86_MnemonicOp S86_MnemonicOpFromWReg (bool w, uint8_t reg);
S86_MnemonicOp S86_MnemonicOpFromSR (uint8_t sr);
S86_Str8 S86_MnemonicOpStr8 (S86_MnemonicOp type);
bool S86_MnemonicOpIsAccumulator (S86_MnemonicOp type);
bool S86_MnemonicOpIsRegister (S86_MnemonicOp type);
S86_Str8 S86_RegisterFileRegArrayStr8(S86_RegisterFileRegArray type);
void S86_PrintOpcodeMnemonicOp (S86_Opcode opcode, bool src);
void S86_PrintOpcode (S86_Opcode opcode);

Binary file not shown.