What is the minimum set of steps required to use LODSB to load a relative address to a string in my code?
I have the following test program that I'm using PXE to boot. I boot it two ways: via pxelinux.0 and directly. If I boot it directly, my program prints both strings. If I boot via pxelinux.0, it only prints the first string.
Why?
Working technique (for both):
Set the direction flag to increment, cld  
Set ds to cs 
Put the address (from start) of string in si
Add the starting offset to si
Non-working technique (just for pxelinux):
Calculate a new segment address based on (((cs << 4) + offset) >> 4)
Set ds to that. (either A000 or 07C0)
text here to fix bug in markdown  
// Note: If you try this code, don't forget to set 
//       the "#if 0" below appropriately!
    .text
    .globl  start, _start
start:  
_start: 
_start1:    
    .code16
    jmp real_start
    . = _start1 + 0x1fe
    .byte 0x55, 0xAA
    // Next sector
    . = _start1 + 0x200
    jmp real_start
test1_str:
    .asciz  "\r\nTest: 9020:fe00"
test2_str:
    .asciz  "\r\nTest: a000:0000"
real_start:
    cld         // Make sure %si gets incremented.
#if 0
    // When loaded by pxelinux, we're here:
    // 9020:fe00 ==> a000:0000
    // This works.
    movw    $0x9020, %bx
    movw    %bx, %ds
    movw    $(test1_str - _start1), %si
    addw    $0xfe00, %si
    call    print_message
    // This does not.
    movw    $0xA000, %bx
    movw    %bx, %ds
    movw    $(test2_str - _start1), %si
    call    print_message
#else
    // If we are loaded directly without pxelinux, we're here:
    // 0000:7c00 ==> 07c0:0000
    // This works.
    movw    $0x0000, %bx
    movw    %bx, %ds
    movw    $(test1_str - _start1), %si
    addw    $0x7c00, %si
    call    print_message
    // This does, too.
    movw    $0x07c0, %bx
    movw    %bx, %ds
    movw    $(test2_str - _start1), %si
    call    print_message
#endif
    // Hang the computer
    sti
1:
    jmp 1b
// Prints string DS:SI (modifies AX BX SI)
print_message:
    pushw   %ax
    jmp 2f
3:
    movb    $0x0e, %ah  /* print char in AL */
    int $0x10       /* via TTY mode */
2:  
    lodsb   (%si), %al  /* get token */
    cmpb    $0, %al     /* end of string? */
    jne 3b
    popw    %ax
    ret
.balign 0x200
Here's the compilation:
/usr/bin/ccache gcc -Os -fno-stack-protector -fno-builtin -nostdinc  -DSUPPORT_SERIAL=1 -DSUPPORT_HERCULES=1 -DSUPPORT_GRAPHICS=1 -DHAVE_CONFIG_H -I. -Wall -ggdb3 -Wmissing-prototypes -Wunused -Wshadow -Wpointer-arith -falign-jumps=1 -falign-loops=1 -falign-functions=1 -Wundef -g -c -o ds_teststart_exec-ds_teststart.o ds_test.S
/usr/bin/ccache gcc  -g   -o ds_teststart.exec -nostdlib -Wl,-N -Wl,-Ttext -Wl,8000 ds_teststart_exec-ds_teststart.o  
objcopy -O binary ds_teststart.exec ds_teststart