//
//
// NAME:
//   uplrtime.asm
// TITLE:
//   UPL Runtime Library.  (Version 2.0a4)
// FUNCTION:
//   The runtime library provides basic I/O, stack management,
//   logical and arithmetic (8-bit and 16-bit, signed and unsigned)
//   services to programs compiled with the Queztalcoatl compiler
//   for the 6502 family of processors.
//
// TUNABLE DEFINES:
//   target		Selects runtime library target.
//			eg. -Dtarget=1 for Unexpanded VIC-20.
//
// PORTING:
//   Porting this runtime library to other 6502 platforms should be
//   fairly straight forward.  The compiler assumes the runtime
//   library has CL, CH and BP pseudo-registers in a certain
//   location, and that <datastack> is allocated an entire page
//   and that the current stack frame is indexed with BP.
//   Obviously the called routines must do exactly what they
//   say they'll do.  Apart from these conditions, the compiler
//   doesn't really care about the runtime library's internals.
//
//   To port the library to another platform.
//     0.  Assign yourself a target number.  Check the target enumerations
//         below.  If your platform is already there, use that number.
//         If not, assign yourself a new target number.  When you mofify
//         this runtime library you can include code just for your target
//         by using the if (target == target_gizmo) conditional,
//         where gizmo is the target you are adding.
//     1.  Change org to reflect the start of the application memory area.
//     2.  Move whatever registers and variables you can into zero-page.
//         (This generic implementation was written assuming only a few
//          zero-page locations were available.)
//     3.  Change the kernel routines for console I/O.  If your target
//         platform doesn't have corresponding routines that do this
//         (eg. correct handling for newlines), you'll need to write
//         filters for them.  On some targets this may not be possible;
//	   eg. small handhelds or game consoles without keyboards.
//     4.  That's it!  Recompile the runtime library, saving the
//         output under name name uplrXXXX.obj, where XXXX is the
//         name of the target platform. eg. uplrlynx.obj for an Atari Lyynx.
//         eg. quetzal -Dtarget=5 -cm uplrlynx.obj uplrtime.asm
//         If you look at the target enumerations below, you'll
//         see that 5 is the target number I've assigned to the lynx.
//         If you're porting to another platform you'll need another
//         target number.
//     5.  When compiling programs to use your new target include the
//         option -r lynx on the linker command line.  It'll link in
//         your runtime library in rather than the default one.
//     6.  Send me a copy and I'll place it online at
//	   http://www.kdef.com/geek/vic for other Quetzalcoatlites.
//         Please do this within a reasonable timeframe;  The runtime
//         library is updated from time to time, and if your version
//         is six months out of date by the time I get it it could
//         make merging it with the current version difficult.
//
// NOTE: The "and" opcode.
//   The Quetzalcoatl assembler is a free-format assembler.
//   That is, an instruction can spread over several lines.
//   It also evaluates complex conditional expressions, such
//   as those containing *, /, +, -, "not", "or" and "and".
//   The latter produces some problems, because it just happens
//   to be a 6502 opcode.  This means "cmp #fred and wilma"
//   (even when spread over two lines) is interpreted as *one*
//   instruction.  To break it up you need to place a semicolon ";"
//   after the cmp line.  Thus "cmp #fred; and wilma".  This
//   problem only happend with the "and" opcode.  While it is
//   easy to fix, it's still annoying an easy to overlook.
//   In a future version I'll find a better way to fix this.
//   (Perhaps restricting instructions to a single line)
//   [bj 21sep1998]
//
// PREREQUISITES:
//   None.
//
// AUTHOR:
//   Brendan Jones, Kestrel Defence.
// RIGHTS:
//   (c) Copyright Kestrel Defence, 1998.  All Rights Reserved.
//
//   You're encouraged to modify this library to optimise it,
//   port it to other platforms or to produce customised versions
//   for a particular application (for example, omitting functions
//   that are not used by that application).  Modifications should
//   be clearly labelled and the file should be labelled such that
//   is not confused with the original version.  Apart from an
//   entry under the MODIFICATIONS change log below, the text of
//   this header and the accompanying legal notice must be
//   distributed with it verbatim.
//
//   Use of the Quetzalcoatl compiler is subject to a separate
//   licence agreement, contained in the file legal.txt.
//   You must read and agree to the terms of the licence before
//   using this software.
//
// WEB:
//   http://www.kdef.com/geek/vic
//
// SECURITY:
//   Unclassified.  For public distribution.
// CREATION DATE:
//   July 20, 1998.
//
// MODIFICATIONS:
//   NAME  MOD  DATE       DESCRIPTION
//   bj    1    21sep1998  Alpha Release 2.0a1.
//   bj    2    22sep1998  Fixed bugs; shaved off a whopping 33 bytes! :/
//			   Added conditionals so one runtime library source
//   bj    3    24sep1998  Renamed mul_ww umul_ww (which it really is).
//			   Added but not optimised a new mul_ww.
//			   Added vicg target.
//   bj    4    25sep1998  Added vich target.
//   bj    5    30sep1998  Added c64  target.
//   bj    6     7oct1998  Fixed bug: neg_w.
//
//


		// GROUP Target.
		//

		// ENUMERATION target_*.
		//
		// Each constant specifies a different target platform.
		// These may be selectd with eg. -Dtarget=2 for vicx.
		//
		target_generic	= 0
		target_vicu	= 1	// VIC-20 with 3.5Kb RAM
		target_apple2	= 2	// Apple ][ with 48Kb RAM.
		target_vicx	= 3	// VIC-20 with >= 11.5Kb RAM
		target_c64	= 4	// Commodore 64
		target_lynx	= 5	// Not implemented.
		target_nes 	= 6     // Not implemented.

		// Graphical VIC-20 with >= 11.5Kb RAM.
		//
		// This is a variation of vicx, but with a memory
		// configuration better suited to bitmapped graphics.
		//
		// The character set begins at           0x1000.
		// The video frame buffer begins at      0x1800.
		// The runtime library proper begins at  0x1A00.
		// When we start running BASIC begins at 0x1200.
		//
		// If you place the BASIC bootstrap at 0x1200
		// you'll lose 16 bytes (two characters); 64 and 65.
		//
		target_vicg	= 7	// Graphical Expanded VIC.


		// Full-screen graphical VIC-20 with >= 11.5Kb RAM.
		//
		// This is a variation of vicx, but with a memory
		// configuration better suited to full-screen bitmapped
		// graphics.
		//
		// The video frame buffer stays at       0x1000.
		// A 512 byte unused block of memory     0x1200
		// The character set begins at           0x1400.
		// The runtime library proper begins at  0x2400.
		// When we start running BASIC begins at 0x1200.
		//
		target_vich	= 8	// Graphical Expanded VIC.



		// If no target is defined default to the generic one.
		//
		if (not defined(target))
		  target = target_generic;
		endif

		// Identify the target so there's no confusion.
		//
		if (target == target_generic)
		  message 1 "Target is Generic Commodore."
		elif (target == target_vicu)
		  message 1 "Target is Unexpanded VIC-20 with 3.5Kb RAM."
		elif (target == target_vicx)
		  message 1 "Target is Expanded VIC-20 with >= 11.5Kb RAM."
		elif (target == target_c64)
		  message 1 "Target is Commodore 64."
		elif (target == target_apple2)
		  error "Target is Apple ][ with >= 48Kb RAM. (NOT IMPLEMENTED)"
		elif (target == target_lynx)
		  error "Target is Atari Lynx. (NOT IMPLEMENTED)"
		elif (target == target_nes)
		  error "Target is Nintendo NES. (NOT IMPLEMENTED)"
		elif (target == target_vicg)
		  message 1 "Target is Bitmap-optimised Expanded VIC-20 with >= 11.5Kb RAM."
		elif (target == target_vich)
		  message 1 "Target is a Full-screen Bitmap-optimised Expanded VIC-20 with >= 11.5Kb RAM."
		else
                  error "Unspecified target: Define me!"
		endif


		// State Target.
		//
		// Record the target enumeration in the object file.
		// Although not currently used, in future versions
		// it could be used to automatically choose the best
		// runtime library for an application, and to prevent
		// the wrong runtime library accidentally being linked.
		//
		declare_target(target)
		declare_flags(0)		// Not yet used.



		// Starting Address.
		//
		// NOTE:
		//   Under Quetzalcoatl version 2.0a1 the assembler
		//   produces absolute rather than relocatable code.
		//   The linker places this absolute code at the
		//   very start of the executable.  Thus you should
		//   set the origin to the start of application memory.
		//   Later versions of the assembler will produce
		//   relocatable code.  Until then... [bj 21sep1998]
		//
		// IMPORTANT:
		//   The runtime library must start on a page boundary.
		//   That is, the least significant byte of the
		//   org starting address must be 0x00.
		//
		if (target == target_vicu or
		    target == target_vicg or
		    target == target_generic)
		  org   0x1000
		elif (target == target_vicx or
		      target == target_vich)
		  org   0x1200
		elif (target == target_c64)
		  org	0x0800
		else
		  error "Undefined target: Define me!"
		endif




		// GROUP Implementation Specific Kernel Routines.
		//

		// Print the character in A on standard output.
		if (target == target_vicx 	or
		    target == target_vicg 	or
		    target == target_vich 	or
		    target == target_vicu 	or
		    target == target_generic 	or
		    target == target_c64)
		// This routine is only called by the runtime
		// library from the routine print_ch_a, which
		// performs whatever corrections are required
		// (eg. newline translation that makes sure
		// '\n' 0x0a always moves the cursor to the
		// start of the next line).
		//
		call_print_ch_a 	= 0xffd2

		// Get Raw Character.
		//
		// Get a character from standard input and return it in A.
		// If no character is ready set A to zero and return
		// immediately.
		//
		call_get_ch_raw_a	= 0xffe4

		// Get Cooked Character.
		//
		// Get a character from standard input; the next character
		// from a line which the user has had a chance to edit
		// before pressing Enter.
		//
		call_get_ch_cooked_a	= 0xffcf
		else
		  error "Undefined target: Define me!"
		endif




		// GROUP Charmode Constants.
		//
		// These are enumerated constants we use to store
		// the current I/O control mode of reading standard input.
		// (eg. raw, cooked).
		//
		// The compiler uses the same values, so don't change them.
		//
		charmode_cooked		= 0x00	// DON'T CHANGE!
		charmode_raw_wait	= 0x01	// DON'T CHANGE!
		charmode_raw_nowait	= 0x80  // DON'T CHANGE!



		// GROUP Reserved.
		//

		if (target == target_vicg)
		//
		// Bitmap-optimised Expanded VIC-20.
		//
		// Allocate 2.5Kb.
		//
		// The first 2Kb of this (starting at 0x1000) will become
		// the new character set.  The existing video frame buffer
		// at 0x1000 will need to be moved to 0x1800 to make way.
		// The next 512 bytes begins at is this new video frame
		// buffer.
		//
		// The init routine will change the VICs registers to
		// conform with this new configuration.
		//
		//
		  db 0(2048)	// Room for an 8*8 Character set.  (2Kb)
		  db 32(512)	// New video frame buffer.
				// Initialise with spaces.

		elif (target == target_vich)
		//
		// Full-screen Bitmap-optimised Expanded VIC-20.
		//
		// Allocate 4.5Kb.
		//
		// The first 512 bytes is wasted space (at 0x1200),
		// since the character can't start between a 1Kb boundary.
		// We salvage what we can by using 256 bytes of that as
		// the datastack.  The remainder is unused.
		//
		// The 4Kb character set (at 0x1400) follows.
		//
		// The init routine will change the VICs registers to
		// conform with this new configuration.
		//
		//
datastack:	  db 0(256)	// Salvage wasted space.
		  db 0(256)	// Unused.
		  db 0(4096)	// Room for an 8*16 Character set.  (4Kb)
		endif


		// GROUP Data.
		//

		// SEGMENT Data.
		//
		// This section contains variables.  The only variables
		// that need to be here are the arrays such as
		// <datastack> and <buffer>.  To improve speed and
		// reduce the size of the runtime library you can
		// move other variables to free locations in the
		// zero page (if you have any).
		//


		// Datastack.
		//
		// Quetzacoatl uses a <datastack> to perform some
		// expression evaluation, automatic variables,
		// parameters and for temporary storage.
		//
		// In practice the compiler tries to avoid using
		// the stack whenever possible.
		//
		// IMPORTANT:
		// <datastack> *must* begin on a page boundary.
		//
		//
		if (target != target_vich)
datastack:	  db 0(256)
		endif


		if (target == target_vicx or
		    target == target_vicg or
		    target == target_vich or
		    target == target_vicu)
		//
		// IMPLEMENTATION: VIC-20.
		//
		// Source: "VIC Revealed".  Nick Hampshire.  1981.  No ISBN.
		// (Caution: This book has contains errors/inconstencies.)
		//
		// The VIC's ROM has very few spare locations in page zero.
		// We use locations used by the ROM's RS232C routines;
		// (Obviously this means this configuration of the runtime
		// library cannot be run if you're doing an RS232C transfer.)
		// An alternative would be the BASIC numerical working area
		// between 87-96;  However this could break any programs
		// that make calls to BASIC routines, such as the Meteor
		// sample programs calls to the random number generator.
		//
		// The VIC also has some spare memory between 673-776.
		// We use 24 bytes between 680-704 for the print buffer.
		// Why not start at 673?  Well... this location is faulty
		// on my VIC.  :-)
		//
		// [bj 22sep1998]
		//
		//
		datastacks	= 167
		el		= 168
		eh		= 169
		fl		= 170
		fh		= 171
		charmode	= 180
		is_negative 	= 181
		bitcount	= 182
		buffer_count	= 183
		buffer		= 680	// 24 bytes long

		elif (target == target_c64)
		//
		// IMPLEMENTATION: Commodore 64.
		//
		// Source funet.fi/pub/cbm/MemoryMaps.
		//
		// The C64 has fewer zero-page locations even than the VIC-20.
		// We use free locations higher in the memory map.  Note that
		// location 255 which we assume is the BP (Base pointer),
		// is actually used by the C64 for temporary BASIC data area.
		// This'd make calling a program using this runtime library
		// from BASIC (or BASIC routines) a tenuous proposition.
		//
		datastacks	= 1020
		el		= 1021
		eh		= 1022
		fl		= 679
		fh		= 680
		charmode	= 681
		is_negative 	= 682
		bitcount	= 683
		buffer_count	= 684
		buffer		= 685	// 24 bytes long

		else
		//
		// IMPLEMENTATION: Generic.
		//

		// Datastacks.
		//
		// An index to <datastack>, pointing to the top of the stack.
		//
datastacks:	db 0

el:		db 0		// Low  byte - Pseudo register E.
eh:		db 0    	// Low  byte - Pseudo register E.

fl:		db 0		// Low  byte - Pseudo register F.
fh:		db 0    	// Low  byte - Pseudo register F.

charmode:	db 0            // Current ioctl mode.

is_negative:	db 0            // Temporary.
bitcount:	db 0		// Temporary.

buffer_count:	db 0		// Temporary.  For printing numbers.
buffer:		db 0(24)        // Temporary.  For printing numbers.
		endif


		// SUBGROUP Fixed Pseudo Registers.
		//
		// The compiler assumes these psuedo registers are
		// at the stated locations.  Do not move them!
		//
		// <C> must be immeidately followed by <D>.
		//
		//
		cl = 251    // Low  byte - Pseudo register C. DON'T CHANGE!
		ch = 252    // High byte - Pseudo register C. DON'T CHANGE!
		dl = 253    // Low  byte - Pseudo register D. DON'T CHANGE!
		dh = 254    // High byte - Pseudo register D. DON'T CHANGE!
		bp = 255    // Base Pointer.                  DON'T CHANGE!



		// SEGMENT Static Data.
		//

		// Runtime Error String.
		//
rerror_string:
		db "RERROR "
		db  0




		// SEGMENT Code.
		//

		// PROCEDURE init.
		//
		// Initialise the runtime library.
		//
init:
		cld			// Don't use BCD mode.
		lda #0x00
		sta datastacks		// Reset the data stack pointer.
		sta bp			// Reset the base pointer.
		lda #charmode_raw_wait
		sta charmode		// Begin in raw wait ioctl mode.

		if (target == target_vicg)
		//
		// vicg mode.
		//
		// Move character generator to       0x1000 (4096).
		// Move framebuffer to         	     0x1800 (6144).
		// Runtime proper must then start at 0x1a00 (6656).
		//
		lda #(224+12)
		sta [36869]		// VIC Register #6
		lda [36866]             // VIC Register #3
		and #0x7f
		sta [36866]             // VIC Register #3
		lda #0x18		// MSB of new video buffer address.
		sta [648]		// Tell the kernel it's moved.
		lda #147		// The clear character
		jmp print_ch_a		// Print clear character and rts.

		elif (target == target_vich)
		//
		// vich mode.
		//
		// Leave framebuffer at        	     0x1000 (4096).
		// Move character generator to       0x1400 (5120).
		// Runtime proper must then start at 0x2400 (9216).
		//
		lda #(192+13)
		sta [36869]		// VIC Register #6

		lda [36866]             // VIC Register #3
		and #0x7f
		sta [36866]             // VIC Register #3

		lda [36867]             // VIC Register #4
		ora #0x01		// Double-height characters.
		sta [36867]             // VIC Register #4

		lda #0x10		// MSB of new video buffer address.
		sta [648]		// Tell the kernel it's moved.
		lda #147		// The clear character
		jmp print_ch_a		// Print clear character and rts.
		else
		rts
		endif



		// GROUP Push/Pop.
		//

		// PROCEDURE push_b.
		//
		// NOTE: push_b is stored in the Operator Group
		// 	 to allow relative branches to it


		// PROCEDURE cast_w2b.
		//
		// Change the word on the top of the stack into a byte.
		// (Simply discard the high byte; equivalent to pop_b).
		//
cast_w2b:
		/*FALLTHROUGH*/


		// PROCEDURE pop_b.
		//
		// Pop a byte from the stack into <A>.
		//
pop_b:
		ldy datastacks
		lda datastack,y
		dey
		sty datastacks
		rts


		// PROCEDURE push_w.
		//
		// Push the word <A,X> on the stack.
		//
		// By convention when we store a word in the registers
		// <A=LSB, X=MSB>.
		//
		// The low byte is pushed first, the high byte second.
		// Thus we get: [under] = <A>, [top] = <X>.
		//
push_w:
		ldy datastacks
		iny
		sta datastack,y
		iny
		txa
		sta datastack,y
		sty datastacks
		rts


		// PROCEDURE pop_w.
		//
		// Pop a word from the stack into <A,X>.
		//
pop_w:
		ldy datastacks
		lda datastack,y
		tax
		dey
		lda datastack,y
		dey
		sty datastacks
		rts


		// PROCEDURE push_w_c.
		//
		// Push the word <C> on the stack.
		//
push_w_c:
		ldx ch
		lda cl
		jmp push_w	// rts.


		// PROCEDURE pop_w_c.
		//
		// Pop a word from the stack into <C>.
		//
pop_w_c:
		jsr pop_w
		stx ch
		sta cl
		rts


		// PROCEDURE push_b_c.
		//
		// Push the byte <CL> on the stack.
		//
push_b_c:
		lda cl
		jmp push_b	// rts.


		// PROCEDURE pop_w_c.
		//
		// Pop a byte from the stack into <CL>.
		//
pop_b_c:	jsr pop_b
		sta cl
		rts


		// PROCEDURE pop_b_cr.
		//
		// Pop two bytes from the stack into <A> and <CL>.
		//
		// <A> = [under]. <CL> = [top].
		//
pop_b_cr:
		jsr pop_b_c
		jmp pop_b	// rts


		// PROCEDURE pop_b_cr.
		//
		// Pop two bytes from the stack into <A, X> and <C>.
		//
		// <A, X> = [under]. <C> = [top].
		//
pop_w_cr:
		jsr pop_w
		sta cl
		stx ch
		jmp pop_w	// rts


		// PROCEDURE pop_w_dc
		//
		// Pop two words from the stack into <D> and <C>.
		//
		// <C> = [under]. <D> = [top].
		//
pop_w_dc:
		jsr pop_w
		sta dl
		stx dh
		jmp pop_w_c	// rts





		// GROUP Cast.
		//

		// PROCEDURE cast_b2w.
		//
		// NOTE: cast_b2w is stored in the Operator Group
		//	 where it is identical to push_b #0.
		//


		// PROCEDURE cast_c2w.
		//
		// Change the signed byte (eg. char) on the top of the stack
		// into a word.
		//
		// MOD2: [bj 22sep1998] Wasn't looking at stack.
		//
cast_c2w:
		ldy datastacks	// Peek at [top]; copy into <A>.
		lda datastack,y
		jsr extend_ax	// Extend sign to <X>
		txa
		jmp push_b	// Push the extended MSB <X>.




		// GROUP Swap.
		//

		// PROCEDURE swap_bb.
		//
		// Swap bytes [under] and [top].
		//
swap_bb:	jsr pop_b
		pha
		jsr pop_b
		tax
		pla
		jsr push_b
		txa
		jmp push_b	// rts.


		// PROCEDURE swap_ww.
		//
		// Swap words [under] and [top].
		//
swap_ww:
		jsr pop_w_c
		jsr pop_w
		sta dl
		stx dh
		jsr push_w_c
		lda dl
		ldx dh
		jmp push_w	// rts.




		// GROUP Operators.
		//


		// PROCEDURE extend.
		//
		// Extend signed byte a <A> into signed word <A, X>.
		// If <A> is negative <X> is set to 0xffu.
		// If <A> is zero or positive<X> is set to 0x00.
		//
extend_ax:
		cmp #0
		bmi _extend_neg
		ldx #0
		rts
_extend_neg:	ldx #0xff
		rts


		// PROCEDURE and_bb.  (Logical/Bit And with byte operands).
		//
		// [top] = [top] bitand [under].
		//
and_bb:
		jsr pop_b_cr	; Semicolon forces end of statement.
		and cl          ;
		jmp push_b	// rts.


		// PROCEDURE or_bb.  (Logical/Bit Or with byte operands).
		//
		// [top] = [top] bitor [under].
		//
or_bb:
		jsr pop_b_cr
		ora cl
		jmp push_b	// rts.


		// PROCEDURE and_ww.  (Logical/Bit And with word operands).
		//
		// [top] = [top] bitand [under].
		//
and_ww:		jsr pop_w_c     ;
		jsr pop_w	; Semicolon forces end of statement.
		and cl		;
		sta cl          ;
		txa             ;
		and ch          ;
		sta ch          ;
		jmp push_w_c	// rts


		// PROCEDURE or_ww.  (Logical/Bit Or with word operands).
		//
		// [top] = [top] bitor [under].
		//
or_ww:		jsr pop_w_c
		jsr pop_w
		ora cl
		sta cl
		txa
		ora ch
		sta ch
		jmp push_w_c	// rts


		// PROCEDURE pop_b_cr_cmp.
		//
		// Pop two bytes into <C> and <X>, then compare them
		// setting the carry/minus/zero flags as appropriate.
		//
pop_b_cr_cmp:
		jsr pop_b_cr
		cmp cl
		rts



		// PROCEDURE pop_w_cmp_cm.
		//
		// Pop two words into <C> and <X>, then compare them
		// setting the carry/minus flags as appropriate.
		//
_pop_w_cmp_cm:
		jsr pop_w_cr		// al,xh = [under].  c = [top].
		sec
		sbc cl
		txa
		sbc ch			// [under] - [top]
		rts


		// PROCEDURE pop_w_cmp_eq.
		//
		// Pop two words into <C> and <X>, then compare them
		// setting the zero flag as appropriate.
		//
_pop_w_cmp_eq:
		jsr pop_w_cr		// al,xh = [under].  c = [top].
		cmp cl
		bne _leave20
		cpx ch
_leave20:
		rts


		// PROCEDURE pop_w_cmp_all.
		//
		// Pop two words into <C> and <X>, then compare them
		// setting the carry/minus/zero flags as appropriate.
		//
_pop_w_cmp_all:
		jsr pop_w_cr		// al,xh = [under].  c = [top].

		cmp cl
		bne _set_nonzero
		cpx ch
		sec			// For upcoming subtraction
		beq _set_zero
_set_nonzero:
		sbc cl
		txa
		sbc ch			// [under] - [top]
		php
		pla		;
		and #253	;	// Clear the Z flag
		jmp _cont22     ;

_set_zero:
		sbc cl
		txa
		sbc ch			// [under] - [top]
		php
		pla
		ora #2			// Set Z flag.

_cont22:
		pha
		plp
		rts




		// PROCEDURE eq_ww.  [top] = [under] == [top] (words)
		//
eq_ww:
		jsr 	_pop_w_cmp_eq
		jmp	_res_eq

		// PROCEDURE ne_ww.  [top] = [under] != [top] (words)
		//
ne_ww:
		jsr 	_pop_w_cmp_eq
		jmp	_res_ne



		// PROCEDURE lt_ww.  [top] = [under] <  [top] (signed words)
		//
lt_ww:
		jsr 	_pop_w_cmp_cm	// [underneath] - [top]
		jmp	_res_lt


		// PROCEDURE le_ww.  [top] = [under] <= [top] (signed words)
		//
le_ww:
		jsr 	_pop_w_cmp_all
		jmp	_res_le


		// PROCEDURE gt_ww.  [top] = [under] >  [top] (signed words)
		//
gt_ww:
		jsr 	_pop_w_cmp_all
		jmp	_res_gt


		// PROCEDURE ge_ww.  [top] = [under] >= [top] (signed words)
		//
ge_ww:
		jsr 	_pop_w_cmp_cm
		jmp	_res_ge


		// PROCEDURE ult_ww. [top] = [under] <  [top] (unsigned words)
		//
ult_ww:
		jsr 	_pop_w_cmp_cm
		jmp	_res_ult


		// PROCEDURE ule_ww. [top] = [under] <= [top] (unsigned words)
		//
ule_ww:
		jsr 	_pop_w_cmp_all
		jmp	_res_ule


		// PROCEDURE ugt_ww. [top] = [under] >  [top] (unsigned words)
		//
ugt_ww:
		jsr 	_pop_w_cmp_all
		jmp	_res_ugt


		// PROCEDURE uge_ww. [top] = [under] >= [top] (unsigned words)
		//
uge_ww:
		jsr 	_pop_w_cmp_cm
		jmp	_res_uge




		// PROCEDURE push_b_1.
		//
		// Push true (-1).
		//
		// NOTE:
		//   We use -1 to represent true rather than 1,
		//   since -1 allows logical and bitwise
		//   boolean operations to be done by the
		//   same algorithm.  This is also faster.
		//   [bj 21sep1998].
		//
		//
push_b_1:
		lda #0xff
		/*FALLTHROUGH*/



		// PROCEDURE push_b.
		//
		// Push the byte <A> on the stack.
		//
push_b:
		ldy datastacks
		iny
		sta datastack,y
		sty datastacks
		rts



		// PROCEDURE cast_b2w.
		//
		// Change the unsigned byte on the top of the stack
		// into a word.  (Set the high byte to zero).
		//
		// This is identical to push_b_0.
		//
cast_b2w:
		/*FALLTHROUGH*/


		// PROCEDURE push_b_0.
		//
		// Push false (0).
		//
push_b_0:
		lda #0
		beq push_b	// rts.
		//NOTREACHED




		// PROCEDURE eq_bb.  [top] = [under] == [top] (bytes)
		//
eq_bb:
		jsr pop_b_cr_cmp
_res_eq:
		beq push_b_1
		bne push_b_0		// rts
		//NOTREACHED


		// PROCEDURE ne_bb.  [top] = [under] != [top] (bytes)
		//
ne_bb:
		jsr pop_b_cr_cmp
_res_ne:
		bne push_b_1
		beq push_b_0		// rts
		//NOTREACHED


// Unsigned Greater than: true iff [under] > [top]
//
ugt_bb:
		jsr pop_b_cr_cmp	// lda [under], cmp [top]
_res_ugt:
		beq push_b_0		// =  fails.
		bcs push_b_1		// >= suceeds.
		bcc push_b_0		// <  fails.
		//NOTREACHED


// Unsigned Greater than or equal: true iff [under] >= [top]
//
uge_bb:
		jsr pop_b_cr_cmp	// lda [under], cmp [top]
_res_uge:
		bcs push_b_1		// >= suceeds
		bcc push_b_0		// *  fails.
		//NOTREACHED


// Unsigned Less than: true iff [under] < [top]
//
ult_bb:
		jsr pop_b_cr_cmp	// lda [under], cmp [top]
_res_ult:
		bcc push_b_1		// < suceeds.
		bcs push_b_0		// * fails.
		//NOTREACHED


// Unsigned Less than or equal: true iff [under] <= [top]
//
ule_bb:
		jsr pop_b_cr_cmp	// lda [under], cmp [top]
_res_ule:
		bcc push_b_1		// < succeeds.
		beq push_b_1		// = succeeds.
		bne push_b_0		// * fails.
		//NOTREACHED


// Signed Greater than: true iff [under] > [top]
//				 [under] - [top] > 0
//
gt_bb:
		jsr pop_b_cr_cmp	// lda [under], cmp [top]
_res_gt:
		beq push_b_0		// =  fails.
		bpl push_b_1		// >= suceeds.
		bmi push_b_0		// <  fails.
		//NOTREACHED


// Signed Greater than or equal: true iff [under] >= [top]
//					  [under] -  [top] >= 0
//
ge_bb:
		jsr pop_b_cr_cmp	// lda [under], cmp [top]
_res_ge:
		bpl push_b_1		// >= suceeds
		bmi push_b_0		// <  fails.
		//NOTREACHED


// Signed Less than: true iff [under] < [top]
//			      [under] - [top] < 0
//
lt_bb:
		jsr pop_b_cr_cmp	// lda [under], cmp [top]
_res_lt:
		bmi push_b_1		// <  suceeds.
		bpl push_b_0		// *  fails.
		//NOTREACHED


// Signed Less than or equal: true iff [under] <= [top]
//				    [under] - [top] <= 0
//
le_bb:
		jsr pop_b_cr_cmp	// lda [under], cmp [top]
_res_le:
		bmi push_b_1		// <  suceeds
		beq push_b_1		// =  suceeds.
		bne push_b_0		// *  fails.
		//NOTREACHED




		// PROCEDURE add_bb.  [top] = [under] + [top] (bytes)
		//
add_bb:
		jsr pop_b_cr		// a = [under].  c = [top].
		clc
		adc cl
		jmp push_b		// rts.


		// PROCEDURE sub_bb.  [top] = [under] - [top] (bytes)
		//
sub_bb:
		jsr pop_b_cr		// a = [under].  c = [top].
		sec
		sbc cl
		jmp push_b		// rts.


		// PROCEDURE add_ww.  [top] = [under] + [top] (words)
		//
add_ww:
		jsr pop_w_cr		// al,xh = [under].  c = [top].
		clc
		adc cl
		sta cl
		txa
		adc ch
		sta ch
		jmp push_w_c		// rts.


		// PROCEDURE sub_ww.  [top] = [under] - [top] (words)
		//
sub_ww:
		jsr pop_w_cr		// al,xh = [under].  c = [top].
		sec
		sbc cl
		sta cl
		txa
		sbc ch
		sta ch
		jmp push_w_c		// rts.




		// PROCEDURE not_b.   [top] = ~[under] 	(bytes)
		//
not_b:		jsr pop_b
		eor #0xff
		jmp push_b		// rts.


		// PROCEDURE not_w.   [top] = ~[under] 	(words)
		//
not_w:		jsr pop_w		// al, xh = [top].
		eor #0xff
		tay
		txa
		eor #0xff
		tax
		tya
		jmp push_w		// rts.


		// PROCEDURE neg_b.   [top] = -[under] 	(bytes)
		//
neg_b:		jsr pop_b
		eor #0xff
		clc
		adc #1
		jmp push_b		// rts.


		// PROCEDURE neg_w.   [top] = -[under] 	(words)
		//
neg_w:
		jsr pop_w_c
		ldx #0			// Select <C>
		jsr _neg_cx_flag        // Negate it
                jmp push_w_c            // MOD6 [bj 7oct1998]




		// PROCEDURE rot_w.  [top] = [top] << <Y>.
		//
rot_w:
		sty bitcount
		jsr pop_w_c
_loop10:
		cpy #0
		beq _leave10
		asl cl
		rol ch
		iny
		jmp _loop10
_leave10:
		jmp push_w_c





		// PROCEDURE mul_ww. [top] = [under] / [top] (signed word)
		//
mul_ww:
		jsr _pop_w_cd_abs
		jsr umul_ww_cd
		lda is_negative
		beq _skip20
		jmp neg_w
_skip20:
		rts


		// PROCEDURE mul_ww.  [top] = [under] * [top] (unsigned word)
		//
umul_ww:
		jsr pop_w_cd		// Order doesn't matter...

		// ALTERNATE ENTRY mul_ww_cd.  [top] = <C> * <D>.
		//
umul_ww_cd:
		lda #0
		sta el
		sta eh

		ldx #16

_loop0:
		asl el
		rol eh

		bit ch
		bpl _skip0

		clc
		lda el
		adc dl
		sta el
		lda eh
		adc dh
		sta eh

_skip0:
		asl cl
		rol ch

		dex
		bne _loop0

		lda el
		ldx eh
		jmp push_w		// rts.





		// PROCEDURE pop_w_cd.
		//
		// Pop two words from the stack into <C>, <D>.
		//
		// <D> = [top]; <C> = [underneath]
pop_w_cd:
		jsr pop_w
		sta dl
		stx dh
		jmp pop_w_c		// rts.




		// LOCAL PROCEDURE _abs_cx_flag.
		//
		// if <X> == 0 <C> = <C> >= 0 ? <C> : -<C>.
		// if <X> == 2 <D> = <D> >= 0 ? <D> : -<D>.
		//
		// Set <C/D> to the absolute value of itself.
		// If <C/D> was negative, then increment the
		// <is_negative> counter.
		//
		// NOTE:
		//   The <is_negative> counter can be used to count the
		//   total number of negative operands to an operator.
		//   If this number is odd, we know we need to negate
		//   the result of the operator.
		//
_abs_cx_flag:
		lda ch,x
		bpl _skip6

		// ALTERNATE ENTRY _neg_cx_flag.
		//
		// if <X> == 0 <C> = -<C>.
		// if <X> == 2 <D> = -<D>.
		//
		//
_neg_cx_flag:
		lda cl,x
		eor #255
		clc
		adc #1
		sta cl,x
		lda ch,x
		eor #255
		adc #0
		sta ch,x
		inc is_negative
_skip6:
		rts



		// PROCEDURE pop_w_cd_abs.
		//
		// Pop two words from the stack into <C>, <D>.
		//
		// <D> = [top]; <C> = [underneath];
		// <is_negative> = number of negative operands.
		//
_pop_w_cd_abs:
		jsr pop_w_c
		ldx #0
		stx is_negative
		jsr _abs_cx_flag
		lda cl
		sta dl
		lda ch
		sta dh
		jsr pop_w_c
		ldx #2
		jmp _abs_cx_flag

_push_w_abs:
		lda is_negative         ;
		and #1			;
		beq _skip7              ;
		sta cl
		stx ch
		ldx #0
		jsr _neg_cx_flag
		lda cl
		ldx ch
_skip7:
		jmp push_w		; rts







		// PROCEDURE mod_ww. [top] = [under] % [top] (signed word)
		//
mod_ww:
		jsr _pop_w_cd_abs
		jsr div_mod_w
		lda el
		ldx eh
		jmp _push_w_abs		; rts,


		// PROCEDURE div_ww. [top] = [under] / [top] (signed word)
		//
div_ww:
		jsr _pop_w_cd_abs
		jsr div_mod_w
		lda fl
		ldx fh
		jmp _push_w_abs		; rts,




		// PROCEDURE umod_ww. [top] = [under] % [top] (unsigned word)
		//
umod_ww:
		jsr pop_w_dc
		jsr div_mod_w
		lda el
		ldx eh
		jmp push_w		// rts


		// PROCEDURE udiv_ww. [top] = [under] / [top] (unsigned word)
		//
udiv_ww:
		jsr pop_w_dc
		jsr div_mod_w
		lda fl
		ldx fh
		jmp push_w		// rts


		// PROCEDURE div_mod_w.
		//
		// Divides <C> by <D>.
		//
		// IN:
		//   C		number
		//   D		divisor
		// OUT:
		//   F          Quotient.
		//   E		Remainder.
		//   C		Destroyed.
		//
		//
div_mod_w:
		lda #0
		sta el
		sta eh
		sta fl
		sta fh

		ldx #16
_loop1:
		asl fl
		rol fh
		asl cl
		rol ch
		rol el
		rol eh
_loop1a:
		lda eh
		cmp dh
		bcc _leave1a
		lda el
		cmp dl
		bcc _leave1a

		sec
		lda el
		sbc dl
		sta el
		lda eh
		sbc dh
		sta eh

		inc fl
		bne _loop1a
		inc fh
		jmp _loop1a

_leave1a:
		dex
		bne _loop1

		rts




		// GROUP Console I/O.
		//


		// PROCEDURE ioctl.
		//
		// Set the ioctl standard input mode from [top].
		//
ioctl:
		jsr pop_b
		sta charmode
		rts


		// PROCEDURE print_ch_b.
		//
		// Print [top] as a character.
		//
print_ch_b:
		jsr pop_b

		// ALTERNATE ENTRY print_ch_a.
		//
		// Print <A> as a character.
		//
print_ch_a:
		cmp #10			// convert '\n' to '\r'
		bne _skip4
		lda #13
_skip4:
		jmp call_print_ch_a	// rts

print_nl:
		lda #13		// convert '\n' to '\r'
		bne _skip4	// small cut saves us a byte.
		//NOTREACHED


		// PROCEDURE print_string_ax.
		//
		// Print the null-terminated string pointed at by <A, X>.
		//
		// Two strings walk into a bar.
		// The first says, "I'll have a beer."
		// The second says, "I'll have a beer too1r380*)@$(C@&unrd3c"
		// The first says, "You'll have to excuse my friend;
		// He isn't null terminated."  :)
		//
		//
print_string_ax:
		sta cl
		stx ch
		jmp print_string_c

		// ALTERNATE ENTRY print_string_w.
		//
		// Print the null-terminated string pointed at by [top].
		//
print_string_w:
		jsr pop_w_c

		// ALTERNATE ENTRY print_string_w.
		//
		// Print the null-terminated string pointed at by <C>.
		//
print_string_c:
_loop5:		ldy #0
		lda (cl),y
		beq _leave5
		jsr print_ch_a
		inc cl
		bne _loop5
		inc ch
		jmp _loop5
_leave5:
		rts


		// PROCEDURE print_integer_ax_signed.
		//
		// Print the signed word <A, X> as an integer.
		//
print_integer_ax_signed:
		sta cl
		stx ch
		ldy #0			// Address <C>
		sty is_negative		// Clear <is_negative>.
		jsr _abs_cx_flag	// Make absolute; update <is_negative>
		ldy is_negative		// Test <is_negative>.
		beq print_integer_c	// Not set, so Positive; print as is.
		lda #45			// Set, so Negative; print '-'.
		jsr print_ch_a
		jmp print_integer_c	// Now print it.

		// PROCEDURE print_integer_ax.
		//
		// Print the unsigned word <A, X> as an integer.
		//
print_integer_ax:
		sta cl
		stx ch
		jmp print_integer_c

		// ALTERNATE ENTRY print_integer_w.
		//
		// Print the unsigned word [top] as an integer.
		//
print_integer_w:
		jsr pop_w_c

		// ALTERNATE ENTRY print_integer_c.
		//
		// Print the unsigned word <C> as an integer.
		//
print_integer_c:
		lda #10
		sta dl
		lda #0
		sta dh
		sta buffer_count
_loop2:
		jsr div_mod_w
		lda el
		adc #48			// '0'
		ldy buffer_count
		sta buffer, y
		inc buffer_count

		lda fl
		sta cl
		lda fh
		sta ch

		cmp #0
		bne _loop2
		lda cl
		cmp #0
		bne _loop2


		dec buffer_count
_loop3:
		ldy buffer_count
		lda buffer, y
		jsr call_print_ch_a
		dec buffer_count
		bpl _loop3

__rts1:
		rts

		// PROCEDURE get_ch_a.
		//
		// Get a character from standard input.
		// Return the character in 'A'.
		//
		// This applies the ioctl standard input mode
		// before calling the appropriate kernel routines.
		//
get_ch_a:	lda charmode
		beq _get_cooked
		bmi _get_nowait

_get_wait:	jsr call_get_ch_raw_a
		cmp #0
		beq _get_wait
		bne __rts1

_get_nowait:	jmp call_get_ch_raw_a

_get_cooked:	jmp call_get_ch_cooked_a


get_ch_b:	jsr get_ch_a
		jmp push_b




		// GROUP Stack Management.
		//

		// PROCEDURE push_n.
		//
		// Push <A> bytes on the stack.
		// Their contents are undefined.
		//
push_n:
		clc
		adc datastacks
		sta datastacks
		rts


		// PROCEDURE pop_n.
		//
		// Pop <A> bytes off the stack and discard them.
		//
pop_n:
		sta el
		sec
		lda datastacks
		sbc el
		sta datastacks
		rts


		// PROCEDURE prolog.
		//
		// Start a new stack frame.  A stack frame is
		// needed when a subroutine is called that has
		// either parameters or local "automatic" variables.
		// When we return from the subroutine the stack
		// frame is discarded.
		//
		// Subroutines with none of these do not need a stack frame.
		//
		// A stack frame lives on the datastack.
		// It consists of local storage;
		// Parameters followed by local "automatic" variables.
		// The base pointer <BP> points to the first byte
		// underneath the the current stack frame.
		//
		//   datastack[BP+1] points to the first  byte of the frame.
		//   datastack[BP+2] points to the second byte of the frame.
		//   datastack[BP+3] points to the third  byte of the frame.
		//   etc.
		//
		// The prolog subroutine is called from the application
		// before calling the subroutine.  It should be called
		// immediately before the first parameter is pushed on
		// the stack.
		//
		// On entering the subroutine the bytes needed for
		// storage of local "automatic" variables needs to be
		// pushed on the stack (by calling push_n).  This is the
		// size of the stack frame, minus the size of the parameters.
		//
prolog:
		lda bp
		jsr push_b	// Saved old <BP> on stack.
		lda datastacks
		sta bp		// Current stack pointer becomes new <BP>.
		rts



		// PROCEDURE epilog.
		//
		// This subroutine is called before returning from
		// a stack-framed subroutine.  <A> is set to the size
		// of the local stack frame.
		//
		// Subroutines that return a value do so in <A, X>;
		// <A> (and <Y>) are destroyed by this subroutine,
		// and so <A> should be saved before calling it.
		//
epilog:
		jsr pop_n	// Discard local stack frame.
		jsr pop_b       // Retrieve the previous <BP>.
		sta bp          // Restore  the previous <BP>.
		rts


		// GROUP Memory Management.
		//

		// PROCEDURE peek_w.
		//
		// [top] = memory[[top]]
		//
		// Peek at the address in [top], returning the byte
		// of the value at that address on the stack.
		//
peek_w_b:
		jsr pop_w_c
_peek_w_b_c:
		ldy #0
		lda (cl),y
		jmp push_b



		// PROCEDURE peek_w.
		//
		// [top] = memory[[top]]
		//
		// Peek at the address in [top], returning the word
		// of the value at that address on the stack.
		//
peek_w_w:
		jsr peek_w_b
		inc cl
		bne _peek_w_b_c
		inc ch
		jmp _peek_w_b_c


		// PROCEDURE poke_w_b.
		//
		// memory[[under]] = [top]
		//
		// Set the address at [under] to the byte [top].
		//
poke_w_b:
		jsr pop_b
		pha
		jsr pop_w_c
		pla
_poke_w_b_c:
		ldy #0
		sta (cl),y
		rts


		// PROCEDURE poke_w_w.
		//
		// memory[[under]] = [top]
		//
		// Set the address at [under] to the word [top].
		//
poke_w_w:
		jsr pop_b
		pha
		jsr poke_w_b
		pla
		inc cl
		bne _poke_w_b_c
		inc ch
		jmp _poke_w_b_c






		// GROUP Runtime Error.
		//


		// PROCEDURE rerror (report runtime error).
		//
		// Terminate reporting runtime error <A>.
		//
rerror:
		pha
                lda #(rerror_string % 256)
                ldx #(rerror_string / 256)
		jsr print_string_ax		// Print "RERROR "
		pla
		ldx #0
		jsr print_integer_ax		// Print the error #.
		jsr call_get_ch_cooked_a	// Wait for an Enter.
		brk				// Soft break.

		end