7f28c94eda
The interpreter more or less passes all the tests in the z-machine test suite. It should build for every target except Archos (for which it is disabled). git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24267 a1c6a512-1295-4272-9138-f99709370657
1109 lines
21 KiB
C
1109 lines
21 KiB
C
/* text.c - Text manipulation functions
|
|
* Copyright (c) 1995-1997 Stefan Jokisch
|
|
*
|
|
* This file is part of Frotz.
|
|
*
|
|
* Frotz is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Frotz is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
|
*/
|
|
|
|
#include "frotz.h"
|
|
|
|
enum string_type {
|
|
LOW_STRING, ABBREVIATION, HIGH_STRING, EMBEDDED_STRING, VOCABULARY
|
|
};
|
|
|
|
extern zword object_name (zword);
|
|
|
|
static zchar decoded[10];
|
|
static zword encoded[3];
|
|
|
|
/*
|
|
* According to Matteo De Luigi <matteo.de.luigi@libero.it>,
|
|
* 0xab and 0xbb were in each other's proper positions.
|
|
* Sat Apr 21, 2001
|
|
*/
|
|
static zchar zscii_to_latin1[] = {
|
|
0xe4, 0xf6, 0xfc, 0xc4, 0xd6, 0xdc, 0xdf, 0xbb,
|
|
0xab, 0xeb, 0xef, 0xff, 0xcb, 0xcf, 0xe1, 0xe9,
|
|
0xed, 0xf3, 0xfa, 0xfd, 0xc1, 0xc9, 0xcd, 0xd3,
|
|
0xda, 0xdd, 0xe0, 0xe8, 0xec, 0xf2, 0xf9, 0xc0,
|
|
0xc8, 0xcc, 0xd2, 0xd9, 0xe2, 0xea, 0xee, 0xf4,
|
|
0xfb, 0xc2, 0xca, 0xce, 0xd4, 0xdb, 0xe5, 0xc5,
|
|
0xf8, 0xd8, 0xe3, 0xf1, 0xf5, 0xc3, 0xd1, 0xd5,
|
|
0xe6, 0xc6, 0xe7, 0xc7, 0xfe, 0xf0, 0xde, 0xd0,
|
|
0xa3, 0x00, 0x00, 0xa1, 0xbf
|
|
};
|
|
|
|
/*
|
|
* translate_from_zscii
|
|
*
|
|
* Map a ZSCII character onto the ISO Latin-1 alphabet.
|
|
*
|
|
*/
|
|
|
|
zchar translate_from_zscii (zbyte c)
|
|
{
|
|
|
|
if (c == 0xfc)
|
|
return ZC_MENU_CLICK;
|
|
if (c == 0xfd)
|
|
return ZC_DOUBLE_CLICK;
|
|
if (c == 0xfe)
|
|
return ZC_SINGLE_CLICK;
|
|
|
|
if (c >= 0x9b && story_id != BEYOND_ZORK) {
|
|
|
|
if (hx_unicode_table != 0) { /* game has its own Unicode table */
|
|
|
|
zbyte N;
|
|
|
|
LOW_BYTE (hx_unicode_table, N)
|
|
|
|
if (c - 0x9b < N) {
|
|
|
|
zword addr = hx_unicode_table + 1 + 2 * (c - 0x9b);
|
|
zword unicode;
|
|
|
|
LOW_WORD (addr, unicode)
|
|
|
|
return (unicode < 0x100) ? (zchar) unicode : '?';
|
|
|
|
} else return '?';
|
|
|
|
} else /* game uses standard set */
|
|
|
|
if (c <= 0xdf) {
|
|
|
|
if (c == 0xdc || c == 0xdd) /* Oe and oe ligatures */
|
|
return '?'; /* are not ISO-Latin 1 */
|
|
|
|
return zscii_to_latin1[c - 0x9b];
|
|
|
|
} else return '?';
|
|
}
|
|
|
|
return c;
|
|
|
|
}/* translate_from_zscii */
|
|
|
|
/*
|
|
* translate_to_zscii
|
|
*
|
|
* Map an ISO Latin-1 character onto the ZSCII alphabet.
|
|
*
|
|
*/
|
|
|
|
zbyte translate_to_zscii (zchar c)
|
|
{
|
|
int i;
|
|
|
|
if (c == ZC_SINGLE_CLICK)
|
|
return 0xfe;
|
|
if (c == ZC_DOUBLE_CLICK)
|
|
return 0xfd;
|
|
if (c == ZC_MENU_CLICK)
|
|
return 0xfc;
|
|
|
|
if (c >= ZC_LATIN1_MIN) {
|
|
|
|
if (hx_unicode_table != 0) { /* game has its own Unicode table */
|
|
|
|
zbyte N;
|
|
int i;
|
|
|
|
LOW_BYTE (hx_unicode_table, N)
|
|
|
|
for (i = 0x9b; i < 0x9b + N; i++) {
|
|
|
|
zword addr = hx_unicode_table + 1 + 2 * (i - 0x9b);
|
|
zword unicode;
|
|
|
|
LOW_WORD (addr, unicode)
|
|
|
|
if (c == unicode)
|
|
return (zbyte) i;
|
|
|
|
}
|
|
|
|
return '?';
|
|
|
|
} else { /* game uses standard set */
|
|
|
|
for (i = 0x9b; i <= 0xdf; i++)
|
|
if (c == zscii_to_latin1[i - 0x9b])
|
|
return (zbyte) i;
|
|
|
|
return '?';
|
|
|
|
}
|
|
}
|
|
|
|
if (c == 0) /* Safety thing from David Kinder */
|
|
c = '?'; /* regarding his Unicode patches */
|
|
/* Sept 15, 2002 */
|
|
|
|
return c;
|
|
|
|
}/* translate_to_zscii */
|
|
|
|
/*
|
|
* alphabet
|
|
*
|
|
* Return a character from one of the three character sets.
|
|
*
|
|
*/
|
|
|
|
static zchar alphabet (int set, int index)
|
|
{
|
|
|
|
if (h_alphabet != 0) { /* game uses its own alphabet */
|
|
|
|
zbyte c;
|
|
|
|
zword addr = h_alphabet + 26 * set + index;
|
|
LOW_BYTE (addr, c)
|
|
|
|
return translate_from_zscii (c);
|
|
|
|
} else /* game uses default alphabet */
|
|
|
|
if (set == 0)
|
|
return 'a' + index;
|
|
else if (set == 1)
|
|
return 'A' + index;
|
|
else if (h_version == V1)
|
|
return " 0123456789.,!?_#'\"/\\<-:()"[index];
|
|
else
|
|
return " ^0123456789.,!?_#'\"/\\-:()"[index];
|
|
|
|
}/* alphabet */
|
|
|
|
/*
|
|
* load_string
|
|
*
|
|
* Copy a ZSCII string from the memory to the global "decoded" string.
|
|
*
|
|
*/
|
|
|
|
static void load_string (zword addr, zword length)
|
|
{
|
|
int resolution = (h_version <= V3) ? 2 : 3;
|
|
int i = 0;
|
|
|
|
while (i < 3 * resolution)
|
|
|
|
if (i < length) {
|
|
|
|
zbyte c;
|
|
|
|
LOW_BYTE (addr, c)
|
|
addr++;
|
|
|
|
decoded[i++] = translate_from_zscii (c);
|
|
|
|
} else decoded[i++] = 0;
|
|
|
|
}/* load_string */
|
|
|
|
/*
|
|
* encode_text
|
|
*
|
|
* Encode the Unicode text in the global "decoded" string then write
|
|
* the result to the global "encoded" array. (This is used to look up
|
|
* words in the dictionary.) Up to V3 the vocabulary resolution is
|
|
* two, since V4 it is three words. Because each word contains three
|
|
* Z-characters, that makes six or nine Z-characters respectively.
|
|
* Longer words are chopped to the proper size, shorter words are are
|
|
* padded out with 5's. For word completion we pad with 0s and 31s,
|
|
* the minimum and maximum Z-characters.
|
|
*
|
|
*/
|
|
|
|
static void encode_text (int padding)
|
|
{
|
|
static zchar again[] = { 'a', 'g', 'a', 'i', 'n', 0 };
|
|
static zchar examine[] = { 'e', 'x', 'a', 'm', 'i', 'n', 'e', 0 };
|
|
static zchar wait[] = { 'w', 'a', 'i', 't', 0 };
|
|
|
|
zbyte zchars[12];
|
|
const zchar *ptr = decoded;
|
|
zchar c;
|
|
int resolution = (h_version <= V3) ? 2 : 3;
|
|
int i = 0;
|
|
|
|
/* Expand abbreviations that some old Infocom games lack */
|
|
|
|
if (f_setup.expand_abbreviations)
|
|
|
|
if (padding == 0x05 && decoded[1] == 0)
|
|
|
|
switch (decoded[0]) {
|
|
case 'g': ptr = again; break;
|
|
case 'x': ptr = examine; break;
|
|
case 'z': ptr = wait; break;
|
|
}
|
|
|
|
/* Translate string to a sequence of Z-characters */
|
|
|
|
while (i < 3 * resolution)
|
|
|
|
if ((c = *ptr++) != 0) {
|
|
|
|
int index, set;
|
|
zbyte c2;
|
|
|
|
/* Search character in the alphabet */
|
|
|
|
for (set = 0; set < 3; set++)
|
|
for (index = 0; index < 26; index++)
|
|
if (c == alphabet (set, index))
|
|
goto letter_found;
|
|
|
|
/* Character not found, store its ZSCII value */
|
|
|
|
c2 = translate_to_zscii (c);
|
|
|
|
zchars[i++] = 5;
|
|
zchars[i++] = 6;
|
|
zchars[i++] = c2 >> 5;
|
|
zchars[i++] = c2 & 0x1f;
|
|
|
|
continue;
|
|
|
|
letter_found:
|
|
|
|
/* Character found, store its index */
|
|
|
|
if (set != 0)
|
|
zchars[i++] = ((h_version <= V2) ? 1 : 3) + set;
|
|
|
|
zchars[i++] = index + 6;
|
|
|
|
} else zchars[i++] = padding;
|
|
|
|
/* Three Z-characters make a 16bit word */
|
|
|
|
for (i = 0; i < resolution; i++)
|
|
|
|
encoded[i] =
|
|
(zchars[3 * i + 0] << 10) |
|
|
(zchars[3 * i + 1] << 5) |
|
|
(zchars[3 * i + 2]);
|
|
|
|
encoded[resolution - 1] |= 0x8000;
|
|
|
|
}/* encode_text */
|
|
|
|
/*
|
|
* z_check_unicode, test if a unicode character can be read and printed.
|
|
*
|
|
* zargs[0] = Unicode
|
|
*
|
|
*/
|
|
|
|
void z_check_unicode (void)
|
|
{
|
|
zword c = zargs[0];
|
|
|
|
if (c >= 0x20 && c <= 0x7e)
|
|
store (3);
|
|
else if (c == 0xa0)
|
|
store (1);
|
|
else if (c >= 0xa1 && c <= 0xff)
|
|
store (3);
|
|
else
|
|
store (0);
|
|
|
|
}/* z_check_unicode */
|
|
|
|
/*
|
|
* z_encode_text, encode a ZSCII string for use in a dictionary.
|
|
*
|
|
* zargs[0] = address of text buffer
|
|
* zargs[1] = length of ASCII string
|
|
* zargs[2] = offset of ASCII string within the text buffer
|
|
* zargs[3] = address to store encoded text in
|
|
*
|
|
* This is a V5+ opcode and therefore the dictionary resolution must be
|
|
* three 16bit words.
|
|
*
|
|
*/
|
|
|
|
void z_encode_text (void)
|
|
{
|
|
int i;
|
|
|
|
load_string ((zword) (zargs[0] + zargs[2]), zargs[1]);
|
|
|
|
encode_text (0x05);
|
|
|
|
for (i = 0; i < 3; i++)
|
|
storew ((zword) (zargs[3] + 2 * i), encoded[i]);
|
|
|
|
}/* z_encode_text */
|
|
|
|
/*
|
|
* decode_text
|
|
*
|
|
* Convert encoded text to Unicode. The encoded text consists of 16bit
|
|
* words. Every word holds 3 Z-characters (5 bits each) plus a spare
|
|
* bit to mark the last word. The Z-characters translate to ZSCII by
|
|
* looking at the current current character set. Some select another
|
|
* character set, others refer to abbreviations.
|
|
*
|
|
* There are several different string types:
|
|
*
|
|
* LOW_STRING - from the lower 64KB (byte address)
|
|
* ABBREVIATION - from the abbreviations table (word address)
|
|
* HIGH_STRING - from the end of the memory map (packed address)
|
|
* EMBEDDED_STRING - from the instruction stream (at PC)
|
|
* VOCABULARY - from the dictionary (byte address)
|
|
*
|
|
* The last type is only used for word completion.
|
|
*
|
|
*/
|
|
|
|
#define outchar(c) if (st==VOCABULARY) *ptr++=c; else print_char(c)
|
|
|
|
static void decode_text (enum string_type st, zword addr)
|
|
{
|
|
zchar *ptr;
|
|
long byte_addr;
|
|
zchar c2;
|
|
zword code;
|
|
zbyte c, prev_c = 0;
|
|
int shift_state = 0;
|
|
int shift_lock = 0;
|
|
int status = 0;
|
|
|
|
ptr = NULL; /* makes compilers shut up */
|
|
byte_addr = 0;
|
|
|
|
/* Calculate the byte address if necessary */
|
|
|
|
if (st == ABBREVIATION)
|
|
|
|
byte_addr = (long) addr << 1;
|
|
|
|
else if (st == HIGH_STRING) {
|
|
|
|
if (h_version <= V3)
|
|
byte_addr = (long) addr << 1;
|
|
else if (h_version <= V5)
|
|
byte_addr = (long) addr << 2;
|
|
else if (h_version <= V7)
|
|
byte_addr = ((long) addr << 2) + ((long) h_strings_offset << 3);
|
|
else /* h_version == V8 */
|
|
byte_addr = (long) addr << 3;
|
|
|
|
if (byte_addr >= story_size)
|
|
runtime_error (ERR_ILL_PRINT_ADDR);
|
|
|
|
}
|
|
|
|
/* Loop until a 16bit word has the highest bit set */
|
|
|
|
if (st == VOCABULARY)
|
|
ptr = decoded;
|
|
|
|
do {
|
|
|
|
int i;
|
|
|
|
/* Fetch the next 16bit word */
|
|
|
|
if (st == LOW_STRING || st == VOCABULARY) {
|
|
LOW_WORD (addr, code)
|
|
addr += 2;
|
|
} else if (st == HIGH_STRING || st == ABBREVIATION) {
|
|
HIGH_WORD (byte_addr, code)
|
|
byte_addr += 2;
|
|
} else
|
|
CODE_WORD (code)
|
|
|
|
/* Read its three Z-characters */
|
|
|
|
for (i = 10; i >= 0; i -= 5) {
|
|
|
|
zword abbr_addr;
|
|
zword ptr_addr;
|
|
|
|
c = (code >> i) & 0x1f;
|
|
|
|
switch (status) {
|
|
|
|
case 0: /* normal operation */
|
|
|
|
if (shift_state == 2 && c == 6)
|
|
status = 2;
|
|
|
|
else if (h_version == V1 && c == 1)
|
|
new_line ();
|
|
|
|
else if (h_version >= V2 && shift_state == 2 && c == 7)
|
|
new_line ();
|
|
|
|
else if (c >= 6)
|
|
outchar (alphabet (shift_state, c - 6));
|
|
|
|
else if (c == 0)
|
|
outchar (' ');
|
|
|
|
else if (h_version >= V2 && c == 1)
|
|
status = 1;
|
|
|
|
else if (h_version >= V3 && c <= 3)
|
|
status = 1;
|
|
|
|
else {
|
|
|
|
shift_state = (shift_lock + (c & 1) + 1) % 3;
|
|
|
|
if (h_version <= V2 && c >= 4)
|
|
shift_lock = shift_state;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
shift_state = shift_lock;
|
|
|
|
break;
|
|
|
|
case 1: /* abbreviation */
|
|
|
|
ptr_addr = h_abbreviations + 64 * (prev_c - 1) + 2 * c;
|
|
|
|
LOW_WORD (ptr_addr, abbr_addr)
|
|
decode_text (ABBREVIATION, abbr_addr);
|
|
|
|
status = 0;
|
|
break;
|
|
|
|
case 2: /* ZSCII character - first part */
|
|
|
|
status = 3;
|
|
break;
|
|
|
|
case 3: /* ZSCII character - second part */
|
|
|
|
c2 = translate_from_zscii ((prev_c << 5) | c);
|
|
outchar (c2);
|
|
|
|
status = 0;
|
|
break;
|
|
|
|
}
|
|
|
|
prev_c = c;
|
|
|
|
}
|
|
|
|
} while (!(code & 0x8000));
|
|
|
|
if (st == VOCABULARY)
|
|
*ptr = 0;
|
|
|
|
}/* decode_text */
|
|
|
|
#undef outchar
|
|
|
|
/*
|
|
* z_new_line, print a new line.
|
|
*
|
|
* no zargs used
|
|
*
|
|
*/
|
|
|
|
void z_new_line (void)
|
|
{
|
|
|
|
new_line ();
|
|
|
|
}/* z_new_line */
|
|
|
|
/*
|
|
* z_print, print a string embedded in the instruction stream.
|
|
*
|
|
* no zargs used
|
|
*
|
|
*/
|
|
|
|
void z_print (void)
|
|
{
|
|
|
|
decode_text (EMBEDDED_STRING, 0);
|
|
|
|
}/* z_print */
|
|
|
|
/*
|
|
* z_print_addr, print a string from the lower 64KB.
|
|
*
|
|
* zargs[0] = address of string to print
|
|
*
|
|
*/
|
|
|
|
void z_print_addr (void)
|
|
{
|
|
|
|
decode_text (LOW_STRING, zargs[0]);
|
|
|
|
}/* z_print_addr */
|
|
|
|
/*
|
|
* z_print_char print a single ZSCII character.
|
|
*
|
|
* zargs[0] = ZSCII character to be printed
|
|
*
|
|
*/
|
|
|
|
void z_print_char (void)
|
|
{
|
|
|
|
print_char (translate_from_zscii (zargs[0]));
|
|
|
|
}/* z_print_char */
|
|
|
|
/*
|
|
* z_print_form, print a formatted table.
|
|
*
|
|
* zargs[0] = address of formatted table to be printed
|
|
*
|
|
*/
|
|
|
|
void z_print_form (void)
|
|
{
|
|
zword count;
|
|
zword addr = zargs[0];
|
|
|
|
bool first = TRUE;
|
|
|
|
for (;;) {
|
|
|
|
LOW_WORD (addr, count)
|
|
addr += 2;
|
|
|
|
if (count == 0)
|
|
break;
|
|
|
|
if (!first)
|
|
new_line ();
|
|
|
|
while (count--) {
|
|
|
|
zbyte c;
|
|
|
|
LOW_BYTE (addr, c)
|
|
addr++;
|
|
|
|
print_char (translate_from_zscii (c));
|
|
|
|
}
|
|
|
|
first = FALSE;
|
|
|
|
}
|
|
|
|
}/* z_print_form */
|
|
|
|
/*
|
|
* print_num
|
|
*
|
|
* Print a signed 16bit number.
|
|
*
|
|
*/
|
|
|
|
void print_num (zword value)
|
|
{
|
|
int i;
|
|
|
|
/* Print sign */
|
|
|
|
if ((short) value < 0) {
|
|
print_char ('-');
|
|
value = - (short) value;
|
|
}
|
|
|
|
/* Print absolute value */
|
|
|
|
for (i = 10000; i != 0; i /= 10)
|
|
if (value >= i || i == 1)
|
|
print_char ('0' + (value / i) % 10);
|
|
|
|
}/* print_num */
|
|
|
|
/*
|
|
* z_print_num, print a signed number.
|
|
*
|
|
* zargs[0] = number to print
|
|
*
|
|
*/
|
|
|
|
void z_print_num (void)
|
|
{
|
|
|
|
print_num (zargs[0]);
|
|
|
|
}/* z_print_num */
|
|
|
|
/*
|
|
* print_object
|
|
*
|
|
* Print an object description.
|
|
*
|
|
*/
|
|
|
|
void print_object (zword object)
|
|
{
|
|
zword addr = object_name (object);
|
|
zword code = 0x94a5;
|
|
zbyte length;
|
|
|
|
LOW_BYTE (addr, length)
|
|
addr++;
|
|
|
|
if (length != 0)
|
|
LOW_WORD (addr, code)
|
|
|
|
if (code == 0x94a5) { /* encoded text 0x94a5 == empty string */
|
|
|
|
print_string ("object#"); /* supply a generic name */
|
|
print_num (object); /* for anonymous objects */
|
|
|
|
} else decode_text (LOW_STRING, addr);
|
|
|
|
}/* print_object */
|
|
|
|
/*
|
|
* z_print_obj, print an object description.
|
|
*
|
|
* zargs[0] = number of object to be printed
|
|
*
|
|
*/
|
|
|
|
void z_print_obj (void)
|
|
{
|
|
|
|
print_object (zargs[0]);
|
|
|
|
}/* z_print_obj */
|
|
|
|
/*
|
|
* z_print_paddr, print the string at the given packed address.
|
|
*
|
|
* zargs[0] = packed address of string to be printed
|
|
*
|
|
*/
|
|
|
|
void z_print_paddr (void)
|
|
{
|
|
|
|
decode_text (HIGH_STRING, zargs[0]);
|
|
|
|
}/* z_print_paddr */
|
|
|
|
/*
|
|
* z_print_ret, print the string at PC, print newline then return true.
|
|
*
|
|
* no zargs used
|
|
*
|
|
*/
|
|
|
|
void z_print_ret (void)
|
|
{
|
|
|
|
decode_text (EMBEDDED_STRING, 0);
|
|
new_line ();
|
|
ret (1);
|
|
|
|
}/* z_print_ret */
|
|
|
|
/*
|
|
* print_string
|
|
*
|
|
* Print a string of ASCII characters.
|
|
*
|
|
*/
|
|
|
|
void print_string (const char *s)
|
|
{
|
|
char c;
|
|
|
|
while ((c = *s++) != 0)
|
|
|
|
if (c == '\n')
|
|
new_line ();
|
|
else
|
|
print_char (c);
|
|
|
|
}/* print_string */
|
|
|
|
/*
|
|
* z_print_unicode
|
|
*
|
|
* zargs[0] = Unicode
|
|
*
|
|
*/
|
|
|
|
void z_print_unicode (void)
|
|
{
|
|
|
|
print_char ((zargs[0] <= 0xff) ? zargs[0] : '?');
|
|
|
|
}/* z_print_unicode */
|
|
|
|
/*
|
|
* lookup_text
|
|
*
|
|
* Scan a dictionary searching for the given word. The first argument
|
|
* can be
|
|
*
|
|
* 0x00 - find the first word which is >= the given one
|
|
* 0x05 - find the word which exactly matches the given one
|
|
* 0x1f - find the last word which is <= the given one
|
|
*
|
|
* The return value is 0 if the search fails.
|
|
*
|
|
*/
|
|
|
|
static zword lookup_text (int padding, zword dct)
|
|
{
|
|
zword entry_addr;
|
|
zword entry_count;
|
|
zword entry;
|
|
zword addr;
|
|
zbyte entry_len;
|
|
zbyte sep_count;
|
|
int resolution = (h_version <= V3) ? 2 : 3;
|
|
int entry_number;
|
|
int lower, upper;
|
|
int i;
|
|
bool sorted;
|
|
|
|
encode_text (padding);
|
|
|
|
LOW_BYTE (dct, sep_count) /* skip word separators */
|
|
dct += 1 + sep_count;
|
|
LOW_BYTE (dct, entry_len) /* get length of entries */
|
|
dct += 1;
|
|
LOW_WORD (dct, entry_count) /* get number of entries */
|
|
dct += 2;
|
|
|
|
if ((short) entry_count < 0) { /* bad luck, entries aren't sorted */
|
|
|
|
entry_count = - (short) entry_count;
|
|
sorted = FALSE;
|
|
|
|
} else sorted = TRUE; /* entries are sorted */
|
|
|
|
lower = 0;
|
|
upper = entry_count - 1;
|
|
|
|
while (lower <= upper) {
|
|
|
|
if (sorted) /* binary search */
|
|
entry_number = (lower + upper) / 2;
|
|
else /* linear search */
|
|
entry_number = lower;
|
|
|
|
entry_addr = dct + entry_number * entry_len;
|
|
|
|
/* Compare word to dictionary entry */
|
|
|
|
addr = entry_addr;
|
|
|
|
for (i = 0; i < resolution; i++) {
|
|
LOW_WORD (addr, entry)
|
|
if (encoded[i] != entry)
|
|
goto continuing;
|
|
addr += 2;
|
|
}
|
|
|
|
return entry_addr; /* exact match found, return now */
|
|
|
|
continuing:
|
|
|
|
if (sorted) /* binary search */
|
|
|
|
if (encoded[i] > entry)
|
|
lower = entry_number + 1;
|
|
else
|
|
upper = entry_number - 1;
|
|
|
|
else lower++; /* linear search */
|
|
|
|
}
|
|
|
|
/* No exact match has been found */
|
|
|
|
if (padding == 0x05)
|
|
return 0;
|
|
|
|
entry_number = (padding == 0x00) ? lower : upper;
|
|
|
|
if (entry_number == -1 || entry_number == entry_count)
|
|
return 0;
|
|
|
|
return dct + entry_number * entry_len;
|
|
|
|
}/* lookup_text */
|
|
|
|
/*
|
|
* tokenise_text
|
|
*
|
|
* Translate a single word to a token and append it to the token
|
|
* buffer. Every token consists of the address of the dictionary
|
|
* entry, the length of the word and the offset of the word from
|
|
* the start of the text buffer. Unknown words cause empty slots
|
|
* if the flag is set (such that the text can be scanned several
|
|
* times with different dictionaries); otherwise they are zero.
|
|
*
|
|
*/
|
|
|
|
static void tokenise_text (zword text, zword length, zword from, zword parse, zword dct, bool flag)
|
|
{
|
|
zword addr;
|
|
zbyte token_max, token_count;
|
|
|
|
LOW_BYTE (parse, token_max)
|
|
parse++;
|
|
LOW_BYTE (parse, token_count)
|
|
|
|
if (token_count < token_max) { /* sufficient space left for token? */
|
|
|
|
storeb (parse++, token_count + 1);
|
|
|
|
load_string ((zword) (text + from), length);
|
|
|
|
addr = lookup_text (0x05, dct);
|
|
|
|
if (addr != 0 || !flag) {
|
|
|
|
parse += 4 * token_count;
|
|
|
|
storew ((zword) (parse + 0), addr);
|
|
storeb ((zword) (parse + 2), length);
|
|
storeb ((zword) (parse + 3), from);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}/* tokenise_text */
|
|
|
|
/*
|
|
* tokenise_line
|
|
*
|
|
* Split an input line into words and translate the words to tokens.
|
|
*
|
|
*/
|
|
|
|
void tokenise_line (zword text, zword token, zword dct, bool flag)
|
|
{
|
|
zword addr1;
|
|
zword addr2;
|
|
zbyte length;
|
|
zbyte c;
|
|
|
|
length = 0; /* makes compilers shut up */
|
|
|
|
/* Use standard dictionary if the given dictionary is zero */
|
|
|
|
if (dct == 0)
|
|
dct = h_dictionary;
|
|
|
|
/* Remove all tokens before inserting new ones */
|
|
|
|
storeb ((zword) (token + 1), 0);
|
|
|
|
/* Move the first pointer across the text buffer searching for the
|
|
beginning of a word. If this succeeds, store the position in a
|
|
second pointer. Move the first pointer searching for the end of
|
|
the word. When it is found, "tokenise" the word. Continue until
|
|
the end of the buffer is reached. */
|
|
|
|
addr1 = text;
|
|
addr2 = 0;
|
|
|
|
if (h_version >= V5) {
|
|
addr1++;
|
|
LOW_BYTE (addr1, length)
|
|
}
|
|
|
|
do {
|
|
|
|
zword sep_addr;
|
|
zbyte sep_count;
|
|
zbyte separator;
|
|
|
|
/* Fetch next ZSCII character */
|
|
|
|
addr1++;
|
|
|
|
if (h_version >= V5 && addr1 == text + 2 + length)
|
|
c = 0;
|
|
else
|
|
LOW_BYTE (addr1, c)
|
|
|
|
/* Check for separator */
|
|
|
|
sep_addr = dct;
|
|
|
|
LOW_BYTE (sep_addr, sep_count)
|
|
sep_addr++;
|
|
|
|
do {
|
|
|
|
LOW_BYTE (sep_addr, separator)
|
|
sep_addr++;
|
|
|
|
} while (c != separator && --sep_count != 0);
|
|
|
|
/* This could be the start or the end of a word */
|
|
|
|
if (sep_count == 0 && c != ' ' && c != 0) {
|
|
|
|
if (addr2 == 0)
|
|
addr2 = addr1;
|
|
|
|
} else if (addr2 != 0) {
|
|
|
|
tokenise_text (
|
|
text,
|
|
(zword) (addr1 - addr2),
|
|
(zword) (addr2 - text),
|
|
token, dct, flag );
|
|
|
|
addr2 = 0;
|
|
|
|
}
|
|
|
|
/* Translate separator (which is a word in its own right) */
|
|
|
|
if (sep_count != 0)
|
|
|
|
tokenise_text (
|
|
text,
|
|
(zword) (1),
|
|
(zword) (addr1 - text),
|
|
token, dct, flag );
|
|
|
|
} while (c != 0);
|
|
|
|
}/* tokenise_line */
|
|
|
|
/*
|
|
* z_tokenise, make a lexical analysis of a ZSCII string.
|
|
*
|
|
* zargs[0] = address of string to analyze
|
|
* zargs[1] = address of token buffer
|
|
* zargs[2] = address of dictionary (optional)
|
|
* zargs[3] = set when unknown words cause empty slots (optional)
|
|
*
|
|
*/
|
|
|
|
void z_tokenise (void)
|
|
{
|
|
|
|
/* Supply default arguments */
|
|
|
|
if (zargc < 3)
|
|
zargs[2] = 0;
|
|
if (zargc < 4)
|
|
zargs[3] = 0;
|
|
|
|
/* Call tokenise_line to do the real work */
|
|
|
|
tokenise_line (zargs[0], zargs[1], zargs[2], zargs[3] != 0);
|
|
|
|
}/* z_tokenise */
|
|
|
|
/*
|
|
* completion
|
|
*
|
|
* Scan the vocabulary to complete the last word on the input line
|
|
* (similar to "tcsh" under Unix). The return value is
|
|
*
|
|
* 2 ==> completion is impossible
|
|
* 1 ==> completion is ambiguous
|
|
* 0 ==> completion is successful
|
|
*
|
|
* The function also returns a string in its second argument. In case
|
|
* of 2, the string is empty; in case of 1, the string is the longest
|
|
* extension of the last word on the input line that is common to all
|
|
* possible completions (for instance, if the last word on the input
|
|
* is "fo" and its only possible completions are "follow" and "folly"
|
|
* then the string is "ll"); in case of 0, the string is an extension
|
|
* to the last word that results in the only possible completion.
|
|
*
|
|
*/
|
|
|
|
int completion (const zchar *buffer, zchar *result)
|
|
{
|
|
zword minaddr;
|
|
zword maxaddr;
|
|
zchar *ptr;
|
|
zchar c;
|
|
int len;
|
|
int i;
|
|
|
|
*result = 0;
|
|
|
|
/* Copy last word to "decoded" string */
|
|
|
|
len = 0;
|
|
|
|
while ((c = *buffer++) != 0)
|
|
|
|
if (c != ' ') {
|
|
|
|
if (len < 9)
|
|
decoded[len++] = c;
|
|
|
|
} else len = 0;
|
|
|
|
decoded[len] = 0;
|
|
|
|
/* Search the dictionary for first and last possible extensions */
|
|
|
|
minaddr = lookup_text (0x00, h_dictionary);
|
|
maxaddr = lookup_text (0x1f, h_dictionary);
|
|
|
|
if (minaddr == 0 || maxaddr == 0 || minaddr > maxaddr)
|
|
return 2;
|
|
|
|
/* Copy first extension to "result" string */
|
|
|
|
decode_text (VOCABULARY, minaddr);
|
|
|
|
ptr = result;
|
|
|
|
for (i = len; (c = decoded[i]) != 0; i++)
|
|
*ptr++ = c;
|
|
*ptr = 0;
|
|
|
|
/* Merge second extension with "result" string */
|
|
|
|
decode_text (VOCABULARY, maxaddr);
|
|
|
|
for (i = len, ptr = result; (c = decoded[i]) != 0; i++, ptr++)
|
|
if (*ptr != c) break;
|
|
*ptr = 0;
|
|
|
|
/* Search was ambiguous or successful */
|
|
|
|
return (minaddr == maxaddr) ? 0 : 1;
|
|
|
|
}/* completion */
|