dslinux/user/bitchx/dll/europa/cse476 Makefile README gnode.cpp gnode.h grammar.cpp grammar.h grammar.txt lexicon.cpp lexicon.h lexicon.txt list.cpp list.h p1 p1-junk.cpp p1.cpp parse.cpp parse.h parse.txt parse2.txt t1.cpp t2.cpp t3.cpp t4.cpp
stsp
stsp at user.in-berlin.de
Sun Jul 2 15:18:35 CEST 2006
Update of /cvsroot/dslinux/dslinux/user/bitchx/dll/europa/cse476
In directory antilope:/tmp/cvs-serv9280/dll/europa/cse476
Added Files:
Makefile README gnode.cpp gnode.h grammar.cpp grammar.h
grammar.txt lexicon.cpp lexicon.h lexicon.txt list.cpp list.h
p1 p1-junk.cpp p1.cpp parse.cpp parse.h parse.txt parse2.txt
t1.cpp t2.cpp t3.cpp t4.cpp
Log Message:
Adding pristine copy of BitchX so I can branch from it.
--- NEW FILE: list.cpp ---
// Copyright (c) 1998-99, Ed Schlunder
#include <string.h>
#include "list.h"
// ---------------------------------------------------------------------------
// Class Implementation: List
// ---------------------------------------------------------------------------
List::List() {
rootPtr = NULL;
currPtr = NULL;
}
List::~List() {
listNode *tmpPtr = rootPtr, *nextPtr;
// go through the list and delete each node
while(tmpPtr) {
nextPtr = tmpPtr->nextPtr;
if(tmpPtr->item)
delete tmpPtr->item;
if(tmpPtr->itemList)
delete tmpPtr->itemList;
if(tmpPtr->itemNode)
delete tmpPtr->itemNode;
delete tmpPtr;
tmpPtr = nextPtr;
}
};
void List::InsertBefore(char *item) {
// this version does char strings
if(rootPtr == NULL) {
// creating the first node in a previously empty list.
rootPtr = new listNode;
rootPtr->item = new char[strlen(item) + 1];
strcpy(rootPtr->item, item);
rootPtr->itemList = NULL;
rootPtr->itemNode = NULL;
rootPtr->nextPtr = NULL;
rootPtr->prevPtr = NULL;
currPtr = rootPtr;
}
else {
// create a new node and insert it before currPtr's node
listNode *newPtr = new listNode;
newPtr->nextPtr = currPtr;
newPtr->prevPtr = currPtr->prevPtr;
newPtr->item = new char[strlen(item) + 1];
strcpy(newPtr->item, item);
newPtr->itemList = NULL;
newPtr->itemNode = NULL;
currPtr->prevPtr = newPtr;
if(newPtr->prevPtr)
newPtr->prevPtr->nextPtr = newPtr;
currPtr = newPtr;
}
}
void List::InsertBefore(List *item) {
// this version does Lists
if(rootPtr == NULL) {
rootPtr = new listNode;
rootPtr->item = NULL;
rootPtr->itemList = item;
rootPtr->itemNode = NULL;
rootPtr->nextPtr = NULL;
rootPtr->prevPtr = NULL;
currPtr = rootPtr;
}
else {
listNode *newPtr = new listNode;
newPtr->item = NULL;
newPtr->itemList = item;
newPtr->itemNode = NULL;
newPtr->nextPtr = currPtr;
newPtr->prevPtr = currPtr->prevPtr;
currPtr->prevPtr = newPtr;
if(newPtr->prevPtr)
newPtr->prevPtr->nextPtr = newPtr;
currPtr = newPtr;
}
}
void List::InsertBefore(genericNode *item) {
if(rootPtr == NULL) {
rootPtr = new listNode;
rootPtr->item = NULL;
rootPtr->itemList = NULL;
rootPtr->itemNode = item;
rootPtr->nextPtr = NULL;
rootPtr->prevPtr = NULL;
currPtr = rootPtr;
}
else {
listNode *newPtr = new listNode;
newPtr->item = NULL;
newPtr->itemList = NULL;
newPtr->itemNode = item;
newPtr->nextPtr = currPtr;
newPtr->prevPtr = currPtr->prevPtr;
currPtr->prevPtr = newPtr;
if(newPtr->prevPtr)
newPtr->prevPtr->nextPtr = newPtr;
currPtr = newPtr;
}
}
void List::InsertAfter(char *item) {
if(rootPtr == NULL) {
rootPtr = new listNode;
rootPtr->prevPtr = NULL;
rootPtr->nextPtr = NULL;
rootPtr->itemList = NULL;
rootPtr->itemNode = NULL;
rootPtr->item = new char[strlen(item) + 1];
strcpy(rootPtr->item, item);
currPtr = rootPtr;
}
else {
listNode *newPtr = new listNode;
newPtr->nextPtr = currPtr->nextPtr;
newPtr->prevPtr = currPtr;
newPtr->itemList = NULL;
newPtr->itemNode = NULL;
newPtr->item = new char[strlen(item) + 1];
strcpy(newPtr->item, item);
if(newPtr->nextPtr)
newPtr->nextPtr->prevPtr = newPtr;
newPtr->prevPtr->nextPtr = newPtr;
currPtr = newPtr;
}
}
void List::InsertAfter(List *item) {
if(rootPtr == NULL) {
rootPtr = new listNode;
rootPtr->prevPtr = NULL;
rootPtr->nextPtr = NULL;
rootPtr->item = NULL;
rootPtr->itemList = item;
rootPtr->itemNode = NULL;
currPtr = rootPtr;
}
else {
listNode *newPtr = new listNode;
newPtr->nextPtr = currPtr->nextPtr;
newPtr->prevPtr = currPtr;
newPtr->item = NULL;
newPtr->itemList = item;
newPtr->itemNode = NULL;
if(newPtr->nextPtr)
newPtr->nextPtr->prevPtr = newPtr;
newPtr->prevPtr->nextPtr = newPtr;
currPtr = newPtr;
}
}
void List::InsertAfter(genericNode *item) {
if(rootPtr == NULL) {
rootPtr = new listNode;
rootPtr->prevPtr = NULL;
rootPtr->nextPtr = NULL;
rootPtr->item = NULL;
rootPtr->itemList = NULL;
rootPtr->itemNode = item;
currPtr = rootPtr;
}
else {
listNode *newPtr = new listNode;
newPtr->nextPtr = currPtr->nextPtr;
newPtr->prevPtr = currPtr;
newPtr->item = NULL;
newPtr->itemList = NULL;
newPtr->itemNode = item;
if(newPtr->nextPtr)
newPtr->nextPtr->prevPtr = newPtr;
newPtr->prevPtr->nextPtr = newPtr;
currPtr = newPtr;
}
}
bool List::GoTop(void) {
currPtr = rootPtr;
if(currPtr == NULL)
return false;
else
return true;
}
bool List::GoNext(void) {
if(!currPtr)
return false;
currPtr = currPtr->nextPtr;
if(currPtr == NULL)
return false;
else
return true;
}
bool List::GoPrev(void) {
if(!currPtr)
return false;
currPtr = currPtr->prevPtr;
return true;
}
bool List::GoItem(int n) {
if(!rootPtr) // if empty list, abort
return false;
currPtr = rootPtr;
while(n--) { // visit each node from the root up to n nodes deep
if(!currPtr->nextPtr)
break;
currPtr = currPtr->nextPtr;
}
if(currPtr == NULL)
return false;
else
return true;
}
char *List::currItem(void) {
if(currPtr)
return currPtr->item;
else
return NULL;
}
List *List::currItemList(void) {
if(currPtr)
return currPtr->itemList;
else
return NULL;
}
genericNode *List::currItemNode(void) {
if(currPtr)
return currPtr->itemNode;
else
return NULL;
}
void List::Print(void) {
listNode *tmpPtr = rootPtr;
cout << "(";
while(tmpPtr) {
if(tmpPtr->item)
cout << tmpPtr->item << " ";
if(tmpPtr->itemList)
tmpPtr->itemList->Print();
if(tmpPtr->itemNode)
cout << tmpPtr->itemNode;
tmpPtr = tmpPtr->nextPtr;
}
cout << "\b) ";
}
List *MakeList(char *text) {
int length, i, j;
char *tmp = text;
List *l = new List;
length = strlen(text);
for(i = 0; i < length; i++) {
if(text[i] == '{' || text[i] == '(') {
// sub-list found
for(j = i + 1; j < length; j++)
if(text[j] == '}' || text[j] == ')')
break;
tmp = new char[j - i];
strncpy(tmp, text + i + 1, j - i - 1);
tmp[j - i - 1] = 0;
l->InsertAfter(MakeList(tmp));
delete tmp;
i = j + 1;
}
else if(text[i] != ' ' && text[i] != '\t' && text[i] != '.') {
// item found
for(j = i + 1; j < length; j++)
if(text[j] == ' ' || text[j] == '\t' || text[j] == '.')
break;
tmp = new char[j - i + 1];
strncpy(tmp, text + i, j - i);
tmp[j-i] = 0;
l->InsertAfter(tmp);
if(text[j] == '.')
l->InsertAfter(".");
delete tmp;
i = j;
}
}
return l;
}
List *MakeFeatList(char *text) {
int length, i, j;
char *tmp;
List *l = new List;
length = strlen(text);
for(i = 0; i < length; i++) {
for(j = i; j < length; j++)
if(text[j] == ' ')
break;
for(j++; j < length; j++)
if(text[j] == ' ' || text[j] == '}')
break;
if(text[j+1] == '{')
for(j++; j < length; j++)
if(text[j] == '}') {
j++;
break;
}
tmp = new char[j - i + 1];
strncpy(tmp, text + i, j - i);
tmp[j - i] = 0;
// cout << "Mklist: [" << tmp << "]" << endl;
l->InsertAfter(MakeList(tmp));
delete tmp;
i = j;
}
return l;
}
// ---------------------------------------------------------------------------
// Creates a list of features from a string. Example string:
// ((CAT NP) (AGR ?a))
List *makeFeatureList(char *text) {
int length, i, j, level, beg, end;
char *tmp, *thisList;
List *l = new List;
length = strlen(text);
// locate opening parenthesis
for(i = 0; i < length; i++)
if(text[i] == '(') break;
// no list given if no opening parenthesis found -- abort
if(i == length) return NULL;
// get -past- the (
i++;
// find end of this list
for(level = 1, j = i+1; j < length; j++) {
if(text[j] == ')') level--;
if(text[j] == '(') level++;
}
j--;
// make a new string containing a copy of the list text we want to work
// with only...
thisList = new char[j - i + 1];
strncpy(thisList, text + i, j - i);
thisList[j - i] = 0;
trim(thisList);
// cout << "thisList: [" << thisList << "]" << endl;
// step through the list text and take out each item (*tmp)
length = strlen(thisList);
end = -1;
while(end < length) {
beg = end + 1;
// find the end of this item
for(level = 0, end = beg; end < length; end++) {
if((thisList[end] == ' ') && (level == 0))
break;
// make sure that items encased within ( ) are kept as one piece
if(thisList[end] == '(') level++;
if(thisList[end] == ')') level--;
}
// make a copy of the text just for this item
tmp = new char[end - beg + 1];
strncpy(tmp, thisList + beg, end - beg);
tmp[end - beg] = 0;
// is this item a sub list?
if(tmp[0] == '(')
l->InsertAfter(makeFeatureList(tmp));
else
l->InsertAfter(tmp);
delete tmp;
}
// free up memory for temporary list string
delete thisList;
return l;
}
ostream &operator<<(ostream &out_file, const List &l) {
if(&l == NULL) {
out_file << "nil";
return out_file;
}
listNode *tmpPtr = l.rootPtr;
// is this an empty list?
if(tmpPtr == NULL)
out_file << "()";
else {
out_file << "(";
// print out first item without a separating space for it
if(tmpPtr->item) out_file << tmpPtr->item;
if(tmpPtr->itemList) out_file << *tmpPtr->itemList;
if(tmpPtr->itemNode) out_file << *tmpPtr->itemNode;
// step through the linked list of List items and print each one...
while((tmpPtr = tmpPtr->nextPtr)) {
if(tmpPtr->item) out_file << ' ' << tmpPtr->item;
if(tmpPtr->itemList) out_file << ' ' << *tmpPtr->itemList;
if(tmpPtr->itemNode) out_file << ' ' << *tmpPtr->itemNode;
}
out_file << ')';
}
return out_file;
}
bool List::empty(void) {
if(rootPtr == NULL)
return true;
else
return false;
}
int List::length(void) {
listNode *tmp;
int num;
// count each node in the list and return the count
tmp = rootPtr;
num = 0;
while(tmp) {
num++;
tmp = tmp->nextPtr;
}
return num;
}
List *findSubst(char *var, List *assign) {
if(assign->GoTop() == false)
return NULL;
do {
List *varSubst = assign->currItemList();
varSubst->GoTop();
// is this the variable that we are trying to find?
if(strcmp(varSubst->currItem(), var) == 0) {
varSubst->GoNext();
return varSubst->currItemList();
}
} while(assign->GoNext());
// couldn't find var in the variable substitution list
return NULL;
}
List *substitute(List *old, List *assign) {
List *subst = new List;
listNode *tmp;
tmp = old->rootPtr;
while(tmp) {
if(tmp->item) {
// cout << "recreating item: " << tmp->item << endl;
if(tmp->item[0] == '?') {
// variable may need to be substituted
List *newItem = findSubst(tmp->item, assign);
if(newItem == NULL)
subst->InsertAfter(tmp->item);
else {
newItem->GoTop();
do {
if(newItem->currItem())
subst->InsertAfter(newItem->currItem());
if(newItem->currItemList())
subst->InsertAfter(newItem->currItemList());
if(newItem->currItemNode())
subst->InsertAfter(newItem->currItemNode());
} while(newItem->GoNext());
}
}
else
// constants just stay
subst->InsertAfter(tmp->item);
}
// just call ourselves recursively for sublists
if(tmp->itemList) {
// cout << "recreating list: " << *tmp->itemList << endl;
subst->InsertAfter(substitute(tmp->itemList, assign));
}
if(tmp->itemNode) {
// cout << "recreating genericNode: " << *tmp->itemNode << endl;
subst->InsertAfter(new genericNode(NULL, substitute(tmp->itemNode->features(), assign)));
}
tmp = tmp->nextPtr;
}
return subst;
}
--- NEW FILE: p1.cpp ---
#include <iostream.h>
#include <fstream.h>
#include <string.h>
#include "parse.h"
#include "grammar.h"
#include "lexicon.h"
#include "list.h"
#include "gnode.h"
Grammar grammar("grammar.txt");
Lexicon lexicon("lexicon.txt");
int main(int argc, char *argv[]) {
char *sentence = "Jack was happy to see a dog.";
// cout << "[nlp]$ " << sentence << endl;
// parse(sentence);
sentence = new char[1000];
cout << "[nlp]$ " << flush;
cin.getline(sentence, 999);
// while(cin) {
if(strlen(sentence))
parse(sentence);
// cout << "[nlp]$ " << flush;
// cin.getline(sentence, 999);
// }
cout << "Bye bye..." << endl;
}
--- NEW FILE: gnode.cpp ---
// Copyright (c) 1998-99, Ed Schlunder
#include <string.h>
#include "gnode.h"
#include "list.h"
// takes a string and hacks off leading/trailing spaces and *'s.
void trim(char *text) {
int i, j, len;
len = strlen(text);
// empty strings don't need trimming...
if(len == 0) return;
// find beginning of actual text
for(i = 0; i < len; i++)
if(text[i] != ' ') break;
// was there any preceeding white space to clear out?
if(i > 0)
for(j = 0; text[i] != 0; j++, i++)
text[j] = text[i];
// step from the end backwards to see find where we need to chop
for(j = strlen(text); j > 1; j--)
if(text[j-1] != ' ') {
text[j] = 0;
break;
}
}
// ---------------------------------------------------------------------------
// Class Implementation: genericNode
// ---------------------------------------------------------------------------
genericNode::genericNode(char *word, List *features) {
if(word) {
name = new char[strlen(word)];
strcpy(name, word);
}
else
word = NULL;
featureList = features;
}
// Example Phrasal: ((CAT N) (AGR ?a))
// Example Lexical: saw ((CAT N) (ROOT SAW1) (AGR 3s))
genericNode::genericNode(char *obj) {
unsigned int i, j;
// first try to read in the name field (in lexical objects)
for(i = 0; i < strlen(obj); i++)
if(obj[i] != ' ') break;
// maybe this is a phrasal object -- doesn't have a "name" field
if(obj[i] == '(') {
name = NULL;
j = i;
}
else {
// this is a lexical object, pick up the name:
for(j = i; j < strlen(obj); j++)
if(obj[j] == ' ') break;
name = new char[j - i + 1];
strncpy(name, obj, j - i);
name[j - i] = 0;
j++;
}
// pick up the object's feature list
char *tmp = obj + j;
// cout << "[" << tmp << "]" << endl;
featureList = makeFeatureList(tmp);
}
ostream& operator<<(ostream &out_file, genericNode const &n) {
if(n.name == NULL)
out_file << "(nil";
else
out_file << '(' << n.name;
if(n.featureList)
out_file << ' ' << *n.featureList;
out_file << ')';
return out_file;
}
List *unify(genericNode &s, genericNode &g) {
List &sFeatures = *s.featureList, &gFeatures = *g.featureList, *currFeature;
List *assignList = new List;
char *featureName;
List *featureValueA, *featureValueB;
// if a node has no feature list, then we automatically know it can unify
if(!sFeatures.GoTop()) return new List;
if(!gFeatures.GoTop()) return new List;
do {
// get the current feature
currFeature = sFeatures.currItemList();
currFeature->GoTop();
// pull out the feature name and value
featureName = currFeature->currItem();
currFeature->GoNext();
featureValueA = currFeature->currItemList();
featureValueB = g.lookupFeature(featureName);
/*
cout << "unify: [" << featureName << "] = " << *featureValueA << "";
cout << " with " << *featureValueB << " --> ";
*/
/*
if(featureValueB == NULL)
// if feature lookup failed, no corresponding feature exists --
// automatically agrees...
// cout << "yes" << endl;
else {
*/
// if featureB is nothing, no corresponding featuture exists -- agrees
if(featureValueB != NULL) {
List *assignment = cmpFeatures(*featureValueA, *featureValueB);
if(assignment == NULL) {
// cout << "no" << endl;
delete assignList;
return NULL;
}
// cout << "yes: " << *assignment << endl;
if(!assignment->empty())
assignList->InsertAfter(assignment);
}
} while(sFeatures.GoNext());
// cout << "assignment list: " << *assignList << endl;
return assignList;
}
List *cmpFeatures(List &a, List &b) {
char *valueA, *valueB;
a.GoTop();
do {
valueA = a.currItem();
b.GoTop();
do {
valueB = b.currItem();
// do we have a match?
if(strcmp(valueA, valueB) == 0)
return new List;
// if one of the values is a variable, then it can match anything
if(valueA[0] == '?') {
List *assignList = new List;
assignList->InsertAfter(valueA);
assignList->InsertAfter(&b);
return assignList;
}
if(valueB[0] == '?') {
List *assignList = new List;
assignList->InsertAfter(valueB);
assignList->InsertAfter(&a);
return assignList;
}
} while(b.GoNext());
} while(a.GoNext());
return NULL;
}
List *genericNode::lookupFeature(const char *name) const {
char *tmp;
List *featureSeek;
// if this node has no features, abort search
if(featureList == NULL) return NULL;
featureList->GoTop();
do {
// pick up current feature
featureSeek = featureList->currItemList();
featureSeek->GoTop();
// pick up current feature's name
tmp = featureSeek->currItem();
// is this the feature we were trying to find?
if(strcmp(tmp, name) == 0) {
// found our feature, pick up the value
featureSeek->GoNext();
return featureSeek->currItemList();
}
} while(featureList->GoNext());
// exhausted the feature list, this feature doesn't exist in this genericNode
return NULL;
}
char *genericNode::word(void) {
return name;
}
genericNode::~genericNode() {
// -----------------------
// THIS NEEDS IMPLEMENTING
// -----------------------
delete name;
delete featureList;
}
List *genericNode::features(void) {
return featureList;
}
// ---------------------------------------------------------------------------
genericNode *substitute(genericNode *old, List *assign) {
genericNode *subst;
subst = new genericNode(old->name, substitute(old->featureList, assign));
return subst;
}
--- NEW FILE: lexicon.cpp ---
// Copyright (c) 1998-99, Ed Schlunder
#include <iostream.h>
#include <fstream.h>
#include "lexicon.h"
// ---------------------------------------------------------------------------
// Class Implementation: Lexicon
// ---------------------------------------------------------------------------
Lexicon::Lexicon(char *fileName) {
char buffer[1000];
ifstream inFile;
// specify that this is an empty lexicon currently
rootPtr = currPtr = NULL;
// try to open the lexicon file
inFile.open(fileName);
if(!inFile) {
cout << "Lexicon file can not be opened." << endl;
return;
}
// read in each lexicon entry and insert it into our BST
while(inFile) {
inFile.getline(buffer, 999);
// ignore blank lines
if(strlen(buffer) < 2) continue;
insert(new genericNode(buffer));
}
}
void Lexicon::insert(genericNode *newWord) {
#ifdef DEBUG
cout << "insertLexicon: [" << *newWord << "]" << endl;
#endif
// if this is an empty lexicon, insert at the top of the BST
if(rootPtr == NULL) {
rootPtr = new lexicalNode;
rootPtr->node = newWord;
rootPtr->leftPtr = NULL;
rootPtr->rightPtr = NULL;
}
else
insert(newWord, rootPtr);
}
void Lexicon::insert(genericNode *newWord, lexicalNode *root) {
lexicalNode *tmp;
// does newWord belong to the right of this node?
if(strcmp(root->node->word(), newWord->word()) > 0)
// ASSERT: newWord belongs to the right of this word
if(root->rightPtr) {
// nodes already exist to the right, follow link down and try again
insert(newWord, root->rightPtr);
return;
}
else
// make a new node to the right
tmp = root->rightPtr = new lexicalNode;
else
if(root->leftPtr) {
insert(newWord, root->leftPtr);
return;
}
else
tmp = root->leftPtr = new lexicalNode;
tmp->node = newWord;
tmp->leftPtr = NULL;
tmp->rightPtr = NULL;
}
Lexicon::~Lexicon() {
// -----------------------
// THIS NEEDS IMPLEMENTING
// -----------------------
}
genericNode *Lexicon::currNode(void) {
if(currPtr)
return currPtr->node;
return NULL;
}
// ---------------------------------------------------------------------------
bool Lexicon::lookupNext(void) {
if(currPtr->leftPtr == NULL)
return false;
else
return lookupWord(currPtr->leftPtr, currPtr->node->word());
}
bool Lexicon::lookupWord(char *word) {
return lookupWord(rootPtr, word);
}
bool Lexicon::lookupWord(lexicalNode *root, char *word) {
int cmp;
// if we've hit a dead end, we know this word doesn't exist in the tree
if(root == NULL) return false;
// is this word the one we were looking for?
cmp = strcmp(root->node->word(), word);
if(cmp == 0) {
currPtr = root;
return true;
}
if(cmp > 0)
return lookupWord(root->rightPtr, word);
else
return lookupWord(root->leftPtr, word);
}
--- NEW FILE: parse2.txt ---
Jack was happy to see a dog.
1 2 3 4 5 6 7 8
Agenda
-------------------------------------------------------------------------------------------------
- 1 2 Jack NAME ((AGR 3s))
- 1 2 NP ((AGR 3s))
- 2 3 was V ((ROOT BE1) (VFORM past) (AGR (1s 3s)) (SUBCAT (_adjp _np)))
- 3 4 happy ADJ ((SUBCAT _vp:inf))
- 3 4 ADJP
- 4 5 to TO
- 5 6 see V ((ROOT SEE1) (VFORM base) (SUBCAT _np) (IRREG-PAST +) (EN-PASTRT +))
- 6 7 a ART ((ROOT A1)(AGR 3s))
- 7 8 dog N ((CAT N) (ROOT DOG1) (AGR 3s))
- 6 8 NP ((AGR 3s))
- 5 8 VP ((AGR 3s) (VFORM base))
- 4 8 VP ((SUBCAT inf) (AGR 3s) (VFORM inf))
- 3 8 ADJP
- 2 3 VP ((AGR (1s 3s)) (VFORM past))
* 1 8 S ((INV -) (VFORM past) (AGR 3s))
Chart
-------------------------------------------------------------------------------------------------
1 2 NAME ((AGR 3s))
1 2 NP ((AGR 3s))
2 3 V ((ROOT BE1) (VFORM past) (AGR (1s 3s)) (SUBCAT (_adjp _np)))
3 4 ADJ ((SUBCAT _vp:inf))
3 4 ADJP
4 5 TO
5 6 V ((ROOT SEE1) (VFORM base) (SUBCAT _np) (IRREG-PAST +) (EN-PASTRT +))
6 7 ART ((ROOT A1)(AGR 3s))
7 8 N ((ROOT DOG1) (AGR 3s))
6 8 NP ((AGR 3s))
5 8 VP ((AGR 3s) (VFORM base))
4 8 VP ((SUBCAT inf) (AGR 3s) (VFORM inf))
3 8 ADJP
2 8 VP ((AGR (1s 3s)) (VFORM past))
1 8 S ((INV -) (VFORM past) (AGR 3s))
2 7 VP ((AGR (1s 3s)) (VFORM past))
1 7 S ((INV -) (VFORM past) (AGR 3s))
Active Arc List
-------------------------------------------------------------------------------------------------
1 2 S ((INV -) (VFORM ?v (pres past)) (AGR 3s))
-> NP ((AGR 3s)) o VP ((VFORM ?v (pres past)) (AGR 3s))
2 3 VP ((AGR (1s 3s)) (VFORM past))
-> V ((SUBCAT _np) (AGR (1s 3s)) (VFORM past)) o NP
2 3 VP ((AGR (1s 3s)) (VFORM past))
-> V ((SUBCAT _adjp) (AGR (1s 3s)) (VFORM past)) o ADJP
3 4 ADJP -> ADJ ((SUBCAT _vp:inf)) o VP ((VFORM inf))
4 5 VP ((SUBCAT inf) (AGR ?a) (VFORM inf))
-> TO ((AGR ?a) (VFORM inf)) o VP ((VFORM base))
5 6 VP ((AGR ?a) (VFORM base))
-> V ((SUBCAT _np) (AGR ?a) (VFORM base)) o NP
6 7 NP ((AGR 3s)) o N ((AGR 3s))
6 7 S ((INV -) (VFORM ?v (pres past)) (AGR 3s)
-> NP ((AGR 3s)) o VP ((VFORM ?v (pres past)) (AGR 3s))
--- NEW FILE: Makefile ---
CPP= g++ -Wall -c -g
LINK= g++ -g
all: p1
p1: p1.o gnode.o list.o grammar.o lexicon.o parse.o
$(LINK) p1.o parse.o grammar.o lexicon.o list.o gnode.o -o p1
t1: t1.o gnode.o list.o
$(LINK) t1.o gnode.o list.o -o t1
t1.o: t1.cpp
$(CPP) t1.cpp
t2: t2.o gnode.o list.o
$(LINK) t2.o gnode.o list.o -o t2
t2.o: t2.cpp
$(CPP) t2.cpp
t3: t3.o lexicon.o gnode.o list.o
$(LINK) t3.o lexicon.o gnode.o list.o -o t3
t3.o: t3.cpp
$(CPP) t3.cpp
t4: t4.o grammar.o lexicon.o gnode.o list.o
$(LINK) t4.o grammar.o lexicon.o gnode.o list.o -o t4
t4.o: t4.cpp
$(CPP) t4.cpp
p1.o: p1.cpp
$(CPP) p1.cpp
grammar.o: grammar.cpp
$(CPP) grammar.cpp
lexicon.o: lexicon.cpp
$(CPP) lexicon.cpp
gnode.o: gnode.cpp
$(CPP) gnode.cpp
list.o: list.cpp
$(CPP) list.cpp
parse.o: parse.cpp
$(CPP) parse.cpp
clean:
rm -f core p1 t1 t2 t3 t4 *.o *~ *#
--- NEW FILE: grammar.cpp ---
// Copyright (c) 1998-99, Ed Schlunder
#include <fstream.h>
#include <iostream.h>
#include <string.h>
#include "grammar.h"
#include "gnode.h"
Grammar::Grammar(char *fileName) {
char buffer[1000];
ifstream inFile;
// specify that this is currently an empty grammar rule list
rootPtr = currPtr = NULL;
// try to open the grammar rules file
inFile.open(fileName);
if(!inFile) {
cout << "Grammar rules file can not be opened." << endl;
return;
}
while(inFile) {
inFile.getline(buffer, 999);
// ignore blank lines
if(strlen(buffer) < 2) continue;
trim(buffer);
insert(buffer);
}
}
void Grammar::insert(char *text) {
List *ruleLine = new List;
genericNode *obj;
char *tmp;
int length, level, beg, end;
length = strlen(text);
end = -1;
while(end < length) {
beg = end + 1;
// locate opening parenthesis
while(beg < length)
if(text[beg] == '(') break;
else beg++;
for(level = 0, end = beg + 1; end < length; end++) {
if((text[end] == ')') && (level == 0)) break;
// make sure that items encased within () are kept intact
if(text[end] == '(') level++;
if(text[end] == ')') level--;
}
end++;
tmp = new char[end - beg + 1];
strncpy(tmp, text + beg, end - beg);
tmp[end - beg] = 0;
if(strlen(tmp) == 0)
break;
// make a genericNode out of this phrasal object and add it to the ruleLine
// cout << '[' << tmp << ']' << endl;
obj = new genericNode(tmp);
ruleLine->InsertAfter(obj);
}
insert(ruleLine);
}
void Grammar::insert(List *ruleLine) {
grammarLine *tmp;
// cout << "grammarInsert: " << *ruleLine << endl;
if(rootPtr == NULL) {
rootPtr = new grammarLine;
rootPtr->line = ruleLine;
rootPtr->nextPtr = NULL;
}
else {
// find end of list
tmp = rootPtr;
while(tmp->nextPtr != NULL)
tmp = tmp->nextPtr;
// tack a new rule on the end of the list
tmp->nextPtr = new grammarLine;
tmp = tmp->nextPtr;
tmp->line = ruleLine;
tmp->nextPtr = NULL;
}
}
Grammar::~Grammar() {
}
List *Grammar::currLine(void) {
return currPtr->line;
}
void Grammar::goTop(void) {
currPtr = rootPtr;
}
bool Grammar::goNext(void) {
if(currPtr)
if(currPtr->nextPtr) {
currPtr = currPtr->nextPtr;
return true;
}
return false;
}
List *Grammar::findMatch(genericNode *obj) {
searchObj = obj;
currPtr = rootPtr;
return findNext();
}
List *Grammar::findNext(void) {
List *assign, *currLine;
// go through each rule in the grammar and try to match it with the given obj
while(currPtr) {
currLine = currPtr->line;
currLine->GoTop();
currLine->GoNext();
assign = unify(*currLine->currItemNode(), *searchObj);
currPtr = currPtr->nextPtr;
if(assign)
return substitute(currLine, assign);
}
return NULL;
}
--- NEW FILE: t4.cpp ---
#include "grammar.h"
#include <iostream.h>
int main(int argc, char *argv[]) {
Grammar gram("grammar.txt");
cout << "top line:" << endl;
}
--- NEW FILE: p1-junk.cpp ---
// CSE476 Bottom Up Chart Parser
// Ed Schlunder (zilym at asu.edu)
#include <iostream.h>
#include <fstream.h>
#include <stdio.h>
#include <string.h>
#include "grammar.h"
#include "lexicon.h"
Lexicon lex("lexicon.txt");
Grammar gra("grammar.txt");
void procAgenda(genericNode *a) {
List *currLine;
genericNode *pred;
char *category;
category = a->Feature("CAT");
cout << " Unify searching for: " << category << endl;
gra.GoTop();
do {
currLine = gra.currLine();
currLine->GoTop();
currLine->GoNext();
pred = currLine->currItemNode();
if(unify(a, pred)) {
gra.Print();
cout << endl;
}
} while(gra.GoNext());
cout << endl;
}
// Jack was happy to see a dog
void Parse(char *sentence) {
List *s = MakeList(sentence);
cout << "Sentence: ";
s->Print();
cout << endl;
s->GoTop();
do {
if(s->currItem() == NULL)
break;
cout << "Word: " << s->currItem() << endl;
if(lex.Lookup(s->currItem())) {
cout << " CAT: " << lex.Feature("CAT") << endl;
procAgenda(lex.currNode());
}
else
cout << " No lexical entry!" << endl;
} while(s->GoNext());
}
int main(int argc, char *argv[]) {
char sentence[1000];
cout << "[nlp]$ Jack was happy to see a dog." << endl;
Parse("Jack was happy to see a dog.");
cout << "[nlp]$ " << flush;
cin.getline(sentence, 999);
while(cin) {
Parse(sentence);
cout << "[nlp]$ " << flush;
cin.getline(sentence, 999);
}
cout << endl;
/*
char tmp[100];
cin.getline(tmp, 99);
while(cin) {
cout << "Lexical Lookup: \"" << tmp << "\" ";
if(lex.Lookup(tmp)) {
cout << " - cat: " << lex.Feature("CAT");
cout << " subcat: " << lex.Feature("SUBCAT");
while(char *subcat = lex.Feature())
cout << ", " << subcat;
cout << endl;
}
else
cout << "does not exist" << endl;
cin.getline(tmp, 99);
}
*/
}
--- NEW FILE: lexicon.txt ---
a ((CAT (ART)) (ROOT (A1)) (AGR (3s)))
be ((CAT (V)) (ROOT (BE1)) (VFORM (base)) (IRREG-PRES (+)) (IRREG-PAST (+)) (SUBCAT (_adjp _np)))
cry ((CAT (V)) (ROOT (CRY1)) (VFORM (base)) (SUBCAT (_none)))
dog ((CAT (N)) (ROOT (DOG1)) (AGR (3s)))
fish ((CAT (N)) (ROOT (FISH1)) (AGR (3s 3p)) (IRREG-PL (+)))
happy ((CAT (ADJ)) (SUBCAT (_vp:inf)))
he ((CAT (PRO)) (ROOT (HE1)) (AGR (3s)))
is ((CAT (V)) (ROOT (BE1)) (VFORM (pres)) (SUBCAT (_adjp _np)) (AGR (3s)))
Jack ((CAT (NAME)) (AGR (3s)))
Sally ((CAT (NAME)) (AGR (3s)))
man ((CAT (N1)) (ROOT (MAN1)) (AGR (3s)))
men ((CAT (N)) (ROOT (MAN1)) (AGR (3p)))
saw ((CAT (N)) (ROOT (SAW1)) (AGR (3s)))
saw ((CAT (V)) (ROOT (SAW2)) (VFORM (base)) (SUBCAT (_np)))
saw ((CAT (V)) (ROOT (SEE1)) (VFORM (past)) (SUBCAT (_np)))
see ((CAT (V)) (ROOT (SEE1)) (VFORM (base)) (SUBCAT (_np)) (IRREG-PAST (+)) (EN-PASTPRT (+)))
seed ((CAT (N)) (ROOT (SEED1)) (AGR (3s)))
the ((CAT (ART)) (ROOT (THE1)) (AGR (3s 3p)))
to ((CAT (TO)))
want ((CAT (V)) (ROOT (WANT1)) (VFORM (base)) (SUBCAT (_np _vp:inf _np_vp:inf)))
was ((CAT (V)) (ROOT (BE1)) (VFORM (past)) (AGR (1s 3s)) (SUBCAT (_adjp _np)))
were ((CAT (V)) (ROOT (BE)) (VFORM (past)) (AGR (2s 1p 2p 3p)) (SUBCAT (_adjp _np)))
--- NEW FILE: gnode.h ---
#ifndef _GNODE_H_
#define _GNODE_H_
#include <iostream.h>
class List;
void trim(char *text); // GOOD
// genericNode -- holds any lexical or phrase object
class genericNode {
public:
// constructor: pass it a string containing a lexical or phrase
// object in the format used in our lexicon.txt and grammar.txt
genericNode(char *obj); // GOOD
genericNode(char *word, List *features); // GOOD except feature doesn't copy
~genericNode();
friend ostream &operator<<(ostream &out_file, const genericNode &n); // GOOD
// GOOD: sort of.. this routine leaks memory because the lists for
// assignment it makes aren't properly deallocated upon unification failure
friend List *unify(genericNode &s, genericNode &g);
friend List *cmpFeatures(List &a, List &b);
List *lookupFeature(const char *name) const;
friend genericNode *substitute(genericNode *old, List *assign);
char *word(void); // GOOD
List *features(void);
private:
char *name;
List *featureList;
};
#endif
--- NEW FILE: parse.h ---
#include "gnode.h"
void parse(char *text);
void processAgenda(void);
typedef struct activeArc {
int begLoc;
int endLoc;
int numFound;
List *ruleLine;
};
class ActiveArcList {
public:
ActiveArcList();
void add(activeArc *newArc);
List *findMatch(int begLoc, int endLoc, genericNode *obj);
bool goTop(void);
bool goNext(void);
activeArc *currArc(void);
void print(void);
private:
struct arcNode;
typedef struct arcNode {
activeArc *arc;
arcNode *nextPtr;
};
arcNode *rootPtr;
arcNode *currPtr;
};
class Chart {
public:
Chart();
void add(int begLoc, int endLoc, genericNode *obj);
void getKey(int &begLoc, int &endLoc, genericNode *&obj);
// returns true if there are more keys on the agenda that need processing
bool process(void); // GOOD
bool findMatch(genericNode *obj, List *&assign, int begLoc, int &endLoc);
List *findNext(void);
void print(void);
private:
struct chartNode;
typedef struct chartNode {
int begLoc;
int endLoc;
genericNode *obj;
bool processFlag;
chartNode *nextPtr;
};
chartNode *rootPtr;
chartNode *currPtr;
genericNode *searchObj;
};
--- NEW FILE: t1.cpp ---
#include <iostream.h>
#include "gnode.h"
int main(int argc, char *argv[]) {
genericNode p("((CAT (VP)) (VFORM (inf)))");
genericNode l("Jack ((CAT NAME) (AGR 3s))");
cout << "P: " << p << endl;
cout << "L: " << l << endl;
}
--- NEW FILE: t2.cpp ---
#include <iostream.h>
#include "gnode.h"
int main(int argc, char *argv[]) {
genericNode a("((CAT (V)) (SUBCAT (_vp:inf)) (AGR (?a)) (VFORM (?v)))");
genericNode b("want ((CAT (V)) (ROOT (WANT1)) (VFORM (base)) (SUBCAT (_np _vp:inf _np_vp:inf)))");
cout << "A: " << a << endl;
cout << "B: " << b << endl;
cout << "unify(b, a): " << unify(b, a) << endl;
}
--- NEW FILE: p1 ---
(This appears to be a binary file; contents omitted.)
--- NEW FILE: lexicon.h ---
#ifndef _LEXICON_H_
#define _LEXICON_H_
#include "gnode.h"
#include "list.h"
class Lexicon {
public:
Lexicon(char *fileName); // GOOD
~Lexicon();
bool lookupWord(char *word);
bool lookupNext(void);
genericNode *currNode(void); // GOOD
private:
struct lexicalNode;
typedef struct lexicalNode {
genericNode *node;
lexicalNode *leftPtr;
lexicalNode *rightPtr;
};
void insert(genericNode *newWord); // GOOD
void insert(genericNode *newWord, lexicalNode *root); // GOOD
bool lookupWord(lexicalNode *root, char *word);
lexicalNode *rootPtr;
lexicalNode *currPtr;
List *featureSeek;
};
#endif
--- NEW FILE: list.h ---
#ifndef _LIST_H_
#define _LIST_H_
#include "gnode.h"
struct listNode;
typedef struct listNode {
char *item;
List *itemList;
genericNode *itemNode;
listNode *nextPtr; // pointer to next option
listNode *prevPtr;
};
class List {
public:
List();
~List();
void InsertBefore(char *item);
void InsertBefore(List *item);
void InsertBefore(genericNode *item);
void InsertAfter(char *item);
void InsertAfter(List *item);
void InsertAfter(genericNode *item);
// returns true if everything is good, false if operation failed
bool GoTop(void); // GOOD
bool GoNext(void); // GOOD
bool GoPrev(void);
bool GoItem(int n);
bool empty(void); // GOOD
int length(void); // GOOD
char *currItem(void);
List *currItemList(void);
genericNode *currItemNode(void);
void Print(void);
friend List *MakeFeatList(char *text);
friend List *makeFeatureList(char *text); // GOOD
friend ostream &operator<<(ostream &out_file, const List &l); // GOOD
friend List *substitute(List *old, List *assign);
private:
listNode *rootPtr;
listNode *currPtr;
};
List *MakeList(char *text);
#endif
--- NEW FILE: grammar.txt ---
((CAT (S)) (INV (-)) (VFORM (?v (pres past))) (AGR (?a))) -> ((CAT (NP)) (AGR (?a))) ((CAT (VP)) (VFORM (?v (pres past))) (AGR (?a)))
((CAT (NP)) (AGR (?a))) -> ((CAT (ART)) (AGR (?a))) ((CAT (N)) (AGR (?a)))
((CAT (NP)) (AGR (?a))) -> ((CAT (PRO)) (AGR (?a)))
((CAT (NP)) (AGR (?a))) -> ((CAT (NAME)) (AGR (?a)))
((CAT (VP)) (AGR (?a)) (VFORM (?v))) -> ((CAT (V)) (SUBCAT (_none)) (AGR (?a)) (VFORM (?v)))
((CAT (VP)) (AGR (?a)) (VFORM (?v))) -> ((CAT (V)) (SUBCAT (_np)) (AGR (?a)) (VFORM (?v))) ((CAT (NP)))
((CAT (VP)) (AGR (?a)) (VFORM (?v))) -> ((CAT (V)) (SUBCAT (_vp:inf)) (AGR (?a)) (VFORM (?v))) ((CAT (VP)) (VFORM (inf)))
((CAT (VP)) (AGR (?a)) (VFORM (?v))) -> ((CAT (V)) (SUBCAT (_np_vp:inf)) (AGR (?a)) (VFORM (?v))) ((CAT (NP))) ((CAT (VP)) (VFORM (inf)))
((CAT (VP)) (AGR (?a)) (VFORM (?v))) -> ((CAT (V)) (SUBCAT (_adjp)) (AGR (?a)) (VFORM (?v))) ((CAT (ADJP)))
((CAT (VP)) (SUBCAT (inf)) (AGR (?a)) (VFORM (inf))) -> ((CAT (TO))) ((CAT (VP)) (VFORM (base)))
((CAT (ADJP))) -> ((CAT (ADJ)))
((CAT (ADJP))) -> ((CAT (ADJ)) (SUBCAT (_vp:inf))) ((CAT (VP)) (VFORM (inf)))
--- NEW FILE: grammar.h ---
#ifndef _GRAMMAR_H_
#define _GRAMMAR_H_
#include "list.h"
#include "gnode.h"
class Grammar {
public:
Grammar(char *fileName); // GOOD
~Grammar();
List *currLine(void);
List *findMatch(genericNode *obj);
List *findNext(void);
void goTop(void);
bool goNext(void);
private:
struct grammarLine;
typedef struct grammarLine {
List *line;
grammarLine *nextPtr;
};
void insert(char *text); // GOOD
void insert(List *ruleLine); // GOOD
grammarLine *rootPtr;
grammarLine *currPtr;
genericNode *searchObj;
};
#endif
--- NEW FILE: README ---
Copyright (c) 1999, Ed Schlunder <zilym at asu.edu>
This is free software distributable under the terms of the GNU GPL.
See COPYING for details.
This is some old natural language processing code I wrote for a class I
took at ASU (CSE476 appearantly). Its not great (read: buggy and poorly
documented) but I'm keeping it around for reference when I get around to
writing a new NLP engine for Europa in Java.
--- NEW FILE: parse.cpp ---
#include "parse.h"
#include "list.h"
#include "grammar.h"
#include "lexicon.h"
extern Grammar grammar;
extern Lexicon lexicon;
Chart chart;
ActiveArcList arcs;
void parse(char *text) {
List *s = MakeList(text);
int i;
cout << *s << endl;
// go through each word and process
s->GoTop();
i = 1;
do {
if(lexicon.lookupWord(s->currItem()) == false) {
cout << "Can't find [" << s->currItem() << "] in the lexicon..." << endl;
return;
}
do {
chart.add(i, i + 1, lexicon.currNode());
} while(lexicon.lookupNext());
processAgenda();
i++;
} while(s->GoNext());
}
void processAgenda(void) {
int begLoc, endLoc;
genericNode *obj;
while(chart.process()) {
List *newMatch;
chart.getKey(begLoc, endLoc, obj);
cout << "find: " << *obj << endl;
newMatch = grammar.findMatch(obj);
if(newMatch) {
do {
cout << "Grammar search success:\n" << *newMatch << endl;
if(newMatch->length() <= 2) {
newMatch->GoTop();
chart.add(begLoc, endLoc, newMatch->currItemNode());
}
else {
activeArc *newArc;
newArc = new activeArc;
newArc->begLoc = begLoc;
newArc->endLoc = endLoc;
newArc->numFound = 1;
newArc->ruleLine = newMatch;
arcs.add(newArc);
}
} while((newMatch = grammar.findNext()));
}
else
cout << "Grammar search fail" << endl;
arcs.print();
// if there is nothing on the active arc list, we can skip checking arcs
if(!arcs.goTop())
continue;
do {
activeArc *checkArc;
checkArc = arcs.currArc();
if(checkArc->endLoc == begLoc) {
// find the current waiting component of this arc
checkArc->ruleLine->GoTop();
for(int i = 0; i <= checkArc->numFound; i++)
checkArc->ruleLine->GoNext();
cout << "Arc search: " << *checkArc->ruleLine->currItemNode() << ", "
<< *obj;
List *assign = unify(*checkArc->ruleLine->currItemNode(), *obj);
// did this arc unify with the current obj?
if(!assign) {
cout << " = FAIL" << endl;
}
else {
cout << " = " << *assign << endl;
// do the variable substitutions
assign = substitute(checkArc->ruleLine, assign);
if(assign->length() > (checkArc->numFound + 2)) {
// this arc has been extended, but not completed
cout << "Extended arc:\n" << *assign << "\n" << endl;
activeArc *newArc = new activeArc;
newArc->begLoc = checkArc->begLoc;
newArc->endLoc = endLoc;
newArc->numFound = checkArc->numFound + 1;
newArc->ruleLine = assign;
arcs.add(newArc);
}
else {
// this arc has been completed, we can add it to the chart
cout << "Completed arc:\n" << *assign << "\n" << endl;
assign->GoTop();
chart.add(checkArc->begLoc, endLoc, assign->currItemNode());
}
}
}
} while(arcs.goNext());
}
chart.print();
}
Chart::Chart() {
// mark this chart as being empty
rootPtr = currPtr = NULL;
}
void Chart::add(int begLoc, int endLoc, genericNode *obj) {
cout << '[' << begLoc << ',' << endLoc << ',' << *obj << ']' << endl;
if(rootPtr == NULL) {
rootPtr = new chartNode;
rootPtr->begLoc = begLoc;
rootPtr->endLoc = endLoc;
rootPtr->obj = obj;
rootPtr->processFlag = true;
rootPtr->nextPtr = NULL;
}
else {
chartNode *tmp = rootPtr;
while(tmp->nextPtr != NULL)
tmp = tmp->nextPtr;
tmp->nextPtr = new chartNode;
tmp = tmp->nextPtr;
tmp->begLoc = begLoc;
tmp->endLoc = endLoc;
tmp->obj = obj;
tmp->processFlag = true;
tmp->nextPtr = NULL;
}
}
bool Chart::process(void) {
chartNode *tmp = rootPtr;
while(tmp != NULL) {
if(tmp->processFlag)
return true;
tmp = tmp->nextPtr;
}
return false;
}
void Chart::getKey(int &begLoc, int &endLoc, genericNode *&obj) {
chartNode *tmp = rootPtr;
while(tmp != NULL) {
if(tmp->processFlag) {
// this key hasn't been processed yet, return it and mark as processed
begLoc = tmp->begLoc;
endLoc = tmp->endLoc;
obj = tmp->obj;
tmp->processFlag = false;
return;
}
tmp = tmp->nextPtr;
}
return;
}
void Chart::print(void) {
cout << "Chart ";
for(int i = 0; i < 160 - 6; i++)
cout << '-';
cout << endl;
currPtr = rootPtr;
while(currPtr) {
cout << '[' << currPtr->begLoc << ',' << currPtr->endLoc << ','
<< currPtr->processFlag << ',' << *currPtr->obj << ']' << endl;
currPtr = currPtr->nextPtr;
}
cout << '\n' << endl;
}
bool Chart::findMatch(genericNode *obj, List *&assign, int begLoc, int &endLoc) {
searchObj = obj;
currPtr = rootPtr;
// empty chart has nothing to match -- abort
if(currPtr == NULL)
return false;
// go through each rule in the grammar and try to match it with the given obj
while(currPtr) {
if(currPtr->begLoc == begLoc) {
assign = unify(*currPtr->obj, *searchObj);
if(assign) {
endLoc = currPtr->endLoc;
return true;
}
}
currPtr = currPtr->nextPtr;
}
return false;
}
ActiveArcList::ActiveArcList() {
rootPtr = currPtr = NULL;
}
void ActiveArcList::add(activeArc *newArc) {
List *assign;
int endLoc;
if(rootPtr) {
arcNode *tmp = rootPtr;
// skip down to the last node
while(tmp->nextPtr)
tmp = tmp->nextPtr;
tmp->nextPtr = new arcNode;
tmp = tmp->nextPtr;
tmp->arc = newArc;
tmp->nextPtr = NULL;
}
else {
rootPtr = new arcNode;
rootPtr->arc = newArc;
rootPtr->nextPtr = NULL;
}
// whenever a new arc is added, we need to try to unify it with
// any of the completed objects on the chart to see if it can be extended
newArc->ruleLine->GoTop();
for(int i = 0; i <= newArc->numFound; i++)
newArc->ruleLine->GoNext();
if(!chart.findMatch(newArc->ruleLine->currItemNode(), assign, newArc->endLoc, endLoc))
return;
assign = substitute(newArc->ruleLine, assign);
if(assign->length() > (newArc->numFound + 2)) {
cout << "Extended arc: " << newArc->numFound + 1 << "\n" << *assign << "\n" << endl;
activeArc *newArc2 = new activeArc;
newArc2->begLoc = newArc->begLoc;
newArc2->endLoc = endLoc;
newArc2->numFound = newArc->numFound + 1;
arcs.add(newArc2);
}
else {
// this arc has been completed, we can add it to the chart
cout << "Completed arc:\n" << *assign << "\n" << endl;
chart.print();
assign->GoTop();
chart.add(newArc->begLoc, endLoc, assign->currItemNode());
}
}
bool ActiveArcList::goTop(void) {
if(rootPtr) {
currPtr = rootPtr;
return true;
}
else {
currPtr = NULL;
return false;
}
}
bool ActiveArcList::goNext(void) {
if(currPtr && currPtr->nextPtr) {
currPtr = currPtr->nextPtr;
return true;
}
else {
currPtr = NULL;
return false;
}
}
activeArc *ActiveArcList::currArc(void) {
if(currPtr)
return currPtr->arc;
else
return NULL;
}
void ActiveArcList::print(void) {
cout << "Active Arcs ";
for(int i = 0; i < 160-12; i++)
cout << '-';
cout << endl;
currPtr = rootPtr;
while(currPtr) {
cout << '[' << currPtr->arc->begLoc << ',' << currPtr->arc->endLoc << ',';
currPtr->arc->ruleLine->GoTop();
cout << *currPtr->arc->ruleLine->currItemNode();
for(int i = 0; i < currPtr->arc->numFound; i++) {
currPtr->arc->ruleLine->GoNext();
cout << ' ' << *currPtr->arc->ruleLine->currItemNode();
}
cout << " o";
while(currPtr->arc->ruleLine->GoNext())
cout << ' ' << *currPtr->arc->ruleLine->currItemNode();
cout << ']' << endl;
currPtr = currPtr->nextPtr;
}
cout << '\n' << endl;
}
--- NEW FILE: parse.txt ---
Jack was happy to see a dog.
1 2 3 4 5 6 7 8
Key List
-------------------------------------------------------------------------------------------------
- 1 2 Jack NAME ((AGR 3s))
- 1 2 NP ((AGR 3s))
- 2 3 was V ((ROOT BE1) (VFORM past) (AGR (1s 3s)) (SUBCAT (_adjp _np)))
- 3 4 happy ADJ ((SUBCAT _vp:inf))
- 3 4 ADJP
- 4 5 to TO
- 5 6 see V ((ROOT SEE1) (VFORM base) (SUBCAT _np) (IRREG-PAST +) (EN-PASTRT +))
- 6 7 a ART ((ROOT A1)(AGR 3s))
- 6 7 NP ((AGR 3s))
- 5 7 VP ((AGR ?a) (VFORM base))
- 4 7 VP ((SUBCAT inf) (AGR ?a) (VFORM inf))
- 3 7 ADJP
- 2 7 VP ((AGR (1s 3s)) (VFORM past))
* 1 7 S ((INV -) (VFORM past) (AGR 3s))
Chart
-------------------------------------------------------------------------------------------------
1 2 NAME ((AGR 3s))
1 2 NP ((AGR 3s))
2 3 V ((ROOT BE1) (VFORM past) (AGR (1s 3s)) (SUBCAT (_adjp _np)))
3 4 ADJ ((SUBCAT _vp:inf))
3 4 ADJP
4 5 TO
5 6 V ((ROOT SEE1) (VFORM base) (SUBCAT _np) (IRREG-PAST +) (EN-PASTRT +))
6 7 ART ((ROOT A1)(AGR 3s))
// 6 7 NP ((AGR 3s))
5 7 VP ((AGR ?a) (VFORM base))
4 7 VP ((SUBCAT inf) (AGR ?a) (VFORM inf))
3 7 ADJP
2 7 VP ((AGR (1s 3s)) (VFORM past))
1 7 S ((INV -) (VFORM past) (AGR 3s))
Active Arc List
-------------------------------------------------------------------------------------------------
1 2 S ((INV -) (VFORM ?v (pres past)) (AGR 3s))
-> NP ((AGR 3s)) o VP ((VFORM ?v (pres past)) (AGR 3s))
2 3 VP ((AGR (1s 3s)) (VFORM past))
-> V ((SUBCAT _np) (AGR (1s 3s)) (VFORM past)) o NP
2 3 VP ((AGR (1s 3s)) (VFORM past))
-> V ((SUBCAT _adjp) (AGR (1s 3s)) (VFORM past)) o ADJP
3 4 ADJP -> ADJ ((SUBCAT _vp:inf)) o VP ((VFORM inf))
4 5 VP ((SUBCAT inf) (AGR ?a) (VFORM inf))
-> TO ((AGR ?a) (VFORM inf)) o VP ((VFORM base))
5 6 VP ((AGR ?a) (VFORM base))
-> V ((SUBCAT _np) (AGR ?a) (VFORM base)) o NP
6 7 NP ((AGR 3s)) o N ((AGR 3s))
6 7 S ((INV -) (VFORM ?v (pres past)) (AGR 3s)
-> NP ((AGR 3s)) o VP ((VFORM ?v (pres past)) (AGR 3s))
--- NEW FILE: t3.cpp ---
#include "lexicon.h"
int main(int argc, char *argv[]) {
Lexicon lex("lexicon.txt");
bool res;
res = lex.lookupWord("want");
cout << "lookupWord(\"want\"): " << res << endl;
if(res)
cout << " gnode: " << *lex.currNode() << endl;
}
More information about the dslinux-commit
mailing list