dslinux/user/bitchx/dll/europa/cse476 Makefile README gnode.cpp gnode.h grammar.cpp grammar.h grammar.txt lexicon.cpp lexicon.h lexicon.txt list.cpp list.h p1 p1-junk.cpp p1.cpp parse.cpp parse.h parse.txt parse2.txt t1.cpp t2.cpp t3.cpp t4.cpp

stsp stsp at user.in-berlin.de
Sun Jul 2 15:18:35 CEST 2006


Update of /cvsroot/dslinux/dslinux/user/bitchx/dll/europa/cse476
In directory antilope:/tmp/cvs-serv9280/dll/europa/cse476

Added Files:
	Makefile README gnode.cpp gnode.h grammar.cpp grammar.h 
	grammar.txt lexicon.cpp lexicon.h lexicon.txt list.cpp list.h 
	p1 p1-junk.cpp p1.cpp parse.cpp parse.h parse.txt parse2.txt 
	t1.cpp t2.cpp t3.cpp t4.cpp 
Log Message:
Adding pristine copy of BitchX so I can branch from it.


--- NEW FILE: list.cpp ---
// Copyright (c) 1998-99, Ed Schlunder

#include <string.h>
#include "list.h"

// ---------------------------------------------------------------------------
// Class Implementation: List
// ---------------------------------------------------------------------------
List::List() {
  rootPtr = NULL;
  currPtr = NULL;
}

List::~List() {
  listNode *tmpPtr = rootPtr, *nextPtr;
  
  // go through the list and delete each node
  while(tmpPtr) {
    nextPtr = tmpPtr->nextPtr;
    if(tmpPtr->item)
      delete tmpPtr->item;
    if(tmpPtr->itemList)
      delete tmpPtr->itemList;
    if(tmpPtr->itemNode)
      delete tmpPtr->itemNode;
    
    delete tmpPtr;
    tmpPtr = nextPtr;
  }
};

void List::InsertBefore(char *item) {
  // this version does char strings

  if(rootPtr == NULL) {
    // creating the first node in a previously empty list.
    rootPtr = new listNode;
    
    rootPtr->item = new char[strlen(item) + 1];
    strcpy(rootPtr->item, item);
    rootPtr->itemList = NULL;
    rootPtr->itemNode = NULL;
    
    rootPtr->nextPtr = NULL;
    rootPtr->prevPtr = NULL;
    
    currPtr = rootPtr;
  }
  else {
    // create a new node and insert it before currPtr's node
    listNode *newPtr = new listNode;
    
    newPtr->nextPtr = currPtr;
    newPtr->prevPtr = currPtr->prevPtr;
    newPtr->item = new char[strlen(item) + 1];
    strcpy(newPtr->item, item);
    newPtr->itemList = NULL;
    newPtr->itemNode = NULL;
    
    currPtr->prevPtr = newPtr;
    if(newPtr->prevPtr)
      newPtr->prevPtr->nextPtr = newPtr;
    
    currPtr = newPtr;
  }
}

void List::InsertBefore(List *item) {
  // this version does Lists

  if(rootPtr == NULL) {
    rootPtr = new listNode;
    
    rootPtr->item = NULL;
    rootPtr->itemList = item;
    rootPtr->itemNode = NULL;
    
    rootPtr->nextPtr = NULL;
    rootPtr->prevPtr = NULL;
    
    currPtr = rootPtr;
  }
  else {
    listNode *newPtr = new listNode;
    
    newPtr->item = NULL;
    newPtr->itemList = item;
    newPtr->itemNode = NULL;
    newPtr->nextPtr = currPtr;
    newPtr->prevPtr = currPtr->prevPtr;
    
    currPtr->prevPtr = newPtr;
    if(newPtr->prevPtr)
      newPtr->prevPtr->nextPtr = newPtr;
    
    currPtr = newPtr;
  }
}

void List::InsertBefore(genericNode *item) {
  if(rootPtr == NULL) {
    rootPtr = new listNode;
    
    rootPtr->item = NULL;
    rootPtr->itemList = NULL;
    rootPtr->itemNode = item;
    
    rootPtr->nextPtr = NULL;
    rootPtr->prevPtr = NULL;
    
    currPtr = rootPtr;
  }
  else {
    listNode *newPtr = new listNode;
    
    newPtr->item = NULL;
    newPtr->itemList = NULL;
    newPtr->itemNode = item;
    newPtr->nextPtr = currPtr;
    newPtr->prevPtr = currPtr->prevPtr;
    
    currPtr->prevPtr = newPtr;
    if(newPtr->prevPtr)
      newPtr->prevPtr->nextPtr = newPtr;
    
    currPtr = newPtr;
  }
}

void List::InsertAfter(char *item) {
  if(rootPtr == NULL) {
    rootPtr = new listNode;
    rootPtr->prevPtr = NULL;
    rootPtr->nextPtr = NULL;
    rootPtr->itemList = NULL;
    rootPtr->itemNode = NULL;
    rootPtr->item = new char[strlen(item) + 1];
    strcpy(rootPtr->item, item);
    
    currPtr = rootPtr;
  }
  else {
    listNode *newPtr = new listNode;
    
    newPtr->nextPtr = currPtr->nextPtr;
    newPtr->prevPtr = currPtr;
    newPtr->itemList = NULL;
    newPtr->itemNode = NULL;
    newPtr->item = new char[strlen(item) + 1];
    strcpy(newPtr->item, item);
    
    if(newPtr->nextPtr)
      newPtr->nextPtr->prevPtr = newPtr;
    newPtr->prevPtr->nextPtr = newPtr;
    
    currPtr = newPtr;
  }
}

void List::InsertAfter(List *item) {
  if(rootPtr == NULL) {
    rootPtr = new listNode;
    rootPtr->prevPtr = NULL;
    rootPtr->nextPtr = NULL;
    rootPtr->item = NULL;
    rootPtr->itemList = item;
    rootPtr->itemNode = NULL;
    
    currPtr = rootPtr;
  }
  else {
    listNode *newPtr = new listNode;
    
    newPtr->nextPtr = currPtr->nextPtr;
    newPtr->prevPtr = currPtr;
    newPtr->item = NULL;
    newPtr->itemList = item;
    newPtr->itemNode = NULL;
    
    if(newPtr->nextPtr)
      newPtr->nextPtr->prevPtr = newPtr;
    newPtr->prevPtr->nextPtr = newPtr;
    
    currPtr = newPtr;
  }
}

void List::InsertAfter(genericNode *item) {
  if(rootPtr == NULL) {
    rootPtr = new listNode;
    rootPtr->prevPtr = NULL;
    rootPtr->nextPtr = NULL;
    rootPtr->item = NULL;
    rootPtr->itemList = NULL;
    rootPtr->itemNode = item;
    
    currPtr = rootPtr;
  }
  else {
    listNode *newPtr = new listNode;
    
    newPtr->nextPtr = currPtr->nextPtr;
    newPtr->prevPtr = currPtr;
    newPtr->item = NULL;
    newPtr->itemList = NULL;
    newPtr->itemNode = item;
    
    if(newPtr->nextPtr)
      newPtr->nextPtr->prevPtr = newPtr;
    newPtr->prevPtr->nextPtr = newPtr;
    
    currPtr = newPtr;
  }
}

bool List::GoTop(void) {
    currPtr = rootPtr;

    if(currPtr == NULL)
      return false;
    else
      return true;
}

bool List::GoNext(void) {
  if(!currPtr)
    return false;
  
  currPtr = currPtr->nextPtr;
  if(currPtr == NULL)
    return false;
  else
    return true;
}

bool List::GoPrev(void) {
  if(!currPtr)
    return false;
  
  currPtr = currPtr->prevPtr;
  return true;
}

bool List::GoItem(int n) {
  if(!rootPtr) // if empty list, abort
    return false;
  
  currPtr = rootPtr;
  
  while(n--) { // visit each node from the root up to n nodes deep
    if(!currPtr->nextPtr)
      break;
    
    currPtr = currPtr->nextPtr;
  }
  
  if(currPtr == NULL)
    return false;
  else
    return true;
}

char *List::currItem(void) {
  if(currPtr)
    return currPtr->item;
  else
    return NULL;
}

List *List::currItemList(void) {
  if(currPtr)
    return currPtr->itemList;
  else
    return NULL;
}

genericNode *List::currItemNode(void) {
  if(currPtr)
    return currPtr->itemNode;
  else
    return NULL;
}

void List::Print(void) {
  listNode *tmpPtr = rootPtr;
  
  cout << "(";
    while(tmpPtr) {
      if(tmpPtr->item)
	cout << tmpPtr->item << " ";
      
      if(tmpPtr->itemList)
	tmpPtr->itemList->Print();
      
      if(tmpPtr->itemNode)
	cout << tmpPtr->itemNode;
      
      tmpPtr = tmpPtr->nextPtr;
    }
    cout << "\b) ";
}


List *MakeList(char *text) {
  int length, i, j;
  char *tmp = text;
  List *l = new List;

  length = strlen(text);
  for(i = 0; i < length; i++) {
    if(text[i] == '{' || text[i] == '(') {
      // sub-list found
      for(j = i + 1; j < length; j++)
	if(text[j] == '}' || text[j] == ')')
	  break;

      tmp = new char[j - i];
      strncpy(tmp, text + i + 1, j - i - 1);
      tmp[j - i - 1] = 0;

      l->InsertAfter(MakeList(tmp));
      delete tmp;
      i = j + 1;
    }
    else if(text[i] != ' ' && text[i] != '\t' && text[i] != '.') {
      // item found
      for(j = i + 1; j < length; j++)
	if(text[j] == ' ' || text[j] == '\t' || text[j] == '.')
	  break;
      tmp = new char[j - i + 1];
      strncpy(tmp, text + i, j - i);
      tmp[j-i] = 0;

      l->InsertAfter(tmp);
      if(text[j] == '.')
	l->InsertAfter(".");
      delete tmp;
      i = j;
    }
  }

  return l;
}

List *MakeFeatList(char *text) {
  int length, i, j;
  char *tmp;
  List *l = new List;

  length = strlen(text);
  for(i = 0; i < length; i++) {
    for(j = i; j < length; j++)
      if(text[j] == ' ')
	break;

    for(j++; j < length; j++)
      if(text[j] == ' ' || text[j] == '}')
	break;

    if(text[j+1] == '{')
      for(j++; j < length; j++)
	if(text[j] == '}') {
	  j++;
	  break;
	}

    tmp = new char[j - i + 1];
    strncpy(tmp, text + i, j - i);
    tmp[j - i] = 0;
    //    cout << "Mklist: [" << tmp << "]" << endl;
    l->InsertAfter(MakeList(tmp));
    delete tmp;

    i = j;
  }

  return l;
}
// ---------------------------------------------------------------------------

// Creates a list of features from a string. Example string:
// ((CAT NP) (AGR ?a))
List *makeFeatureList(char *text) {
  int length, i, j, level, beg, end;
  char *tmp, *thisList;
  List *l = new List;

  length = strlen(text);

  // locate opening parenthesis
  for(i = 0; i < length; i++)
    if(text[i] == '(') break;

  // no list given if no opening parenthesis found -- abort
  if(i == length) return NULL;

  // get -past- the (
  i++;

  // find end of this list
  for(level = 1, j = i+1; j < length; j++) {
    if(text[j] == ')') level--;
    if(text[j] == '(') level++;
  }

  j--;

  // make a new string containing a copy of the list text we want to work
  // with only...
  thisList = new char[j - i + 1];
  strncpy(thisList, text + i, j - i);
  thisList[j - i] = 0;
  trim(thisList);

  //  cout << "thisList: [" << thisList << "]" << endl;

  // step through the list text and take out each item (*tmp)
  length = strlen(thisList);
  end = -1;
  while(end < length) {
    beg = end + 1;

    // find the end of this item
    for(level = 0, end = beg; end < length; end++) {
      if((thisList[end] == ' ') && (level == 0))
	break;

      // make sure that items encased within ( ) are kept as one piece
      if(thisList[end] == '(') level++;
      if(thisList[end] == ')') level--;
    }

    // make a copy of the text just for this item
    tmp = new char[end - beg + 1];
    strncpy(tmp, thisList + beg, end - beg);
    tmp[end - beg] = 0;

    // is this item a sub list?
    if(tmp[0] == '(')
      l->InsertAfter(makeFeatureList(tmp));
    else
      l->InsertAfter(tmp);

    delete tmp;
  }
  
  // free up memory for temporary list string
  delete thisList;
  return l;
}

ostream &operator<<(ostream &out_file, const List &l) {
  if(&l == NULL) {
    out_file << "nil";
    return out_file;
  }

  listNode *tmpPtr = l.rootPtr;
  
  // is this an empty list?
  if(tmpPtr == NULL)
    out_file << "()";
  else {
    out_file << "(";

    // print out first item without a separating space for it
    if(tmpPtr->item) out_file << tmpPtr->item;
    if(tmpPtr->itemList) out_file << *tmpPtr->itemList;
    if(tmpPtr->itemNode) out_file << *tmpPtr->itemNode;

    // step through the linked list of List items and print each one...
    while((tmpPtr = tmpPtr->nextPtr)) {
      if(tmpPtr->item) out_file << ' ' << tmpPtr->item;
      if(tmpPtr->itemList) out_file << ' ' << *tmpPtr->itemList;
      if(tmpPtr->itemNode) out_file << ' ' << *tmpPtr->itemNode;
    }
    out_file << ')';
  }

  return out_file;
}

bool List::empty(void) {
  if(rootPtr == NULL)
    return true;
  else
    return false;
}

int List::length(void) {
  listNode *tmp;
  int num;

  // count each node in the list and return the count
  tmp = rootPtr;
  num = 0;
  while(tmp) {
    num++;
    tmp = tmp->nextPtr;
  }

  return num;
}

List *findSubst(char *var, List *assign) {
  if(assign->GoTop() == false)
    return NULL;

  do {
    List *varSubst = assign->currItemList();
    varSubst->GoTop();

    // is this the variable that we are trying to find?
    if(strcmp(varSubst->currItem(), var) == 0) {
      varSubst->GoNext();
      return varSubst->currItemList();
    }
  } while(assign->GoNext());

  // couldn't find var in the variable substitution list
  return NULL;
}

List *substitute(List *old, List *assign) {
  List *subst = new List;
  listNode *tmp;

  tmp = old->rootPtr;
  while(tmp) {
    if(tmp->item) {
      //      cout << "recreating item: " << tmp->item << endl;
      if(tmp->item[0] == '?') {
	// variable may need to be substituted
	List *newItem = findSubst(tmp->item, assign);
	if(newItem == NULL)
	  subst->InsertAfter(tmp->item);
	else {
	  newItem->GoTop();
	  do {
	    if(newItem->currItem())
	      subst->InsertAfter(newItem->currItem());
	    if(newItem->currItemList())
	      subst->InsertAfter(newItem->currItemList());
	    if(newItem->currItemNode())
	      subst->InsertAfter(newItem->currItemNode());
	  } while(newItem->GoNext());
	}
      }
      else
	// constants just stay
	subst->InsertAfter(tmp->item);
    }

    // just call ourselves recursively for sublists
    if(tmp->itemList) {
      //      cout << "recreating list: " << *tmp->itemList << endl;
      subst->InsertAfter(substitute(tmp->itemList, assign));
    }

    if(tmp->itemNode) {
      //      cout << "recreating genericNode: " << *tmp->itemNode << endl;
      subst->InsertAfter(new genericNode(NULL, substitute(tmp->itemNode->features(), assign)));
    }

    tmp = tmp->nextPtr;
  }

  return subst;
}

--- NEW FILE: p1.cpp ---
#include <iostream.h>
#include <fstream.h>
#include <string.h>

#include "parse.h"
#include "grammar.h"
#include "lexicon.h"
#include "list.h"
#include "gnode.h"

Grammar grammar("grammar.txt");
Lexicon lexicon("lexicon.txt");

int main(int argc, char *argv[]) {
  char *sentence = "Jack was happy to see a dog.";

//  cout << "[nlp]$ " << sentence << endl;
//  parse(sentence);

  sentence = new char[1000];
  cout << "[nlp]$ " << flush;
  cin.getline(sentence, 999);
//  while(cin) {
    if(strlen(sentence))
      parse(sentence);

//    cout << "[nlp]$ " << flush;
//    cin.getline(sentence, 999);
//  }

  cout << "Bye bye..." << endl;
}

--- NEW FILE: gnode.cpp ---
// Copyright (c) 1998-99, Ed Schlunder

#include <string.h>
#include "gnode.h"
#include "list.h"

// takes a string and hacks off leading/trailing spaces and *'s.
void trim(char *text) {
  int i, j, len;

  len = strlen(text);

  // empty strings don't need trimming...
  if(len == 0) return;

  // find beginning of actual text
  for(i = 0; i < len; i++)
    if(text[i] != ' ') break;

  // was there any preceeding white space to clear out?
  if(i > 0)
    for(j = 0; text[i] != 0; j++, i++)
      text[j] = text[i];

  // step from the end backwards to see find where we need to chop
  for(j = strlen(text); j > 1; j--)
    if(text[j-1] != ' ') {
      text[j] = 0;
      break;
    }
}


// ---------------------------------------------------------------------------
// Class Implementation: genericNode
// ---------------------------------------------------------------------------

genericNode::genericNode(char *word, List *features) {
  if(word) {
    name = new char[strlen(word)];
    strcpy(name, word);
  }
  else
    word = NULL;

  featureList = features;
}

// Example Phrasal: ((CAT N) (AGR ?a))
// Example Lexical: saw ((CAT N) (ROOT SAW1) (AGR 3s))
genericNode::genericNode(char *obj) {
  unsigned int i, j;

  // first try to read in the name field (in lexical objects)
  for(i = 0; i < strlen(obj); i++)
    if(obj[i] != ' ') break;
  
  // maybe this is a phrasal object -- doesn't have a "name" field
  if(obj[i] == '(') {
    name = NULL;
    j = i;
  }
  else {
    // this is a lexical object, pick up the name:
    for(j = i; j < strlen(obj); j++)
      if(obj[j] == ' ') break;
    
    name = new char[j - i + 1];
    strncpy(name, obj, j - i);
    name[j - i] = 0;
    j++;
  }

  // pick up the object's feature list
  char *tmp = obj + j;
  //  cout << "[" << tmp << "]" << endl;
  featureList = makeFeatureList(tmp);
}

ostream& operator<<(ostream &out_file, genericNode const &n) {
  if(n.name == NULL)
    out_file << "(nil";
  else
    out_file << '(' << n.name;

  if(n.featureList)
    out_file << ' ' << *n.featureList;

  out_file << ')';

  return out_file;
}

List *unify(genericNode &s, genericNode &g) {
  List &sFeatures = *s.featureList, &gFeatures = *g.featureList, *currFeature;
  List *assignList = new List;
  char *featureName;
  List *featureValueA, *featureValueB;

  // if a node has no feature list, then we automatically know it can unify
  if(!sFeatures.GoTop()) return new List;
  if(!gFeatures.GoTop()) return new List;
  
  do {
    // get the current feature
    currFeature = sFeatures.currItemList();
    currFeature->GoTop();

    // pull out the feature name and value
    featureName = currFeature->currItem();
    currFeature->GoNext();
    featureValueA = currFeature->currItemList();
    featureValueB = g.lookupFeature(featureName);

    /*
    cout << "unify: [" << featureName << "] = " << *featureValueA << "";
    cout << " with " << *featureValueB << " --> ";
    */

    /*
    if(featureValueB == NULL)
      // if feature lookup failed, no corresponding feature exists -- 
      // automatically agrees...
      //      cout << "yes" << endl;
    else {
    */
    // if featureB is nothing, no corresponding featuture exists -- agrees
    if(featureValueB != NULL) {
      List *assignment = cmpFeatures(*featureValueA, *featureValueB);
      if(assignment == NULL) {
	//	cout << "no" << endl;
	delete assignList;
	return NULL;
      }
      //      cout << "yes: " << *assignment << endl;
      if(!assignment->empty())
	assignList->InsertAfter(assignment);
    }
  } while(sFeatures.GoNext());
  //  cout << "assignment list: " << *assignList << endl;
  return assignList;
}

List *cmpFeatures(List &a, List &b) {
  char *valueA, *valueB;
  a.GoTop();

  do {
    valueA = a.currItem();

    b.GoTop();
    do {
      valueB = b.currItem();

      // do we have a match?
      if(strcmp(valueA, valueB) == 0)
	return new List;

      // if one of the values is a variable, then it can match anything
      if(valueA[0] == '?') {
	List *assignList = new List;
	assignList->InsertAfter(valueA);
	assignList->InsertAfter(&b);

	return assignList;
      }

      if(valueB[0] == '?') {
	List *assignList = new List;
	assignList->InsertAfter(valueB);
	assignList->InsertAfter(&a);

	return assignList;
      }
    } while(b.GoNext());
  } while(a.GoNext());

  return NULL;
}

List *genericNode::lookupFeature(const char *name) const {
  char *tmp;
  List *featureSeek;
  
  // if this node has no features, abort search
  if(featureList == NULL) return NULL;

  featureList->GoTop();
  do {
    // pick up current feature
    featureSeek = featureList->currItemList();
    featureSeek->GoTop();

    // pick up current feature's name
    tmp = featureSeek->currItem();

    // is this the feature we were trying to find?
    if(strcmp(tmp, name) == 0) {
      // found our feature, pick up the value
      featureSeek->GoNext();
      return featureSeek->currItemList();
    }
  } while(featureList->GoNext());

  // exhausted the feature list, this feature doesn't exist in this genericNode
  return NULL;
}

char *genericNode::word(void) {
  return name;
}

genericNode::~genericNode() {
  // -----------------------
  // THIS NEEDS IMPLEMENTING
  // -----------------------
  delete name;
  delete featureList;
}

List *genericNode::features(void) {
  return featureList;
}
// ---------------------------------------------------------------------------
genericNode *substitute(genericNode *old, List *assign) {
  genericNode *subst;

  subst = new genericNode(old->name, substitute(old->featureList, assign));
  return subst;
}

--- NEW FILE: lexicon.cpp ---
// Copyright (c) 1998-99, Ed Schlunder

#include <iostream.h>
#include <fstream.h>
#include "lexicon.h"

// ---------------------------------------------------------------------------
// Class Implementation: Lexicon
// ---------------------------------------------------------------------------
Lexicon::Lexicon(char *fileName) {
  char buffer[1000];
  ifstream inFile;
  
  // specify that this is an empty lexicon currently
  rootPtr = currPtr = NULL;
  
  // try to open the lexicon file
  inFile.open(fileName);
  if(!inFile) {
    cout << "Lexicon file can not be opened." << endl;
    return;
  }

  // read in each lexicon entry and insert it into our BST
  while(inFile) {
    inFile.getline(buffer, 999);

    // ignore blank lines
    if(strlen(buffer) < 2) continue;

    insert(new genericNode(buffer));
  }
}

void Lexicon::insert(genericNode *newWord) {
#ifdef DEBUG
  cout << "insertLexicon: [" << *newWord << "]" << endl;
#endif

  // if this is an empty lexicon, insert at the top of the BST
  if(rootPtr == NULL) {
    rootPtr = new lexicalNode;
    rootPtr->node = newWord;
    rootPtr->leftPtr = NULL;
    rootPtr->rightPtr = NULL;
  }
  else
    insert(newWord, rootPtr);
}

void Lexicon::insert(genericNode *newWord, lexicalNode *root) {
  lexicalNode *tmp;
  
  // does newWord belong to the right of this node?
  if(strcmp(root->node->word(), newWord->word()) > 0)
    // ASSERT: newWord belongs to the right of this word
    if(root->rightPtr) {
      // nodes already exist to the right, follow link down and try again
      insert(newWord, root->rightPtr);
      return;
    }
    else
      // make a new node to the right
      tmp = root->rightPtr = new lexicalNode;
  else
    if(root->leftPtr) {
      insert(newWord, root->leftPtr);
      return;
    }
    else
      tmp = root->leftPtr = new lexicalNode;
  
  tmp->node = newWord;
  tmp->leftPtr = NULL;
  tmp->rightPtr = NULL;
}

Lexicon::~Lexicon() {
  // -----------------------
  // THIS NEEDS IMPLEMENTING
  // -----------------------
}

genericNode *Lexicon::currNode(void) {
  if(currPtr)
    return currPtr->node;
  
  return NULL;
}

// ---------------------------------------------------------------------------

bool Lexicon::lookupNext(void) {
  if(currPtr->leftPtr == NULL)
    return false;
  else
    return lookupWord(currPtr->leftPtr, currPtr->node->word());
}

bool Lexicon::lookupWord(char *word) {
  return lookupWord(rootPtr, word);
}

bool Lexicon::lookupWord(lexicalNode *root, char *word) {
  int cmp;
  
  // if we've hit a dead end, we know this word doesn't exist in the tree
  if(root == NULL) return false;

  // is this word the one we were looking for?
  cmp = strcmp(root->node->word(), word);
  if(cmp == 0) {
    currPtr = root;
    return true;
  }
  
  if(cmp > 0)
    return lookupWord(root->rightPtr, word);
  else
    return lookupWord(root->leftPtr, word);    
}

--- NEW FILE: parse2.txt ---
				 Jack was happy to see a dog.
				1    2   3     4  5   6 7   8
Agenda
-------------------------------------------------------------------------------------------------
- 1 2 Jack  NAME ((AGR 3s))
- 1 2       NP   ((AGR 3s)) 
- 2 3 was   V    ((ROOT BE1) (VFORM past) (AGR (1s 3s)) (SUBCAT (_adjp _np))) 
- 3 4 happy ADJ  ((SUBCAT _vp:inf))
- 3 4       ADJP
- 4 5 to    TO
- 5 6 see   V    ((ROOT SEE1) (VFORM base) (SUBCAT _np) (IRREG-PAST +) (EN-PASTRT +))
- 6 7 a     ART  ((ROOT A1)(AGR 3s))
- 7 8 dog   N    ((CAT N) (ROOT DOG1) (AGR 3s))
- 6 8       NP   ((AGR 3s))
- 5 8       VP   ((AGR 3s) (VFORM base))
- 4 8       VP   ((SUBCAT inf) (AGR 3s) (VFORM inf))       
- 3 8       ADJP 
- 2 3       VP   ((AGR (1s 3s)) (VFORM past))
* 1 8       S    ((INV -) (VFORM past) (AGR 3s)) 

Chart
-------------------------------------------------------------------------------------------------
1 2 NAME ((AGR 3s))
1 2 NP   ((AGR 3s))
2 3 V    ((ROOT BE1) (VFORM past) (AGR (1s 3s)) (SUBCAT (_adjp _np))) 
3 4 ADJ  ((SUBCAT _vp:inf))
3 4 ADJP
4 5 TO
5 6 V    ((ROOT SEE1) (VFORM base) (SUBCAT _np) (IRREG-PAST +) (EN-PASTRT +))
6 7 ART  ((ROOT A1)(AGR 3s))
7 8 N    ((ROOT DOG1) (AGR 3s))
6 8 NP   ((AGR 3s))
5 8 VP   ((AGR 3s) (VFORM base))
4 8 VP   ((SUBCAT inf) (AGR 3s) (VFORM inf))       
3 8 ADJP 
2 8 VP   ((AGR (1s 3s)) (VFORM past))
1 8 S    ((INV -) (VFORM past) (AGR 3s)) 


2 7 VP   ((AGR (1s 3s)) (VFORM past))
1 7 S    ((INV -) (VFORM past) (AGR 3s))

Active Arc List
-------------------------------------------------------------------------------------------------
1 2 S    ((INV -) (VFORM ?v (pres past)) (AGR 3s)) 
         -> NP ((AGR 3s))				  o VP ((VFORM ?v (pres past)) (AGR 3s))
2 3 VP   ((AGR (1s 3s)) (VFORM past))
         -> V ((SUBCAT _np) (AGR (1s 3s)) (VFORM past))   o NP
2 3 VP   ((AGR (1s 3s)) (VFORM past))
         -> V ((SUBCAT _adjp) (AGR (1s 3s)) (VFORM past)) o ADJP
3 4 ADJP -> ADJ ((SUBCAT _vp:inf))                        o VP ((VFORM inf))
4 5 VP   ((SUBCAT inf) (AGR ?a) (VFORM inf))       
         -> TO ((AGR ?a) (VFORM inf))                     o VP ((VFORM base))
5 6 VP   ((AGR ?a) (VFORM base))
         -> V ((SUBCAT _np) (AGR ?a) (VFORM base))        o NP
6 7 NP   ((AGR 3s))                                       o N ((AGR 3s))
6 7 S    ((INV -) (VFORM ?v (pres past)) (AGR 3s)
         -> NP ((AGR 3s))                                 o VP ((VFORM ?v (pres past)) (AGR 3s))


--- NEW FILE: Makefile ---
CPP= g++ -Wall -c -g
LINK= g++ -g

all: p1

p1: p1.o gnode.o list.o grammar.o lexicon.o parse.o
	$(LINK) p1.o parse.o grammar.o lexicon.o list.o gnode.o -o p1

t1: t1.o gnode.o list.o
	$(LINK) t1.o gnode.o list.o -o t1
t1.o: t1.cpp
	$(CPP) t1.cpp
t2: t2.o gnode.o list.o
	$(LINK) t2.o gnode.o list.o -o t2
t2.o: t2.cpp
	$(CPP) t2.cpp
t3: t3.o lexicon.o gnode.o list.o
	$(LINK) t3.o lexicon.o gnode.o list.o -o t3
t3.o: t3.cpp
	$(CPP) t3.cpp
t4: t4.o grammar.o lexicon.o gnode.o list.o
	$(LINK) t4.o grammar.o lexicon.o gnode.o list.o -o t4
t4.o: t4.cpp
	$(CPP) t4.cpp
p1.o: p1.cpp
	$(CPP) p1.cpp
grammar.o: grammar.cpp
	$(CPP) grammar.cpp
lexicon.o: lexicon.cpp
	$(CPP) lexicon.cpp
gnode.o: gnode.cpp
	$(CPP) gnode.cpp
list.o: list.cpp
	$(CPP) list.cpp
parse.o: parse.cpp
	$(CPP) parse.cpp
clean:
	rm -f core p1 t1 t2 t3 t4 *.o *~ *#

--- NEW FILE: grammar.cpp ---
// Copyright (c) 1998-99, Ed Schlunder

#include <fstream.h>
#include <iostream.h>
#include <string.h>
#include "grammar.h"
#include "gnode.h"

Grammar::Grammar(char *fileName) {
  char buffer[1000];
  ifstream inFile;

  // specify that this is currently an empty grammar rule list
  rootPtr = currPtr = NULL;

  // try to open the grammar rules file
  inFile.open(fileName);
  if(!inFile) {
    cout << "Grammar rules file can not be opened." << endl;
    return;
  }

  while(inFile) {
    inFile.getline(buffer, 999);

    // ignore blank lines
    if(strlen(buffer) < 2) continue;
    trim(buffer);
    insert(buffer);
  }
}

void Grammar::insert(char *text) {
  List *ruleLine = new List;
  genericNode *obj;
  char *tmp;
  int length, level, beg, end;

  length = strlen(text);
  end = -1;
  while(end < length) {
    beg = end + 1;
    
    // locate opening parenthesis
    while(beg < length)
      if(text[beg] == '(') break;
      else beg++;

    for(level = 0, end = beg + 1; end < length; end++) {
      if((text[end] == ')') && (level == 0)) break;
      
      // make sure that items encased within () are kept intact
      if(text[end] == '(') level++;
      if(text[end] == ')') level--;
    }

    end++;
    tmp = new char[end - beg + 1];
    strncpy(tmp, text + beg, end - beg);
    tmp[end - beg] = 0;

    if(strlen(tmp) == 0)
      break;

    // make a genericNode out of this phrasal object and add it to the ruleLine
    //    cout << '[' << tmp << ']' << endl;
    obj = new genericNode(tmp);
    ruleLine->InsertAfter(obj);
  }

  insert(ruleLine);
}

void Grammar::insert(List *ruleLine) {
  grammarLine *tmp;

  //  cout << "grammarInsert: " << *ruleLine << endl;
  if(rootPtr == NULL) {
    rootPtr = new grammarLine;
    rootPtr->line = ruleLine;
    rootPtr->nextPtr = NULL;
  }
  else {
    // find end of list
    tmp = rootPtr;
    while(tmp->nextPtr != NULL)
      tmp = tmp->nextPtr;

    // tack a new rule on the end of the list
    tmp->nextPtr = new grammarLine;
    tmp = tmp->nextPtr;
    tmp->line = ruleLine;
    tmp->nextPtr = NULL;
  }
}

Grammar::~Grammar() {
  
}

List *Grammar::currLine(void) {
  return currPtr->line;
}

void Grammar::goTop(void) {
  currPtr = rootPtr;
}

bool Grammar::goNext(void) {
  if(currPtr)
    if(currPtr->nextPtr) {
      currPtr = currPtr->nextPtr;
      return true;
    }
  
  return false;
}

List *Grammar::findMatch(genericNode *obj) {
  searchObj = obj;
  currPtr = rootPtr;

  return findNext();
}

List *Grammar::findNext(void) {
  List *assign, *currLine;

  // go through each rule in the grammar and try to match it with the given obj
  while(currPtr) {
    currLine = currPtr->line;
    currLine->GoTop();
    currLine->GoNext();
    assign = unify(*currLine->currItemNode(), *searchObj);

    currPtr = currPtr->nextPtr;

    if(assign)
      return substitute(currLine, assign);
  }

  return NULL;
}

--- NEW FILE: t4.cpp ---
#include "grammar.h"
#include <iostream.h>

int main(int argc, char *argv[]) { 
  Grammar gram("grammar.txt");

  cout << "top line:" << endl;
}

--- NEW FILE: p1-junk.cpp ---
// CSE476 Bottom Up Chart Parser
// Ed Schlunder (zilym at asu.edu)

#include <iostream.h>
#include <fstream.h>
#include <stdio.h>
#include <string.h>

#include "grammar.h"
#include "lexicon.h"

Lexicon lex("lexicon.txt");
Grammar gra("grammar.txt");

void procAgenda(genericNode *a) {
  List *currLine;
  genericNode *pred;
  char *category;

  category = a->Feature("CAT");
  cout << "  Unify searching for: " << category << endl;
  gra.GoTop();
  do {
    currLine = gra.currLine();
    currLine->GoTop();
    currLine->GoNext();
    pred = currLine->currItemNode();
    
    if(unify(a, pred)) {
      gra.Print();
      cout << endl;
    }

  } while(gra.GoNext());
  cout << endl;
}

// Jack was happy to see a dog
void Parse(char *sentence) {
  List *s = MakeList(sentence);

  cout << "Sentence: ";
  s->Print();
  cout << endl;

  s->GoTop();
  do {
    if(s->currItem() == NULL)
      break;

    cout << "Word: " << s->currItem() << endl;
    if(lex.Lookup(s->currItem())) {
      cout << "  CAT: " << lex.Feature("CAT") << endl;
      procAgenda(lex.currNode());
    }
    else
      cout << "  No lexical entry!" << endl;
  } while(s->GoNext());
}

int main(int argc, char *argv[]) {
  char sentence[1000];
  
  cout << "[nlp]$ Jack was happy to see a dog." << endl;
  Parse("Jack was happy to see a dog.");

  cout << "[nlp]$ " << flush;
  cin.getline(sentence, 999);
  while(cin) {
    Parse(sentence);

    cout << "[nlp]$ " << flush;
    cin.getline(sentence, 999);
  }

  cout << endl;

  /*
  char tmp[100];
  cin.getline(tmp, 99);
  while(cin) {
    cout << "Lexical Lookup: \"" << tmp << "\" ";
    if(lex.Lookup(tmp)) {
      cout << " - cat: " << lex.Feature("CAT");
      cout << " subcat: " << lex.Feature("SUBCAT");
      while(char *subcat = lex.Feature())
	cout << ", " << subcat;
      cout << endl;
    }
    else
      cout << "does not exist" << endl;

    cin.getline(tmp, 99);
  }
  */
}


--- NEW FILE: lexicon.txt ---
a ((CAT (ART)) (ROOT (A1)) (AGR (3s)))
be ((CAT (V)) (ROOT (BE1)) (VFORM (base)) (IRREG-PRES (+)) (IRREG-PAST (+)) (SUBCAT (_adjp _np)))
cry ((CAT (V)) (ROOT (CRY1)) (VFORM (base)) (SUBCAT (_none)))
dog ((CAT (N)) (ROOT (DOG1)) (AGR (3s)))
fish ((CAT (N)) (ROOT (FISH1)) (AGR (3s 3p)) (IRREG-PL (+)))
happy ((CAT (ADJ)) (SUBCAT (_vp:inf)))
he ((CAT (PRO)) (ROOT (HE1)) (AGR (3s)))
is ((CAT (V)) (ROOT (BE1)) (VFORM (pres)) (SUBCAT (_adjp _np)) (AGR (3s)))
Jack ((CAT (NAME)) (AGR (3s)))
Sally ((CAT (NAME)) (AGR (3s)))
man ((CAT (N1)) (ROOT (MAN1)) (AGR (3s)))
men ((CAT (N)) (ROOT (MAN1)) (AGR (3p)))
saw ((CAT (N)) (ROOT (SAW1)) (AGR (3s)))
saw ((CAT (V)) (ROOT (SAW2)) (VFORM (base)) (SUBCAT (_np)))
saw ((CAT (V)) (ROOT (SEE1)) (VFORM (past)) (SUBCAT (_np)))
see ((CAT (V)) (ROOT (SEE1)) (VFORM (base)) (SUBCAT (_np)) (IRREG-PAST (+)) (EN-PASTPRT (+)))
seed ((CAT (N)) (ROOT (SEED1)) (AGR (3s)))
the ((CAT (ART)) (ROOT (THE1)) (AGR (3s 3p)))
to ((CAT (TO)))
want ((CAT (V)) (ROOT (WANT1)) (VFORM (base)) (SUBCAT (_np _vp:inf _np_vp:inf)))
was ((CAT (V)) (ROOT (BE1)) (VFORM (past)) (AGR (1s 3s)) (SUBCAT (_adjp _np)))
were ((CAT (V)) (ROOT (BE)) (VFORM (past)) (AGR (2s 1p 2p 3p)) (SUBCAT (_adjp _np)))
--- NEW FILE: gnode.h ---
#ifndef _GNODE_H_
#define _GNODE_H_

#include <iostream.h>

class List;

void trim(char *text); // GOOD

// genericNode -- holds any lexical or phrase object
class genericNode {
 public:
  // constructor: pass it a string containing a lexical or phrase
  // object in the format used in our lexicon.txt and grammar.txt
  genericNode(char *obj); // GOOD
  genericNode(char *word, List *features); // GOOD except feature doesn't copy
  ~genericNode();

  friend ostream &operator<<(ostream &out_file, const genericNode &n); // GOOD

  // GOOD: sort of.. this routine leaks memory because the lists for
  // assignment it makes aren't properly deallocated upon unification failure
  friend List *unify(genericNode &s, genericNode &g);
  friend List *cmpFeatures(List &a, List &b);
  List *lookupFeature(const char *name) const;
  friend genericNode *substitute(genericNode *old, List *assign);

  char *word(void); // GOOD
  List *features(void);

 private:
  char *name;
  List *featureList;
};

#endif

--- NEW FILE: parse.h ---
#include "gnode.h"

void parse(char *text);
void processAgenda(void);

typedef struct activeArc {
  int begLoc;
  int endLoc;
  int numFound;
  List *ruleLine;
};

class ActiveArcList {
 public:
  ActiveArcList();
  void add(activeArc *newArc);
  List *findMatch(int begLoc, int endLoc, genericNode *obj);
  bool goTop(void);
  bool goNext(void);
  activeArc *currArc(void);
  void print(void);


 private:
  struct arcNode;
  typedef struct arcNode {
    activeArc *arc;
    arcNode *nextPtr;
  };

  arcNode *rootPtr;
  arcNode *currPtr;
};

class Chart {
 public:
  Chart();

  void add(int begLoc, int endLoc, genericNode *obj);
  void getKey(int &begLoc, int &endLoc, genericNode *&obj);
  
  // returns true if there are more keys on the agenda that need processing
  bool process(void); // GOOD

  bool findMatch(genericNode *obj, List *&assign, int begLoc, int &endLoc);
  List *findNext(void);

  void print(void);

 private:
  struct chartNode;
  typedef struct chartNode {
    int begLoc;
    int endLoc;
    genericNode *obj;
    bool processFlag;
    
    chartNode *nextPtr;
  };

  chartNode *rootPtr;
  chartNode *currPtr;
  genericNode *searchObj;
};

--- NEW FILE: t1.cpp ---
#include <iostream.h>
#include "gnode.h"

int main(int argc, char *argv[]) {
  genericNode p("((CAT (VP)) (VFORM (inf)))");
  genericNode l("Jack ((CAT NAME) (AGR 3s))");

  cout << "P: " << p << endl;
  cout << "L: " << l << endl;
}

--- NEW FILE: t2.cpp ---
#include <iostream.h>
#include "gnode.h"

int main(int argc, char *argv[]) {
  genericNode a("((CAT (V)) (SUBCAT (_vp:inf)) (AGR (?a)) (VFORM (?v)))");
  genericNode b("want ((CAT (V)) (ROOT (WANT1)) (VFORM (base)) (SUBCAT (_np _vp:inf _np_vp:inf)))");

  cout << "A: " << a << endl;
  cout << "B: " << b << endl;

  cout << "unify(b, a): " << unify(b, a) << endl;
}

--- NEW FILE: p1 ---
(This appears to be a binary file; contents omitted.)

--- NEW FILE: lexicon.h ---
#ifndef _LEXICON_H_
#define _LEXICON_H_

#include "gnode.h"
#include "list.h"

class Lexicon {
 public:
  Lexicon(char *fileName); // GOOD
  ~Lexicon();
  bool lookupWord(char *word);
  bool lookupNext(void);
  genericNode *currNode(void); // GOOD

 private:
  struct lexicalNode;
  typedef struct lexicalNode {
    genericNode *node;

    lexicalNode *leftPtr;
    lexicalNode *rightPtr;
  };

  void insert(genericNode *newWord); // GOOD 
  void insert(genericNode *newWord, lexicalNode *root); // GOOD

  bool lookupWord(lexicalNode *root, char *word);

  lexicalNode *rootPtr;
  lexicalNode *currPtr;
  List *featureSeek;
};

#endif

--- NEW FILE: list.h ---
#ifndef _LIST_H_
#define _LIST_H_

#include "gnode.h"

struct listNode;
typedef struct listNode {
  char *item;
  List *itemList;
  genericNode *itemNode;
  
  listNode *nextPtr;            // pointer to next option
  listNode *prevPtr;
};

class List {
public:
  List();
  ~List();
  void InsertBefore(char *item);
  void InsertBefore(List *item);
  void InsertBefore(genericNode *item);
  void InsertAfter(char *item);
  void InsertAfter(List *item);
  void InsertAfter(genericNode *item);

  // returns true if everything is good, false if operation failed
  bool GoTop(void); // GOOD
  bool GoNext(void); // GOOD
  bool GoPrev(void);
  bool GoItem(int n);
  bool empty(void); // GOOD
  int length(void); // GOOD

  char *currItem(void);
  List *currItemList(void);
  genericNode *currItemNode(void);
  void Print(void);

  friend List *MakeFeatList(char *text);

  friend List *makeFeatureList(char *text); // GOOD
  friend ostream &operator<<(ostream &out_file, const List &l); // GOOD
  friend List *substitute(List *old, List *assign);

 private:
  listNode *rootPtr;
  listNode *currPtr;
};

List *MakeList(char *text);

#endif

--- NEW FILE: grammar.txt ---
((CAT (S)) (INV (-)) (VFORM (?v (pres past))) (AGR (?a))) -> ((CAT (NP)) (AGR (?a))) ((CAT (VP)) (VFORM (?v (pres past))) (AGR (?a)))
((CAT (NP)) (AGR (?a))) -> ((CAT (ART)) (AGR (?a))) ((CAT (N)) (AGR (?a)))
((CAT (NP)) (AGR (?a))) -> ((CAT (PRO)) (AGR (?a)))
((CAT (NP)) (AGR (?a))) -> ((CAT (NAME)) (AGR (?a)))
((CAT (VP)) (AGR (?a)) (VFORM (?v))) -> ((CAT (V)) (SUBCAT (_none)) (AGR (?a)) (VFORM (?v)))
((CAT (VP)) (AGR (?a)) (VFORM (?v))) -> ((CAT (V)) (SUBCAT (_np)) (AGR (?a)) (VFORM (?v))) ((CAT (NP)))
((CAT (VP)) (AGR (?a)) (VFORM (?v))) -> ((CAT (V)) (SUBCAT (_vp:inf)) (AGR (?a)) (VFORM (?v))) ((CAT (VP)) (VFORM (inf)))
((CAT (VP)) (AGR (?a)) (VFORM (?v))) -> ((CAT (V)) (SUBCAT (_np_vp:inf)) (AGR (?a)) (VFORM (?v))) ((CAT (NP))) ((CAT (VP)) (VFORM (inf)))
((CAT (VP)) (AGR (?a)) (VFORM (?v))) -> ((CAT (V)) (SUBCAT (_adjp)) (AGR (?a)) (VFORM (?v))) ((CAT (ADJP)))
((CAT (VP)) (SUBCAT (inf)) (AGR (?a)) (VFORM (inf))) -> ((CAT (TO))) ((CAT (VP)) (VFORM (base)))
((CAT (ADJP))) -> ((CAT (ADJ)))
((CAT (ADJP))) -> ((CAT (ADJ)) (SUBCAT (_vp:inf))) ((CAT (VP)) (VFORM (inf)))

--- NEW FILE: grammar.h ---
#ifndef _GRAMMAR_H_
#define _GRAMMAR_H_

#include "list.h"
#include "gnode.h"

class Grammar {
 public:
  Grammar(char *fileName); // GOOD
  ~Grammar();
  List *currLine(void);

  List *findMatch(genericNode *obj);
  List *findNext(void);

  void goTop(void);
  bool goNext(void);

 private:
  struct grammarLine;
  typedef struct grammarLine {
    List *line;
    grammarLine *nextPtr;
  };

  void insert(char *text); // GOOD
  void insert(List *ruleLine); // GOOD

  grammarLine *rootPtr;
  grammarLine *currPtr;
  genericNode *searchObj;
};

#endif

--- NEW FILE: README ---
Copyright (c) 1999, Ed Schlunder <zilym at asu.edu>
This is free software distributable under the terms of the GNU GPL.
See COPYING for details.

This is some old natural language processing code I wrote for a class I
took at ASU (CSE476 appearantly). Its not great (read: buggy and poorly
documented) but I'm keeping it around for reference when I get around to
writing a new NLP engine for Europa in Java.



--- NEW FILE: parse.cpp ---
#include "parse.h"
#include "list.h"
#include "grammar.h"
#include "lexicon.h"

extern Grammar grammar;
extern Lexicon lexicon;
Chart chart;
ActiveArcList arcs;

void parse(char *text) {
  List *s = MakeList(text);
  int i;

  cout << *s << endl;

  // go through each word and process
  s->GoTop();
  i = 1;
  do {
    if(lexicon.lookupWord(s->currItem()) == false) {
      cout << "Can't find [" << s->currItem() << "] in the lexicon..." << endl;
      return;
    }

    do {
      chart.add(i, i + 1, lexicon.currNode());
    } while(lexicon.lookupNext());

    processAgenda();

    i++;
  } while(s->GoNext());
}

void processAgenda(void) {
  int begLoc, endLoc;
  genericNode *obj;

  while(chart.process()) {
    List *newMatch;

    chart.getKey(begLoc, endLoc, obj);
    
    cout << "find: " << *obj << endl;
    newMatch = grammar.findMatch(obj);

    if(newMatch) {
      do {
	cout << "Grammar search success:\n" << *newMatch << endl;
	if(newMatch->length() <= 2) {
	  newMatch->GoTop();
	  chart.add(begLoc, endLoc, newMatch->currItemNode());
	}
	else {
	  activeArc *newArc;

	  newArc = new activeArc;
	  newArc->begLoc = begLoc;
	  newArc->endLoc = endLoc;
	  newArc->numFound = 1;
	  newArc->ruleLine = newMatch;
	  arcs.add(newArc);
	}
      } while((newMatch = grammar.findNext()));
    }
    else
      cout << "Grammar search fail" << endl;

    arcs.print();

    // if there is nothing on the active arc list, we can skip checking arcs
    if(!arcs.goTop())
      continue;
    
    do {
      activeArc *checkArc;
      
      checkArc = arcs.currArc();
      if(checkArc->endLoc == begLoc) {
	// find the current waiting component of this arc
	checkArc->ruleLine->GoTop();
	for(int i = 0; i <= checkArc->numFound; i++)
	  checkArc->ruleLine->GoNext();

	cout << "Arc search: " << *checkArc->ruleLine->currItemNode() << ", "
	     << *obj;
	List *assign = unify(*checkArc->ruleLine->currItemNode(), *obj);
       
	// did this arc unify with the current obj?
	if(!assign) {
	  cout << " = FAIL" << endl;
	}
	else {
	  cout << " = " << *assign << endl;
	  // do the variable substitutions
	  assign = substitute(checkArc->ruleLine, assign);
	  
	  if(assign->length() > (checkArc->numFound + 2)) {
	    // this arc has been extended, but not completed
	    cout << "Extended arc:\n" << *assign << "\n" << endl;
	    activeArc *newArc = new activeArc;
	    newArc->begLoc = checkArc->begLoc;
	    newArc->endLoc = endLoc;
	    newArc->numFound = checkArc->numFound + 1;
	    newArc->ruleLine = assign;
	    arcs.add(newArc);
	  }
	  else {
	    // this arc has been completed, we can add it to the chart
	    cout << "Completed arc:\n" << *assign << "\n" << endl;
	    assign->GoTop();
	    chart.add(checkArc->begLoc, endLoc, assign->currItemNode());	  
	  }
	}
      }
    } while(arcs.goNext());
  }

  chart.print();
}

Chart::Chart() {
  // mark this chart as being empty
  rootPtr = currPtr = NULL;
}

void Chart::add(int begLoc, int endLoc, genericNode *obj) {
  cout << '[' << begLoc << ',' << endLoc << ',' << *obj << ']' << endl;

  if(rootPtr == NULL) {
    rootPtr = new chartNode;
    rootPtr->begLoc = begLoc;
    rootPtr->endLoc = endLoc;
    rootPtr->obj = obj;
    rootPtr->processFlag = true;
    rootPtr->nextPtr = NULL;
  }
  else {
    chartNode *tmp = rootPtr;

    while(tmp->nextPtr != NULL)
      tmp = tmp->nextPtr;

    tmp->nextPtr = new chartNode;
    tmp = tmp->nextPtr;
    tmp->begLoc = begLoc;
    tmp->endLoc = endLoc;
    tmp->obj = obj;
    tmp->processFlag = true;
    tmp->nextPtr = NULL;
  }
}

bool Chart::process(void) {
  chartNode *tmp = rootPtr;

  while(tmp != NULL) {
    if(tmp->processFlag)
      return true;

    tmp = tmp->nextPtr;
  }

  return false;
}

void Chart::getKey(int &begLoc, int &endLoc, genericNode *&obj) {
  chartNode *tmp = rootPtr;

  while(tmp != NULL) {
    if(tmp->processFlag) {
      // this key hasn't been processed yet, return it and mark as processed
      begLoc = tmp->begLoc;
      endLoc = tmp->endLoc;
      obj = tmp->obj;
      tmp->processFlag = false;

      return;
    }

    tmp = tmp->nextPtr;
  }

  return;
}

void Chart::print(void) {
  cout << "Chart ";
  for(int i = 0; i < 160 - 6; i++)
    cout << '-';
  cout << endl;

  currPtr = rootPtr;
  while(currPtr) {
    cout << '[' << currPtr->begLoc << ',' << currPtr->endLoc << ','
	 << currPtr->processFlag << ',' << *currPtr->obj << ']' << endl;

    currPtr = currPtr->nextPtr;
  }
  cout << '\n' << endl;
}

bool Chart::findMatch(genericNode *obj, List *&assign, int begLoc, int &endLoc) {
  searchObj = obj;
  currPtr = rootPtr;

  // empty chart has nothing to match -- abort
  if(currPtr == NULL)
    return false;

  // go through each rule in the grammar and try to match it with the given obj
  while(currPtr) {
    if(currPtr->begLoc == begLoc) {
      assign = unify(*currPtr->obj, *searchObj);

      if(assign) {
	endLoc = currPtr->endLoc;
	return true;
      }
    }

    currPtr = currPtr->nextPtr;
  }

  return false;
}

ActiveArcList::ActiveArcList() {
  rootPtr = currPtr = NULL;
}

void ActiveArcList::add(activeArc *newArc) {
  List *assign;
  int endLoc;

  if(rootPtr) {
    arcNode *tmp = rootPtr;

    // skip down to the last node
    while(tmp->nextPtr)
      tmp = tmp->nextPtr;

    tmp->nextPtr = new arcNode;
    tmp = tmp->nextPtr;
    
    tmp->arc = newArc;
    tmp->nextPtr = NULL;
  }
  else {
    rootPtr = new arcNode;
    rootPtr->arc = newArc;
    rootPtr->nextPtr = NULL;
  }

  // whenever a new arc is added, we need to try to unify it with
  // any of the completed objects on the chart to see if it can be extended
  newArc->ruleLine->GoTop();
  for(int i = 0; i <= newArc->numFound; i++)
    newArc->ruleLine->GoNext();

  if(!chart.findMatch(newArc->ruleLine->currItemNode(), assign, newArc->endLoc, endLoc))
    return;

  assign = substitute(newArc->ruleLine, assign);
  if(assign->length() > (newArc->numFound + 2)) {
    cout << "Extended arc: " << newArc->numFound + 1 << "\n" << *assign << "\n" << endl;
    activeArc *newArc2 = new activeArc;
    newArc2->begLoc = newArc->begLoc;
    newArc2->endLoc = endLoc;
    newArc2->numFound = newArc->numFound + 1;
    arcs.add(newArc2);
  }
  else {
    // this arc has been completed, we can add it to the chart
    cout << "Completed arc:\n" << *assign << "\n" << endl;

    chart.print();

    assign->GoTop();
    chart.add(newArc->begLoc, endLoc, assign->currItemNode());
  }
}

bool ActiveArcList::goTop(void) {
  if(rootPtr) {
    currPtr = rootPtr;
    return true;
  }
  else {
    currPtr = NULL;
    return false;
  }
}

bool ActiveArcList::goNext(void) {
  if(currPtr && currPtr->nextPtr) {
    currPtr = currPtr->nextPtr;
    return true;
  }
  else {
    currPtr = NULL;
    return false;
  }
}

activeArc *ActiveArcList::currArc(void) {
  if(currPtr)
    return currPtr->arc;
  else
    return NULL;
}

void ActiveArcList::print(void) {
  cout << "Active Arcs ";
  for(int i = 0; i < 160-12; i++)
    cout << '-';
  cout << endl;

  currPtr = rootPtr;
  while(currPtr) {
    cout << '[' << currPtr->arc->begLoc << ',' << currPtr->arc->endLoc << ',';
    currPtr->arc->ruleLine->GoTop();
    cout << *currPtr->arc->ruleLine->currItemNode();
    for(int i = 0; i < currPtr->arc->numFound; i++) {
      currPtr->arc->ruleLine->GoNext();
      cout << ' ' << *currPtr->arc->ruleLine->currItemNode();
    }
    cout << " o";
    while(currPtr->arc->ruleLine->GoNext())
      cout << ' ' << *currPtr->arc->ruleLine->currItemNode();
    cout << ']' << endl;

    currPtr = currPtr->nextPtr;
  }

  cout << '\n' << endl;
}

--- NEW FILE: parse.txt ---
				 Jack was happy to see a dog.
				1    2   3     4  5   6 7   8
Key List
-------------------------------------------------------------------------------------------------
- 1 2 Jack  NAME ((AGR 3s))
- 1 2       NP   ((AGR 3s)) 
- 2 3 was   V    ((ROOT BE1) (VFORM past) (AGR (1s 3s)) (SUBCAT (_adjp _np))) 
- 3 4 happy ADJ  ((SUBCAT _vp:inf))
- 3 4       ADJP
- 4 5 to    TO
- 5 6 see   V    ((ROOT SEE1) (VFORM base) (SUBCAT _np) (IRREG-PAST +) (EN-PASTRT +))
- 6 7 a     ART  ((ROOT A1)(AGR 3s))
- 6 7       NP   ((AGR 3s))
- 5 7       VP   ((AGR ?a) (VFORM base))
- 4 7       VP   ((SUBCAT inf) (AGR ?a) (VFORM inf))
- 3 7       ADJP
- 2 7       VP   ((AGR (1s 3s)) (VFORM past))
* 1 7       S    ((INV -) (VFORM past) (AGR 3s))

Chart
-------------------------------------------------------------------------------------------------
1 2 NAME ((AGR 3s))
1 2 NP   ((AGR 3s))
2 3 V    ((ROOT BE1) (VFORM past) (AGR (1s 3s)) (SUBCAT (_adjp _np))) 
3 4 ADJ  ((SUBCAT _vp:inf))
3 4 ADJP
4 5 TO
5 6 V    ((ROOT SEE1) (VFORM base) (SUBCAT _np) (IRREG-PAST +) (EN-PASTRT +))
6 7 ART  ((ROOT A1)(AGR 3s))
// 6 7 NP   ((AGR 3s))
5 7 VP   ((AGR ?a) (VFORM base))
4 7 VP   ((SUBCAT inf) (AGR ?a) (VFORM inf))
3 7 ADJP
2 7 VP   ((AGR (1s 3s)) (VFORM past))
1 7 S    ((INV -) (VFORM past) (AGR 3s))

Active Arc List
-------------------------------------------------------------------------------------------------
1 2 S    ((INV -) (VFORM ?v (pres past)) (AGR 3s)) 
         -> NP ((AGR 3s))				  o VP ((VFORM ?v (pres past)) (AGR 3s))
2 3 VP   ((AGR (1s 3s)) (VFORM past))
         -> V ((SUBCAT _np) (AGR (1s 3s)) (VFORM past))   o NP
2 3 VP   ((AGR (1s 3s)) (VFORM past))
         -> V ((SUBCAT _adjp) (AGR (1s 3s)) (VFORM past)) o ADJP
3 4 ADJP -> ADJ ((SUBCAT _vp:inf))                        o VP ((VFORM inf))
4 5 VP   ((SUBCAT inf) (AGR ?a) (VFORM inf))       
         -> TO ((AGR ?a) (VFORM inf))                     o VP ((VFORM base))
5 6 VP   ((AGR ?a) (VFORM base))
         -> V ((SUBCAT _np) (AGR ?a) (VFORM base))        o NP
6 7 NP   ((AGR 3s))                                       o N ((AGR 3s))
6 7 S    ((INV -) (VFORM ?v (pres past)) (AGR 3s)
         -> NP ((AGR 3s))                                 o VP ((VFORM ?v (pres past)) (AGR 3s))


--- NEW FILE: t3.cpp ---
#include "lexicon.h"

int main(int argc, char *argv[]) {
  Lexicon lex("lexicon.txt");
  bool res;

  res = lex.lookupWord("want");
  cout << "lookupWord(\"want\"): " << res << endl;
  if(res)
    cout << "  gnode: " << *lex.currNode() << endl;  
}




More information about the dslinux-commit mailing list