`
wangqisen
  • 浏览: 46815 次
文章分类
社区版块
存档分类
最新评论

自然语言处理之判断句子合法性的Chart-Parsing算法

 
阅读更多
package nlp;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.*;
import java.util.Map.Entry;

class ChartLine{
	public String wordAttr;
	public int start;
	public int end;
	
	public ChartLine(String wordAttr,int start,int end){
		this.wordAttr=wordAttr;
		this.start=start;
		this.end=end;
	}
	
	public ChartLine(AgendaELement agEl){
		this.wordAttr=agEl.wordAttr;
		this.start=agEl.start;
		this.end=agEl.end;
	}
}
class ActivityLine{
	public String key;
	public String value;
	public int start;
	public int end;
	public int posStart;
	public int posEnd;
	
	public ActivityLine(String key,String value,int start,int end,int posStart,int posEnd){
		this.key=key;
		this.value=value;
		this.start=start;
		this.end=end;
		this.posStart=posStart;
		this.posEnd=posEnd;
	}
}
class AgendaELement{
	public String wordAttr;
	public int start;
	public int end;
	
	public AgendaELement(String wordAttr,int start,int end){
		this.wordAttr=wordAttr;
		this.start=start;
		this.end=end;
	}
}
public class Proj3 {
	
	public HashMap<String,String[]> grammers=new HashMap<String,String[]>();
	public HashMap<String,String[]> wordsAttrs=new HashMap<String,String[]>();
	public ArrayList<ChartLine> chart=new ArrayList<ChartLine>();
	public ArrayList<ActivityLine> activities=new ArrayList<ActivityLine>();
	public ArrayList<AgendaELement> agenda=new ArrayList<AgendaELement>();
	
	public Proj3(){
		grammers.put("S",new String[]{"NP,VP"});
		grammers.put("NP",new String[]{"ART,N","ART,ADJ,N"});
		grammers.put("VP",new String[]{"V","V,NP"});
		wordsAttrs.put("ART",new String[]{"The","a"});
		wordsAttrs.put("N",new String[]{"cat","mouse","dog"});
		wordsAttrs.put("V",new String[]{"caught","eat","walk"});	
	} 
	
	public void handle(String sentence){
		String[]words=sentence.split(" ");
		int index=0;
		while(index<words.length){
			String word=words[index];
			Iterator<Entry<String,String[]>> ite=wordsAttrs.entrySet().iterator();
			String attr="";
			while(ite.hasNext()){
				Entry<String,String[]> entry=ite.next();
				String key=entry.getKey();
				String []valueTemp=entry.getValue();
				for(String value:valueTemp){
					if(value.equals(word))
						attr=key;
				}
			}
			AgendaELement agEl=new AgendaELement(attr,index+1,index+2);
			agenda.add(agEl);
			while(agenda.size()>0){
				AgendaELement ele=agenda.remove(0);
				ChartLine line=new ChartLine(ele);
				//添加图标中
				chart.add(line);
				Iterator<Entry<String,String[]>> gIte=grammers.entrySet().iterator();
				while(gIte.hasNext()){
					Entry<String,String[]> entry=gIte.next();
					String gKey=entry.getKey();
					String[]gValue=entry.getValue();
					for(String g:gValue){
						if(g.startsWith(ele.wordAttr)){
							String[]ss=g.split(",");
							if(ss.length>1&&ss[0].equals(ele.wordAttr)){
								ActivityLine aLine=new ActivityLine(gKey,g,ele.start,ele.end,1,2);
								//添加活动边
								activities.add(aLine);
							}else if(ss.length==1){
								agenda.add(new AgendaELement(gKey,ele.start,ele.end));
							}
						}
					}
				}
				//查找活动边
				for(int i=0;i<activities.size();i++){
					ActivityLine aLine2=activities.get(i);
					String graKey=aLine2.key;
					String gra=aLine2.value;
					String[]ws=gra.split(",");
					if(ele.start==aLine2.end&&ele.wordAttr.equals(ws[aLine2.posStart])){
						if(aLine2.posStart!=ws.length-1){
							ActivityLine aLine3=new ActivityLine(graKey,gra,aLine2.start,ele.end,aLine2.posStart+1,aLine2.end+1);
							activities.add(aLine3);
						}else{
							agenda.add(new AgendaELement(graKey,aLine2.start,ele.end));
						}
					}
				}
			}
			index++;
		}
	}
	
	public static void main(String args[]) throws Exception{
		Proj3 p=new Proj3();
		while(true){
			BufferedReader buffer=new BufferedReader(new InputStreamReader(System.in));   
			String str=buffer.readLine();
			p.handle(str);
			ArrayList<ChartLine> arr=p.chart;
			boolean flag=false;
			for(int i=0;i<arr.size();i++){
				ChartLine line=arr.get(i);
				String[]words=str.split(" ");
				if(line.start==1&&line.end==words.length+1&&line.wordAttr=="S")
					flag=true;
			}
			if(flag)
				System.out.println("这个句子是合法的");
			else
				System.out.println("这个句子是不合法的");
		}
		
	}
}

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics