|
|||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||
SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object | +--org.xml.sax.HandlerBase | +--Linkable.Analysis.XHTMLAnalyzer
Field Summary | |
private AuthorSection |
as
|
private Author[] |
authors
|
private java.util.Vector |
contextTrees
|
private ContextSection |
cs
|
private static boolean |
DEBUG
|
private java.util.Vector |
displayID
|
private boolean |
doContexts
|
private boolean |
doingReferences
|
private javax.xml.parsers.SAXParserFactory |
factory
|
private boolean |
finishing
|
private boolean |
firstReference
|
private boolean |
getDocTitle
|
private boolean |
grabAuthor
|
private boolean |
grabReference
|
private java.util.Vector |
knownCitations
|
private java.lang.String |
localURL
|
private Creation |
me
|
private static java.lang.String |
ME
|
private java.lang.String |
moreName
|
private boolean |
moreTitle
|
private boolean |
notInTable
|
private java.lang.String |
pubDate
|
private ReferenceSection |
rs
|
private SentenceTree |
sentence
|
private java.lang.String |
sourceURN
|
private boolean |
startAuthor
|
private boolean |
starting
|
private java.lang.String |
startName
|
private boolean |
startTitle
|
private java.lang.String |
title
|
Constructor Summary | |
XHTMLAnalyzer()
Default constructor creates a plain XHTMLAnalyzer. |
Method Summary | |
private java.io.BufferedReader |
buffer(java.io.InputStreamReader in,
int k)
|
java.util.Vector |
buildCitationList(java.lang.String docURN)
buildCitationList - Return a vector of Citation objects currently known for this item This will involved calls on the citeref database, which is indexed by document URN. |
java.lang.String |
buildLocalMetaData(java.lang.String doi,
java.lang.String pubDateIn,
Creation c)
Return an XML file that contains original text fragments of bibliographic information gleaned from this archive item. |
Reference[] |
buildRefList(BibData b)
buildRefList - Return an array of Reference objects gleaned from this archive item. |
void |
characters(char[] buf,
int offset,
int len)
Implements the characters interface of the DocumentHandler |
void |
endDocument()
Implements the endDocument interface of the DocumentHandler |
void |
endElement(java.lang.String name)
|
private int[] |
findLocalLink(java.lang.String doc,
int p,
java.lang.String ref)
|
private int |
fullStop(java.lang.String s,
int n,
char c)
|
java.lang.String |
getDate()
|
java.lang.String |
getLinkedText(Reference[] refList,
java.lang.String url)
getLinkedText emits XML for the linked body of the text. |
java.lang.String |
getLinkedTextFinalize()
getLinkedTextFinalize emits XML for finishing off the Surrogate linked text output. |
java.lang.String |
getLinkedTextInitialize()
getLinkedTextInitialize sets up to generate XML for our Surrogate, but not the incantation. |
protected void |
handleEndTag(java.lang.String tag)
|
protected void |
handleStartTag(java.lang.String name,
org.xml.sax.AttributeList attrs)
|
protected void |
handleText(char[] text,
int offset,
int length)
|
private boolean |
isAnH(java.lang.String h)
|
private boolean |
isSizeChange(org.xml.sax.AttributeList attrs)
|
private int |
nextEOS(java.lang.String s,
int n)
finds the end of the sentence. |
java.io.InputStreamReader |
openConn(java.net.URLConnection conn)
|
private java.net.URLConnection |
openURL(java.lang.String url)
|
private int |
processText(java.lang.String s,
int nextPos)
splits text into sentences. |
private java.lang.String |
readInputStream(java.lang.String u)
|
private java.lang.String |
refHasURL(java.lang.String[] xLinks,
java.lang.String[] tags,
java.lang.String ref)
returns XLink elements or null for each Reference in the list note that XLink elements may contain multiple URLs They each contain "****" where the anchor (the reference in text) is supposed to go. |
private java.lang.String |
resolveTitles(java.lang.String title,
java.lang.String textString)
|
void |
setURL(java.lang.String url,
java.lang.String remoteURL)
gets the URL of the Item to be analyzed, and proceeds to fill up local structures, partially cooked in some cases, the contents of which can be returned on demand by the Surrogate constructor. |
void |
startDocument()
Implements the startDocument interface of the DocumentHandler |
void |
startElement(java.lang.String name,
org.xml.sax.AttributeList attrs)
Implements the startDocument interface of the DocumentHandler |
private java.lang.String |
update(java.lang.String document,
int position,
java.lang.String[] tags,
java.lang.String[] xLinks,
SentenceTree sentence)
|
Methods inherited from class org.xml.sax.HandlerBase |
error, fatalError, ignorableWhitespace, notationDecl, processingInstruction, resolveEntity, setDocumentLocator, unparsedEntityDecl, warning |
Methods inherited from class java.lang.Object |
|
Field Detail |
private static final java.lang.String ME
private static final boolean DEBUG
private java.lang.String title
private java.lang.String pubDate
private java.util.Vector displayID
private Author[] authors
private ReferenceSection rs
private AuthorSection as
private ContextSection cs
private java.util.Vector knownCitations
private Creation me
private java.lang.String localURL
private java.lang.String sourceURN
private javax.xml.parsers.SAXParserFactory factory
private java.util.Vector contextTrees
private SentenceTree sentence
private boolean starting
private boolean getDocTitle
private boolean startTitle
private boolean moreTitle
private boolean startAuthor
private boolean grabAuthor
private boolean doContexts
private boolean doingReferences
private boolean grabReference
private boolean finishing
private boolean firstReference
private java.lang.String startName
private java.lang.String moreName
private boolean notInTable
Constructor Detail |
public XHTMLAnalyzer()
Method Detail |
public void setURL(java.lang.String url, java.lang.String remoteURL) throws SurrogateException
url
- is a string of the xhtml item to be analyzedremoteURL
- is the location on the net of the original
itemSurrogateException
- if the url cannot be opened for analysis.public void startDocument() throws org.xml.sax.SAXException
startDocument
in class org.xml.sax.HandlerBase
public void endDocument() throws org.xml.sax.SAXException
endDocument
in class org.xml.sax.HandlerBase
public void startElement(java.lang.String name, org.xml.sax.AttributeList attrs) throws org.xml.sax.SAXException
startElement
in class org.xml.sax.HandlerBase
public void endElement(java.lang.String name) throws org.xml.sax.SAXException
endElement
in class org.xml.sax.HandlerBase
public void characters(char[] buf, int offset, int len) throws org.xml.sax.SAXException
characters
in class org.xml.sax.HandlerBase
protected void handleStartTag(java.lang.String name, org.xml.sax.AttributeList attrs)
protected void handleEndTag(java.lang.String tag)
protected void handleText(char[] text, int offset, int length)
private int processText(java.lang.String s, int nextPos)
String
- s is the chunk of text that is being processed.nextPos
- (0-based) is where to pick
up looking for the end of the current sentence.private int nextEOS(java.lang.String s, int n)
String
- s is the hunk of text currently being scanned.integer
- n, position at which to start scanning.
Starting at String s, position n, return where the next sentence
starts, or -1 if you run off the end of the string while looking
for end of sentence, or s.length() if s ends with a sentence.private int fullStop(java.lang.String s, int n, char c)
public java.lang.String buildLocalMetaData(java.lang.String doi, java.lang.String pubDateIn, Creation c)
buildLocalMetaData
in interface RefLinkAnalyzer
public Reference[] buildRefList(BibData b)
buildRefList
in interface RefLinkAnalyzer
public java.util.Vector buildCitationList(java.lang.String docURN)
buildCitationList
in interface RefLinkAnalyzer
public java.lang.String getLinkedTextInitialize()
getLinkedTextInitialize
in interface RefLinkAnalyzer
public java.lang.String getLinkedText(Reference[] refList, java.lang.String url) throws SurrogateException
getLinkedText
in interface RefLinkAnalyzer
The
- array of Reference objects belonging to this Surrogate.URL
- of the item being analyzed, for Base URL addressSurrogateException
- if URL to be analyzed cannot be opened.public java.lang.String getLinkedTextFinalize()
getLinkedTextFinalize
in interface RefLinkAnalyzer
private java.net.URLConnection openURL(java.lang.String url) throws SurrogateException
public java.lang.String getDate()
getDate
in interface RefLinkAnalyzer
public java.io.InputStreamReader openConn(java.net.URLConnection conn) throws SurrogateException
private java.io.BufferedReader buffer(java.io.InputStreamReader in, int k)
private java.lang.String readInputStream(java.lang.String u) throws SurrogateException
private java.lang.String refHasURL(java.lang.String[] xLinks, java.lang.String[] tags, java.lang.String ref)
The
- array of References to be processprivate java.lang.String update(java.lang.String document, int position, java.lang.String[] tags, java.lang.String[] xLinks, SentenceTree sentence)
private int[] findLocalLink(java.lang.String doc, int p, java.lang.String ref)
private boolean isAnH(java.lang.String h)
private boolean isSizeChange(org.xml.sax.AttributeList attrs)
private java.lang.String resolveTitles(java.lang.String title, java.lang.String textString)
|
|||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||
SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |