module idlSTARTSdef {

interface STARTSServer {

/** EXCEPTION SPECIFICATIONS **/

exception EXC_BadInput {

string reason;

};

exception EXC_IO {

string reason;

};

exception EXC_Source {

string reason;

};

exception EXC_SourceQuery {

string reason;

};

exception EXC_UnsupportedQuery {

string reason;

};

 

/** CONSTANT SPECIFICATIONS **/

/* used as a dummy term in sCSTermStatDataReqElement.previousLastTerm

to indicate we want content summary term stats starting at the first

term for this field. It should NOT match any terms in the source */

const string noPreviousLastCSTerm = "a@*#$^#*#@%$";

 

/** DATA TYPE SPECIFICATIONS **/

/* a sequence of sources */

typedef sequence <string> sourceList;

 

/* a sequence of languages */

typedef sequence <string> languageSeq;

 

/* a fully qualified field has an attribute set and field name */

struct qualifiedField {

string attrSet;

string fieldName;

};

 

typedef sequence <qualifiedField> qualifiedFieldSeq;

 

/* language qualified fields have a fully qualified field and a

and a list of languages */

struct langQualifiedField {

qualifiedField field;

languageSeq languages;

};

 

typedef sequence <langQualifiedField> langQualifiedFieldSeq;

 

/* a fully qualified modifier has an attribute set and modifer string */

struct qualifiedModifier {

string attrSet;

string modifierString;

};

 

/* language qualified modifiers have a fully qualified modifier and a

and a list of languages */

struct langQualifiedModifier {

qualifiedModifier modifier;

languageSeq languages;

};

 

typedef sequence <langQualifiedModifier> langQualifiedModifierSeq;

 

/* field/modifier combination - used in source metadata to specify

the allowed combinations */

struct fieldModifierCombination {

qualifiedModifier modifier;

qualifiedField field;

};

 

typedef sequence <fieldModifierCombination> fieldModifierCombinationSeq;

 

/* potentially supported query parts are ranking or filter */

struct queryParts {

boolean filter;

boolean ranking;

};

 

/* a tokenizer (returned by source metadata attributes) is a

a name and a list of languages supported */

struct tokenizer {

string name;

languageSeq languages;

};

 

typedef sequence <tokenizer> tokenizerSeq;

 

/* a stop word list is a sequence of words */

typedef sequence <string> stopWordSeq;

 

/* result sort order can either be ascending or descending */

enum sortOrder { ascending, descending };

 

/* a result sort specification is composed of a qualified field and the

sort order for that field (ascending or descending) */

struct answerSortSpec {

qualifiedField fieldSpec;

sortOrder sortOrder;

};

 

typedef sequence <answerSortSpec> answerSortSpecSeq;

 

/* a qualified term consists of a qualified field and a term string,

for example (basic-1 author) "lagoze" */

struct qualifiedTerm {

qualifiedField field;

string term;

};

 

/* a qualified field and its value */

struct fieldValuePair {

qualifiedField field;

string fieldVal;

};

 

typedef sequence <fieldValuePair> fieldValuePairSeq;

 

/* an mbasic1-meta attribute/value pair */

struct metaValue {

qualifiedField attr;

string value;

};

 

typedef sequence <metaValue> metaValueSeq;

 

/* document term statistics for search results.

term statistics consist of a qualified term, the frequency of that

term in the document, the weight of the term in the document, and the

number of documents in the source in which the term appears */

struct termStat {

qualifiedTerm term;

long termFreq;

float termWeight;

long docFreq;

};

 

typedef sequence <termStat> termStatSeq;

 

/* source term statistics for content summary information

the number of times a term appears in the source */

struct CSSourceFreq {

string term;

long frequency;

};

 

/* source term statistics for content summary information

the number of documents in which a terms appears in the source */

struct CSDocumentFreq {

string term;

long frequency;

};

 

/* source term statistics for content summary information

a combination of the number of times a term appears in the source

and the number of documents in which it appears */

struct CSTermDocumentFreq {

string term;

long sourceFrequency;

long docFrequency;

};

 

/* term statistics in the content summary can either be number of times

in source, number of documents in source, or both */

enum CSTermStatsType {

sourceType,

docType,

bothType

};

 

/* a data type to hold one of the three content summary term stat types */

union CSTermStat switch(CSTermStatsType) {

case sourceType: CSSourceFreq termStatistics;

case docType: CSDocumentFreq documentStatistics;

case bothType: CSTermDocumentFreq termDocStatistics;

};

 

typedef sequence <CSTermStat> CSTermStatSeq;

 

/* a source description returned by a resource metadata request */

struct sourceDescription {

string sourceID;

string metadataURL;

string metadataSyntax;

};

 

typedef sequence <sourceDescription> sourceDescriptionSeq;

 

 

/* a STARTS query */

struct sQuery {

string version;

string filterExp;

string rankingExp;

boolean stopWordsP;

string defaultAttributeSet;

string defaultLanguage;

sourceList sources;

qualifiedFieldSeq answerFields;

answerSortSpecSeq sortFieldList;

float minScore;

long maxDocs;

};

 

 

/* a document returned in the "hit list" in a result set */

struct sQRDocument {

string version;

float rawScore;

sourceList sources;

fieldValuePairSeq fieldValuePairs;

termStatSeq termStats;

long docKBytes;

long docNumTokens;

};

 

typedef sequence <sQRDocument> sQRDocumentSeq;

 

/* a query result set with its included document sequence */

struct sQResult {

string version;

sourceList sources;

string filterExp;

string rankingExp;

long numDocs;

sQRDocumentSeq documents;

};

 

/* results returned by a query submission */

typedef sequence <sQResult> sQResultSeq;

 

 

/* the data returned by a meta attributes request */

struct sMetaAttributes {

string version;

string source;

langQualifiedFieldSeq fieldsSupported;

langQualifiedModifierSeq modifiersSupported;

fieldModifierCombinationSeq fieldModifierCombinations;

queryParts queryPartsSupported;

float scoreRange[2];

string rankingAlgorithmID;

tokenizerSeq tokenizerIDList;

string sampleResultsURL;

stopWordSeq stopWordList;

boolean turnOffStopWords;

string defaultMetaAttributeSet;

metaValueSeq metaAttributes;

};

 

/* "fixed fields" content summary data and a list of fields for

which there are term statistics.

NOTE: if fields are not indicated in terms for server, then the

termStatFields array returned by getContentSummaryInfo should

have only one element indicating the qualified field (field name

and attribute set) value to be used by the client for

sCSTermSTatDataReqList when requesting non-field-specified

term statistics with getCSTermStatData.

this data returned by a getContentSummaryInfo request */

struct sContentSummaryInfo {

string version;

boolean stemming;

boolean stopWords;

boolean caseSensitive;

boolean fieldsIndicatedInTerms;

long numDocuments;

langQualifiedFieldSeq termStatFields;

};

 

 

/* Content Summary term statistics are broken out

by field by language, so we need to individually specify

each of these combinations in structures below */

struct oneLangQualifiedField {

qualifiedField field;

string language;

};

 

/* an element in the list of Content Summary fields for which

we are requesting term statistics data.

Since multiple calls to the server could be required to get

all the terms for a field (since CORBA chokes on large objects),

previousLastTerm is how the client tells the server where

to start getting term statistics. If the client wants the

server to start with the first term for this field, set

"previousLastTerm" to IDL constant "noPreviousLastCSTerm".

NOTE: if fields are not indicated in the content summary

terms for a server, then CSfield will have field name

and attribute set as indicated by the server via the sole

element in the sContentSummaryInfo.termStatFields */

struct sCSTermStatDataReqElement {

oneLangQualifiedField CSfield;

string previousLastTerm;

};

 

/* the list of Content Summary fields for which we want term

statistics, including the last term for which we got term

stat data for this field */

typedef sequence <sCSTermStatDataReqElement> CSTermStatDataReqList;

 

/* an element in the list of CSfield data results for which

we have requestsed term statistics data.

Note that some fields will have too many terms for CORBA to

handle in one shot. In that case, the server returns some of

the terms and sets "containsAllTermStats" to false. */

struct sCSTermStatDataElement {

oneLangQualifiedField CSfield;

CSTermStatSeq CSTermStats;

boolean containsLastTerm; /* set to true by server if

CSTermStats has the last

term stat for this field */

};

 

/* what will be returned when we ask for source content summary

term statistics */

typedef sequence <sCSTermStatDataElement> sCSTermStatData;

 

 

/* the data returned by a resource description request */

struct sResource {

string version;

sourceDescriptionSeq sourceList;

};

 

 

/** METHOD SPECIFICATIONS **/

/* shortcut method to create a query object by talking to the

server to get server side defaults filled in for all fields

of sQuery not specified in the method arguments.

Obviously, it is also fine to simply have the client construct

a query object from scratch. The danger is that the client may

construct a query object with values that the server can't

process. Hopefully these cases have been handled with proper

exceptions and polite error messages in the code. */

sQuery createQuery (

in string filterExp,

in string rankingExp,

in sourceList sources);

 

/* method to submit a query and return a result set */

sQResultSeq submitQuery (in sQuery query)

raises (EXC_BadInput, EXC_Source, EXC_IO,

EXC_SourceQuery, EXC_UnsupportedQuery);

 

/* method to request source meta attributes */

sMetaAttributes getMetaAttributes (in string source)

raises (EXC_BadInput, EXC_Source);

 

/* method to request a source's content summary "header" */

sContentSummaryInfo getContentSummaryInfo (in string source)

raises (EXC_BadInput, EXC_Source);

 

/* method to request source content summary term statistics */

sCSTermStatData getCSTermStatData (

in string source,

/* list of fields and starting subscripts for each field */

in CSTermStatDataReqList requestFields)

raises (EXC_BadInput, EXC_Source);

 

/* method to request resource metadata */

sResource getResourceMetadata()

raises (EXC_Source);

 

}; /* end interface STARTSServer */

}; /* end module idlSTARTSdef */