Re: General Purpose Dictionary Generator
From: | Arthaey Angosii <arthaey@...> |
Date: | Thursday, October 26, 2006, 8:17 |
On 10/26/06, Alex Fink <a4pq1injbok_0@...> wrote:
> This is a great idea. I'd actually been recently thinking that I should
> convert my conlangs' lexica to a more structured format (they're currently
> in un-marked-up and inconsistently-formatted human-readable text files) so I
> could process them by computer; this would be perfect for that.
It's also similar to my own conversion from Shoebox to XML. I'm
mid-conversion, but I do have an XML Schema. Perhaps it can be used as
a basis for this program, or at least to spur discussion? In either
capacity, it might prove helpful.
A (very small) example file might look like:
<lexicon lexeme-lang="x-foo" document-lang="en">
<person role="author">
<name>Arthaey Angosii</name>
<email>arthaey@gmail.com</email>
</person>
<entry>
<lexeme>foo</lexeme>
<cxs>fu</cxs>
<word-class>pron</word-class>
<gloss>whatever</gloss>
<gloss lang="es">cualquier</gloss>
</entry>
</lexicon>
The schema itself supports much more than shown in the example:
multiple pronuncation schemes, definitions in addition to short
glosses, semantic domains, multiple example sentences,
cross-references (such as synonyms), notes, subentries, and senses.
Below my signature (for easy skipping) is the 193-line schema file. (I
would have attached it and not bothered those not interested, but I
assume the listserv kills attachments.)
Please also note that it's my first schema, and as such I may have
done things in less-than-optimal ways just to get it to validate. :P
--
AA
http://conlang.arthaey.com
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema
targetNamespace="http://lexicon.arthaey.com/"
xmlns:lex="http://lexicon.arthaey.com/"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
elementFormDefault="qualified"
attributeFormDefault="unqualified"
>
<!-- definitions of groups -->
<xs:group name="entry-info">
<xs:sequence>
<xs:element name="lexeme" type="lex:textType" />
<!-- pronunciation -->
<xs:sequence maxOccurs="5">
<xs:element name="ad-hoc" type="xs:token" minOccurs="0" />
<xs:element name="cxs" type="xs:token" minOccurs="0" />
<xs:element name="ipa" type="xs:token" minOccurs="0" />
<xs:element name="sampa" type="xs:token" minOccurs="0" />
<xs:element name="x-sampa" type="xs:token" minOccurs="0" />
</xs:sequence>
<xs:element name="word-class" type="xs:token" />
<xs:element name="gloss" type="lex:textType" />
<xs:element name="definition" type="lex:textType"
minOccurs="0" maxOccurs="unbounded" />
<xs:element name="domain" type="lex:textType"
minOccurs="0" maxOccurs="unbounded" />
<xs:element name="example" minOccurs="0" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="text" type="lex:textTypeRequired"
maxOccurs="unbounded"/>
<xs:element name="date" type="xs:date" />
<xs:element name="note" type="lex:noteType"
minOccurs="0" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="xref" type="lex:xrefType"
minOccurs="0" maxOccurs="unbounded" />
<xs:element name="note" type="lex:noteType"
minOccurs="0" maxOccurs="unbounded" />
<xs:element name="subentry" minOccurs="0" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="lexeme" type="lex:textType" />
<!-- pronunciation -->
<xs:sequence minOccurs="0" maxOccurs="5">
<xs:element name="ad-hoc" type="xs:token" minOccurs="0" />
<xs:element name="cxs" type="xs:token" minOccurs="0" />
<xs:element name="ipa" type="xs:token" minOccurs="0" />
<xs:element name="sampa" type="xs:token" minOccurs="0" />
<xs:element name="x-sampa" type="xs:token" minOccurs="0" />
</xs:sequence>
<xs:element name="word-class" type="xs:token" minOccurs="0" />
<xs:element name="gloss" type="lex:textType" />
<xs:element name="definition" type="lex:textType"
minOccurs="0" maxOccurs="unbounded" />
<xs:element name="domain" type="lex:textType"
minOccurs="0" maxOccurs="unbounded" />
<xs:element name="example" minOccurs="0"
maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="text" type="lex:textTypeRequired"
maxOccurs="unbounded"/>
<xs:element name="date" type="xs:date" />
<xs:element name="note" type="lex:noteType"
minOccurs="0" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="xref" type="lex:xrefType"
minOccurs="0" maxOccurs="unbounded" />
<xs:element name="note" type="lex:noteType"
minOccurs="0" maxOccurs="unbounded" />
<xs:element name="date" type="xs:date" minOccurs="0" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="date" type="xs:date" />
</xs:sequence>
</xs:group>
<xs:group name="nameGroup">
<xs:choice>
<xs:element name="name" type="xs:token" />
<!-- full name -->
<xs:sequence>
<xs:element name="personal-name" type="xs:token" maxOccurs="3" />
<xs:element name="middle-name" type="xs:token" maxOccurs="3" />
<xs:element name="family-name" type="xs:token" maxOccurs="3" />
</xs:sequence>
</xs:choice>
</xs:group>
<!-- definitions of simple types -->
<xs:simpleType name="xrefEnum">
<xs:restriction base="xs:token">
<xs:enumeration value="antonym" />
<xs:enumeration value="etymology" />
<xs:enumeration value="see" />
<xs:enumeration value="synonym" />
</xs:restriction>
</xs:simpleType>
<!-- definitions of complex types -->
<xs:complexType name="textTypeRequired">
<xs:simpleContent>
<xs:extension base="xs:token">
<xs:attribute name="lang" type="xs:language" use="required" />
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="textType">
<xs:simpleContent>
<xs:extension base="xs:token">
<xs:attribute name="lang" type="xs:language" />
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="noteType" mixed="true">
<xs:complexContent>
<xs:restriction base="xs:anyType">
<xs:sequence>
<xs:any namespace="##any" processContents="lax"
minOccurs="0" maxOccurs="unbounded" />
</xs:sequence>
<xs:attribute name="type" type="xs:token" />
</xs:restriction>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="xrefType">
<xs:simpleContent>
<xs:extension base="lex:textType">
<xs:attribute name="type" type="lex:xrefEnum" use="required" />
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<!-- definitions of complex elements -->
<xs:element name="lexicon">
<xs:complexType>
<xs:sequence>
<xs:element name="person">
<xs:complexType>
<xs:sequence>
<xs:group ref="lex:nameGroup" />
<xs:element name="email" type="xs:token"
minOccurs="0" maxOccurs="unbounded" />
<xs:element name="url" type="xs:anyURI"
minOccurs="0" maxOccurs="unbounded" />
<xs:element name="note" type="lex:noteType"
minOccurs="0" maxOccurs="unbounded" />
</xs:sequence>
<xs:attribute name="role" type="xs:token" use="required" />
</xs:complexType>
</xs:element>
<xs:element name="entry" minOccurs="0" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:choice>
<xs:group ref="lex:entry-info" maxOccurs="1" />
<xs:element name="sense" minOccurs="2"
maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:group ref="lex:entry-info" />
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:choice>
<xs:element name="note" type="lex:noteType"
minOccurs="0" maxOccurs="unbounded" />
</xs:sequence>
<xs:attribute name="type" type="xs:token" />
</xs:complexType>
</xs:element>
<xs:element name="note" type="lex:noteType"
minOccurs="0" maxOccurs="unbounded" />
</xs:sequence>
<xs:attribute name="src" type="xs:anyURI" />
<xs:attribute name="lexeme-lang" type="xs:language" use="required" />
<xs:attribute name="document-lang" type="xs:language" use="required" />
</xs:complexType>
</xs:element>
</xs:schema>