# Glossary format converter, v0.93, 28 Feb 2011
# by Nathan Rasmussen, primarily written June 2010

#   Copyright 2011 LTAC Global
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

use strict;
use warnings;
use XML::Rules;
	# maintainer: in rules, use 'return ()' and not 'return undef'
	# (works around a bug: if a first child element returns undef,
	# parent's _content acquires a spurious '' entry)

# arguments: input format & filename, output format & filename
# both i/o arguments examine requested extension for format
# input format can be txt for special processing
# output defaults to STDOUT if no filename before extension
# output will overwrite files if given an existing name

my ($in_file, $out_file) = @ARGV[0,1];
my ($in_format, $quick_input, $out_format);
my $file_ext = qr/(?:gml|olf|tbx|txt|utx)$/i;

if ($in_file =~ /($file_ext)/) {
	$in_format = lc $1;
} else {
	die "input filename $in_file lacks a valid extension\n";
}

if ($in_format eq 'txt') {
	$quick_input = 1;
	$in_format = 'utx';
}

# per-extension import behavior
my %import = (
	utx => \&import_utx,
	gml => \&import_gml,
	tbx => \&import_tbx,
	olf => \&import_olf,
);
ref eq 'CODE' or die "Bad hacker! Coderefs only!" for values %import;

if ($out_file =~ /($file_ext)/) {
	$out_format = lc $1;
} else {
	die "output filename $out_file lacks a valid extension\n";
}

die "invalid output format: select one of .gml .olf .tbx .utf\n"
  if $out_format eq 'txt';

# per-extension export behavior
my %export = (
	utx => \&export_utx,
	gml => \&export_gml,
	tbx => \&export_tbx,
	olf => \&export_olf,
);
ref eq 'CODE' or die "Bad hacker! Coderefs only!" for values %export;

# get it in (passing filename for routines to open)
our $glossary = $import{$in_format}->($in_file)
  or die "could not import glossary from $in_file\n";
	# UTX (and only UTX) also consults global $quick_entry

# $glossary is [$src, $tgt, $subject_field, $global_note, @record]
# items in @record are hashrefs, each a single entry
# these vars get copied a few times going to and from fns
# but that's cleaner than using globals, yo?

# clean it up
$glossary = validate_and_process ($glossary, $quick_input);

# ready for output (open the file and select its filehandle)
my $out_fh;
unless ($out_file =~ /^\.?$file_ext$/) {
	our $!;
	open $out_fh, '>:encoding(utf-8-strict)', $out_file
		or die "cannot open $out_file for writing: $!";
	select $out_fh;
} # if $out_file IS just an extension, STDOUT will do fine.

# spit it out
$export{$out_format}->($glossary)
  or die "could not export glossary as .$out_format\n";




# here are all those functions we called
# they are outdented for space's sake
# but they all use the same names for their comparable vars anyway
# so it's just like reading main-sequence code

sub import_utx {
my $in_file = shift;

open my $in, '<:encoding(utf-8-strict)', $in_file
	or die "cannot open $in_file for reading\n";

my ($src, $tgt, $subject_field, $global_note, @record);

# first header line
$_ = <$in>;
die "not a UTX-S file\n" unless $quick_input or /^#UTX/;
s/\s*$//; # chomp all trailing whitespace: space, CR, LF, whatever.
($src, $tgt) = ($1, $2) if m{([a-zA-Z-]*)/([a-zA-Z-]*)};
$subject_field = $1 if /subject: ?([^;]+)/i; # error later if not
$global_note = $1 if /comment: ?([^;]+)/i;

# second header line
$_ = <$in>;
s/\s*$//;
s/^#//;
my @field_name = split /\t/;
die "no src column\n" unless $field_name[0] eq 'src';
die "no tgt column\n" unless $field_name[1] eq 'tgt';
# a 'validating' UTX parser would also check for src:pos
# but we defer POS issues here

# body lines
while (<$in>) {
	next if /^#/;
	s/\s*$//;
	next if /^$/;
	# turn line to list, then list to hash
	my @field = split /\t/;
	my %record;
	%record = map {$field_name[$_] => $field[$_]} (0..$#field); 
	# clear out blanks, except src and tgt
	for my $field (grep {$_ ne 'src' and $_ ne 'tgt'} keys %record) {
		delete $record{$field} unless $record{$field} =~ /\S/
	}
	push @record, \%record;
}

return [$src, $tgt, $subject_field, $global_note, @record];

} # end import_utx



sub import_gml {
my $in_file = shift;
my ($src, $tgt, $subject_field, $global_note, @record);
my @rules = (

	term => sub {
		my @result;
		push @result, 'term', $_[1]->{_content};
		# grab data from attributes, give them correct names
		my %transl = (
			'pos:partOfSpeech' => 'pos',
			'ex:example' => 'ex',
			's:source' => 'exSource',
		);
		for my $attr (keys %{$_[1]}) {
			push @result, $transl{$attr} => $_[1]->{$attr}
				if exists $transl{$attr};
		}
		return @result; # as key/value pairs into langEntry
	},

	definition => sub {
		my @result;
		push @result, 'def' => $_[1]->{_content};
		push @result, 'defSource', $_[1]->{source}
			if defined $_[1]->{source};
		return @result;
	},

	langEntry => sub {
		my $lang = delete $_[1]->{'xml:lang'};
		'%lang' => [$lang, $_[1]];
	},

	comment => 'content',

	glossentry => sub {
		my $langs = $_[1]->{lang};
		# check for correctness: 2 langEntries, one in src lang
		my $summary = join ', ', 
			map {"$_ $langs->{$_}{term}"} keys %$langs;
		# example $summary: "uk-UA суп, en-US soup"
		unless (keys %$langs == 2) {
			warn "need exactly two langEntries: $summary\n"; 
			return (); # skip this glossentry
		}
		my $srclang = $_[3]->[-1]{srclang};
		unless (exists $langs->{$srclang}) {
			warn "no $srclang langEntry among $summary\n";
			return (); # skip
		}
		# build an entry, with an extra field for tgtlang
		my %entry; 
		my ($tgtlang) = grep {$_ ne $srclang} keys %$langs;
		$entry{tgtlang} = $tgtlang;
		$entry{'src:note'} = $_[1]->{comment}
			if exists $_[1]->{comment};
		$srclang = $langs->{$srclang}; # from key string to hashref
		$tgtlang = $langs->{$tgtlang};
		$entry{src} = delete $srclang->{term};
		$entry{tgt} = delete $tgtlang->{term};
		foreach my $datum (keys %$srclang) {
			$entry{"src:$datum"} = $srclang->{$datum};
		}
		foreach my $datum (keys %$tgtlang) {
			$entry{"tgt:$datum"} = $tgtlang->{$datum};
		}
		return \%entry;
	},

	glossary => sub {
		# glossary-wide data categories ...
		$src = $_[1]->{srclang};
		$subject_field = $_[1]->{'sf:subjectField'};
		$global_note = $_[1]->{comment}
			if exists $_[1]->{comment};
		# ... one of which needs a correctness check
		my %tgt;
		$tgt{$_}++ foreach map { $_->{tgtlang} } 
				@{$_[1]->{_content}};
		warn "no valid entries in file\n" if keys %tgt == 0;
		unless (keys %tgt < 2) {
			warn 'need exactly one target language: ', 
				join (', ', keys %tgt),
				"\n";
			}
		$tgt = (%tgt)[0]; # arbitrary if multiple!
		# per-entry data categories
		@record = @{$_[1]->{_content}};
		delete $_->{tgtlang} foreach @record;
	},
);

my $glossML_parser = XML::Rules->new (
	rules => \@rules,
	namespaces => {
		'http://www.maxprograms.com/gml' => '',
		'http://www.isocat.org/rest/dc/149' => 'ex',
		'http://www.isocat.org/rest/dc/396' => 'pos',
		'http://www.isocat.org/rest/dc/471' => 's',
		'http://www.isocat.org/rest/dc/489' => 'sf',
		'*' => 'keep',
	},
 	stripspaces => 3|8,
);

$glossML_parser->parsefile($in_file);

return [$src, $tgt, $subject_field, $global_note, @record];

} # end import_gml



sub import_tbx {
my $in_file = shift;
my ($src, $tgt, $subject_field, $global_note, @record);

my @rules = (

	term => 'content',

	termNote => sub {
		return (pos => $_[1]->{_content})
			if $_[1]->{type} eq 'partOfSpeech';
		return (); # otherwise
	},

	admin => sub {
		return (source => $_[1]->{_content})
			if $_[1]->{type} eq 'source';
		return (); # otherwise
	},

	descrip => sub {
		return (ex => $_[1]->{_content})
			if $_[1]->{type} eq 'context';
		return (def => $_[1]->{_content})
			if $_[1]->{type} eq 'definition';
		return (subj => $_[1]->{_content})
			if $_[1]->{type} eq 'subjectField';
		return (); # otherwise		
	},

	descripGrp => sub {
		# if there's a source, figure out what kind
		# and replace it with the correct kind
		if (my $source = delete $_[1]->{source}) {
			$_[1]->{defSource} = $source
				if exists $_[1]->{def};
			$_[1]->{exSource} = $source
				if exists $_[1]->{ex};
		}
		# then pass the contents through
		return %{$_[1]};
	},

	'termGrp, ntig, tig' => 'pass',

	langSet => sub {
		my $lang = delete $_[1]->{'xml:lang'};
		'%lang' => [$lang, $_[1]];
	},

	note => [ # actions depend on tag path
		titleStmt => sub {
			local $_ = $_[1]->{_content};
			# strip out language declarations
			$src = $1 if s/\s*src:\s+([a-zA-Z-]+)\s*//;
			$tgt = $1 if s/\s*tgt:\s+([a-zA-Z-]+)\s*//;
			# use anything that's left
			$global_note = $_ if $_;
		},
		termEntry => 'content',
		sub {return ();}, # elsewhere, not convertible
	],

	termEntry => sub {
		my $langs = $_[1]->{lang};
		# check for correctness: 2 langEntries, one in src lang
		my $summary = join ', ', 
			map {"$_ $langs->{$_}{term}"} keys %$langs;
		# example $summary: "uk-UA суп, en-US soup"
		unless (keys %$langs == 2) {
			warn "need exactly two langSets: $summary\n"; 
			return (); # skip this termEntry
		}
		# insert standard gripe about 'return undef' misbehaving
		unless (exists $langs->{$src}) {
			warn "no $src langSet among $summary\n";
			return (); # skip
		}
		unless (exists $langs->{$tgt}) {
			warn "no $tgt langSet among $summary\n";
			return (); # skip
		}
		# build an entry, with extra field for subj
		my %entry; 
		unless ($entry{subj} = $_[1]->{subj}) {
			warn "skipped termEntry, no subjectField\n";
			return (); # can't use it!
		}
		$entry{'src:note'} = $_[1]->{note}
			if exists $_[1]->{note};
		my $srclang = $langs->{$src}; # from key to hashref
		my $tgtlang = $langs->{$tgt};
		$entry{src} = delete $srclang->{term};
		$entry{tgt} = delete $tgtlang->{term};
		foreach my $datum (keys %$srclang) {
			$entry{"src:$datum"} = $srclang->{$datum};
		}
		foreach my $datum (keys %$tgtlang) {
			$entry{"tgt:$datum"} = $tgtlang->{$datum};
		}
		return \%entry;
	},

	'body' => sub {
		# glossary-wide data categories, 
		# two of which need a correctness check
		# check that all entries have same subject field
		my %subj;
		$subj{$_}++ foreach map { delete $_->{subj} } 
				@{$_[1]->{_content}};
		warn "no valid entries in file\n" if keys %subj == 0;
		unless (keys %subj < 2) {
			warn 'need exactly one subject field: ', 
				join (', ', keys %subj),
				"\n";
			}
		$subject_field = (%subj)[0]; # arbitrary if multiple!
		# per-entry data categories
		@record = @{$_[1]->{_content}};
		return ();
	},
);

my $tbx_parser = XML::Rules->new (
	rules => \@rules,
	stripspaces => 3|8,
);

$tbx_parser->parsefile($in_file);

return [$src, $tgt, $subject_field, $global_note, @record];
}

sub import_olf {
my $in_file = shift;
my ($src, $tgt, $subject_field, $global_note, @record);
my %de_olify = (
	'x-properNoun' => 'properNoun',
	adj => 'adjective',
	adv => 'adverb',
);

my @rules = (

	canForm => sub {
		return (
			term => $_[1]->{_content},
			lang => $_[1]->{'xml:lang'}
		);
	},

	ptOfSpeech => sub {
		my $pos = $_[1]->{_content};
		$pos = $de_olify{$pos} if defined $de_olify{$pos};
		return (pos => $pos);
	},

	subjField => sub {
		my $subj = $_[1]->{_content};
		$subj =~ s/^x-//;
		return (subj => $subj);
	},

	note => sub {(note => $_[1]->{_content})},

	'tbx:descrip, tbx:admin' => 'content by type',

	example => sub {
		my @return;
		push @return, ex => $_[1]->{context}
			if exists $_[1]->{context};
		push @return, exSource => $_[1]->{source}
			if exists $_[1]->{source};
		return @return;
	},

	definition => sub {
		my @return;
		push @return, def => $_[1]->{definition}
			if exists $_[1]->{definition};
		push @return, defSource => $_[1]->{source}
			if exists $_[1]->{source};
		return @return;
	},

	'keyDC, generalDC, monoDC, monoSem, transfer, mono' => 'pass',

	entry => sub {
		my @return;
		# push source entries into a list, to be shifted off later
		push @return, ('@src', $_[1]) if exists $_[1]->{TrTarget};
		# push target entries into a hash by ID
		push @return, 
			('%tgt', [delete $_[1]->{MonoUserId} => $_[1]])
			if exists $_[1]->{MonoUserId};
		@return;
	},

	body => sub {
		# unload the src-entries list, and pair each w/ target
		# along the way, tally languages and subject fields used
		my (%src, %tgt, %subj);
		while (my $s = shift @{$_[1]->{src}}) {
			my $t = delete $s->{TrTarget}; # ID string
			unless ($t = $_[1]->{tgt}{$t}) { # and now hashref
				warn "no such MonoUserId as $t\n";
				next;
			}
			# make tally marks, remove extraneous info
			$src{delete $s->{lang}}++;
			$tgt{delete $t->{lang}}++;
			$subj{delete $s->{subj}}++;
			$subj{delete $t->{subj}}++;
			my %entry;
			$entry{src} = delete $s->{term};
			$entry{tgt} = delete $t->{term};
			$entry{'src:note'} = delete $s->{note};
			foreach my $datum (keys %$s) {
				$entry{"src:$datum"} = $s->{$datum};
			}
			foreach my $datum (keys %$t) {
				$entry{"tgt:$datum"} = $t->{$datum};
			}
			push @record, \%entry;
		}
		# dispose of the targets now we've got what we need
		undef $_[1]->{tgt};
		# verify that there's only one src, tgt, subj
		unless (keys %src == 1) {
			warn 'need exactly one source language: ', 
				join (', ', keys %src),
				"\n";
			}
		unless (keys %tgt == 1) {
			warn 'need exactly one target language: ', 
				join (', ', keys %tgt),
				"\n";
			}
		unless (keys %subj == 1) {
			warn 'need exactly one subject field: ', 
				join (', ', keys %tgt),
				"\n";
			}
		# and set the glossary-wide vars accordingly
		# if there *are* more than one, pick arbitrarily!
		($src, $tgt, $subject_field) = 
			( (%src)[0], (%tgt)[0], (%subj)[0] );
		return ();
	},

	header => sub { # get the global note out
		$global_note = $_[1]->{note};
	},
);

my $olif_parser = XML::Rules->new (
	rules => \@rules,
	namespaces => {
		'http://www.olif.net/' => '',
		'http://www.lisa.org/TBX-Specification.33.0.html' => 'tbx',
		'*' => 'keep',
	},
	stripspaces => 3|8, # useful in production, obfuscating in test
);

$olif_parser->parsefile($in_file);

return [$src, $tgt, $subject_field, $global_note, @record];
}


sub validate_and_process {

my ($glossary, $quick_input) = @_;
my ($src, $tgt, $subject_field, $global_note, @record) = @$glossary;

# mandatory glossary-wide data
$src = '*SRC*', warn "no source language indicated\n" unless $src;
$tgt = '*TGT*', warn "no target language indicated\n" unless $tgt;
$subject_field = '*SUBJECT_FIELD*', warn "no subject field indicated\n"
	unless $subject_field;

# mandatory per-entry data
# terms can be blank strings, so this all just concerns Part of Speech
my %pos = map {$_, 1} qw (adjective adverb noun properNoun verb);
$pos{''} = 1; # for now
foreach my $record (@record) {
	# src:pos default if using quick input
	$record->{'src:pos'} ||= 'noun' if $quick_input;
	# tgt:pos from magic note if using quick input
	if (
		$quick_input &&
		$record->{'src:note'} &&
		$record->{'src:note'} =~ s/\s+tgt:?pos: ?(\w+)//i
	) {
		$record->{'tgt:pos'} ||= $1
	}
	# picklist validation
	my $spos = $record->{'src:pos'} || ''; # these vars for brevity
	my $tpos = $record->{'tgt:pos'} || '';
	warn "$spos not a convertible part of speech\n"
		unless $pos{$spos}; 
	warn "$tpos not a convertible part of speech\n"
		unless $pos{$tpos}; 
	# if only one pos exists, set other from it
	$record->{'tgt:pos'} = $spos if $spos and not $tpos;
	$record->{'src:pos'} = $tpos if $tpos and not $spos;
	# emit warning if neither pos exists
	warn "entry lacks part of speech\n" if not ($spos or $tpos);
}

return [$src, $tgt, $subject_field, $global_note, @record];

} # end validate_and_process


sub export_utx {
my $glossary = shift;
my ($src, $tgt, $subject_field, $global_note, @record) = @$glossary;

# first header line
my ($sec, $min, $hour, $mday, $mon, $year, undef, undef, undef) = gmtime;
my $timestamp = sprintf "%4d-%02d-%02dT%02d:%02d:%02dZ", 
	$year + 1900, $mon + 1, $mday, $hour, $min, $sec;
print "#UTX-S 1.00; $src/$tgt; $timestamp; subject: $subject_field";
print"; comment: $global_note" if $global_note;
print "\cM\cJ";

# second header line
my @label = qw(src:def src:defSource tgt:def tgt:defSource src:ex src:exSource tgt:ex tgt:exSource src:note);
my %used;
# see what gets used
for my $record (@record) { $used{$_}++ for keys %$record }
# drop the unused and grab the mandatories
@label = (qw(src tgt src:pos tgt:pos), grep { $used{$_} } @label);
print "#", join ("\t", @label), "\cM\cJ";
# @label is employed below

# body
for my $record (@record) {
	print join ("\t", map {$record->{$_} or ''} @label), "\cM\cJ";
}

1;

} # end export_utx

sub export_gml {
my $glossary = shift;
my ($src, $tgt, $subject_field, $global_note, @record) = @$glossary;

# escape ampersand and less-than for XML well-formedness
($src, $tgt, $subject_field, $global_note) = 
	xml_escape($src, $tgt, $subject_field, $global_note);
$subject_field =~ s/"/&quot;/g; # it's an attribute value

print <<EOTag;
<?xml version="1.0" encoding="UTF-8"?>
<glossary version="1.0" srclang="$src"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"	
  xsi:schemaLocation="http://www.maxprograms.com/glossml/GlossML.xsd"
  xmlns:ex="http://www.isocat.org/rest/dc/149"
  xmlns:pos="http://www.isocat.org/rest/dc/396"
  xmlns:s="http://www.isocat.org/rest/dc/471"
  xmlns:sf="http://www.isocat.org/rest/dc/489"
  sf:subjectField="$subject_field"
  xmlns="http://www.maxprograms.com/gml"
>
EOTag

print "  <comment>$global_note</comment>\n" if defined $global_note;

for my $n (@record) {
	# pre-process items for output
	# escape ampersand and less-than
	($_) = xml_escape($_) for values %$n; # list of one, still a list
	# values that will go in attributes need quotes escaped too
	$n->{$_} =~ s/"/&quot;/g for grep {exists $n->{$_}} qw (src:ex src:exSource tgt:ex tgt:exSource src:defSource tgt:defSource);
	# form up pieces for printing
	my $comment = '';
	$comment = "\n    <comment>$n->{'src:note'}</comment>"
		if defined $n->{'src:note'};
	my $src_ex = '';
	if (defined $n->{'src:ex'}) {
		$src_ex .= "\n        ex:example=\"$n->{'src:ex'}\""
	}
	if (defined $n->{'src:exSource'}) {
		$src_ex .= "\n        s:source=\"$n->{'src:exSource'}\""
	}
	my $src_def = '';
	if (defined $n->{'src:def'} or defined $n->{'src:defSource'}) {
		$src_def .= "\n      <definition";
		if (defined $n->{'src:defSource'}) {
			$src_def .= "\n        source=\"$n->{'src:defSource'}\"\n      ";
		}
		if (defined $n->{'src:def'}) {
			$src_def .= ">$n->{'src:def'}</definition>";
		} else {
			$src_def .= "/>";
		}
	}
	my $tgt_ex = '';
	if (defined $n->{'tgt:ex'}) {
		$tgt_ex .= "\n        ex:example=\"$n->{'tgt:ex'}\""
	}
	if (defined $n->{'tgt:exSource'}) {
		$tgt_ex .= "\n        s:source=\"$n->{'tgt:exSource'}\""
	}
	my $tgt_def = '';
	if (defined $n->{'tgt:def'} or defined $n->{'tgt:defSource'}) {
		$tgt_def .= "\n      <definition";
		if (defined $n->{'tgt:defSource'}) {
			$tgt_def .= "\n        source=\"$n->{'tgt:defSource'}\"\n      ";
		}
		if (defined $n->{'tgt:def'}) {
			$tgt_def .= ">$n->{'tgt:def'}</definition>";
		} else {
			$tgt_def .= " />";
		}
	}

	# print up the glossentry
	print <<EOEntry;

  <glossentry>$comment
    <langEntry xml:lang="$src">
      <term
        pos:partOfSpeech="$n->{'src:pos'}"$src_ex
      >$n->{src}</term>$src_def
    </langEntry>
    <langEntry xml:lang="$tgt">
      <term
        pos:partOfSpeech="$n->{'tgt:pos'}"$tgt_ex
      >$n->{tgt}</term>$tgt_def
    </langEntry>
  </glossentry>
EOEntry
}

# close tag
print "</glossary>\n";

1;

} # end export_gml

sub export_tbx {
my $glossary = shift;
my ($src, $tgt, $subject_field, $global_note, @record) = @$glossary;

# escape ampersand and less-than for XML well-formedness
($src, $tgt, $subject_field, $global_note) = 
	xml_escape($src, $tgt, $subject_field, $global_note);
$subject_field =~ s/"/&quot;/g; # it's an attribute value

my $title = <<Open;
      <titleStmt>
        <title />
        <note>src: $src tgt: $tgt</note>
Open
$title .= "        <note>$global_note</note>\n" if defined $global_note;
$title .= "      </titleStmt>\n";

print <<EOHeader;
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE martif SYSTEM "TBXcoreStructV02.dtd">
<martif type="TBX" xml:lang="$src">
  <martifHeader>
    <fileDesc>
$title      <sourceDesc>
        <p>TBX-Glossary export</p>
      </sourceDesc>
    </fileDesc>
    <encodingDesc>
      <p type="DCSName">Glossary.xcs</p>
    </encodingDesc>
  </martifHeader>
  <text>
    <body>
EOHeader

for my $n (@record) {
	# pre-process items for output
	($_) = xml_escape($_) for values %$n; # list of one, still a list
	my $note = defined $n->{'src:note'} ? <<EONote : '';
        <note>$n->{'src:note'}</note>
EONote
	my $src_def = '';
	if (defined $n->{'src:def'} or defined $n->{'src:defSource'}) {
		$src_def .= "          <descripGrp>\n";
		$src_def .= "            <descrip type=\"definition\">$n->{'src:def'}</descrip>\n" if defined $n->{'src:def'};
		$src_def .= "            <admin type=\"source\">$n->{'src:defSource'}</admin>\n" if defined $n->{'src:defSource'};
		$src_def .= "          </descripGrp>\n";
	}
	my $src_ex = '';
	if (defined $n->{'src:ex'} or defined $n->{'src:exSource'}) {
		$src_ex .= "            <descripGrp>\n";
		$src_ex .= "              <descrip type=\"context\">$n->{'src:ex'}</descrip>\n" if defined $n->{'src:ex'};
		$src_ex .= "              <admin type=\"source\">$n->{'src:exSource'}</admin>\n" if defined $n->{'src:exSource'};
		$src_ex .= "            </descripGrp>\n";
	}
	my $tgt_def = '';
	if (defined $n->{'tgt:def'} or defined $n->{'tgt:defSource'}) {
		$tgt_def .= "          <descripGrp>\n";
		$tgt_def .= "            <descrip type=\"definition\">$n->{'tgt:def'}</descrip>\n" if defined $n->{'tgt:def'};
		$tgt_def .= "            <admin type=\"source\">$n->{'tgt:defSource'}</admin>\n" if defined $n->{'tgt:defSource'};
		$tgt_def .= "          </descripGrp>\n";
	}
	my $tgt_ex = '';
	if (defined $n->{'tgt:ex'} or defined $n->{'tgt:exSource'}) {
		$tgt_ex .= "            <descripGrp>\n";
		$tgt_ex .= "              <descrip type=\"context\">$n->{'tgt:ex'}</descrip>\n" if defined $n->{'tgt:ex'};
		$tgt_ex .= "              <admin type=\"source\">$n->{'tgt:exSource'}</admin>\n" if defined $n->{'tgt:exSource'};
		$tgt_ex .= "            </descripGrp>\n";
	}
	print <<EOEntry;

      <termEntry>
        <descrip type="subjectField">$subject_field</descrip>
$note        <langSet xml:lang="$src">
$src_def          <tig>
            <term>$n->{src}</term>
            <termNote type="partOfSpeech">$n->{'src:pos'}</termNote>
$src_ex          </tig>
        </langSet>
        <langSet xml:lang="$tgt">
$tgt_def          <tig>
            <term>$n->{tgt}</term>
            <termNote type="partOfSpeech">$n->{'tgt:pos'}</termNote>
$tgt_ex          </tig>
        </langSet>
      </termEntry>
EOEntry
}

print <<EOFooter;
    </body>
  </text>
</martif>
EOFooter

1;

} # end export_tbx

sub export_olf {
my $glossary = shift;
my ($src, $tgt, $subject_field, $global_note, @record) = @$glossary;

# canonical OLIF subject fields
my %canonical = map {$_ => 1} qw (
	agriculture	audiovisual	aviation	botany/zoology
	budget	chemistry	construction	customs	defense
	development	economics	education	electrotechnics
	employment	energy	environment	eurospeak	finance
	fisheries	general	geology	industry	informatics
	insurance	law	mechanics	medicine	mining
	nuclear	social	statistics	steel	taxation
	technology	telecom	trade	transport
	MA	Buddh	comp	geom	gram	math	
);

# OLIF values for ptOfSpeech
my %olify = (
	adjective => 'adj',
	adverb => 'adv',
	properNoun => 'x-properNoun',
);

# escape ampersand and less-than for XML well-formedness
($src, $tgt, $subject_field, $global_note) = 
	xml_escape($src, $tgt, $subject_field, $global_note);

# glossary-wide preprocessing
my ($sec, $min, $hour, $mday, $mon, $year, undef, undef, undef) = gmtime;
my $timestamp = sprintf "%4d%02d%02dT%02d%02d%02dZ", 
	$year + 1900, $mon + 1, $mday, $hour, $min, $sec;
$global_note = "    <note>$global_note</note>\n" if defined $global_note;
$subject_field = "x-$subject_field"
	unless exists $canonical{$subject_field};

print <<EOHeader;
<?xml version="1.0" encoding="UTF-8"?>
<olif 
  OlifVersion="3.0, February 2008"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://www.olif.net ./OLIF.xsd" 
  xmlns:tbx="http://www.lisa.org/TBX-Specification.33.0.html"
  xmlns="http://www.olif.net/"
>
  <header
    CreaDate="$timestamp"
    CreaId="user"
    CreaTool="Glossary Format Converter"
    CreaToolVersion="1.0"
    OrigFormat="UTX-S, GlossML, or TBX-Glossary"
  >
    <contentInfo>
      <quotMarkInfo/>
      <langIdUse>region_standard</langIdUse>
    </contentInfo>
$global_note  </header>
  <body>
EOHeader

my $i = 0; # used to create MonoUserIDs
for my $n (@record) {
	($_) = xml_escape($_) for values %$n; # list of one, still a list
	# prep parts of speech
	my $src_pos = $n->{'src:pos'};
	$src_pos = $olify{$src_pos} if defined $olify{$src_pos};
	my $tgt_pos = $n->{'tgt:pos'};
	$tgt_pos = $olify{$tgt_pos} if defined $olify{$tgt_pos};
	# prep definitions
	my $src_def = '';
	if (defined $n->{'src:def'}) {
		$src_def .= "              <tbx:descrip type=\"definition\">$n->{'src:def'}</tbx:descrip>\n"
	}
	if (defined $n->{'src:defSource'}) {
		$src_def .= "              <tbx:admin type=\"source\">$n->{'src:defSource'}</tbx:admin>\n"
	}
	if ($src_def) {
	$src_def = <<EODef;
        <monoDC>
          <monoSem>
            <definition>
$src_def            </definition>
          </monoSem>
        </monoDC>
EODef
	}
	# prep examples and note
	my $src_ex = '';
	if (defined $n->{'src:ex'}) {
		$src_ex .= "            <tbx:descrip type=\"context\">$n->{'src:ex'}</tbx:descrip>\n"
	}
	if (defined $n->{'src:exSource'}) {
		$src_ex .= "            <tbx:admin type=\"source\">$n->{'src:exSource'}</tbx:admin>\n"
	}
	if ($src_ex) {
	$src_ex = <<EOEx;
          <example>
$src_ex          </example>
EOEx
	}
	my $note = exists $n->{'src:note'} ?
		"          <note>$n->{'src:note'}</note>\n" :
		'';
	if ($src_ex or $note) {
		$src_ex = <<EOGeneralDC;
        <generalDC>
$src_ex$note        </generalDC>
EOGeneralDC
	}
	# similar for tgt side
	# prep definitions
	my $tgt_def = '';
	if (defined $n->{'tgt:def'}) {
		$tgt_def .= "              <tbx:descrip type=\"definition\">$n->{'tgt:def'}</tbx:descrip>\n"
	}
	if (defined $n->{'tgt:defSource'}) {
		$tgt_def .= "              <tbx:admin type=\"source\">$n->{'tgt:defSource'}</tbx:admin>\n"
	}
	if ($tgt_def) {
	$tgt_def = <<EODef;
        <monoDC>
          <monoSem>
            <definition>
$tgt_def            </definition>
          </monoSem>
        </monoDC>
EODef
	}
	# prep examples and note
	my $tgt_ex = '';
	if (defined $n->{'tgt:ex'}) {
		$tgt_ex .= "            <tbx:descrip type=\"context\">$n->{'tgt:ex'}</tbx:descrip>\n"
	}
	if (defined $n->{'tgt:exSource'}) {
		$tgt_ex .= "            <tbx:admin type=\"source\">$n->{'tgt:exSource'}</tbx:admin>\n"
	}
	if ($tgt_ex) {
	$tgt_ex = <<EOEx;
        <generalDC>
          <example>
$tgt_ex          </example>
        </generalDC>
EOEx
	}
	# prep ID to link source to target
	my $id = "${timestamp}_$i"; $i++;
	# finally, print the two entries
	print <<EOPair;
    <entry>
      <mono>
        <keyDC>
          <canForm xml:lang="$src">$n->{src}</canForm>
          <ptOfSpeech>$src_pos</ptOfSpeech>
          <subjField>$subject_field</subjField>
        </keyDC>
$src_def$src_ex      </mono>
      <transfer TrTarget="$id"/>
    </entry>
    <entry>
      <mono MonoUserId="$id">
        <keyDC>
          <canForm xml:lang="$tgt">$n->{tgt}</canForm>
          <ptOfSpeech>$tgt_pos</ptOfSpeech>
          <subjField>$subject_field</subjField>
        </keyDC>
$tgt_def$tgt_ex      </mono>
    </entry>

EOPair
}

print <<EOFooter;
  </body>
</olif>
EOFooter

1;

} # end export_olf


sub xml_escape { # utility fn used by GlossML, TBX, OLIF exporters
	no warnings 'uninitialized';
	my @return = @_;
	foreach (@return) {
		s/&/&amp;/sg;
		s/</&lt;/sg;
	}
	@return;
}

