## -*- Mode: CPerl -*-
##
## File: DTA::CAB::Analyzer::EqLemma::CDB.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description: DB dictionary-based equivalence-class expander, rewrite variant

package DTA::CAB::Analyzer::EqLemma::CDB;
use DTA::CAB::Analyzer ':child';
use DTA::CAB::Analyzer::Dict::CDB;
use strict;

##==============================================================================
## Globals
##==============================================================================

our @ISA = qw(DTA::CAB::Analyzer::Dict::CDB);

our $ANALYZE_GET_MOOT = '($_->{moot} ? $_->{moot}{lemma} : qw())';
our $ANALYZE_GET_MORPH = '($_->{morph} ? (map {$_->{hi}} @{$_->{morph}}) : qw())';
our $ANALYZE_GET_RW_MORPH = '($_->{rw} ? (map {$_->{morph} ? (map {$_->{lemma}} @{$_->{morph}}) : qw()} @{$_->{rw}}) : qw())';

our $ANALYZE_GET_ALL = '('.join(",\n",$ANALYZE_GET_MOOT, $ANALYZE_GET_MORPH, $ANALYZE_GET_RW_MORPH).')';

our $ANALYZE_GET_DEFAULT = $ANALYZE_GET_MOOT;

##==============================================================================
## Constructors etc.
##==============================================================================

## $obj = CLASS_OR_OBJ->new(%args)
##  + object structure: see DTA::CAB::Analyzer::Dict::CDB
sub new {
  my $that = shift;
  return $that->SUPER::new(
			   ##-- options
			   label       => 'eqlemma',
			   ##
			   analyzeGet  => $ANALYZE_GET_DEFAULT,
			   ##analyzeSet  => $DICT_SET_FST, ##-- NOT USED
			   ##allowRegex  => '(?:^[[:alpha:]\-\]*[[:alpha:]]+$)|(?:^[[:alpha:]]+[[:alpha:]\-\]+(?:\.?)$)',

			   ##-- user args
			   @_
			  );
}


##========================================================================
## analysis overrides

## $bool = $anl->doAnalyze(\%opts, $name)
sub doAnalyze {
  my $anl = shift;
  return 0 if ($_[1] eq 'Types');
  return $anl->SUPER::doAnalyze(@_);
}

## $doc = $anl->analyzeTypes($doc,\%types,\%opts)
##  + perform type-wise analysis of all (text) types in $doc->{types}
##  + override does nothing
sub analyzeTypes { return $_[1]; }

## $doc = $anl->analyzeSentences($doc,\%opts)
##  + perform sentence-wise analysis of all sentences $doc->{body}[$si]
##  + expand lemma equivalence
sub analyzeSentences {
  my ($anl,$doc,$opts) = @_;

  ##-- common vars
  my $lab = $anl->{label};
  my $lab_key = $lab.'_key';

  ##-- dict structures
  my $dhash    = $anl->dictHash;
  my $allow_re = defined($anl->{allowRegex}) ? qr($anl->{allowRegex}) : undef;

  ##-- accessors
  my $aget  = $anl->analyzeCode($anl->{analyzeGet});

  ##-- get lemma types
  my $ltypes = {};
  my ($tok,@lw,%lw,$l,$ltyp,$lw);
  foreach $tok (map {@{$_->{tokens}}} @{$doc->{body}}) {
    next if (defined($allow_re) && $tok->{text} !~ $allow_re);
    @lw = qw();
    foreach $l ($aget->($tok)) {
      if (!defined($ltyp=$ltypes->{$l})) {
	$ltyp = $ltypes->{$l} = {lemma=>$l};
	$lw   = $dhash->{$l};
	$ltyp->{$lab} = [({lo=>$l,hi=>$l,w=>0}),
			 (map {$_->{lo}=$l; $_}
			  map {DTA::CAB::Analyzer::parseFstString($_)}
			  grep {defined($_)}
			  split(/\t/,$lw))
			]
	  if (defined($lw) && $lw ne '');
      }
      push(@lw, @{$ltyp->{$lab}}) if ($ltyp->{$lab});
    }

    %lw=qw();
    foreach (@lw) {
      $lw{$_->{hi}} = $_ if (!exists($lw{$_->{hi}}) || $lw{$_->{hi}}{w} > $_->{w});
    }
    delete($tok->{$lab});
    $tok->{$lab} = [sort {($a->{w}||0) <=> ($b->{w}||0) || ($a->{hi}||"") cmp ($b->{hi}||"")} values %lw];
  }

  ##-- return
  return $doc;
}


1; ##-- be happy

__END__
##========================================================================
## POD DOCUMENTATION, auto-generated by podextract.perl, edited

##========================================================================
## NAME
=pod

=head1 NAME

DTA::CAB::Analyzer::EqLemma::CDB - CDB dictionary-based lemma-equivalence expander

=cut

##========================================================================
## SYNOPSIS
=pod

=head1 SYNOPSIS

 use DTA::CAB::Analyzer::EqLemma::CDB;
 
 ##========================================================================
 ## Constructors etc.
 
 $eqlemma = DTA::CAB::Analyzer::EqLemma::CDB->new(%args);
 
 ##========================================================================
 ## analysis overrides
 
 $bool = $anl->doAnalyze(\%opts, $name);
 $doc = $anl->analyzeTypes($doc,\%types,\%opts);
 $doc = $anl->analyzeSentences($doc,\%opts);
 

=cut

##========================================================================
## DESCRIPTION
=pod

=head1 DESCRIPTION

DB dictionary-based lemma equivalence-class expander.

=cut

##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::Dict: Globals
=pod

=head2 Globals

=over 4

=item Variable: @ISA

DTA::CAB::Analyzer::EqLemma::CDB inherits from
L<DTA::CAB::Analyzer::Dict::CDB>.

=item Variable: $ANALYZE_GET_MOOT

(undocumented)

=item Variable: $ANALYZE_GET_MORPH

(undocumented)

=item Variable: $ANALYZE_GET_RW_MORPH

(undocumented)

=item Variable: $ANALYZE_GET_ALL

(undocumented)

=item Variable: $ANALYZE_GET_DEFAULT

(undocumented)

=back

=cut

##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::EqLemma::CDB: Constructors etc.
=pod

=head2 Constructors etc.

=over 4

=item new

 $eqc = CLASS_OR_OBJ->new(%args);

Constructor.  Sets some default options.

=back

=cut


##----------------------------------------------------------------
## DESCRIPTION: DTA::CAB::Analyzer::EqLemma::CDB: Analysis
=pod

=head2 Methods: Analysis

=over 4

=item doAnalyze

 $bool = $anl->doAnalyze(\%opts, $name);

(undocumented)

=item analyzeTypes

 $doc = $anl->analyzeTypes($doc,\%types,\%opts);

Perform type-wise analysis of all (text) types in $doc-E<gt>{types};
Override does nothing.

=item analyzeSentences

 $doc = $anl->analyzeSentences($doc,\%opts);

Perform sentence-wise analysis of all sentences $doc-E<gt>{body}[$si].
Override expands lemma equivalence at sentence level
(i.e. only after canonical disambiguation and PoS-tagging).

=cut


##========================================================================
## END POD DOCUMENTATION, auto-generated by podextract.perl

##======================================================================
## Footer
##======================================================================

=pod

=head1 AUTHOR

Bryan Jurish E<lt>moocow@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2010-2019 by Bryan Jurish

This package is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.24.1 or,
at your option, any later version of Perl 5 you may have available.

=cut
