# Kristian S. Gleditsch
# Interface for distance data
#
# Revision history 
# Version 0.9  26.11.99 KSG
# Version 0.91 27.11.99 KSG
# Version 0.92 29.11.99 MDW (added documentation)
# Version 0.93 29.11.99 KSG (added documentation)
# Version 0.94 30.11.99 KSG
#	1) Added substitutions in user input
#	   in threshold and start/endyears
#	2) Added checks for legal values in user input
#	   in threshold and start/endyears
#	3) Took out same state dyads
# Version 0.95 02.03.2000 KSG
#	Revisions to the data
# Version 0.96 21 July 2004 KSG
#	Revisions to the data
# Version 0.97 14 December 2004 KSG
#	Revisions to the data


$version = 0.97;

# I. Introductory
print "\n\n";
print "************************************************************************\n";
print "*                                                                      *\n";
print "* Minimum distance data version ".$version."                                   *\n";
print "*                                                                      *\n";
print "* Copyright Kristian S. Gleditsch and Michael D. Ward, 1999            *\n";
print "*                                                                      *\n";
print "* This program extracts minimum distance data for independent states   *\n";
print "* in time spans within the interval 1875-2002. The data use the list   *\n";   
print "* of states in the international system outlined in Kristian S.        *\n";
print "* Gleditsch and Michael D. Ward. 1999. \"A Revised list of the          *\n";
print "* Independent States 1816,\" International Interactions 25(4):393-413.  *\n";
print "*                                                                      *\n";
print "*          .....Hit return to continue.....                            *\n";
print "*                                                                      *\n";
print "************************************************************************\n";

$anykey = <STDIN>;

print "\n\n\n\n\n\n";
print "************************************************************************\n";
print "*                                                                      *\n";
print "* The data contain the minimum distance in the interval between zero   *\n";
print "* and 950 kilometers, as measured with MS Virtual Globe. You can       *\n";
print "* choose between the actual minimum distance or binary data with       *\n";
print "* entries of 1 if distance is less than a specified threshold and 0    *\n";
print "* if distances exceed the threshold.                                   *\n";
print "*                                                                      *\n";
print "* The data are offered on an \"as is\" basis. Please report any errors   *\n";   
print "* or suggestions to: kgleditsch\@ucsd.edu                               *\n";
print "*                                                                      *\n";
print "* These data should be cited as:                                       *\n";
print "* Kristian S. Gleditsch and Michael D. Ward. 1999.                     *\n";   
print "* Minimum Distance Data, 1875-1999.                                    *\n";   
print "*                                                                      *\n";
print "* Thanks to Richard Tucker for comments and the Research Council of    *\n";
print "* Norway for financial support.                                        *\n";
print "*                                                                      *\n";
print "************************************************************************\n";

print "\n\t\tHit return to continue\n";
$anykey = <STDIN>;

# Test if /matrixes exist 
opendir(INDIR,"matrixes") || die "The \"matrixes\" directory must be in the same directory as this program\n";
closedir(INDIR);

# II. Get user preferences
print "\n\tDo you want matrixes (default) or dyadic data?\n";
$format = <STDIN>;
chop($format);
if ($format =~ /dyad/){
    $format = 'dyads';
} else {$format = 'matrixes';}

print "\n\tIndicate starting year (1875 default)\n";
$startyear = <STDIN>;
chop($startyear);
if ($startyear =~ /\d/){ # if input containst digits
 if ($startyear =~ /\D/){ # if it contains any not-digits
	$startyear =~ s/\s+//; # Take out spaces
	$startyear =~ s/\D+//; # Take out non-digits
 } # end if non-digits 
} else {
	$startyear = 1875;
}

print "\n\tIndicate final year (2002 default)\n";
$endyear = <STDIN>;
chop($endyear);
if ($endyear =~ /\d/){ # If input contains digits
 if ($endyear =~ /\D/){ # If non-digits
	$endyear =~ s/\s+//; # Take out spaces
	$endyear =~ s/\D+//; # Take out non-digits
 } # end if non-digits
} else {
	$endyear = 2002;
}

if(($startyear lt 1875) | ($startyear gt $endyear) | ($endyear gt 2002)){
 print "\n\tInconsistent time domain given";
 STARTYRCHK: while($startyear lt 1875){
    print "\n\n\tStartyear must be 1875 or later, try again\n";
    print "\n\tIndicate starting year\n";
    $startyear = <STDIN>;
    chop($startyear);
    if ($startyear =~ /\D/){ # if it contains any not-digits
	$startyear =~ s/\s+//; # Take out spaces
	$startyear =~ s/\D+//; # Take out non-digits
    } # end if non-digits 
    STARTYRCHK;
  }
  STARTENDPROB: while ($startyear gt $endyear){
    print "\n\n\tStartyear must be lower than endyear, try again\n";

    print "\n\tIndicate starting year (min 1875)\n";
    $startyear = <STDIN>;
    chop($startyear);
    if ($startyear =~ /\D/){ # if it contains any not-digits
	$startyear =~ s/\s+//; # Take out spaces
	$startyear =~ s/\D+//; # Take out non-digits
    } # end if non-digits 
    
    print "\n\tIndicate final year (max 2002)\n";
    $endyear = <STDIN>;
    chop($endyear);
    if ($endyear =~ /\D/){ # If non-digits
	$endyear =~ s/\s+//; # Take out spaces
	$endyear =~ s/\D+//; # Take out non-digits
    } # end if non-digits
    STARTENDPROB:
  }

  ENDYRCHK: while($endyear gt 2002){
    print "\n\n\tEndyear must be 2002 or earlier, try again\n";
    print "\n\tIndicate final year\n";
    $endyear = <STDIN>;
    chop($endyear);
    if ($endyear =~ /\D/){ # If non-digits
	$endyear =~ s/\s+//; # Take out spaces
	$endyear =~ s/\D+//; # Take out non-digits
    } # end if non-digits
    ENDYRCHK;
  }
}

#if ($threshold ne "continuous" && $threshold>950){

print "\n\tEnter a distance threshold (in kms)\n\t (leave blank for continuous)\n";
$threshold = <STDIN>;
chop($threshold);
if ($threshold =~ /\d/){
  if ($threshold =~ /\D/){	# print "Non-digits found\n";
	# substitute all non-digits
	$threshold =~ s/\s+//; # Take out spaces
	$threshold =~ s/\D+//; # Take out non-digits
  }
} 
else {
	$threshold = "continuous";
	print "Assume you prefer continuous\n";
}

if ($threshold ne "continuous"){ 

   # Note: testing for a negative number not necessary, since
   # - is a \D (not a digit), and will thus be taken away
   # So we only need to check if over the max

 THRESHCHECK: while ($threshold>950){ 
   print "\n\tThe maximum distance threshold is 950km, try again";
   print "\n\tEnter a distance threshold (in kms)\n\t\n";
   $threshold = <STDIN>;
   chop($threshold);
   if ($threshold =~ /\d/){
    if ($threshold =~ /\D/){	# print "Non-digits found\n";
	# substitute all non-digits
	$threshold =~ s/\s+//; # Take out spaces
	$threshold =~ s/\D+//; #
    } # Close non-digits
   } # Close digits
   THRESHCHECK;
  } # close while > 950
} # close if continous

print "\n\tDo you want to specify a missing data code? (Default is NA)\n";
 $yesno = <STDIN>;
 chop($yesno);
 if ($yesno =~ /y/){
   print "\n\tPlease indicate missing data code\n";
   $missingdata = <STDIN>;
   chop($missingdata);
 } else {$missingdata = 'NA';}

print "\n\tYou have indicated that you want ".$format." with a ".$threshold." threshold\n";
print "\tfrom ".$startyear." to ".$endyear.". "; 
print "Your missing data character is ".$missingdata."\n";


#III. Dyads
if ($format =~ /dyad/){
 print "\n\n\tCreating dyads, this could take a while.\n";
 print "\tThe output will be in the file \"distdyad.asc\".\n";
 open(OUT,">distdyad.asc") || die "Cannot create the target file";

 for ($j = $startyear; $j <= $endyear; $j++) {
  $inputfile = 'matrixes/mat'.$j.'.asc';
  open(IN,$inputfile) || die "Cannot open input file ".$inputfile;
  while(<IN>) { # Read lines from IN to $_, loop until EOF
   @info = split(/\s+/,$_);  # read 
   if ($info[0] =~ /row.names/){ #
      for ($i = 1; $i <= (@info-1); $i++) {
         $col[$i] = $info[$i];
         #print $row[$i]."\n"; 
      }	
   }
   else {
      $row = $info[0];
      for ($i = 1; $i <= (@info-1); $i++) {
	if($threshold =~ /continuous/){
	   if($info[$i]=~ /NA/){$data = $missingdata;}
	   else{$data = $info[$i];}
	   if($row ne $col[$i]){print OUT $row." ".$col[$i]." ".$j." ".$data."\n";}
        }
	else{
	   if($info[$i]=~ /NA/){$data = 0;}
	   elsif($info[$i]<$threshold){$data = 1;}
	   else{$data = 0;}
	   if($row ne $col[$i]){print OUT $row." ".$col[$i]." ".$j." ".$data."\n";}
	}# close threshold test
      }
   }
  } # End of while
 } # End of year loop
}

# IV. Matrixes
else {
 print "\n\tDo you want to specify an output directory? (Default is ../mddout/)\n";
 $yesno = <STDIN>;
 chop($yesno);
 if ($yesno =~ /y/){
   print "\n\tPlease indicate output directory\n";
   $outdir = <STDIN>;
   chop($outdir);
   mkdir($outdir,0777);
 } else {
   $outdir = 'mddout';
   mkdir($outdir,0777);
 } 

 print "\n\n\tCreating matrixes, please be patient.\n";

 for ($j = $startyear; $j <= $endyear; $j++) {
  $inputfile = 'matrixes\mat'.$j.'.asc';
  open(IN,$inputfile) || die "Cannot open input file ".$inputfile;

  $outputfile = $outdir.'/mat'.$j.'.dat';
  open(OUT,">$outputfile") || die "Cannot create the target file ".$outputfile;

  while(<IN>) { # Read lines from IN to $_, loop until EOF
   @info = split(/\s+/,$_);  # read 
   if ($info[0] =~ /row.names/){ #
      print OUT "row.names";
      for ($i = 1; $i <= (@info-1); $i++) {
         $col[$i] = $info[$i];
         print OUT " ".$col[$i]; 
      }	
      print OUT "\n"; 
   }
   else {
      $row = $info[0];
      print OUT $row;
      for ($i = 1; $i <= (@info-1); $i++) {
	if($threshold =~ /continuous/){
	  if($info[$i]=~ /NA/){$data = $missingdata;}
	  else{$data = $info[$i];}
          print OUT " ".$data;
        } # Close threshold is continuous
	else{
	   if($info[$i]=~ /NA/){$data = 0;}
	   elsif($info[$i]<$threshold){$data = 1;}
	   else{$data = 0;}
	   print OUT " ".$data;
	}
      }
      print OUT "\n";
   }
  } # End of while
 } # End of loop over years
} # End of matrixes

print "\n\n\n\n\n\n";
print "************************************************************************\n";
print "*                                                                      *\n";
print "*      Enjoy the minimum distance data!                                *\n";
print "*                                                                      *\n";
print "*                                                                      *\n";
print "*     We are interested in hearing about your use of the data          *\n";
print "*     and encourage you to contact kgleditsch\@ucsd.edu                 *\n";
print "*                                                                      *\n";
print "*                                                                      *\n";
print "*              .........Hit return to continue...........              *\n";
print "*                                                                      *\n";
print "*                                                                      *\n";
print "************************************************************************\n";

$anykey = <STDIN>;
 
