<?php 
 
// this was used to get canadian government organization names and their acronyms 
// as well as contact information using the found acronyms from government electronic directory services. 
 
$OrganizationListPage = file_get_contents("http://direct.srv.gc.ca/cgi-bin/direct500/"); 
$OrganizationArray = findOrgsInURLs($OrganizationListPage); 
$OrganizationNameArray = findOrgNamesInURLs($OrganizationListPage); 
 
foreach($OrganizationArray as $index => $acro) { 
    preg_match("/([^\-]*)-/", $acro, $matches); 
    $match = $matches[1]; 
    $ToWrite .= $match . "    " . $OrganizationNameArray[$index] . "\r\n";     
} 
$file = "eng/GOC/acronyms.txt"; 
$fp = fopen($file, 'w'); 
fwrite($fp, $ToWrite); 
fclose($fp); 
 
$FraOrganizationListPage = file_get_contents("http://direct.srv.gc.ca/cgi-bin/direct500/XFo%3dGC%2cc%3dCA"); 
$FraOrganizationArray = FrafindOrgsInURLs($FraOrganizationListPage); 
$FraOrganizationNameArray = FrafindOrgNamesInURLs($FraOrganizationListPage); 
 
foreach($FraOrganizationArray as $Fraindex => $Fraacro) { 
    preg_match("/-([^\-]*)/", $Fraacro, $Framatches); 
    $Framatch = $Framatches[1]; 
    $FraToWrite .= $Framatch . "    " . $FraOrganizationNameArray[$Fraindex] . "\r\n";     
} 
$Frafile = "fra/GDC/acronyms.txt"; 
$Frafp = fopen($Frafile, 'w'); 
fwrite($Frafp, $FraToWrite); 
fclose($Frafp); 
 
exit(0); 
 
// from here we are getting contact information 
$URLArray = array(); 
foreach($OrganizationArray as $OrgAcro) { 
    $PageWithPeople = file_get_contents("http://direct.srv.gc.ca/cgi-bin/direct500/SEou%3d$OrgAcro%2co%3dGC%2cc%3dCA?SV=web&SF=Title&ST=contains&x=31&y=20"); 
    $URLArray = array_merge($URLArray, findURLs($PageWithPeople)); 
} 
 
$rxpArray = array( 
    // Name 
    '/(<h2>)([\s]{0,10})([^\r\n]*)([\s]{0,10})(<a)/is' => '$3', 
    // Title 
    '/(<!-- Display detailed information -->)([\s]{0,10})(<div class="*text"*>)([\s]{0,10})([^<]*)(<br>)(<br>)([\s]{0,15})(<!-- title of person -->)/is' => '$5', 
    // Organization 1 
    '/(<!-- title of person -->)([\s]{0,10})([\w ,\-\(\)é]*)([\s]{0,10})(<br>)([\s]{0,10})(<!-- top level OU -->)/is' => '${3}', 
    // Organization 2 
    '/(<!-- top level OU -->)([\s]{0,10})([\w ,\-\(\)é]*)([\s]{0,10})*(<br>)([\s]{0,10})(<!-- immediate OU -->)/is' => '$3', 
    // Address 
    '/(<!-- Address - PO Box - Mail stop - City - Province - Contry - Postal code -->)([\s]{0,10})(<br>)([^<]*)(<br>)/is' => '$4', 
    // City, Province 
    '/(ITEM=\[\]\$-->)([\s]{0,10})(<br>)([\w ,\-\(\)é]*)([\s]{0,10})(<br>)/is' => '$4', 
    // Country 
    '/(<br>)([\w]*)(<br>)([\s]{0,10})([\w \-\(\)é]*)([\s]{0,10})(<!-- Telephone - Alternate telephone - Secure telephone - Fax - Secure Fax - TDD -->)/is' => '$2', 
    // Postal Code 
    '/(<br>)([\s]{0,10})([\w ]*)([\s]{0,10})(<!-- Telephone - Alternate telephone - Secure telephone - Fax - Secure Fax - TDD -->)/is' => '$3', 
    // Telephone 1 
    '/(<!-- Telephone - Alternate telephone - Secure telephone - Fax - Secure Fax - TDD -->)([\s]{0,10})(<dl>)([\s]{0,10})(<dt>Telephone:<\/dt><dd>)([^<]*)(<\/dd>)/is' => '$6', 
    // Telephone 2 
    '/(<dd>)([\w\(\)\- ]*)(<\/dd>)([\s]{0,10})(<dt>Fax:<\/dt><dd>)([^<]*)(<\/dd>)([\s]{0,10})(<\/dl>)([\s]{0,10})(<!-- X400 address -->)/is' => '$2',         
    // Fax 
    '/(<dt>Fax:<\/dt><dd>)([^<]*)(<\/dd>)([\s]{0,10})(<\/dl>)([\s]{0,10})(<!-- X400 address -->)/is' => '$2', 
    ); 
     
// these spaces are tabs (since we want a tab-separated spreadsheet out of this). 
$record = ("Name" . "    "); 
$record .= ("Title" . "    "); 
$record .= ("Organization 1" . "    "); 
$record .= ("Organization 2" . "    "); 
$record .= ("Address" . "    "); 
$record .= ("City, Province" . "    "); 
$record .= ("Country" . "    "); 
$record .= ("Postal Code" . "    "); 
$record .= ("Telephone 1" . "    "); 
$record .= ("Telephone 2" . "    "); 
$record .= ("Fax" . "\r\n"); 
 
foreach ($URLArray as $file) { 
    $fileContents = file_get_contents($file); 
    $WhatToAdd = FindStuff($fileContents, $rxpArray); 
    if ("                                            \r\n" != $WhatToAdd) { 
        $record .= $WhatToAdd; 
    } 
} 
 
WriteFile("GEDS-record.txt", $record); 
 
function WriteFile($strTargetx, $tpx) { 
    // permission must be modified so that this file can be written to. 
    $handle = fopen($strTargetx, 'w'); 
    fwrite($handle, $tpx); 
    fclose($handle); 
} 
 
function FindStuff($strToFindOn, $rxpArray) { 
    $newRecordToAppend = ""; 
    foreach ($rxpArray as $rxp => $replacement) { 
        preg_match($rxp, $strToFindOn, $matches); 
        // this space is a tab (since we want a tab-separated spreadsheet out of this). 
        $newRecordToAppend .= (ReplaceStuff($matches[0], $rxp, $replacement) . "    "); 
    } 
    $newRecordToAppend .= "\r\n"; 
    return($newRecordToAppend); 
} 
 
function FindURLs($strToFindOn) { 
    preg_match_all("/<li><a\shref=\"([^\"]*)\"/is", $strToFindOn, $matches, PREG_PATTERN_ORDER); 
    return($matches[1]); 
} 
 
function FindOrgsInURLs($strToFindOn) { 
    preg_match_all("/<li><a\shref=\"http:\/\/direct\.srv\.gc\.ca\/cgi\-bin\/direct500\/XEou%3d([^\"]*)%2co%3dGC%2cc%3dCA\"/is", $strToFindOn, $matches, PREG_PATTERN_ORDER); 
    return($matches[1]); 
} 
 
function FraFindOrgsInURLs($strToFindOn) { 
    preg_match_all("/<li><a\shref=\"http:\/\/direct\.srv\.gc\.ca\/cgi\-bin\/direct500\/XFou%3d([^\"]*)%2co%3dGC%2cc%3dCA\"/is", $strToFindOn, $matches, PREG_PATTERN_ORDER); 
    return($matches[1]); 
} 
 
function FindOrgNamesInURLs($strToFindOn) { 
    preg_match_all("/<li><a\shref=\"http:\/\/direct\.srv\.gc\.ca\/cgi\-bin\/direct500\/XEou%3d([^\"]*)%2co%3dGC%2cc%3dCA\">([^<]*)/is", $strToFindOn, $matches, PREG_PATTERN_ORDER); 
    return($matches[2]); 
} 
 
function FraFindOrgNamesInURLs($strToFindOn) { 
    preg_match_all("/<li><a\shref=\"http:\/\/direct\.srv\.gc\.ca\/cgi\-bin\/direct500\/XFou%3d([^\"]*)%2co%3dGC%2cc%3dCA\">([^<]*)/is", $strToFindOn, $matches, PREG_PATTERN_ORDER); 
    return($matches[2]); 
} 
 
function ReplaceStuff($strToReplaceOn, $rxp, $replacement) { 
    $strToReplaceOn = preg_replace($rxp, $replacement, $strToReplaceOn); 
    return($strToReplaceOn); 
} 
 
?>
 
 |