<?php
/*
Geo Class alternative implementation.
Usage: add to init.php "Geo_Mongo::load(); then use old Geo functions"
Details:
1. Use mongo as efficient in-memory KVDB:
a. fast key => value lookups
b. cache storage
2. Use sphynx as fast & efficient query service
Class Provides
additional functions for efficient multy-lookups (zips => locations)
See also:
Geo_MongoAdmin
- loading geo data from mysql => mongo
- xml exporting geo data for spynx
Data Structure changes from mysql:
geo.city - parent_id only if parent_exists
- p=1 if main city
- new fields - zip, loc ([lat,lon])
- cwi - word index
2012-6-19 - All countries and regions are added to regions:
* regions for all countries (All country Case)
* regions within a country
to get country from region use:
$country_id = ($region >> 8) +1
typical sql to extract specific country should look like
$country_mask = ($country_id-1) << 8;
select * from table where region & 0xFF00 = $country_mask;
Examples:
Get nearby cities:
Geo::nearbyCities(Geo::rc("Boston, MA"), 5);
*/
use MongoDB\Database;
class Geo {
// Used to keep already readed city names.
// Could not be very large because we have less than 100K cities at all.
protected static $_CITY_NAMES = [];
protected static $_CITIES_LOC = []; //rm =>latitude,longitude
const UNKNOWN_REGION = 0xFF00; // 255*256
const UNKNOWN_NON_US_REGION = 0xFE00; // 254*256 - we do not know what country this, but this is not a US
const STATE_BITMASK_NON_US = 1 << 62; // used by ::stateBitMask function
const LOC_ENRICH_NONE = 0x00;
const LOC_ENRICH_ALL = 0xFF;
const LOC_ENRICH_ZIP = 0x01;
const LOC_ENRICH_STREET = 0x02;
const LOC_ENRICH_GEO = 0x04;
const LOC_ENRICH_RM = 0x08;
const LOC_ENRICH_COUNTY = 0x10;
const LOC_ENRICH_GOOGLE = 0x100; // use PAID google geocoding - street string ONLY !!
//
// IMPORTANT - this is ** PAID API ** - $0.50 per 1K queries - use IT wisely
// IMPORTANT - LOC_ENRICH_ALL does not include google API enrichment.
// IMPORTANT LOC_ENRICH_GOOGLE opposite to any other enrichments
// results includes region-city-city_main, zip+4, geo, geo_bounds, google_place_id, nn, str
const CITY_OFFICIAL_NAME = 1; // NEWTON CENTRE
const CITY_NONOFFICIAL_NAME = 2; // NEWTON CNTR
const CITY_JUNK_NAME = 3; // 19,942 - BOSTON FINANCIAL DATA SERVIC
/**
*
* @param mixed $loc
* @param int $enrich - bitfield that
* @param int $debug
* @return array
* @Example:
* Geo::parseLoc("11 Rustic St, Newton, MA", Geo::LOC_ENRICH_ALL)
* Geo::parseLoc("11 Rustic St, Newton MA", Geo::LOC_ENRICH_GOOGLE) // use google geocoding instead of ours
* Geo::parseLoc("11 Rustic St, Newton, MA")
* Geo::parseLoc(["region" => 19, "city"=>627, "street_id" => 360033], Geo::LOC_ENRICH_ALL)
*
* Default $enrich bitmask is decided to be Geo::LOC_ENRICH_NONE. So only data retrieved from passed value will be returned.
* To enrich data pass appropriate $enrich.
* Maximum information to be returned:
*
* > Geo::parseLoc("11 Rustic St, Newton, MA", Geo::LOC_ENRICH_ALL)
* Array(
* region => 19
* state => MA
* region_name => Massachusssets
* city_main => 627
* city => 627
* city_name => Newton
* str => RUSTIC ST
* nn => 11
* street => 11 RUSTIC ST
* street_id => 360033
* street2 => RUSTIC ST
* geo =>
* 0 => 42.3664126
* 1 => -71.1986421
* zip => 02458
* )
*/
static function parseLoc($loc, $enrich = Geo::LOC_ENRICH_NONE, $debug=0) {
if($debug){
Geo_Street::$DEBUG =1;
}
if ($enrich & Geo::LOC_ENRICH_GOOGLE) {
return Geo_Geocoding::googleStreet($loc); // Throws Exceptions when limit reached or other non-recoverable error
}
$res = self::parseLocs([$loc], $enrich, $debug);
$r = [];
if($res)
$r = first($res);
return $r;
}
/**
* @param string $loc
* @return array{string, int, int, int} [$country_ISO_2_letters, $rc, $street_id, $nn]
* Geo::parseInternationalLoc("11 Rustic St, Newton, MA, India")
*/
static function parseInternationalLoc(string $loc):array {
\Profiler::info(__METHOD__);
if (\hb\Str::endsWith($loc, ", United States") || \hb\Str::endsWith($loc, ', USA')) {
$loc = \hb\Str::beforeLast($loc, ",");
$r = self::parseLoc($loc);
return ['US', $r['rm'] ?? 0, $r['street_id'] ?? 0, $r['nn'] ?? 0, "A"];
}
if ($p = strrpos($loc, ",")) {
$cn_loc = trim(substr($loc, $p+1));
if (strlen($cn_loc) == 2) {
if (substr_count($loc, ",") > 1) {
# "Weiden, BY, DE"
if ($cn = self::country_iso($cn_loc)) {
return [$cn, 0, 0, 0, "b"];
}
}
} elseif ($cn = self::country_iso($cn_loc)) {
return [$cn, 0, 0, 0, "c"];
}
} elseif ($cn = self::country_iso($loc)) {
$r = Geo::parseLoc($loc);
if ($r) {
$cn = Geo::country_iso( Geo::id_country($r['region']) );
return [$cn, $r['rm'], 0, 0, "d1"];
}
return [$cn, 0, 0, 0, "d2"];
}
$r = self::parseLoc($loc);
if (! $r || ! ($r['region']??0))
return ["", 0, 0, 0, 'e'];
$cn = Geo::country_iso( Geo::id_country($r['region']) );
return [$cn, $r["rm"] ?? 0, $r['street_id'] ?? 0, $r['nn'] ?? 0, "f"];
}
/**
* @param $locs ['13 Rustic st, Newton Ma',' 2111 Firewood Ct, San Bernardino, CA 92404','3494 Darren Pl, Highland, CA 92346']
* @param int $enrich
* @param int $debug
* @return array
* @throws Exception
* @Example:
* /rd/bin/pe "v(Geo::parseLocs(['13 Rustic st Newton Ma',' 2111 Firewood Ct San Bernardino CA',
* '3494 Darren Pl, Highland CA 92346','761 10 1/2 Ave W Unit 3 West Fargo ND',
* ['street'=>'14548 High Street','city'=>'Thornton','state'=>'CO','zip'=>80602],
* 'Rustic Newton MA',
* 'Newton MO',
* 'Virginia',
* ['street' => '9022 bobbie cir', 'zip'=>92646, 'zip4'=>7816, 'region'=>0, 'city'=>0, 'geo'=>[ 33.6654, -117.968]]],255))"
* /rd/bin/pe "v(Geo::parseLocs(['street' => '415 Sylvan Ave','rc' => 328935, 'rm' => 328934, 'city' => 1255, 'city_main' => 1254, 'region' => 5, 'street_id' => 4213746, 'str' => 'Sylvan Ave', 'nn' => 415, 'state' => 'CA']))"
*/
static function parseLocs($locs, $enrich = Geo::LOC_ENRICH_NONE, $debug=0){
if($debug){
Geo_Street::$DEBUG =1;
}
$_GEO = [];
$_CITIES = [];
$_STREETS = [];
$_COUNTIES = [];
$_ZIPS = [];
try{
$_locs = Geo_Street::Locs($locs, ($enrich & Geo::LOC_ENRICH_STREET), $debug);
} catch (Exception $ex) {
$_locs = [];
}
$_RMS = [];
foreach($_locs as $k => &$L) {
try {
if (!($L["rm"]??0) && ($L["rc"]??0) && ($enrich & Geo::LOC_ENRICH_RM)) {
$L["rm"] = Geo::rm($L["rc"]);
}
if (!($L["rm"]??0) && ($L["region"]??0) && ($L["city"]??0) && ($enrich & Geo::LOC_ENRICH_RM)) {
$L["rm"] = Geo::rm($L["region"], $L["city"]);
}
}catch(Exception $ex){
Console::err($ex->getMessage(). " ". x2s($L),'red');
}
if(($L["rm"]??0) && ! (self::$_CITIES_LOC[$L["rm"]]??0)){
$_RMS[] = $L["rm"];
}
if ((!($L["geo"]??0) && ($enrich & Geo::LOC_ENRICH_GEO)) || (!($L["zip"]??0) && ($enrich & Geo::LOC_ENRICH_ZIP))) {
$_ZIPS[$k] = $L;
if ($L["rm"]??0) {
if ($L["street_id"]??0) {
$_GEO[$k]['q'] = ["rm" => $L["rm"], "street_id" => $L["street_id"], "nn" => $L["nn"]??""];
}
}
}
if ($enrich & self::LOC_ENRICH_RM) {
if (!empty($L["region"])) {
if(empty($L["state"])) {
$L["state"] = Geo::region_name($L["region"], true);
}
if(empty($L["region_name"])) {
$L["region_name"] = Geo::region_name($L["region"], false);
}
if (!empty($L["city"]) && empty($L["city_name"])) {
$_CITIES[Geo::rc($L["region"], $L["city"])] = [$L["region"], $L["city"]];
}
}
if (!empty($L["street_id"])) {
$_STREETS[$L["street_id"]] = 1;
}
}
if ($enrich & self::LOC_ENRICH_COUNTY) {
if($L["rm"]??0) {
$_COUNTIES[$L["rm"]] = ['county_id' => $L["county_id"]??null, 'county' => $L["county"]??null, 'county_name' => $L["county_name"]??null];
}
}
}
if($_RMS){
$_RMS = array_unique($_RMS);
$rs = Geo_Go::citiesLoc($_RMS);
foreach($_RMS as $k => $rm){
if(ctype_digit((string)$rm))
self::$_CITIES_LOC[$rm] = $rs[$k] ?? [0, 0, 0];
}
}
if($_GEO && (($enrich & Geo::LOC_ENRICH_GEO) || ($enrich & Geo::LOC_ENRICH_ZIP))){
if(count($_GEO) > 1) {
try {
$DB_Parallel = DBP();
foreach ($_GEO as $v) {
$rm = $v['q']['rm'];
if(isset(self::$_CITIES_LOC[$rm])){
$v['q']['geo'] = self::$_CITIES_LOC[$rm];
KRDB::i("street_loc")->lookup($v['q'], false, $DB_Parallel);
}
}
$DB_Parallel->exec();
foreach ($_GEO as $k => $v) {
$rm = $v['q']['rm'];
if(isset(self::$_CITIES_LOC[$rm])) {
$v['q']['geo'] = self::$_CITIES_LOC[$rm];
$_GEO[$k]['res'] = KRDB::i("street_loc")->lookup($v['q'], false, $DB_Parallel);
}
}
$DB_Parallel->reset();
} catch (Exception $ex) {
Console::err($ex->getMessage(). " ". x2s($_GEO),'red');
}
} else {
$v = current($_GEO);
$k = key($_GEO);
$rm = $v['q']['rm'];
if(isset(self::$_CITIES_LOC[$rm])) {
$v['q']['geo'] = self::$_CITIES_LOC[$rm];
}
$_GEO[$k]['res'] = KRDB::i("street_loc")->lookup($v['q'], false);
}
foreach ($_GEO as $k => $v) {
$SL = $v['res'];
//v($SL);
if (!empty($SL["latlong"])) {
$_locs[$k]["enriched"] = ($_locs[$k]["enriched"] ?? 0 ) | self::LOC_ENRICH_GEO;
$_locs[$k]["geo"] = $SL["latlong"];
}
if (empty($_locs[$k]["zip"]) && ($enrich & Geo::LOC_ENRICH_ZIP) && !empty($SL["zip"])) {
$_locs[$k]["enriched"] = ($_locs[$k]["enriched"] ?? 0) | self::LOC_ENRICH_ZIP;
$_locs[$k]["zip"] = sprintf("%05d", $SL["zip"]);
}
if (empty($_locs[$k]["zip4"]) && ($enrich & Geo::LOC_ENRICH_ZIP) && !empty($SL["zip4"])) {
$_locs[$k]["enriched"] = ($_locs[$k]["enriched"] ?? 0) | self::LOC_ENRICH_ZIP;
$_locs[$k]["zip4"] = sprintf("%04d", $SL["zip4"]);
}
}
}
$cities_names = [];
if($_CITIES && ($enrich & (Geo::LOC_ENRICH_GEO | Geo::LOC_ENRICH_RM))){
$cities_names = Geo::cities(array_keys($_CITIES));
}
$street_names = [];
if($_STREETS && ($enrich & Geo::LOC_ENRICH_GEO | Geo::LOC_ENRICH_STREET)){
$street_names = Geo::streetNames(array_keys($_STREETS));
}
$county_ids = [];
if($_COUNTIES){
try {
$county_ids = Geo::citiesToCounties(array_keys($_COUNTIES));
}catch(Exception $ex){
Console::err($ex->getMessage(). " ". x2s($_COUNTIES),'red');
}
}
#vd($_locs);
foreach($_locs as $k => &$L) {
if(($L['region']??0) && ($L['city']??0) && isset($cities_names[$L['region']][$L['city']][0])){
$L['city_name'] = $cities_names[$L['region']][$L['city']][0];
}
if(($L['street_id']??0) && isset($street_names[$L['street_id']])){
$L['str'] = mb_convert_case($street_names[$L['street_id']], MB_CASE_TITLE);
if(!isset($L['street'])){
$L['street'] = $L['str'];
}
}
if(($L['rm']??0) && isset($county_ids[$L['rm']])){
if (empty($L["county_id"])) {
$L["enriched"] = ($L["enriched"] ?? 0) | self::LOC_ENRICH_COUNTY;
$L["county_id"] = $county_ids[$L['rm']];
}
if (empty($L["county"]) && !empty($L["county_id"])) {
$L["enriched"] = ($L["enriched"] ?? 0) | self::LOC_ENRICH_COUNTY;
$L["county"] = Geo::county_name($L["county_id"], 0, true);
}
if (empty($L["county_name"]) && !empty($L["county_id"])) {
$L["enriched"] = ($L["enriched"] ?? 0) | self::LOC_ENRICH_COUNTY;
$L["county_name"] = Geo::county_name($L["county_id"], 0, false);
}
}
if (empty($L["geo"]) && (($enrich & Geo::LOC_ENRICH_GEO) || ($enrich & Geo::LOC_ENRICH_ZIP))) {
self::resolveLoc($L);
}
}
#vd($_GEO,$_CITIES,$_STREETS,$_COUNTIES,$cities_name??null,$street_names??null,$_locs);
return $_locs;
}
// parse free text geo
// extract region or region+city from it
// opts -
// us-only - as is - SO FAR - this is a only implemented option
// autocomplete -- when city is unknown - complete city using parsed state or geoip state
public static function parse(/* free text */ $geo, array $opts=[]) { # cn, region, city, main
$geo = trim($geo);
$na = [0,0,0,0];
if (preg_match('!\b(\d{5})-?(\d{4})?([\s]+[\d]+)?$!', $geo, $ztmp)) {
if (!empty($ztmp[1])) {
$zip = $ztmp[1];
$g = Geo::_zip($zip);
if ($g) {
return [1, $g[0], $g[1], $g[4]];
}
}
// INVALID ZIP - CUT IT
$geo = trim(substr($geo, 0, -strlen($ztmp[0])));
}
/*
if ($zip = fm("!([,\s]\d{5})(-\d{4})?$!", $geo)) { // ZIP at the end
$g = Geo::_zip($zip);
if ($g)
return [1, $g[0], $g[1], $g[4]];
// INVALID ZIP - CUT IT
$geo = trim(substr($geo, 0, -5));
}
*/
if (is_numeric($geo)) { // ZIP. ex: 2458
if (strlen($geo) > 5) {
return $na;
}
$g = Geo::_zip($geo);
if ($g)
return [1, $g[0], $g[1], $g[4]];
return $na;
}
if (! $geo)
return $na;
// check for short state
if (strlen($geo)==2)
return ($r = Geo::regions(5, $geo)) ? [1, $r, 0, 0] : $na;
// full state
if ($r = Geo::regions(7, $geo))
return [1, $r, 0, 0];
// city state then
// $c = Geo::city($geo);
$c = Geo_Go::city2id($geo);
if (!$c || !$c[1])
$c = Geo::city($geo); // really corner case - Geo::city("No-Such-City, MA") should return MA
if (!$c){return [1, 0, 0, 0];}
if(!$c[1]){ return [1, $c[0], 0, 0];}
$i = Geo::_city($c[0], $c[1]);
return [1, $c[0], $c[1], NVL($i[3], $c[1])];
}
// Convert To/From uint32 form of Region-City
//
// TO UINT32
// Geo::rc($r, $c) => (uint32) $rc
// Geo::rc("City State") => (uint32) $rc
// Geo::rc(['city' => 'Boston MA']) => (uint32) $rc
//
// FROM UINT32 => [region, $city] (UNWRAP)
// Geo::rc((uint32) $rc) => [$r, $c]
//
// ARRAY TO UINT32
// Geo::rc([$r, $c]) => (uint32) $rc
// Geo::rc(["region" => $r, "city" => $c]) => (uint32) $rc
// Geo::rc(["region" => $r, "city_main" => $c]) (uint32) $rc
//
// ARRAY OF ARRAYS => ARRAY OF UINT32
// Geo::rc([[$r, $c], [$r2, $c2], $rc3]) => [(uint32) $rc1, $rc2, (uint32) $rc3]
// Geo::rc(PublicProfile $pp) == $pp->RC == [(uint32) $rc1, $rc2, ..] << ALL PLACES LIVED
//
// Check examples in tests/Geo.stest
static function rc($region, $city=-1) { # UINT packed Geo | [$region, $city]
if (func_num_args()==2) {
if ($region > 65535)
throw new UnexpectedValueException("region is unsigned word. got: $region");
if ($city > 65535)
throw new UnexpectedValueException("city is unsigned word. got: $city");
return ((int)$region) << 16 | (int)$city;
}
$rc = $region;
if (is_numeric($rc)) # >> [region, city]
return [$rc >> 16, $rc & 0xFFFF];
if (is_array($rc)) { # uint32
if (! $rc)
return 0;
if (isset($rc[0]) && is_array($rc[0])) {
$r = [];
foreach ($rc as $t) {
if (is_numeric($t))
$r[] = $t;
else
$r[] = Geo::rc($t);
}
return $r;
}
if (isset($rc[0])) # [0=>region, 1=>city]
return Geo::rc($rc[0], $rc[1]);
if (isset($rc['city'])) { # [region => , city =>]
if (is_numeric($rc['city']) && !empty($rc['region'])) {
return Geo::rc($rc['region'], $rc['city']);
}
[$cn, $r, $c, $m] = Geo::parse($rc['city']);
return Geo::rc($r, $c);
}
if (isset($rc['city_main']) && !empty($rc['region'])) {
return Geo::rc($rc['region'], $rc['city_main']);
} # [region => , city_main =>]
if (isset($rc['region'])) {# [region => ]
return $rc['region'] ? Geo::rc($rc['region'], 0) : 0;
}
if ($rc['zip']??0) {
#list ($r, $c, $name, $state, $m) = Geo::_zip($rc['zip']);
$_r = Geo::_zip($rc['zip']);
if($_r[0]??0) {
return Geo::rc($_r[0], $_r[1]);
}
}
throw new UnexpectedValueException("can't parse rc: ".x2s($rc));
}
if (is_object($rc)) # PublicProfile => uint32
return $rc->RC;
if (is_string($rc)) { # uint32
// try Geo_Go
$r = Geo_Go::cities2id([$rc]);
[$r, $c, $m] = reset($r);
if ($c)
return Geo::rc($r, $c);
// - try mongo
[$cn, $region, $city] = Geo::parse($rc);
return Geo::rc($region, $city);
}
}
// region / city_main from
// A. [region: x, city_main: x, ...] hash => uint32
// Geo::rm(['region' => 19, 'city_main' => 499])
// A.2 [state: 'ST', city_main: x, ...] hash => uint32
// [state: 'State', city_main: x, ...] hash => uint32
// Geo::rm(['region' => 19, 'city_main' => 499])
// B. [region, city, city_main] array => uint32
// Geo::rm([19, 0, 499])
// C. [region, city_main] array => uint32
// D. $rc => uint32
// E. "City State" => uint32
// Geo::rm('Waban MA')
// Geo::rm(['city' => 'Waban MA'])
//
// F. "RM" in array [..., "rm"] array => uint32
// "RC" in array [..., "rc"] array => uint32
// G. array of Above => array of uint32
// Geo::rm([['region' => 19, 'city' => 642, 'city_main' => 627], [19, 642, 627], [19, 627], "Newton MA", "Waban MA", 1245811])
// Check examples in tests/Geo.stest
static function rm(/* array */ $rc, $city=-1) { # UINT packed Geo
if (func_num_args()==2) {
return self::rm([$rc, $city]); //
}
if (! is_array($rc)) {
if (is_numeric($rc)) { // uint => uint - WE ARE NOT TRYING TO RESOLVE CITY -> CITY_MAIN << FIX
return Geo::rm(Geo::rc($rc));
}
if (is_string($rc)) { // "String" => uint
// try Geo_Go
$r = Geo_Go::cities2id([$rc]);
[$r, $c, $m] = reset($r);
if ($m == 0) {
[$cn, $region, $city, $main] = Geo::parse($rc);
return Geo::rc($region, $main);
}
return Geo::rc($r, $m);
}
throw new UnexpectedValueException("can't parse rc".x2s($rc));
}
if (isset($rc[0]) && is_array($rc[0])) {
$r = [];
foreach ($rc as $rm)
$r[] = Geo::rm($rm);
return $r;
}
if (isset($rc['rm']))
return $rc['rm'];
if (isset($rc['rc'])) {
return Geo::rm(Geo::rc($rc["rc"]));
}
//A.2 case cast to A.
if (isset($rc['state']) && empty($rc['region'])) {
$rc['region'] = Geo::_id_region(1, $rc['state']);
}
if (!empty($rc['city_main']) && ($rc['region']??0))
return Geo::rc($rc['region'] ?? 0, $rc['city_main']);
if (!empty($rc['city'])) {
// RESOLVE CITY => CITY_MAIN
if (is_numeric($rc['city']) && ($rc['region']??0)) {
$m = Geo::main_city($rc['region'], $rc['city']);
return Geo::rc($rc['region'], $m);
}
[$cn, $r, $c, $m] = Geo::parse($rc['city'] . " " . ($rc['state']??""));
return Geo::rc($r, $m);
}
if (!empty($rc['zip'])) {
$_zip_resolve= Geo::_zip($rc['zip']);
if(!$_zip_resolve)
return 0;
return Geo::rc($_zip_resolve[0], $_zip_resolve[4]);
}
if (isset($rc['region'])) {
if (! $rc['region']) // NULL, 0
return 0;
return Geo::rc($rc['region'], 0);
}
if (! isset($rc[0]))
throw new UnexpectedValueException("can't parse rc:".x2s($rc));
if (! isset($rc[2]) && isset($rc[1])) { // [Region, City] support
// RESOLVE CITY => CITY_MAIN
$m = Geo::main_city($rc[0], $rc[1]);
return Geo::rc($rc[0], $m);
}
return Geo::rc($rc[0], ($rc[2] ?? 0));
}
// rc as string
// rc - any Region-City presentation
static function rcs($rc) { # string
if (! $rc)
return "";
if (!is_string($rc) && isset($rc[0]) && (is_array($rc[0]) || $rc[0] > 65535)) {
$r = [];
foreach ($rc as $t)
$r[] = Geo::rcs($t);
return $r;
}
if (! is_numeric($rc)) {
$rc = self::rc($rc);
if (! is_numeric($rc))
throw new UnexpectedValueException("Can't make a Geo:rc ".x2s($rc));
}
[$r, $c] = self::rc($rc);
return Geo::name($r, $c);
}
// so far idea is:
// rcm = [rc(region,city), rm := rc(region, city_main)]
// For display - we always use [region,city]
// For city match comparizon we always use [region,city_main]
static function rcm($region, $city, $main=0) { # [RC(city), RC(city_main)]
if (! $main)
$main = Geo::main_city($region, $city);
return [Geo::rc($region, $city), Geo::rc($region, $main)];
}
// rcm is [rc, rm], compare using rm
static function rcmEqual($rcm1, $rcm2) { # (bool)
return $rcm1[1] == $rcm2[1];
}
// Generate US State Bitmask from region list
// ex: $mask = (int) Geo::stateBitMask((array) $regions);
// Specific code: self::STATE_BITMASK_NON_US used for non US regions
static function stateBitMask(array $regions) { # (bigint) $state_bitmask | false (no regions)
if (! $regions)
return false;
$mask = 0;
foreach ($regions as $r) {
if ($r && $r < 52) {
$mask |= 1 << $r -1;
}
if ($r > 52) { // non US
$mask |= self::STATE_BITMASK_NON_US;
}
}
return $mask;
}
// --------------------------------------------------------------------------------
// GEO-MONGO SPECIFIC functions
// > j Geo::zips_names( [2458, 2481] )
// { "2458":"Newton, MA 02458",
// "2481":"Wellesley Hills, MA 02481"
// }
static function zips_names(array $zips) { // zip => location( "Boston, MA 02458" )
$z=M("geo.zip1")->find_in($zips, "mcity state");
$r=[];
foreach ($z as $k => $e)
$r[$k]=$e["mcity"].", ".$e["state"]." ".str_pad($k, 5, "0", STR_PAD_LEFT); // Boston, MA 02458- no commas!!!
return $r;
}
static function zips_names_a($zip) { // geo.zip1 record | null
$res = [];
if (is_string($zip) && $czip = fm("!([a-z][0-9][a-z]\s{0,1}[0-9][a-z][0-9])!i", $zip)) {
//TODO: Create proper database with canadian zip codes
// https://en.wikipedia.org/wiki/Postal_codes_in_Canada#:~:text=Like%20British%2C%20Irish%20and%20Dutch,the%20third%20and%20fourth%20characters.
// Canadian zip's fake support
// $czip = strtoupper($czip);
// $res = M("geo.zip_canada")->findOne($czip);
/*
$res = [
"_id" => $czip,
"city" => "Ottawa",
"state" => "ON",
];
*/
/*
Array(
_id => 2458
city => Newton
city_id => 627
city_main => 627
cwi => 1428067277
mcity => Newton
mwi => 1428067277
region => 19
state => MA
loc =>
0 => -71.1872
1 => 42.3527
)
*/
} else {
$res = M("geo.zip1")->findOne((int)$zip);
}
return $res;
}
// nearby zip code for given location
static function nearby_zip($lon, $lat, $max=5, $city_id=false) { // zip
$wh = ['loc' => ['$near' => [$lon, $lat], '$maxDistance' => $max]];
if ($city_id)
$wh["city_id"]=(int) $city_id;
$wh[":limit"]=1;
$r=M("geo.zip1")->f( $wh, ["_id"=>1] );
if (! $r)
return 0;
$r=reset($r);
return $r;
}
// location by zip
static function zip_location($zip) { // [lat,long]
$r = self::_zip($zip);
if (empty($r[5]))
return [];
return [$r[5][1],$r[5][0]];
}
// loc: [lon, lat] or ZIP
// Examples:
// Geo::distance([42.3527,-71.1872], [42.3044,-71.2849])
// Geo::distance(2458, 2481)
// Geo::distance(2458, 10001) = newton <=> new york
// use Geo::isNear() when possible instead of this function
static function distance($loc1, $loc2) { # distance in miles
if (! is_array($loc1))
$loc1 = self::zip_location($loc1);
if (! is_array($loc2))
$loc2 = self::zip_location($loc2);
return self::distanceFast($loc1, $loc2);
}
static function distanceFast($loc1, $loc2) { # dist miles
[$lat1, $lon1] = $loc1;
[$lat2, $lon2] = $loc2;
$theta = $lon1 - $lon2;
$dist = sin(deg2rad($lat1)) * sin(deg2rad($lat2)) + cos(deg2rad($lat1)) * cos(deg2rad($lat2)) * cos(deg2rad($theta));
$dist = acos($dist);
$dist = rad2deg($dist);
return $dist * 60 * 1.1515;
}
// loc - location [lon, lat]
// locs - array of locations [lon, lat] or location [lon, lat]
// if "key" exists - $loc2 = $locs[0..nn][$key] will be used as location
// $distance_miles - max supported distance to return 1, default - 20 miles
// @future: we can speed up this function even more, if we implement "longitude" difference
// @return: 0 - not near, 1 - within given distance
// @example: Geo::isNear(Geo::zip_location(2459), Geo::zip_location(2481))
static function isNear(array $loc, array $locs, $distance_miles = 32) { # 0 | 1
if (! is_array($locs[0]))
$locs = [$locs];
$dlat_max = 0.014474 * $distance_miles; // lat diff of 0.014474 is one mile
foreach ($locs as $loc2) {
if (! $loc2 || is_array($loc2[1])) { // broken
continue;
}
$dlat = abs($loc2[1] - $loc[1]);
if ($dlat > $dlat_max) // too far away
continue;
$distance = Geo::distanceFast($loc, $loc2);
if ($distance <= $distance_miles)
return 1;
}
return 0;
}
static function __city($mongo_id) { # hash
return M("geo.city", (int) $mongo_id)->_;
}
static function __zip($zip) { # hash
return M("geo.zip1", (int) $zip)->_;
}
// $c - ["region" => region, "city" => city] || mongo_city_id || "City, State"
static function city_zip($c) { # center zip
if (! is_array($c) && ! is_numeric($c)) {
$t=self::city($c);
if (! $t)
return 0;
$c=["region" => $t[0], "city" => $t[1]];
}
return M("geo.city")->one($c, "zip");
}
/**
* Fetches nearby cities RCs ordered by distance asc. Uses mongo collection geo.near_city, see Geo_MongoAdmin::generateNearbyCities()
* @param int $rc - rc of desired city
* @param mixed $limit - optional (false - no limits) limit of number of returned nearby cities.
* @return array
*/
static function nearbyCities($rc, $limit=false) {
$rms = [];
if ($res = M("geo.near_city")->one((int)$rc, "rms")) {
if (false !== $limit) {
$rms = array_slice($res, 0, $limit, true);
} else {
$rms = $res;
}
}
return array_keys($rms);
}
// fix incorrect locs types
// Ex: v Geo::fixLocsTypes( pfl2("tim green:LinkedIn/1")->facts )
static function fixLocsTypes(array $locs) { # $fixed_locs
foreach ($locs as $k => &$loc) {
// Fix Types
if (!empty($loc['city']))
$loc['city'] = (int) $loc['city'];
if (!empty($loc['city_main']))
$loc['city_main'] = (int) $loc['city_main'];
if (!empty($loc['region']))
$loc['region'] = (int) $loc['region'];
$g = $loc['geo']??[];
if ($g) {
if ($g[1]>0 && $g[0]<0) // we have some fucked up geo-locations
$loc['geo'] = [(float) $g[1], (float) $g[0]];
else
$loc['geo'] = [(float) $g[0], (float) $g[1]];
continue;
}
}
return $locs;
}
// add missing geo to locs, fix types in loc
// Geo_Go is used
// Ex: Geo::addMissingGeoToLocs([['city' => 499, 'region' => 19], ['city' => 499, 'region' => 29]])
static function addMissingGeoToLocs(array $locs) { # $enriched_locs
$locs = self::fixLocsTypes($locs);
$geo_query = [];
$geo_query_k = [];
foreach ($locs as $k => &$loc) {
$city = NVL($loc['city']??0, $loc['city_main']??0);
if (! $city)
continue;
$geo_query[] = Geo::rc($loc['region']??0, $city);
$geo_query_k[] = $k;
}
unset($loc);
if ($geo_query) {
$r = Geo_Go::citiesLoc($geo_query);
foreach ($r as $k => $g)
$locs[$geo_query_k[$k]]['geo'] = [$g[0], $g[1]];
}
return $locs;
}
// extract "Locs" (array of loc) from PFL2 Item
// Ex:
// Geo::extractLocsFromItem( pfl2("Tim Brown:LinkedIn/1") )
// Geo::extractLocsFromItem(pfl2("tim groen:LinkedIn/4")->d())
static function extractLocsFromItem(/*array*/ $item, $us_only=true, $resolve_missing_geo=false) { # [loc,loc,...] aka Locs
if (is_object($item)) {
if ($item->_deleted)
return [];
$item = $item->d();
}
// if we have loc - check its format and convert it to 'locs'
$locs = NVL($item['facts']??[], $item['locs']??[], $item['loc']??[]);
// $locs is set of Facts(Locations) or location
if ($locs) {
if (isset($locs['region'])) // ONE LOCATION
$locs = [$locs];
if (is_assoc($locs)) { # at this point locs should be AH (array-of-hashes)
\Log::warn("Bad locs: $item[uk]\n Locs: ".x2s($locs));
return [];
}
}
# facts -- only where region+city_main
$locs = $locs ? AH::items($locs, ['field_exists' => 'region']) : [];
// US Only Items, no negative regions
if ($us_only)
$locs = array_filter($locs, function($d) { return $d['region']>0 && $d['region'] < 52; });
// WTF is this !!! we should NOT have any address fields
// `address` field - to be inserted as a first location
if ($a = ($item['address'] ?? [])) {
if (is_array($a) && ($a['region']??0)>0 && ($a['region']??0) < 52) {
// do we already have address?
if ((($locs[0]['region']??0) != ($a['region']??0)) || ($locs[0]['city_main']??0) != ($a['city_main']??0))
array_unshift($locs, hash_subset($a, "region city_main"));
}
}
if ($resolve_missing_geo)
$locs = self::addMissingGeoToLocs($locs);
else
$locs = self::fixLocsTypes($locs);
return $locs;
}
// ================================================================================
// Original Geo functions
/*
FORM1: name($region, $city=0, $cn=1, $city_cut)
FORM2: name($hash) // required elems ["region"], ["city"] // optional ["cn"], [city_small], [zip]
PARAMETERS:
region - ID | state | string | RC (see geo::rc)
city - ID | string
cn - ID | ISO | string
city_cut - if present cut city name to use "city_cut+3" characters
HASH FORM ONLY
city_small - if passed used as city - ID | city_name
zip - if passed and no small_city used for name generation
Examples:
>>> Geo::name(0,0,"US")
United States
>>> Geo::name(0,0,1)
United States
>>> Geo::name("MA")
MA
>>> Geo::name("MA",499)
Boston, MA
>>> Geo::name("MA","Boston")
Boston, MA
>>> Geo::name(19,"Boston")
Boston, MA
>>> Geo::name(19,"xBoston")
>>> Geo::name(19,499)
Boston, MA
>>> Geo::name( ["cn"=>"Russia"] )
Russia
*/
/* COPY */ static function name($region, $city=0, $cn=1, $city_cut=0) { # str
if (is_array($region) ) {
$city_small = isset($region["city_small"]) ? $region["city_small"] : 0;
[$region, $city, $cn, $zip]=
array( $region["region"]??0, $region["city"]??0,
isset($region["cn"])? $region["cn"] : "",
isset($region["zip"]) ? $region["zip"] : "");
if (! $cn) {
if (! $region) return "";
$cn=self::id_country($region);
}
if ($city_small) $city=$city_small;
else
if ($zip) return self::zip_name($zip);
}
if ($region > 65535) // Region is RC
return Geo::rcs($region);
if (! is_numeric($cn)) $cn=self::id_country($cn);
if (! $region) return self::country_name($cn);
if (! is_numeric($region)) $region=self::_id_region($cn,$region);
if (! $region) return "";
if (! is_numeric($city)) $city=self::_id_city($region, $city);
[$country, $reg, $city]=self::name_path($region, $city);
if (! $reg || ! $country) {// return ""; // bad id case
// special case for new regions
$id = self::id_country($region);
if ($id)
return (self::region_list($id)[$region]??'').', '.self::country_iso($id);
return "";
}
if ($city_cut)
$city=cut($city, $city_cut);
if ($country=='USA' || $country=='United States' || $country=='Canada')
return cs("%s, ",$city).$reg;
return cs("%s ",$city).$reg.", $country";
}
/* COPY */ static function name_path($region, $city=0) { # list<country_str, region_str, city_str>
$KEY="GEO:name_path:$region,$city";
if (! $city ) {
$name="";
[$rname, $short, $cn]=self::_region($region);
} else {
$rc_data = self::_city($region, $city);
$name = $rname = $short = $parent = $cn = null;
if(!empty($rc_data))
[$name, $rname, $short, $parent, $cn]=$rc_data;
}
$r=array( $cn==1 ? "USA" : self::country_name($cn), NVL($short, $rname), $name);
return $r;
}
// STRING to ID!!!
// REGION AND CITY are STRINGS
// cn - ID | string
// region_str - region_name | state
// Geo::id_path(1,"MA", "Newtonville")
// Geo::id_path("US","MA", "Newtonville")
/* COPY */ static function id_path($cn, $region_str, $city_str='') { // [cn, region, city, main_city]
if (! is_numeric($cn) ) $cn=self::id_country($cn);
if (!$cn) \Log::error("BAD CN");
$region=$city=0;
if (is_numeric($region_str) ) $region=$region_str;
else
if ($region_str) $region=self::_id_region($cn, $region_str);
if ($city_str) $city=self::_id_city($region, $city_str);
return array($cn, $region, $city, self::main_city($region, $city));
}
/* COPY */ static function geoip_path($ip) { # [cn, region, city, main_city]
[$cn,$reg]=GeoIP::state($ip,true);
return self::id_path($cn,$reg);
}
/**
* Returns Loc structure according to given IP
* @param string $ip
* @return array SH_Loc
* @example
* php> Geo::geoIpLoc("66.249.79.199")
Array(
region => 5
city_main => 1075
zip => 94043
rc => 328755
city => 1075
rm => 328755
geo =>
0 => 37.4183
1 => -122.071
state => CA
region_name => California
city_name => Mountain View
)
*/
static function geoIpLoc($ip="") {
$loc = [];
if (!$ip) {
$ip = ip();
}
$info = GeoIP::city();
if (($info["country"] ?? "") == "US") {
/*
$region = $info["rc"] >> 16;
$loc = [
"country" => $info["country"],
"state" => $info["region"],
"region" => $region,
"city" => $info["city"],
"zip" => $info["zip"],
"rc" => $info["rc"],
"rm" => $info["rm"],
"geo" => $info["loc"],
"region_name" => $info["state_full"],
];
*/
$loc = Geo::ParseLoc(["state" => $info["state"] ?? "", "city_name" => $info["city"] ?? "", "zip" => $info["zip"] ?? 0], Geo::LOC_ENRICH_ALL);
}
return $loc;
}
// id_path ( zip )
/* COPY */ static function zip_path($zip) { // [cn, region, city, main_city]
$z=self::_zip($zip); // list([region, city, city_name, state, city_main (city.id)]
if (! $z[0]) {
\Log::warning("bad zip code : $zip");
return;
}
return array( self::id_country($z[0]), $z[0], $z[1], $z[4]);
}
// SEE Geo_Completions class
// ipstate: short_name | int | state
PUBLIC static function street_completion($letters, $ipstate) { # zip => name
return Geo_Completion::street_completion($letters, $ipstate);
}
// ipstate: short_name | int | state
PUBLIC static function city_completion($letters, $ipstate) { # zip => name
return Geo_Completion::city_completion($letters, $ipstate);
}
// City Name Completions ( provides city and state suggestion )
// static function city_list_letters_country($cn=0, $city_letters, $limit=20, $main_id=true) { // hash<REGION-CITY: "city, region">
/* COPY */ static function completions($name, $cn=1) { # list<id,names>
[$city, $state] = \HB::explode(",", $name, 2);
$city=trim($city);
if ($state===NULL)
return self::city_list_letters_country($cn,$city);
return self::city_list_letters_country($cn,$city,trim($state));
}
// --------------------------------------------------------------------------------
// COUNTRY RELATED
// geo.countries caching wrapper
// 1: iso => name
// 2: id => name
// 3: id => iso
// 4: iso => id
// 6: name => id
/* NEW */ static function countries($how, $key=false) { # see doc
static $cache;
if (!isset($cache[$how])) {
#$cache[$how] = M('geo.countries', $how)->d;
$cache[$how] = \cache_shm(__CLASS__)->_countries($how);
}
if (! $key)
return $cache[$how];
static $uc_cache; // $how => UCWORDS($word) => data
if (! ($uc_cache[$how]??0)) {
$t = [];
foreach ($cache[$how] as $k => $v)
$t[strtoupper($k)] = $v;
$uc_cache[$how] = $t;
}
return $uc_cache[$how][strtoupper(trim($key))] ?? null;
}
/**
* @internal
* @example \cache_shm("Geo")->_countries($how);
* @param $how
* @return mixed
*/
/* NEW */ static function _countries($how) { # see doc
return M('geo.countries', $how)->d;
}
// Full country list
static function country_list() { // {id => name}
return self::countries(2);
}
// mix: ISO | NAME | REGION
// test: Geo::id_country("RU")
// test: Geo::id_country("Russia")
// test: Geo::id_country( region.id )
static function id_country($mix) { # return country.id
if ($mix==='US' || $mix==='USA') return 1;
if (is_numeric($mix)) {
if ($mix==0) return;
return M("geo.region")->one($mix, "cn");
}
if (strlen($mix)==2)
return self::countries(4, strtoupper($mix));
if (strlen($mix)>2)
return self::countries(6, $mix);
//Log::error("id_country: bad params : $mix");
return 0;
}
// mix: ID | NAME
// test: Geo::country_iso(1)
// test: Geo::country_iso("Russia")
static function country_iso($mix) { # return ISO
if (! $mix)
\Log::error("bad params");
if (! is_numeric($mix) ) {
$id2iso = self::countries(3);
$c = self::id_country($mix);
return $id2iso[$c] ?? "";
}
return self::countries(3, $mix);
}
// mix: ID or ISO
// test: Geo::country_name(1)
// test: Geo::country_name("RU")
static function country_name($mix) { # return name
if (! $mix ) return "";
if (is_numeric($mix) )
return self::countries(2, $mix);
return self::countries(1, $mix);
}
// mix: ID or NAME
// test: country2region(1)
// test: country2region("Russia")
static function country2region($mix) { # geo.region._id
if (!is_numeric($mix))
$mix = self::id_country($mix);
return ($mix-1) * 256;
}
static function region2countryIso(int $region):string {
$cid = self::id_country($region);
return self::country_iso($cid);
}
// --------------------------------------------------------------------------------
//** REGION
// GEO.REGIONS caching wrapper
//
// data generated in Geo_MongoAdmin::denormalize_regions
//
// 1 - ID => name
// 2 - ID => short
// 3 - short => name
// 4 - short => cn
// 5 - short => ID
// 6 - ID => cn
// 7 - name => id
// 8 - ID => name (US cn==1 only)
// 9 - ID => name (CA cn==2 only)
// 10 - ID => short (US cn==1 only)
// 11 - ID => short (CA cn==2 only)
// 12 - SHORT => name (US/sorted)
// 14 - SHORT => name (CA/sorted)
/* NEW */
static function regions($how, $key=false) { # see doc
static $cache;
switch ($how) {
case 3:
case 4:
case 5:
$key = strtoupper($key);
break;
case 7:
$key = ucwords(strtolower($key));
break;
}
if (! isset($cache[$how]) ) {
$c = cache_shm()["geo-regions-$how"];
if ($c)
$cache[$how] = $c;
else {
$cache[$how]=M("geo.regions", $how)->d;
cache_shm()["geo-regions-$how"] = $cache[$how];
}
}
if (! $key)
return $cache[$how];
return isset($cache[$how][$key]) ? $cache[$how][$key] : null;
}
// CN - ID | ISO
static function region_list($cn) { // hash<reg_id:name>
if ($cn==1)
return self::regions(8);
if ($cn==2)
return self::regions(9);
// \Log::error("not supported");
if (!is_numeric($cn))
$cn = self::id_country($cn);
$rz = M("geo.region")->f( ['cn' => intval($cn)], "name");
foreach ($rz as $k=>$v)
$rz[$k] = $v['name'];
return $rz;
}
static function region_list_short($cn) { // hash<reg_id:name>
if ($cn==1)
return self::regions(10);
if ($cn==2)
return self::regions(11);
\Log::error("not supported");
}
static function region_list_short_full($cn) { // hash<short:full>
if ($cn==1)
return self::regions(12);
if ($cn==2)
return self::regions(14);
\Log::error("not supported");
}
// NAME|STATE -> ID
static function region_from_name($name) { // region
if (strlen($name)==2 )
return self::regions(5, $name);
return self::regions(7, $name);
}
/**
* short|full region name
* @param mixed $name
* @param bool $short
* @return string
* @throws Exception
*/
static function region_name($name, $short=false) { // str
//if ($name === false)
if(!$name)
return "";
#if (! is_numeric($name) && strlen($name)==2 ) { << BAD CODE
# return self::regions(3, $name);
#}
$r = self::_region($name);
if (!is_array($r))
return "";
return $short ? NVL($r[1], $r[0]) : NVL($r[0], $r[1]);
}
// --------------------------------------------------------------------------------
//** CITY
/**
* @param string $name
* @return array
* @example city("Boston, MA");
* city("Boston MA")
*/
static function city($name) { # list<region,city>
[$city,$region] = \HB::explode(",",$name, 2);
if (! $region) {
$p = strrpos($name, " ");
if (!$p)
return [];
$city = substr($name, 0, $p);
$region = substr($name, $p+1);
if (is_numeric($region)) {
$region = 0;
}
}
if ($region) {
$region=self::_id_region(1,trim($region));
}
if (! $region) return false;
return array($region, self::_id_city($region, trim($city)));
}
/**
*
* @param int $region
* @param bool $main_id
* @param mixed $pager
* @return array
* @throws Exception
*/
static function city_list($region, $main_id=true, $pager=null) { // list<[id,name]>
$rz=M("geo.city")->f( ["region" => $region, ":sort" => "cwi", ":pager" => $pager], "city parent_id name");
$res = [];
foreach ($rz as $rid => $r) {
if ($main_id) {
$res[$rid] = [NVL($r["parent_id"]??0, $r["city"]??0), $r["name"]??''];
} else {
$res[$rid] = [NVL($r["city"]??0, $r["parent_id"]??0), $r["name"]??''];
}
}
return $res;
/*
$id=$main_id ? "parent_id" : "city";
$rz=M("geo.city")->f( ["region" => $region, ":sort" => "cwi", ":pager" => $pager], "$id name");
return AH::arr($rz, "$id name");
*/
}
static function main_city_list($region, $pager=null) { // list<[id,name]>
# iterator_to_array( M()->geo->city->find( array('region' => 2, '$where' => 'function() { return this.city-this.parent_id==0; }')) )
#$js='function() { return this.city-this.parent_id==0; }';
#'$where' => $js
$rz=M("geo.city")->f( ['region' => $region, ':sort' => "cwi", "p" => 1, ":pager" => $pager], "city name");
return AH::arr($rz, "city name");
}
// hash of cites in the REGION starting with $city_letters
static function city_list_letters($region=0, $city_letters="", $limit=20, $main_id=true) { // hash<geo_id: city>
$wi=HB::word_index($city_letters, 1);
$wh=array("region" => (int) $region,
"cwi" => ['$gte' => $wi[0], '$lte' => $wi[1]],
":sort" => "cwi");
if ($main_id)
$wh["p"]=1;
$rz=M("geo.city")->f($wh, "city name");
return AH::arr($rz, "city name");
}
/**
* @param int $region
* @param bool $main_cities
* @return array of chars ( the list of available first city letters
* @throws Exception
*/
static function city_letters($region, $main_cities=true) {
$cache_key = (int)$region."_".(int)$main_cities;
if (!$letters = Cache::get($cache_key)) {
if ($main_cities) {
$cities = self::main_city_list($region);
} else {
$cities = self::city_list($region);
}
$letters = [];
foreach ($cities as $city) {
$t0 = $city[1][0];
@$letters[$t0]++;
}
Cache::put($cache_key, $letters, 3600*24*100);
}
return $letters;
}
/**
* City name
* @param $region
* @param $city
* @param bool $main_city
* @return mixed
*/
static function city_name($region, $city, $main_city=true) { # name
if ($main_city && $main_city_id=self::main_city($region, $city)){
$city = $main_city_id;
}
$r = self::_city($region, $city);
return $r[0]??'';
}
static function city_location($region, $city, $main_city=true) { # loc[lat,long]
if ($main_city && $main_city_id=self::main_city($region, $city)){
$city = $main_city_id;
}
$c=M("geo.city")->findOne( array("region" => (int) $region, "city" => (int) $city) );
if (!$c)
return [];
return $c['loc'] ?? [];
}
/**
* Mass cities locations resolver
* @param array $rcs
* @return array [rc => [lat, long]]
*/
static function city_locations(array $rcs) { # [rc=>loc[lat,long]]
$wh = [];
$result = [];
if ($rcs) {
foreach ($rcs as $rc) {
[$r, $c] = Geo::rc($rc);
$wh[] = ["region" => $r, "city" => $c];
}
$res = M("geo.city")->findA(['$or' => $wh]);
if ($res) {
foreach ($res as $r) {
$result[Geo::rc($r["region"], $r["city"])] = [$r["loc"] ?? []];
}
}
}
return $result;
}
// hash of cites in the COUNTRY starting with $city_letters
// hash key is 'region-city'
static function city_list_letters_country($cn=0, $city_letters="", $state_letters=false, $limit=20, $main_id=true) { // hash<REGION-CITY: "city, region">
$region = $state_letters ? self::regions(5, $state_letters) : false;
if ($cn>2)
\Log::alert("not supported");
$wh = $region ? ["region" => $region] : [];
if ($city_letters) {
$wi = HB::word_index($city_letters, 1);
$wh["cwi"] = ['$gte' => $wi[0], '$lte' => $wi[1]];
}
$wh[":sort"] = "cwi";
$r = [];
$r2n=self::regions(2); // id => short
if ($main_id)
$wh["p"]=1;
$rz = M("geo.city")->f($wh, "region city name");
foreach ($rz as $e)
$r[ $e["region"]."-".$e["city"] ] = $e["name"]." ".$r2n[$e["region"]];
return $r;
}
//
static function main_city($region, $city) { # city.id
$c=self::_city($region, $city);
return NVL($c[3]??0, $city);
}
// --------------------------------------------------------------------------------
//** ZIP
static function zip_name($zip) { # str: city state
$z=self::_zip($zip);
if (! $z) {
\Log::notice("bad zip $zip");
return;
}
return $z[2].", ".$z[3];
}
// alternative ZIP name
// Junk function - zip can have multiple names
static function zip_name_alt($zip) { # str: city/city_alt, state
\Log::alert("not supported");
}
// --------------------------------------------------------------------------------
/**
* REGION STRING TO ID
* @param int|strin $cn
* @param string $region_str name or shortname of region
* @return bool|int|mixed|Database|null
*/
static function _id_region($cn, $region_str) { # region_id | false
if (!$region_str) {
return false;
}
if (is_numeric($region_str) )
return (int) $region_str;
if (($cn==1 || $cn==2) && strlen($region_str)==2 )
return self::regions(5, $region_str);
return self::regions(7, $region_str);
}
// CITY STRING TO ID
static function _id_city($region, $city_str) { # city | false
$city = false;
if ($city_str && $region){
if(is_numeric($region) && $region < 256) {
if($st = Geo::regions(2,$region)) {
if($ct = Geo_Go::city2id("$city_str,$st")){
$city = $ct[1];
}
}
}
}
if(!$city){
return false;
}
return $city;
/*
$wi = HB::word_index($city_str, 1);
$wh = array("region" => (int) $region,
"cwi" => ['$gte' => $wi[0], '$lte' => $wi[1]],
// "name" => ucwords(strtolower($city_str))
);
$probes = M("geo.city")->f($wh, "city name");
$city_str = mb_strtolower($city_str, "utf8");
foreach ($probes as $probe) {
if (mb_strtolower($probe["name"], "utf8") == $city_str) {
return $probe["city"];
}
}
return false;
#return M("geo.city")->one($wh, "city");
*/
}
// shortname => region.id
static function _region_from_state($state) { // region
return self::regions(5, $state);
}
static function _region($region) { # [name, short, cn] | false
if (! is_numeric($region) )
$region=self::_id_region(1, $region);
if (!$region) {
return false;
}
$name = self::regions(1, $region);
$short = self::regions(2, $region);
$cn = self::regions(6, $region);
return [$name, $short, $cn];
}
/**
* list($name, $rname, $short, $parent, $cn)=self::city($region, $city);
* uses static property to avoid multiple DB requests
* @param $region
* @param $city
* @return array [name (cityname) 0, region_name 1, short (region short) 2, parent_id (city) 3, cn 4]
*/
static function _city($region, $city) {
if (!$city) {
$r=self::_region($region);
if (! $r) return [];
//if (! $r) \Log::error("Bad region: '$region'");
return ["",$r[0],$r[1],0,$r[2]];
}
if(is_array($city)){
if(isset($city[1])) {
$city = $city[1];
}else {
return [];
}
}
if($region > 255){
return [];
}
if (self::$_CITY_NAMES[$region][$city] ?? 0) {
return [self::$_CITY_NAMES[$region][$city]["name"], self::regions(1, $region), self::regions(2, $region), self::$_CITY_NAMES[$region][$city]["parent_id"]??0, 1];
}
$gkey = "$region-$city";
if ($c=Geo_Go::rcs2info([$gkey])) {
if(!is_array($c[$gkey]))
return [];
$st = self::regions(2, $region);
$rc = Geo::rc($c[$gkey][1]??0);
$city_name = explode(',',$c[$gkey][0]??'');
self::$_CITY_NAMES[$region][$city] = ["name"=>$city_name[0], "parent_id"=> $rc[1] ?? null, "region" => $st];
return [$city_name[0], self::regions(1, $region), $st, $rc[1] ?? null, 1 /* USA ONLY */]; // !!
}
// if ($c=M("geo.city")->findOne( array("region" => (int) $region, "city" => (int) $city) )) {
// $st = self::regions(2, $region);
// self::$_CITY_NAMES[$region][$city] = ["name"=>$c["name"], "parent_id"=> $c["parent_id"] ?? null, "region" => $st];
// return [$c["name"], self::regions(1, $region), $st, $c["parent_id"] ?? null, 1 /* USA ONLY */]; // !!
// }
return []; // !!
}
/**
* Batch city names resolver.
* Uses static property to avoid multiple DB requests.
*
* @param [] $ids - array of [$region, $city, $city_main] | array of [$rc]
* @return [$region][$city] => [name (cityname) 0, region_name 1, short (region short) 2, parent_id (city) 3, cn 4
* list($name, $rname, $short, $parent, $cn)=self::city($region, $city);
*/
static function cities($ids) {
$ors = [];
$mains = [];
if (!is_array(reset($ids))) {
foreach ($ids as &$rc1) {
$rc1 = Geo::rc($rc1);
}
}
foreach ($ids as $rc) {
if ($rc && !(self::$_CITY_NAMES[$rc[0]][$rc[1]]??0)) {
if($rc[0]<52){ // Only USA cities
$id = $rc[0] . '-' .$rc[1];
$ors[$id] = [(int)$rc[0], (int)$rc[1]];
if (!empty($rc[2]))
$mains[$id] = $rc[2];
}
}
}
if($ors) {
$chunks = array_chunk($ors, 2000);
foreach ($chunks as $_o) {
$res = Geo_Go::cities($_o);
if ($res && "OK" == $res[0]) {
unset($res[0]);
$res = Geo_Go::cities2id($res);
if ($res && "OK" == $res[0]) {
unset($res[0]);
foreach ($res as $city_state => $v) {
[$city_name, $state] = explode(',', $city_state);
self::$_CITY_NAMES[$v[0]][$v[1]] = [
"name" => trim($city_name),
"parent_id" => $v[2],
"region" => $state,
];
}
}
}
}
}
/*
$res = Geo_Go::cities($ors);
foreach ($res as $key => $v) {
$e = explode('-', $key);
$r = $e[0];
$c = @$e[1];
if (!$c)
continue;
$e = explode(',', $v);
$name = trim($e[0]);
$pid = @$mains[$key];
self::$_CITY_NAMES[$r][$c] = [
"name" => $name,
"parent_id"=> $pid,
"region" => @$e[1] ? trim($e[1]) : self::regions(2, $r),
];
}
*/
$res = [];
foreach ($ids as $rc) {
if ($rc) {
$res[$rc[0]][$rc[1]] = [
self::$_CITY_NAMES[$rc[0]][$rc[1]]["name"] ?? "",
self::regions(1, $rc[0]),
self::$_CITY_NAMES[$rc[0]][$rc[1]]["region"] ?? 0,
self::$_CITY_NAMES[$rc[0]][$rc[1]]["parent_id"] ?? 0,
1
];
}
}
return $res;
}
static function _zip($zip, $wh="") { # [region, city, city_name, state, city_main (city.id)]
// NO $wh support in MONGO!!!
$zip = trim($zip);
$zip = substr($zip, 0, 5);
if ($wh)
\Log::alert("no _zip($wh) support");
$z=M("geo.zip1")->findOne( array("_id" => (int) $zip) );
if (! $z)
return [];
return [$z["region"], $z["city_id"], $z["city"], $z["state"], $z["city_main"],$z['loc']??[]];
}
/**
* Mass zip resolver
* @param array $zips
* @return array [zip => [region, city, city_name, state, city_main (city.id)],..
*/
static function _zips(array $zips) {
if(!is_array($zips)){
$zips = [$zips];
}
$_z = [];
foreach($zips as $zip) {
$_z[] =(int) substr(trim($zip), 0, 5);
}
if (!$_z)
return [];
$z = M("geo.zip1")->f(["_id" => ['$in'=>$_z]]);
$r =[];
if($z){
foreach($z as $zip =>$v){
$r[$zip] = [$v["region"], $v["city_id"], $v["city"], $v["state"], $v["city_main"],$v['loc']];
}
}
return $r;
}
// resolve geo in fact/loc structure
// priority: zip > city > city_main
// Modify Loc
static function resolveLoc(&$loc) { # null - already, 0 - not resolved, 1 - resolved (loc modified)
if (isset($loc["geo"]))
return;
if ($t=($loc["zip"]??0)) {
$geo = Geo::zip_location($t);
if($geo){
$loc["geo"] = $geo;
return 1;
}
}
$t = NVL($loc["city"]??0, $loc["city_main"]??0);
if ($t) {
$geo = Geo::city_location($loc["region"], $t);
if($geo){
$loc["geo"] = $geo;
return 1;
}
}
}
return 0;
}
// USE FOR OFFLINE PROCESSING ONLY
// resolve geo in fact/loc structure
// Modify Loc
static function offlineResolveLoc(&$loc) { # null - already, 0 - not resolved, 1 - resolved (loc modified)
if (isset($loc["geo"]))
return;
static $zip2geo = []; # zip => geo
static $rc2geo = []; # "region-city" => geo
if (! $zip2geo) {
if (php_sapi_name() == 'cli') {
Console::err("loading zip2geo and region-city2geo");
}
$zip2geo = M("geo.zip1")->hash([], "_id loc");
$rcl = M("geo.city")->f([], "region city loc");
foreach ($rcl as $i) {
if (!isset($i["loc"]))
continue;
$rc2geo[$i["region"]."-".$i["city"]] = $i["loc"];
}
}
if ($t=$loc["zip"]) {
if ($g = ($zip2geo[intval($t)] ?? [])) {
$loc["geo"] = [$g[1], $g[0]];
return 1;
}
}
if (isset($loc["region"])) {
$t = NVL($loc["city"], $loc["city_main"]);
if ($t) {
$rc = $loc["region"]."-".$t;
if ($g = $rc2geo[$rc]) {
$loc["geo"] = $g;
return 1;
}
}
}
return 0;
}
/*
* Store the list of popular cities in the cache
* Unfortunately we have to use MySQL denormalized table hb_geo.city_population
* 30 Aug 2016 decided to use denormalized table rxdb:geo.`StreetSummary`. Sort by number of known buildongs. It is more accurate and better suit for hommetry/rehold
*/
static function most_populated_cities($region, $limit=24) {
static $MPCAR_CACHE;
$cache_key = "most_populated_cities_all_regions_{$limit}";
$cities = ($MPCAR_CACHE[$limit] ?? []);
if (empty($cities[$region]) || isset($_GET["UPDATE"])) {
$cities = Cache::get($cache_key);
if (empty($cities[$region])) {
$cities = [];
for ($region1 = 1; $region1 < 52; $region1++) {
$rms = DBE('t-hdb2')->hash("SELECT rm, SUM(addresses) as addr_cnt FROM geo.`StreetSummary` WHERE (rm > ".($region1 << 16).") AND (rm < ".(($region1 + 1)<<16).") GROUP BY rm HAVING addr_cnt > 10 ORDER BY addr_cnt DESC LIMIT $limit");
$cctt = Geo_Go::cities(array_keys($rms));
$cities[$region1] = [];
foreach ($cctt as $r_c => $city_state) {
[$rg, $city_id] = HB::explode('-', $r_c,2);
[$city, $ST] = HB::explode(', ', $city_state,2);
if ($city_id) {
$cities[$region1][] = [$city_id, $city];
}
}
$cities[$region1] = AH::sort($cities[$region1], 1);
}
Cache::put($cache_key, $cities, 3600*24*10);
}
$MPCAR_CACHE[$limit] = $cities;
}
return $cities[$region]??'';
/*
$cities=DB::cacheday()->all(
"SELECT c.id, c.name
FROM hb_geo.city_population cp
LEFT JOIN hb_geo.city c ON (c.id = cp.city_main AND c.region = cp.region)
WHERE cp.region=?
ORDER BY cp.pop DESC
LIMIT $limit", $region);
*/
return AH::sort($cities,1);
}
/**
* Resolve street(s) names.
* IMPORTANT NOTE! result hash ids are NOT street ids.
* @param array $ids
* @return array [id => "street_name", ... ]
*/
static function streetName(/*array*/ $ids) { # "Street"
return Geo_Go::streetName($ids);
}
/**
* Resolve streets names.
* @param array $ids
* @return array [street_id => "street_name", ... ]
*/
static function streetNames(/*array*/ $ids) { # "Street"
return Geo_Go::streetNames($ids);
}
// Geo::county("Essex, MA") | Geo::county("Essex County, MA")
static function county($name) { # list<region,county>
[$county, $region] = \HB::explode(",", $name, 2);
if (!$region) {
$p = strrpos($name, " ");
if (!$p)
return false;
$county = substr($name, 0, $p);
$region = substr($name, $p + 1);
if (is_numeric($region)) {
$region = (int)$region;
}
}
if ($region) {
$region = self::_id_region(1, trim($region));
}
if (!$region)
return false;
return array($region, self::_id_county($region, trim($county)));
}
// COUNTY STRING TO ID
static function _id_county($region, $county_str) { # county | false
//static $COUNTIES = [];
$COUNTIES = self::countyCache();
$county_str = preg_replace(['`\bSAINTE\b`i', '`\bSAINT\b`i'], ['Ste', 'St'], $county_str);
$county_str = str_replace(['-', ' ', '.', "'"], '', $county_str);
$c_str = $region . '_' . mb_strtoupper($county_str, "utf8");
if (isset($COUNTIES[$c_str])) {
return $COUNTIES[$c_str];
}
return false;
}
static function countyCache($force = false){
$COUNTIES = Cache_SHM::get("geo_county");
if (!$COUNTIES || $force) {
$res_counties = DBE('t-hdb2')->select("id, region, name,full_name,parent from geo.county");
foreach ($res_counties as $v) {
if ($v['parent']) {
$v['id'] = (int)$v['parent'];
}
$cnty_name = preg_replace(['`\bSAINTE\b`i', '`\bSAINT\b`i'], ['Ste', 'St'], $v['name']);
$cnty_name = str_replace(['-', ' ', '.', "'"], '', $cnty_name);
$full_name = preg_replace(['`\bSAINTE\b`i', '`\bSAINT\b`i'], ['Ste', 'St'], $v["full_name"]);
$full_name = str_replace(['-', ' ', '.', "'"], '', $full_name);
$name = mb_strtoupper($cnty_name, "utf8");
$COUNTIES["{$v['region']}_$name"] = (int)$v['id'];
$name = mb_strtoupper($full_name, "utf8");
$COUNTIES["{$v['region']}_$name"] = (int)$v['id'];
$COUNTIES[$v['id']] = $v;
}
Cache_SHM::put("geo_county",$COUNTIES);
}
//v($COUNTIES);
return $COUNTIES;
}
// ID to county full name
/**
* Returns county name ( short of full version according to $short )
* @param int|string $county
* @param int $region
* @param bool $short
* @return array|mixed
* @example Geo::county_name(983)
*/
static function county_name($county,$region=0, $short=false) { # name
// Profiler::in("Geo::county_name");
$COUNTIES = self::countyCache();
$name = [];
if(is_numeric($county)){
if(isset($COUNTIES[$county])){
$name = $short?$COUNTIES[$county]['name']:$COUNTIES[$county]['full_name'];
}
}else{
[$r,$c] = self::county("$county, $region");
if($c){
$name = $short?$COUNTIES[$c]['name']:$COUNTIES[$c]['full_name'];
}
}
// Profiler::out();
//$name = DBE('DB2')->one("select full_name from geo.county where id=?",$county_id);
return $name;
}
/**
* Resolves county $rc belongs to
* @param $rc
* @return int
* @example Geo::cityCounty(3801089)
*/
static function cityCounty($rc) {
[$region, $city_id] = Geo::rc($rc);
$county_id = DBE("t-hdb2")->select("county FROM geo.city2county", ["region" => $region, "city" => $city_id, "_result" => "one"]);
return (int) $county_id;
}
/**
* Resolves county $rc belongs to
* @param $rc
* @return int
* @example Geo::citiesToCounties([3801089])
*/
static function citiesToCounties(array $rcs) { # [rc=>county_id]
$_r =[];
foreach($rcs as $rc){
[$region, $city_id] = Geo::rc($rc);
$_r[$region][$city_id] = 1;
}
$wh = [];
if($_r){
foreach($_r as $region=>$cities){
if(count($cities)==1){
$wh[] = "(region = $region and city = ".key($cities).")";
}else{
$wh[] = "(region = $region and city in(".join(',',array_keys($cities))."))";
}
}
}
$counties = DBE("t-hdb2")->select("* FROM geo.city2county", [join(' or ',$wh), "_result" => "all_hash"]);
$r = [];
foreach($counties as $v){
$r[Geo::rc($v['region'],$v['city'])] = $v['county'];
}
return $r;
}
/**
* Returns list of rc belongs to county.
* @param $county_id
* @param DB_Parallel|null $DB_Parallel
* @return array
* @example Geo::countyCitiesList(983)
*/
static function countyCitiesList($county_id, \DB_Parallel &$DB_Parallel = null) {
$list = [];
if ($DB_Parallel) {
if ($DB_Parallel->isComplete()) {
$recs = $DB_Parallel->getQuery("countyCitiesList:$county_id")["result"];
} else {
$DB_Parallel->addQuery("countyCitiesList:$county_id", "DB2", "SELECT city, region FROM geo.city2county WHERE `county` = " . (int) $county_id);
return; // prefetch breakpoint
}
} else {
$recs = DBE("t-hdb2")->select("city, region FROM geo.city2county", ["county" => $county_id]);
}
foreach ($recs as $r) {
$list[] = Geo::rc($r["region"], $r["city"]);
}
return $list;
}
/**
* Returns list of all coutirs in region
* @param int $region
* @return array
*/
static function regionCountiesList($region) {
return DBE("t-hdb2")->select("* FROM geo.county", ["region" => (int) $region, "_result" => "hash_hash"]);
}
/**
* List of all 3-digit zip prefixes with geo information
* @param bool $rebuild
* @return array
* @example \Geo::zipPrefixesList();
*/
static function zipPrefixesList($rebuild = false) {
if ($rebuild || !$zip_prefixes = i('Cache-DT', 'common')->get("ZIP_PREFIXES_INFORMATION")) {
// Note: also d-hdb2:hb_geo.zip_geo_primary could be used
$recs = DBE("t-rxdb")->select("SUBSTRING(ZipCode, 1, 3) as prefix, State, City, AreaCode FROM geo.ZIPCodes", ["PrimaryRecord" => "P", "_order" => "Population DESC", "_group" => "prefix"]);
foreach ($recs as $rec) {
$loc = \Geo::parseLoc($rec["City"] . ", " . $rec["State"], \Geo::LOC_ENRICH_ALL);
$zip_prefixes[$rec["prefix"]] = ["loc" => $loc];
}
i('Cache-DT', 'common')->put("ZIP_PREFIXES_INFORMATION", $zip_prefixes);
}
return $zip_prefixes;
}
/***
* load to static resolved cities names
* USED for KRDB['CacheLocation] sections preresolve city_name
* @cities array [[city_name,parent_id,state_short_name]]
*/
static function preloadResolvedCityName($cities){
foreach($cities as $region => $v){
if($v) {
foreach($v as $city_id => $val) {
if(!$city_id)
continue;
if(!isset(self::$_CITY_NAMES[$region][$city_id])) {
self::$_CITY_NAMES[$region][$city_id] = $val;
}
}
}
}
}
static function getCachedCityNames(){
return self::$_CITY_NAMES;
}
static function clearCachedCityNames(){
self::$_CITY_NAMES = [];
}
// -----
/**
* Extract location from text:
* - major cities (2 word)
* - major cities (1 word)
* - small cities (2 word)
* - small cities (1 word)
* - states
* - then countries
* @param string $text
* @return array [$rc, $region, "$country" iso-2-letter]
*/
static function extractLocationFromText(string $text) : array {
$city2rm_major = self::city2rmMap("major");
$city2rm_small = self::city2rmMap("small");
$region2st = \Geo::regions(10);
$statesRE = join("|", $region2st); # 'MA|CA|...'
preg_match_all("!\b([a-z\-]+)[ ,]($statesRE)\b!i", $text, $m1, PREG_SET_ORDER);
preg_match_all("!\b([a-z\-]+) ([a-z\-]+)[ ,]{1,2}($statesRE)\b!i", $text, $m2, PREG_SET_ORDER);
$ret = fn($rc, $region) => [$rc, $region, self::region2countryIso($region)];
foreach ([$city2rm_major, $city2rm_small] as $map) {
# * cities (2 word),
foreach ($m2 as $m) {
$city = strtolower($m[1] . " " . $m[2] . "," . $m[3]);
if ($rm = $map[$city] ?? 0) {
return $ret($rm, $rm >> 16);
}
}
# * cities (1 word)
foreach ($m1 as $m) {
$city = strtolower($m[1] . "," . $m[2]);
if ($rm = $map[$city] ?? 0) {
return $ret($rm, $rm >> 16);
}
}
}
# states (2 word, 1 word)
$state2region = \Geo::regions(7);
preg_match_all("!\b([a-z]+) ([a-z]+)\b!i", $text, $m2, PREG_SET_ORDER);
foreach ($m2 as $m) {
$name = strtolower($m[1] . ' ' . $m[2]);
if ($region = $state2region[$name] ?? 0) {
return $ret(0, $region);
}
}
preg_match_all("!\b([a-z]+)\b!i", $text, $m1, PREG_SET_ORDER);
foreach ($m1 as $m) {
$name = ucwords(strtolower($m[1]));
if ($region = $state2region[$name] ?? 0) {
return $ret(0, $region);
}
}
# countries
$name2id = Geo::countries(6);
$name2id['Korea'] = 116; # exception
$name2id['Hong Kong'] = 99; # exception
static $names2RE = [];
if (! $names2RE) {
$countries = array_keys($name2id);
foreach ($countries as &$c) {
if (strpos($c, ",")) {
$c = fm("!^(.+),!", $c);
}
if (strpos($c, '(')) {
$c = fm('!^(.+) \(!', $c);
}
}
$names2RE = join("|", $countries);
}
$id2iso = Geo::countries(3);
preg_match("!\b($names2RE)\b!i", $text, $m);
if ($m && ($m[1]??"")) {
$id = $name2id[ucwords(strtolower($m[1]))] ?? 0;
if ($id)
return [0, 0, $id2iso[$id]??""];
}
return [0, 0, ""];
}
/**
* @param string $tp
* @return array ['City,ST' => $rc]
*/
static function city2rmMap(string $tp="major") : array {
static $cache = [];
if ($cache[$tp]??0)
return $cache[$tp];
return $cache[$tp] = cache(__CLASS__)->_city2rmMap($tp);
}
/**
* SLOW FUNCTION - always use cached version !!!
* @internal
* @param string $tp
* @param bool $UPDATE
* @return array ["City,ST" => $rc]
*/
static function _city2rmMap(string $tp = 'major', bool $UPDATE = false): array {
($tp !== "major") && $tp = "small";
$fn = __DIR__."/city2rmMap.$tp.msgpack.zstd.cache";
if ($UPDATE) {
# change ownership of files before updating
file_put_contents($fn, zstd_compress(msgpack_pack(self::_city2rmMap($tp))));
return ['DONE'];
}
if ($data = file_get_contents($fn)) {
return msgpack_unpack(zstd_uncompress($data));
}
vvv("Alert: Using uncached ".__METHOD__);
if ($tp == 'major') {
$filter = ['population' => ['$gte' => 2500]]; # 11732
} else {
# small cities, cities smaller than 1000 considered junk
$filter = ['population' => ['$gt' => 1000, '$lt' => 2500]]; # 23,800
}
$region2st = \Geo::regions(10);
$r = \M("geo.city")->f(['region' => ['$lte' => 51, '$gt' => 0], 'loc' => ['$exists' => true]] + $filter, "city region name");
$z = [];
foreach ($r as $i) {
$name = strtolower($i['name'].",".$region2st[$i['region']]);
$z[$name] = \Geo::rm($i['region'], $i['city']);
}
return $z;
}
} // class