<?php
namespace removal;
/**
*
* In CLI mode default max memcache item size of 1M is too small. We need at least 4M. Configuration should be updated with "-I 10m" option
* [root@pa9spider ~]# cat /etc/sysconfig/memcached
* PORT="11211"
* USER="memcached"
* MAXCONN="1024"
* CACHESIZE="256"
* # INCLREASE MAX ITEM SIZE
* OPTIONS="-I 10m"
*
*
* Entity:
* see allHashes() method documentation for entity structure
*
* /rd/bin/pe "removal\Hasher::reloadRemovals()"
* TODO: describe entity specific data formats
*
*/
class Hasher_BaseHasher {
const HASH_TYPE_NAME = 0x001;
const HASH_TYPE_ADDRESS = 0x002;
const HASH_TYPE_PHONE = 0x004;
const HASH_TYPE_PROFILE = 0x008;
const HASH_TYPE_OFFENDER = 0x010;
const HASH_TYPE_ORGANIZATION = 0x020;
const HASH_TYPE_PROFESSIONAL = 0x040;
const HASH_TYPE_LICENSE = 0x080;
const HASH_TYPE_MENTION = 0x100;
const HASH_TYPE_PEOPLELEGACY = 0x200;
const HASH_TYPE_CITYZOR_PROFILE = 0x400;
const HASH_TYPE_TAHOE_ID = 0x800; // PeopleFinders profile ID aka Tahoe ID. Works in pair with HASH_TYPE_PHONE
const HASH_TYPE_FACET = 0x1000;
const HASH_TYPE_CITYZOR_PHONE = 0x2000;
const HASH_TYPE_CITYZOR_ADDRESS = 0x3000;
const HASH_TYPE_INDEX = 0x4000;
const HASH_TYPE_NAME_REGIONAL = 0x5000;
const HASH_TYPE_EMAIL = 0x6000;
const HASH_TYPE_PERMIT = 0x7000;
const HASH_TYPE_PHOTO = 0x8000;
const HASH_TYPE_TRUTHFINDERAPI = 0x10000;
const HASH_TYPE_ALL = 0xFffff;
static $INDEXABLE_ENTITIES = [
//Should be filled in descendant classes
];
/////////////////////
//
// GO Api solution
//
////////////////////
/**
* Manual service removed hashes update
* @param int $last_updated
* @param bool $full_reload
* @return int
* @example removal\Hasher::reloadRemovals()
*/
static function reloadRemovals($last_updated = 0, $full_reload = false) {
$result = 0;
$GO_API = i("go-api.name-server");
if ($full_reload) {
$result = $GO_API->removalsReload(1); // Full hashes reload at service side
} elseif ($last_updated > 1) {
$result = $GO_API->removalsReload($last_updated); // Load removals newer than given timestamp
} else {
$result = $GO_API->removalsReload(0); // Partial hashes load since last known time at service side
}
return $result;
}
/**
* Returns subset of passed hashes that are listed as removed.
* @param array $hashes
* @return array [hash_1 => 1, hash_2 => 1, ... ]
* @example removal\Hasher::findRemovedHashes([1147056740357836278, 165050511284695997, 16505051128469]);
*/
static function findRemovedHashes(array $hashes) {
if (isset($_GET["UPDATE"])) {
static::syncRemovalHashes();
static::reloadRemovals();
}
}
/**
* Find which of entities are forbidden. Uses i('go-api.name-server') service
* @param array $entities [uk => entity_data]
* @return array [uk => removal_reason]
* @throws \Exception
*/
static function findRemoved($entities){
\Profiler::in(Removal::$MYNAME, "Check data cnt:" . count($entities));
$removed = [];
if ($entities) {
$all_hashes = [];
$entity_hashes = [];
foreach ($entities as $e_id => $entity) {
if ($hashes = static::allHashes($entity)) {
foreach ($hashes as $hash) {
$all_hashes[] = $hash['hash'];
$entity_hashes[$e_id][] = $hash;
}
}
}
if ($all_hashes) {
if ($removed_hashes = static::findRemovedHashes($all_hashes)) {
foreach ($entity_hashes as $e_id => $hashes) {
foreach ($hashes as $hash) {
if ($removed_hashes[$hash["hash"]] ?? 0) {
\Profiler::warn(Removal::$MYNAME, "Hash:".$hash["hash"] . ":" . x2s($hash)."; Entity:".x2s($entity));
$removed[$e_id] = $hash;
break;
}
}
}
if ($removed) {
\Profiler::warn(Removal::$MYNAME, "Removed found: ".count($removed));
}
}
}
}
\Profiler::out();
return $removed;
}
/////////////////////
//
// Backend methods
//
////////////////////
/**
* Loads information about removals from all possible entity types.
* Adds missing hashes to MySQL hash table
* @param bool $full_sync Delete existing hashes before insert ( full hashes updated )
* @example /rd/bin/pe "removal\Hasher::syncRemovalHashes()"
*/
static function syncRemovalHashes($full_sync = false, $print_info = true) {
if (!\Debug::is_admin()) {
$print_info = false;
}
\Profiler::in(Removal::$MYNAME, "Sync MySQL hashes with source removals");
if ($full_sync) {
// Clear existing hashes
$entity_types = [];
foreach (static::$INDEXABLE_ENTITIES as $entity_class) {
$entity_types[] = $entity_class::$ENTITY_TYPE;
}
if ($print_info) {
echo "\n[ ADMIN ONLY ] clean $entity_class";
}
static::dbe()->delete(static::db_tbl(), ["entity_type in " . array_values_str($entity_types)]);
}
foreach (static::$INDEXABLE_ENTITIES as $entity_class) {
$last_entity_hash_time = static::dbe()->one("SELECT max(changed) FROM ".static::db_tbl()." WHERE entity_type = ".$entity_class::$ENTITY_TYPE);
if(!$last_entity_hash_time) {
$last_entity_hash_time = 0; //1.09.22
}
/* new version */
if ($print_info) {
echo "\n[ ADMIN ONLY ] $entity_class ( since 0 )";
}
$INSERTER = static::dbe()->batch_insert(static::db_tbl(), "entity_type request_id site_id added changed hash_type hash hash_seed", ["replace" => true]);
$sql = "changed >= $last_entity_hash_time AND status = " . Removal::REQUEST_STATUS_APPLIED;
$ITERATOR = $entity_class::dbe()->iterator($entity_class::db_tbl(), ["where" => $sql, "use_index" => "changed"]);
$cnt = 0;
foreach ($ITERATOR as $request) {
$cnt++;
$request["data"] = $entity_class::unpackData($request["data"]);
$request_hashes = $entity_class::buildRequestMetaHashes($request);
if ($request_hashes) {
foreach ($request_hashes as $h) {
$INSERTER->add_array([$h["entity_type"], $h["request_id"], $h["site_id"], $h["added"], $h["changed"], $h["hash_type"], $h["hash"], $h["hash_seed"]]);
}
if ($print_info && once()) {
echo "\n $cnt hashes added";
}
}
}
if ($print_info) {
echo "\n [ DONE ] $cnt hashes added";
}
if ($print_info) {
echo "\n[ ADMIN ONLY ] $entity_class ( since 0 )";
}
$INSERTER->flush();
/* new version */
/* old version */
/*
$hashes_to_sync = $entity_class::exportMetaHashes(0, $last_entity_hash_time);
//echo "\n$entity_class - " . count($hashes_to_sync);
if ($hashes_to_sync) {
$INSERTER = static::dbe()->batch_insert(static::db_tbl(), "entity_type request_id site_id added changed hash_type hash hash_seed", ["replace" => true]);
foreach ($hashes_to_sync as $h) {
$INSERTER->add_array([$h["entity_type"], $h["request_id"], $h["site_id"], $h["added"], $h["changed"], $h["hash_type"], $h["hash"], $h["hash_seed"]]);
}
$INSERTER->flush();
}
*/
/* old version */
}
\Profiler::out();
}
static function allHashes(array $entity): array
{
die("Implement me!");
}
/**
* Pure data to hash
* @param string $data_piece
* @return uint32
*/
/*private*/ static function _hash($data_piece) {
return hexdec(long_crc($data_piece, 15)); //60bit UNSIGNED INT
}
/*******************************************************
* INTERNALS
******************************************************/
static function dbe() {
return dbe("DB2");
}
static function db_tbl() {
return "removals.hashes";
}
}
/*
SELECT invoice_id, count(*) cnt FROM `api_log_v3_2022`
WHERE date > "2022-02-01 00:00:00" AND date < "2022-03-01 00:00:00" AND class="background" AND status="complete"
GROUP BY invoice_id ORDER BY cnt DESC;
SELECT count(*) cnt, count(distinct(invoice_id)) FROM `api_log_v3_2022`
WHERE date > "2022-02-01 00:00:00" AND date < "2022-03-01 00:00:00" AND class="background" AND status="complete";
Total Inv Free Inv
FEB
> 11693 11239 10371 10077
JAN
> 8719 8300 7182 6949
INVOICES (count and distinct users count)
SELECT product, count(*) FROM `invoice`
WHERE created_on > "2022-02-01 00:00:00" AND created_on < "2022-03-01 00:00:00" AND status="complete" AND product in (143, 145, 14, 16)
GROUP BY product;
FEB
inv pid per_pid
FEB 2022 10164 1415 7.18
JAN 2022 7009 1300 5.39
DEC 2021 7899 1283 6.15
NOV
SELECT DATE_FORMAT(created_on, "%Y-%m") month, count(*) i_cnt, count(DISTINCT(pid)) p_cnt, count(*)/count(DISTINCT(pid)) per_pid FROM `invoice`
WHERE created_on > "2019-01-01 00:00:00" AND created_on < "2022-03-01 00:00:00" AND status="complete" AND product = 16
GROUP BY month
ORDER BY month DESC
INVOICES (most used pids)
SELECT pid, count(*) per_pid FROM `invoice`
WHERE created_on > "2022-01-01 00:00:00" AND created_on < "2022-02-01 00:00:00" AND status="complete" AND product = 16 GROUP BY pid ORDER BY per_pid DESC;
Feb:
*/