api/import.php

<?php
declare(strict_types=1);

require_once __DIR__ . '/../includes/core.php';
require_once __DIR__ . '/../includes/archive.php';
require_once __DIR__ . '/../includes/modules/import_matomo_db.php';

loadTranslations();

header('Content-Type: application/json; charset=utf-8');
header('X-Robots-Tag: noindex, nofollow, noarchive, nosnippet');

/*
|--------------------------------------------------------------------------
| Import endpoint
|--------------------------------------------------------------------------
|
| Small generic controller for import preview and execution.
| Provider-specific code lives in includes/modules.
|
*/

function brivacia_import_response(array $payload, int $status = 200): never
{
    http_response_code($status);
    echo json_encode($payload, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
    exit;
}

function brivacia_import_request_json(): array
{
    $raw = file_get_contents('php://input');

    if ($raw === false || trim($raw) === '') {
        return $_POST;
    }

    $data = json_decode($raw, true);

    if (!is_array($data)) {
        brivacia_import_response([
            'ok' => false,
            'error' => t('import.errors.invalid_json'),
        ], 400);
    }

    return $data;
}

function brivacia_import_bool(array $input, string $key, bool $default = false): bool
{
    if (!array_key_exists($key, $input)) {
        return $default;
    }

    return filter_var($input[$key], FILTER_VALIDATE_BOOL, FILTER_NULL_ON_FAILURE) ?? $default;
}

function brivacia_import_date(string $value, string $name): string
{
    if (!preg_match('/^\d{4}-\d{2}-\d{2}$/', $value)) {
        throw new RuntimeException(t('import.matomo.errors.invalid_date', ['field' => $name]));
    }

    return $value;
}

function brivacia_import_preview_sample_line(array $items, string $labelKey, string $valueKey): string
{
    if (!$items) {
        return 'none';
    }

    return implode(', ', array_map(static function (array $item) use ($labelKey, $valueKey): string {
        return (string)($item[$labelKey] ?? '') . ' (' . (int)($item[$valueKey] ?? 0) . ')';
    }, $items));
}

function brivacia_import_range_days(string $from, string $to): array
{
    $days = [];
    $start = new DateTimeImmutable($from);
    $end = new DateTimeImmutable($to);

    for ($day = $start; $day <= $end; $day = $day->modify('+1 day')) {
        $days[] = $day->format('Y-m-d');
    }

    return $days;
}

function brivacia_import_rows_for_day(array $data, string $day): array
{
    $rows = $data[$day] ?? [];
    return is_array($rows) ? $rows : [];
}

function brivacia_import_metric_for_day(array $data, string $day, string $metric): int
{
    $row = $data[$day] ?? [];
    return is_array($row) ? max(0, (int)($row[$metric] ?? 0)) : 0;
}

function brivacia_import_first_string(array $row, array $keys): string
{
    foreach ($keys as $key) {
        if (isset($row[$key]) && is_scalar($row[$key]) && trim((string)$row[$key]) !== '') {
            return trim((string)$row[$key]);
        }
    }

    return '';
}

function brivacia_import_first_int(array $row, array $keys): int
{
    foreach ($keys as $key) {
        if (isset($row[$key]) && is_numeric($row[$key])) {
            return max(0, (int)$row[$key]);
        }
    }

    return 0;
}

function brivacia_import_flatten_rows(array $rows): array
{
    $flat = [];

    foreach ($rows as $row) {
        if (!is_array($row)) {
            continue;
        }

        $flat[] = $row;

        if (isset($row['subtable']) && is_array($row['subtable'])) {
            foreach (brivacia_import_flatten_rows($row['subtable']) as $subRow) {
                $flat[] = $subRow;
            }
        }
    }

    return $flat;
}

function brivacia_import_lang_code(string $value): string
{
    $value = strtolower(trim($value));

    if (preg_match('/^[a-z]{2}/', $value, $m)) {
        return $m[0];
    }

    return '';
}

function brivacia_import_page_key(string $url, string $label, string $lang = ''): string
{
    $path = parse_url($url, PHP_URL_PATH);

    if (!is_string($path) || $path === '') {
        $label = trim($label);
        return $label !== '' ? $label : '/';
    }

    $query = parse_url($url, PHP_URL_QUERY);
    $path = '/' . trim($path, '/');

    /*
    |--------------------------------------------------------------------------
    | Language-aware import keys
    |--------------------------------------------------------------------------
    |
    | Keep the importer generic:
    | - if the URL contains an explicit /xx/ prefix, use that language;
    | - otherwise, use the language provided by the importer, when available;
    | - otherwise, keep the historical neutral /path key.
    |
    | No default language is invented here. Site-specific defaults belong in
    | custom rules or in the tracked website itself.
    |
    */

    if (preg_match('~^/([a-z]{2})(/|$)~i', $path, $m)) {
        $lang = strtolower($m[1]);
    } else {
        $lang = brivacia_import_lang_code($lang);
    }

    $key = $lang !== ''
        ? $lang . ':' . $path
        : $path;

    if (is_string($query) && $query !== '') {
        $key .= '?' . $query;
    }

    return $key;
}

function brivacia_import_country_code(array $row): string
{
    $code = strtolower(brivacia_import_first_string($row, ['code', 'countryCode', 'country_code']));
    $code = preg_replace('/[^a-z]/', '', $code) ?? '';

    if (strlen($code) === 2) {
        return $code;
    }

    // Last-resort fallback for old/custom Matomo responses without a code field.
    $label = strtolower(brivacia_import_first_string($row, ['label']));
    $map = [
        'france' => 'fr',
        'united states' => 'us',
        'united kingdom' => 'gb',
        'canada' => 'ca',
        'belgium' => 'be',
        'switzerland' => 'ch',
        'germany' => 'de',
        'spain' => 'es',
        'italy' => 'it',
        'netherlands' => 'nl',
        'brazil' => 'br',
        'china' => 'cn',
        'japan' => 'jp',
        'australia' => 'au',
        'algérie' => 'dz',
        'algeria' => 'dz',
        'maroc' => 'ma',
        'morocco' => 'ma',
    ];

    return $map[$label] ?? '';
}

function brivacia_import_normalized_input(array $input): array
{
    $provider = trim((string)($input['provider'] ?? 'matomo_api'));

    if (!in_array($provider, ['adobe_analytics', 'google_analytics', 'matomo_api', 'matomo_db', 'plausible', 'umami'], true)) {
        throw new RuntimeException(t('import.errors.invalid_provider'));
    }

    $from = brivacia_import_date(trim((string)($input['from'] ?? '')), 'from');
    $to = brivacia_import_date(trim((string)($input['to'] ?? '')), 'to');

    if ($from > $to) {
        throw new RuntimeException(t('import.errors.invalid_range'));
    }

    $brivaciaSite = trim((string)($input['brivacia_site'] ?? ''));

    if ($brivaciaSite === '') {
        $sites = array_keys(brivacia_sites());
        $brivaciaSite = $sites[0] ?? 'import';
    }

    $normalized = [
        'provider' => $provider,
        'from' => $from,
        'to' => $to,
        'brivacia_site' => mb_substr($brivaciaSite, 0, 120, 'UTF-8'),
        'import_hits' => brivacia_import_bool($input, 'import_hits', true),
        'import_countries' => brivacia_import_bool($input, 'import_countries', true),
        'import_referrers' => brivacia_import_bool($input, 'import_referrers', true),
        'import_pages' => brivacia_import_bool($input, 'import_pages', true),
        'replace' => brivacia_import_bool($input, 'replace', false),
    ];

    if ($provider === 'matomo_api') {
        $url = rtrim(trim((string)($input['url'] ?? '')), '/');
        $site = trim((string)($input['site'] ?? ''));
        $token = trim((string)($input['token'] ?? ''));

        if ($url === '' || !filter_var($url, FILTER_VALIDATE_URL)) {
            throw new RuntimeException(t('import.matomo.errors.invalid_url'));
        }

        if ($site === '' || !ctype_digit($site)) {
            throw new RuntimeException(t('import.matomo.errors.invalid_site'));
        }

        if ($token === '') {
            throw new RuntimeException(t('import.matomo.errors.missing_token'));
        }

        return $normalized + [
            'url' => $url,
            'site' => $site,
            'token' => $token,
            'domain_filter' => mb_substr(trim((string)($input['domain_filter'] ?? '')), 0, 250, 'UTF-8'),
        ];
    }

    if ($provider === 'matomo_db') {
        $site = trim((string)($input['site'] ?? ''));
        $port = (int)($input['db_port'] ?? 3306);
        $prefix = trim((string)($input['db_prefix'] ?? 'matomo_'));

        if ($site === '' || !ctype_digit($site)) {
            throw new RuntimeException(t('import.matomo.errors.invalid_site'));
        }

        if (!preg_match('/^[a-zA-Z0-9_]*$/', $prefix)) {
            throw new RuntimeException(t('import.matomo.db.errors.invalid_prefix'));
        }

        return $normalized + [
            'site' => $site,
            'db_host' => mb_substr(trim((string)($input['db_host'] ?? 'localhost')), 0, 250, 'UTF-8'),
            'db_port' => $port > 0 ? $port : 3306,
            'db_name' => mb_substr(trim((string)($input['db_name'] ?? '')), 0, 250, 'UTF-8'),
            'db_user' => mb_substr(trim((string)($input['db_user'] ?? '')), 0, 250, 'UTF-8'),
            'db_password' => (string)($input['db_password'] ?? ''),
            'db_prefix' => $prefix,
            'domain_filter' => mb_substr(trim((string)($input['domain_filter'] ?? '')), 0, 250, 'UTF-8'),
        ];
    }

    return $normalized;
}

try {
    if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
        brivacia_import_response([
            'ok' => false,
            'error' => t('import.errors.method'),
        ], 405);
    }

    $action = (string)($_GET['action'] ?? '');
    $input = brivacia_import_normalized_input(brivacia_import_request_json());

    if ($action === 'test') {
        if ($input['provider'] === 'matomo_db') {
            $site = brivacia_matomo_db_site_info($input);
            $stats = brivacia_matomo_db_preview($input);
            $samples = brivacia_matomo_db_preview_samples($input);

            brivaciaLog(
                'import/preview_matomo_db.log',
                'Preview | Provider: matomo_db' .
                ' | Site: ' . ($site['name'] ?? $input['site']) .
                ' | From: ' . $input['from'] .
                ' | To: ' . $input['to'] .
                ' | Domain filter: ' . ($input['domain_filter'] !== '' ? $input['domain_filter'] : 'none') .
                ' | Visits: ' . $stats['visits'] .
                ' | Unique visitors: ' . $stats['unique_visitors'] .
                ' | Pageviews: ' . $stats['pageviews'] .
                ' | Countries: ' . $stats['countries'] .
                ' | Referrers: ' . $stats['referrers'] .
                ' | Pages: ' . $stats['pages'] .
                ' | Country sample: ' . brivacia_import_preview_sample_line($samples['countries'], 'country', 'visits') .
                ' | Referrer sample: ' . brivacia_import_preview_sample_line($samples['referrers'], 'referrer', 'visits') .
                ' | Page sample: ' . brivacia_import_preview_sample_line($samples['pages'], 'page', 'views')
            );

            brivacia_import_response([
                'ok' => true,
                'site' => $site,
                'stats' => $stats,
                'preview' => $samples,
                'message' => t('import.messages.test_success_with_stats', [
                    'site' => (string)($site['name'] ?? $input['site']),
                    'unique_visitors' => (string)$stats['unique_visitors'],
                    'visits' => (string)$stats['visits'],
                    'pageviews' => (string)$stats['pageviews'],
                ]),
            ]);
        }

        brivacia_import_response([
            'ok' => false,
            'error' => t('import.provider.unavailable'),
        ], 400);
    }

    if ($action === 'import') {
        $stats = match ($input['provider']) {
            'matomo_db' => brivacia_import_matomo_db($input),
            default => throw new RuntimeException(t('import.provider.unavailable')),
        };

        $maintenanceDb = brivaciaDb();

        /*
        |--------------------------------------------------------------------------
        | Post-import maintenance
        |--------------------------------------------------------------------------
        |
        | Imported page rows are stored as unresolved first. This lets Brivacia
        | apply the current page rules before closed years are archived.
        |
        */

        maybeNormalizeStoredReferrers($maintenanceDb);
        maybeNormalizeStoredPages($maintenanceDb);

        refreshPageLabelsForRange(
            $maintenanceDb,
            'site = ? AND day BETWEEN ? AND ?',
            [$input['brivacia_site'], $input['from'], $input['to']],
            100
        );

        refreshPageLabels($maintenanceDb, 25);

        $archivedYears = archiveClosedYears($maintenanceDb);
        brivaciaMarkYearArchiveChecked();

        brivacia_import_response([
            'ok' => true,
            'stats' => $stats,
            'archived_years' => $archivedYears,
            'message' => t('import.messages.import_success', [
                'days' => (string)$stats['days'],
                'unique_visitors' => (string)$stats['unique_visitors'],
                'visits' => (string)$stats['visits'],
                'pageviews' => (string)$stats['pageviews'],
            ]),
        ]);
    }

    brivacia_import_response([
        'ok' => false,
        'error' => t('import.errors.unknown_action'),
    ], 400);
} catch (Throwable $e) {
    if (isset($db) && $db instanceof PDO && $db->inTransaction()) {
        $db->rollBack();
    }

    brivaciaLog(
        'import/import.log',
        'Import failed | Error: ' . $e->getMessage()
    );

    brivacia_import_response([
        'ok' => false,
        'error' => $e->getMessage(),
    ], 500);
}