import argparse
import json
import math
import os
import re
import subprocess
import time
import unicodedata
import urllib.parse
import urllib.request
from collections import defaultdict
from datetime import datetime

import openpyxl
from lxml import html


CHROME_PATHS = [
    r"C:\Program Files\Google\Chrome\Application\chrome.exe",
    r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
    r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
    r"C:\Program Files\Microsoft\Edge\Application\msedge.exe",
]

CHROME_FLAGS = [
    "--headless=new",
    "--disable-gpu",
    "--disable-software-rasterizer",
    "--disable-dev-shm-usage",
    "--no-sandbox",
    "--disable-features=VizDisplayCompositor",
    "--use-gl=swiftshader",
    "--enable-unsafe-swiftshader",
    "--remote-debugging-port=9222",
    "--remote-allow-origins=*",
    "about:blank",
]

TECHNICAL_COLUMNS = {
    "product_id",
    "sku",
    "Produit",
    "Nom produit",
    "Catégorie",
    "Sous-catégorie",
    "Type produit",
    "Clé combinaison",
    "Attributs techniques JSON",
    "Attributs tarifés inclus",
    "base_spec_json",
    "Source",
    "Quantité",
    "Coût achat HT",
    "Prix vente HT",
}

DEFAULT_OPTION_WORDS = {
    "",
    "-----",
    "sans",
    "non",
}

SKIP_SOURCE_PREFIXES = (
    "Base nettoyée pour implémentation",
)

MANUAL_SOURCE_BY_SKU = {
    "PAP-CARTES-DE-VISITE": "https://www.realisaprint.com/carte-visite-impression-r1.html",
    "PAP-SOUS-MAINS": "https://www.realisaprint.com/sous-main-r2474.html",
    "RES-MENUS": "https://www.realisaprint.com/menu-r2462.html",
    "RES-SETS-DE-TABLE-PAPIER": "https://www.realisaprint.com/set-table-r2525.html",
}


def choose_browser():
    for path in CHROME_PATHS:
        if os.path.exists(path):
            return path
    raise RuntimeError("No supported browser executable found.")


def clean_line_label(text):
    text = value_to_string(text)
    return text.split("\n", 1)[0].strip()


def value_to_string(value):
    if value is None:
        return ""
    if isinstance(value, float):
        if math.isnan(value):
            return ""
        if value.is_integer():
            return str(int(value))
    return str(value).strip()


def normalize_text(text):
    text = value_to_string(text)
    text = unicodedata.normalize("NFKD", text)
    text = "".join(ch for ch in text if not unicodedata.combining(ch))
    text = text.lower()
    text = text.replace("m²", "m2").replace("g/m²", "g/m2").replace("®", "")
    text = text.replace("œ", "oe")
    text = re.sub(r"[’'`]", " ", text)
    text = re.sub(r"[/|]", " ", text)
    text = re.sub(r"[^a-z0-9+]+", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


def source_domain(url):
    parsed = urllib.parse.urlparse(url)
    return parsed.netloc.lower()


def parse_price_fragment(text):
    text = value_to_string(text)
    m = re.search(r"(\d+(?:[.,]\d+)?)\s*€", text)
    if not m:
        return None
    return float(m.group(1).replace(",", "."))


def parse_qty_fragment(text):
    text = value_to_string(text)
    m = re.search(r"\b(\d[\d\s]*)\b", text)
    if not m:
        return None
    return int(re.sub(r"\s+", "", m.group(1)))


class ChromeCdp:
    def __init__(self, browser_path):
        self.browser_path = browser_path
        self.proc = None
        self.ws = None
        self._msg_id = 0

    def __enter__(self):
        from websocket import create_connection

        self.proc = subprocess.Popen(
            [self.browser_path, *CHROME_FLAGS],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        )
        for _ in range(40):
            try:
                with urllib.request.urlopen("http://127.0.0.1:9222/json/version", timeout=2):
                    break
            except Exception:
                time.sleep(0.5)
        else:
            raise RuntimeError("Chrome CDP endpoint did not become ready.")

        with urllib.request.urlopen("http://127.0.0.1:9222/json/list", timeout=5) as fh:
            targets = json.load(fh)
        page = next(t for t in targets if t.get("type") == "page")
        self.ws = create_connection(page["webSocketDebuggerUrl"], timeout=60, origin="http://127.0.0.1:9222")
        self.send("Page.enable")
        self.send("Runtime.enable")
        return self

    def __exit__(self, exc_type, exc, tb):
        try:
            if self.ws is not None:
                self.ws.close()
        finally:
            if self.proc is not None:
                try:
                    self.proc.terminate()
                    self.proc.wait(timeout=5)
                except Exception:
                    self.proc.kill()

    def send(self, method, params=None):
        self._msg_id += 1
        msg_id = self._msg_id
        payload = {"id": msg_id, "method": method}
        if params is not None:
            payload["params"] = params
        self.ws.send(json.dumps(payload))
        while True:
            resp = json.loads(self.ws.recv())
            if resp.get("id") == msg_id:
                return resp

    def navigate(self, url, wait_seconds=7):
        self.send("Page.navigate", {"url": url})
        time.sleep(wait_seconds)

    def evaluate(self, expression, await_promise=False):
        resp = self.send(
            "Runtime.evaluate",
            {
                "expression": expression,
                "returnByValue": True,
                "awaitPromise": await_promise,
            },
        )
        result = resp.get("result", {}).get("result", {})
        return result.get("value")


class ObjetRamaScraper:
    def __init__(self):
        self.cache = {}

    def fetch(self, url):
        if url in self.cache:
            return self.cache[url]
        req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
        with urllib.request.urlopen(req, timeout=30) as fh:
            doc = html.fromstring(fh.read())
        price_blocks = doc.xpath("//*[contains(@class, 'tier') or contains(@id, 'tier') or contains(@class, 'price')]")
        price_rows = []
        seen = set()
        for block in price_blocks:
            text = " ".join(" ".join(block.xpath(".//text()")).split())
            if not text or text in seen:
                continue
            seen.add(text)
            for qty, price in re.findall(r"Achetez-en\s+(\d[\d\s]*)\s+à\s+(\d+(?:[.,]\d+)?)\s*€", text, flags=re.I):
                price_rows.append(
                    {
                        "qty": int(re.sub(r"\s+", "", qty)),
                        "unit_price": float(price.replace(",", ".")),
                    }
                )
        price_rows.sort(key=lambda row: row["qty"])
        self.cache[url] = price_rows
        return price_rows

    def price_for_row(self, url):
        rows = self.fetch(url)
        if not rows:
            return None
        chosen = rows[0]
        return {
            "quantity": chosen["qty"],
            "cost": round(chosen["qty"] * chosen["unit_price"], 2),
            "meta": {"source_kind": "objetrama-tier"},
        }


class RealisaprintScraper:
    def __init__(self, browser_path):
        self.browser_path = browser_path

    def process_product_rows(self, url, sku, row_specs, batch_size=40):
        results = {}
        with ChromeCdp(self.browser_path) as chrome:
            chrome.navigate(url)
            for i in range(0, len(row_specs), batch_size):
                batch = row_specs[i : i + batch_size]
                script = self._build_batch_script(sku, batch)
                batch_results = chrome.evaluate(script, await_promise=True) or []
                for item in batch_results:
                    if item and item.get("excel_row") is not None:
                        results[item["excel_row"]] = item
        return results

    def _build_batch_script(self, sku, batch):
        batch_json = json.dumps(batch, ensure_ascii=False)
        sku_json = json.dumps(sku)
        return f"""
(async () => {{
  const sku = {sku_json};
  const batch = {batch_json};
  const sleep = (ms) => new Promise(r => setTimeout(r, ms));
  const firstLine = (s) => String(s || '').split(/\\n+/)[0].trim();
  const norm = (s) => firstLine(String(s || ''))
    .normalize('NFD')
    .replace(/[\\u0300-\\u036f]/g, '')
    .toLowerCase()
    .replace(/g\\/m²/g, 'g m2')
    .replace(/m²/g, 'm2')
    .replace(/[’'`]/g, ' ')
    .replace(/[^a-z0-9+]+/g, ' ')
    .replace(/\\s+/g, ' ')
    .trim();
  const dims = (s) => {{
    const t = String(s || '').toLowerCase();
    const nums = (t.match(/\\d+(?:[.,]\\d+)?/g) || []).map(x => parseFloat(x.replace(',', '.')));
    const factor = t.includes('mm') ? 0.1 : 1.0;
    return nums.map(n => Math.round(n * factor * 100) / 100);
  }};
  const dimsMatch = (a, b) => {{
    const A = dims(a);
    const B = dims(b);
    if (!A.length || A.length !== B.length) return false;
    const pairs = [1, 10, 0.1];
    return pairs.some(f => A.every((x, i) => Math.abs(x - (B[i] * f)) < 0.21));
  }};
  const tokenScore = (wanted, got) => {{
    const a = new Set(norm(wanted).split(' ').filter(Boolean));
    const b = new Set(norm(got).split(' ').filter(Boolean));
    if (!a.size || !b.size) return 0;
    let inter = 0;
    for (const x of a) if (b.has(x)) inter += 1;
    return inter / Math.max(a.size, b.size);
  }};
  const captureControls = () => {{
    return Array.from(document.querySelectorAll('select')).map((el, idx) => {{
      const box = el.closest('.web_ligne_variable_saisie') || el.parentElement;
      const rawLabel = box?.querySelector('.web_ligne_variable_saisie_intitule')?.innerText || '';
      const label = firstLine(rawLabel);
      const options = Array.from(el.options).map((o, optionIndex) => ({{
        index: optionIndex,
        text: o.textContent.trim()
      }}));
      return {{
        key: el.name || el.id || String(idx),
        selector: el.id ? ('#' + CSS.escape(el.id)) : ('select[name="' + String(el.name).replace(/"/g, '\\\\"') + '"]'),
        label,
        options,
        defaultIndex: el.selectedIndex
      }};
    }}).filter(c => c.options.length > 1);
  }};
  let controls = captureControls();
  const selectValue = async (control, optionIndex) => {{
    const el = document.querySelector(control.selector);
    if (!el) return false;
    el.selectedIndex = optionIndex;
    el.value = el.options[optionIndex]?.value || el.value;
    el.dispatchEvent(new Event('input', {{ bubbles: true }}));
    el.dispatchEvent(new Event('change', {{ bubbles: true }}));
    if (window.jQuery) {{
      window.jQuery(el).trigger('change');
      window.jQuery(el).trigger('input');
    }}
    await sleep(60);
    return true;
  }};
  const resetControls = async () => {{
    controls = captureControls();
    for (const control of controls) {{
      await selectValue(control, control.defaultIndex);
    }}
    await sleep(120);
  }};
  const getControlByLabel = (labelText) => {{
    const target = norm(labelText);
    return controls.find(c => norm(c.label).includes(target) || target.includes(norm(c.label)));
  }};
  const optionIsDefault = (text) => {{
    const n = norm(text);
    return n === '' || n === '-----' || n === 'sans' || n === 'non';
  }};
  const firstActiveOption = (control) => {{
    for (const option of control.options) {{
      if (!optionIsDefault(option.text)) return option;
    }}
    return control.options[control.defaultIndex] || control.options[0] || null;
  }};
  const pickMatchingOption = (control, wanted) => {{
    let best = null;
    const wantedNorm = norm(wanted);
    for (const option of control.options) {{
      const optionNorm = norm(option.text);
      let score = 0;
      if (wantedNorm && optionNorm === wantedNorm) score = 1.0;
      else if (wantedNorm && optionNorm.startsWith(wantedNorm + ' ')) score = 0.985;
      else if (wantedNorm && (optionNorm.includes(wantedNorm) || wantedNorm.includes(optionNorm))) score = 0.95;
      else if (dimsMatch(wanted, option.text)) score = 0.93;
      else score = tokenScore(wanted, option.text);
      if (!best || score > best.score) best = {{ option, score }};
    }}
    return best;
  }};
  const applyOverrides = async (row) => {{
    const applied = [];
    for (const override of (row.overrides || [])) {{
      const control = getControlByLabel(override.control_label);
      if (!control) continue;
      let target = null;
      for (const option of control.options) {{
        const optNorm = norm(option.text);
        if (override.option_text && (optNorm === norm(override.option_text) || optNorm.includes(norm(override.option_text)) || norm(override.option_text).includes(optNorm))) {{
          target = option;
          break;
        }}
      }}
      if (!target) {{
        target = firstActiveOption(control);
      }}
      if (target) {{
        await selectValue(control, target.index);
        applied.push({{ control: control.label, option: target.text, source: 'override' }});
      }}
    }}
    return applied;
  }};
  const applyDesiredValues = async (row) => {{
    const applied = [];
    const used = new Set((row.overrides || []).map(x => norm(x.control_label)));
    for (const desired of row.desired || []) {{
      let best = null;
      for (const control of controls) {{
        if (used.has(norm(control.label))) continue;
        let candidate = pickMatchingOption(control, desired.value);
        if (candidate && candidate.score >= 0.76) {{
          const labelBonus = norm(desired.column).includes(norm(control.label)) || norm(control.label).includes(norm(desired.column)) ? 0.08 : 0;
          const finalScore = candidate.score + labelBonus;
          if (!best || finalScore > best.score) {{
            best = {{ control, option: candidate.option, score: finalScore }};
          }}
        }}
      }}
      if (!best) {{
        for (const control of controls) {{
          if (used.has(norm(control.label))) continue;
          const controlScore = tokenScore(desired.value, control.label);
          if (controlScore >= 0.7) {{
            const target = firstActiveOption(control);
            if (target && (!best || controlScore > best.score)) {{
              best = {{ control, option: target, score: controlScore, byLabel: true }};
            }}
          }}
        }}
      }}
      if (best) {{
        await selectValue(best.control, best.option.index);
        used.add(norm(best.control.label));
        applied.push({{
          control: best.control.label,
          option: best.option.text,
          desired: desired.value,
          source: best.byLabel ? 'label-match' : 'option-match'
        }});
      }}
    }}
    return applied;
  }};
  const calculateButton = () => {{
    return Array.from(document.querySelectorAll('.bouton_calculer_du_choix_produit')).find(el => getComputedStyle(el).display !== 'none')
      || document.querySelector('.bouton_calculer_du_choix_produit')
      || Array.from(document.querySelectorAll('div,button,a')).find(el => /obtenir mon tarif/i.test(el.innerText || ''));
  }};
  const clickCalculate = async () => {{
    const btn = calculateButton();
    if (!btn) return false;
    btn.click();
    await sleep(120);
    return true;
  }};
  const waitForPrice = async (beforeSnippet) => {{
    for (let i = 0; i < 45; i += 1) {{
      const rows = document.querySelectorAll('table.tableau-prix tr');
      const busy = /calcul en cours/i.test(document.body.innerText || '');
      const snippet = document.querySelector('#product_snippet')?.innerText || '';
      if (rows.length > 2 && !busy) return true;
      if (!busy && beforeSnippet && snippet && snippet !== beforeSnippet) return true;
      if (!busy && i >= 8) return true;
      await sleep(250);
    }}
    return false;
  }};
  const euroToFloat = (text) => {{
    const m = String(text || '').match(/(\\d+(?:[.,]\\d+)?)\\s*€/);
    return m ? parseFloat(m[1].replace(',', '.')) : null;
  }};
  const intFromText = (text) => {{
    const m = String(text || '').match(/\\b(\\d[\\d\\s]*)\\b/);
    return m ? parseInt(m[1].replace(/\\s+/g, ''), 10) : null;
  }};
  const parseSnippet = () => {{
    const node = document.querySelector('#product_snippet');
    const text = node ? node.innerText : '';
    const qty = intFromText((text.match(/les\\s+([\\d\\s]+)\\s+ex/i) || [])[1] || '');
    const price = euroToFloat(text);
    if (qty && price !== null) {{
      return {{
        quantity: qty,
        standard_price: price,
        express_price: null,
        urgent_price: null,
        source_kind: 'snippet'
      }};
    }}
    if (price !== null && /unite|l unite|piece|pi[eè]ce/i.test(norm(text))) {{
      return {{
        quantity: 1,
        standard_price: price,
        express_price: null,
        urgent_price: null,
        source_kind: 'snippet-unit'
      }};
    }}
    return null;
  }};
  const parseTable = () => {{
    const lineRows = Array.from(document.querySelectorAll('table.tableau-prix tr')).filter(tr => (tr.className || '').includes('ligne_tableau_prix'));
    if (!lineRows.length) return parseSnippet();
    const checked = document.querySelector('input[name="optionsRadios"]:checked');
    let targetRow = checked ? checked.closest('tr') : null;
    if (!targetRow) {{
      targetRow = lineRows.find(tr => (tr.className || '').includes('qte_active')) || lineRows[0];
    }}
    const cells = Array.from(targetRow.querySelectorAll('td')).map(td => td.innerText.replace(/\\s+/g, ' ').trim());
    const quantity = intFromText(cells[0] || '');
    const urgent = euroToFloat(cells[2] || '');
    const express = euroToFloat(cells[5] || '');
    const standard = euroToFloat(cells[8] || '');
    return {{
      quantity,
      standard_price: standard,
      express_price: express,
      urgent_price: urgent,
      source_kind: 'table'
    }};
  }};
  const results = [];
  for (const row of batch) {{
    try {{
      await resetControls();
      const applied = [];
      applied.push(...(await applyOverrides(row)));
      applied.push(...(await applyDesiredValues(row)));
      const beforeSnippet = document.querySelector('#product_snippet')?.innerText || '';
      await clickCalculate();
      await waitForPrice(beforeSnippet);
      const pricing = parseTable() || parseSnippet() || {{}};
      const chosen = pricing.standard_price ?? pricing.express_price ?? pricing.urgent_price ?? null;
      results.push({{
        excel_row: row.excel_row,
        quantity: pricing.quantity || null,
        cost: chosen,
        meta: {{
          source_kind: pricing.source_kind || 'unknown',
          applied
        }}
      }});
    }} catch (err) {{
      results.push({{
        excel_row: row.excel_row,
        quantity: null,
        cost: null,
        meta: {{ error: String(err) }}
      }});
    }}
  }}
  return results;
}})()
"""


def build_desired_values(headers, row_values):
    desired = []
    for header in headers:
        if header in TECHNICAL_COLUMNS:
            continue
        value = value_to_string(row_values.get(header))
        if not value:
            continue
        desired.append({"column": header, "value": value})
    return desired


def build_overrides(sku, row_values):
    overrides = []
    option_value = value_to_string(row_values.get("Options"))
    tickets_value = value_to_string(row_values.get("Nombre de tickets"))
    cover_value = value_to_string(row_values.get("Couverture"))
    if cover_value:
        overrides.append({"control_label": "PAGES DE COUVERTURE", "option_text": "Oui"})
    if sku == "EVT-BILLETTERIE":
        if tickets_value:
            if normalize_text(tickets_value) == "ticket libre":
                overrides.append({"control_label": "MISE EN CARNET", "option_text": "Pas de carnet - ticket libre"})
        if option_value:
            opt = normalize_text(option_value)
            if "couverture" in opt:
                overrides.append({"control_label": "COUVERTURE 350G/M²", "option_text": "Impression quadri recto"})
            elif "code" in opt and "barre" in opt:
                overrides.append({"control_label": "CODE BARRES UNITAIRE", "option_text": "Code 39 recto"})
            elif "numerotation" in opt or "personnalisation" in opt:
                overrides.append({"control_label": "NUMÉROTATION OU PERSO UNITAIRE 1 FACE", "option_text": "Oui"})
    if sku == "EVT-BADGES-NOMINATIFS" and option_value:
        opt = normalize_text(option_value)
        if "code barres" in opt:
            overrides.append({"control_label": "CODE BARRES", "option_text": "Oui"})
        elif "personnalisation" in opt:
            overrides.append({"control_label": "PERSONNALISATION", "option_text": "Oui"})
        elif "attache badge" in opt:
            overrides.append({"control_label": "ATTACHE", "option_text": "Oui"})
        else:
            overrides.append({"control_label": option_value, "option_text": None})
    return overrides


def iter_rows(ws):
    headers = [value_to_string(cell.value) for cell in ws[1]]
    header_index = {name: idx + 1 for idx, name in enumerate(headers) if name}
    for row_idx in range(2, ws.max_row + 1):
        values = {}
        empty = True
        for idx, header in enumerate(headers, start=1):
            if not header:
                continue
            value = ws.cell(row=row_idx, column=idx).value
            values[header] = value
            if value not in (None, ""):
                empty = False
        if not empty:
            yield row_idx, headers, header_index, values


def resolve_source(values):
    source = value_to_string(values.get("Source"))
    if source.startswith("http"):
        return source
    sku = value_to_string(values.get("sku"))
    manual = MANUAL_SOURCE_BY_SKU.get(sku)
    if manual:
        return manual
    return None


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--input", required=True)
    parser.add_argument("--output-dir", required=True)
    parser.add_argument("--limit", type=int, default=0)
    parser.add_argument("--skip-skus", default="")
    args = parser.parse_args()

    browser_path = choose_browser()
    wb = openpyxl.load_workbook(args.input)
    skip_skus = {item.strip() for item in args.skip_skus.split(",") if item.strip()}

    grouped = defaultdict(list)
    writable_cells = {}
    processed_rows = 0

    for ws in wb.worksheets:
        for row_idx, headers, header_index, values in iter_rows(ws):
            sku = value_to_string(values.get("sku"))
            source = resolve_source(values)
            if not source:
                continue
            quantity_col = header_index.get("Quantité")
            cost_col = header_index.get("Coût achat HT")
            if not quantity_col or not cost_col:
                continue
            if sku in skip_skus:
                continue
            if ws.cell(row=row_idx, column=quantity_col).value not in (None, "") or ws.cell(row=row_idx, column=cost_col).value not in (None, ""):
                continue
            desired = build_desired_values(headers, values)
            if not desired and source_domain(source) != "www.objetrama.fr":
                continue
            spec = {
                "sheet": ws.title,
                "excel_row": row_idx,
                "sku": sku,
                "source": source,
                "desired": desired,
                "overrides": build_overrides(sku, values),
            }
            grouped[(source_domain(source), source, sku)].append(spec)
            writable_cells[(ws.title, row_idx)] = (quantity_col, cost_col)
            processed_rows += 1
            if args.limit and processed_rows >= args.limit:
                break
        if args.limit and processed_rows >= args.limit:
            break

    objetrama = ObjetRamaScraper()
    realisa = RealisaprintScraper(browser_path)

    os.makedirs(args.output_dir, exist_ok=True)
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_path = os.path.join(
        args.output_dir,
        f"modele_grilles_tarifaires_avec_sources_et_base_spec_json_filled_{ts}.xlsx",
    )

    filled = 0
    group_items = list(grouped.items())
    for group_index, ((domain, source, sku), specs) in enumerate(group_items, start=1):
        print(f"Processing {sku} | {domain} | rows={len(specs)}")
        results = {}
        try:
            if domain == "www.objetrama.fr":
                for spec in specs:
                    price = objetrama.price_for_row(source)
                    if price:
                        results[spec["excel_row"]] = price
            elif domain == "www.realisaprint.com":
                results = realisa.process_product_rows(source, sku, specs)
            else:
                continue
        except Exception as exc:
            print(f"  failed: {exc}")
            continue

        ws = wb[specs[0]["sheet"]]
        for spec in specs:
            result = results.get(spec["excel_row"])
            if not result:
                continue
            qty_col, cost_col = writable_cells[(spec["sheet"], spec["excel_row"])]
            quantity = result.get("quantity")
            cost = result.get("cost")
            if quantity:
                ws.cell(row=spec["excel_row"], column=qty_col, value=quantity)
            if cost is not None:
                ws.cell(row=spec["excel_row"], column=cost_col, value=round(float(cost), 2))
            if quantity or cost is not None:
                filled += 1

        if group_index == 1 or group_index % 5 == 0 or group_index == len(group_items):
            wb.save(output_path)

    wb.save(output_path)
    print(json.dumps({"filled_rows": filled, "output": output_path}, ensure_ascii=False))


if __name__ == "__main__":
    main()
