open-webui/backend/open_webui/utils/chat_importer.py

<EFBFBD><EFBFBD>from __future__ import annotations

import json
import re
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Iterable, Mapping, cast
from html.parser import HTMLParser


@dataclass(slots=True)
class ChatThread:
    id: str
    title: str
    messages: list[dict[str, Any]]


class ChatImporterError(Exception):
    pass


def detect_threads(data: Any) -> list[ChatThread]:
    """
    \ՋO(uY<EFBFBD>y㉐ghV㉐g<EFBFBD>[eQ<EFBFBD>v JSON penc0
    /ec:
    1. Google AI Studio (Single / Backup)
    2. OpenAI Conversation (ChatGPT export)
    3. Chatbox
    4. Cherry Studio
    5. DeepSeek
    """
    for parser in (
        _try_google_single,
        _try_google_backup,
        _try_openai_conversation,
        _try_chatbox_export,
        _try_cherry_studio,
        _try_deepseek_export,
    ):
        try:
            threads = parser(data)
            if threads:
                return threads
        except Exception:
            # <00>_eu㉐g<19><><EFBFBD><0C>\ՋNN*N㉐ghV
            continue
    return []


# =============================================================================
# 1. Google AI Studio
# =============================================================================

def _try_google_single(data: Any) -> list[ChatThread]:
    if not isinstance(data, Mapping) or "chunkedPrompt" not in data:
        return []
    chunks = data.get("chunkedPrompt", {}).get("chunks", []) or []
    if not chunks:
        return []
    title = "Google AI Studio <00>[݋"
    if chunks and chunks[0].get("branchChildren"):
        # \Ջ<>c<EFBFBD>Sh<><68>
        display_name = chunks[0]["branchChildren"][0].get("displayName", "")
        match = re.search(r"Branch of (.*)", display_name or "")
        if match:
            title = match.group(1).strip()
    
    messages: list[dict[str, Any]] = []
    for idx, chunk in enumerate(chunks, start=1):
        if chunk.get("isThought"):
            continue
        text = chunk.get("text", "")
        if not text and "parts" in chunk:
            text = "".join(p.get("text", "") for p in chunk.get("parts", []) if "text" in p)
        if not text.strip():
            continue
        messages.append(
            {
                "id": f"google-single-{idx}",
                "role": chunk.get("role", "assistant"),
                "content": text.strip(),
                "timestamp": chunk.get("timestamp"),
                "metadata": {},
            }
        )
    if not messages:
        return []
    return [ChatThread(id="google-single", title=title, messages=messages)]


def _try_google_backup(data: Any) -> list[ChatThread]:
    if not isinstance(data, Mapping) or "conversations" not in data:
        return []
    conversations = data.get("conversations", []) or []
    threads: list[ChatThread] = []
    for idx, conversation in enumerate(conversations, start=1):
        title = conversation.get("name") or f"Y<>N<EFBFBD>[݋_{idx}"
        messages_data = conversation.get("messages", []) or []
        messages: list[dict[str, Any]] = []
        for midx, msg in enumerate(messages_data, start=1):
            role = msg.get("author", {}).get("role", "user")
            content = msg.get("content", "")
            if not content:
                continue
            messages.append(
                {
                    "id": f"google-backup-{idx}-{midx}",
                    "role": role,
                    "content": content.strip(),
                    "timestamp": msg.get("create_time"),
                    "metadata": {},
                }
            )
        if messages:
            conv_id = conversation.get("id") or f"google-backup-{idx}"
            threads.append(ChatThread(id=str(conv_id), title=title, messages=messages))
    return threads


# =============================================================================
# 2. OpenAI Conversation
# =============================================================================

def _try_openai_conversation(data: Any) -> list[ChatThread]:
    # <00>{US<55>vyr<79>_<EFBFBD>hKm<1A>list N,{Ny<4E>	g mapping
    if not isinstance(data, list) or not data:
        return []
    first = data[0]
    if not isinstance(first, Mapping) or "mapping" not in first:
        return []
    # :SR DeepSeek: DeepSeek <00>v mapping node message fragments
    # OpenAI <00>v mapping node message content parts
    # b<>N<EFBFBD>S<EFBFBD>N<1A>Ǐ<EFBFBD>h<EFBFBD>g fragment sQ.<2E>W[eg<65>cd<63> DeepSeek<0C>b<05><><EFBFBD> DeepSeek ㉐ghVHQэ
    # FȎُb<>N<EFBFBD>S<EFBFBD>N<1A>Ǐ<EFBFBD>~<7E>gyr<79>_<EFBFBD>f<EFBFBD>~<7E><>0W$R<>e
    
    threads: list[ChatThread] = []
    for idx, conversation in enumerate(data, start=1):
        mapping = conversation.get("mapping") or {}
        if not mapping:
            continue
        
        # <00>h<EFBFBD>g/f&TS+T fragments (DeepSeek yr<79>_)<0C><>Y<EFBFBD>gS+TR<><52>Ǐ<0C><>N<EFBFBD>~ DeepSeek ㉐ghV
        # <00>SN*N<><4E><EFBFBD>p7h,g
        sample_node = next(iter(mapping.values()), {})
        if sample_node.get("message", {}).get("fragments"):
            return [] # ُ/fN*N DeepSeek <00>e<EFBFBD>N<0C>
N/f OpenAI

        title = conversation.get("title") or f"<00>[݋_{idx}"
        conv_id = conversation.get("id") or conversation.get("conversation_id") or f"conv_{idx}"
        
        # ُ̑<D98F>v;<3B><><EFBFBD><EFBFBD>{S:N<1A><>Sg<67>R/e<08>main branch	<09>
        branch = _reconstruct_longest_branch(mapping, conversation.get("current_node"))
        if not branch:
            continue
            
        messages: list[dict[str, Any]] = []
        for node_id in branch:
            node = mapping.get(node_id) or {}
            message = node.get("message")
            if not message:
                continue
            metadata = message.get("metadata") or {}
            if metadata.get("is_visually_hidden_from_conversation"):
                continue
            text = _extract_openai_text(message.get("content"))
            if not text.strip():
                continue
            messages.append(
                {
                    "id": message.get("id", node_id),
                    "role": message.get("author", {}).get("role", "user"),
                    "content": text.strip(),
                    "timestamp": _normalize_timestamp(message.get("create_time")),
                    "metadata": metadata,
                }
            )
        if messages:
            threads.append(ChatThread(id=str(conv_id), title=title, messages=messages))
    return threads


def _extract_openai_text(content: Mapping[str, Any] | None) -> str:
    if not content:
        return ""
    ctype = content.get("content_type")
    if ctype == "text":
        parts = content.get("parts") or []
        return "\n".join(str(part) for part in parts if part)
    if ctype == "multimodal_text":
        texts: list[str] = []
        for part in content.get("parts") or []:
            if isinstance(part, str):
                texts.append(part)
            elif isinstance(part, Mapping) and part.get("type") == "text":
                texts.append(part.get("text", ""))
        return "\n".join(texts)
    if ctype == "user_editable_context":
        return content.get("user_instructions", "")
    if isinstance(content, str):
        return content
    return json.dumps(content, ensure_ascii=False)


# =============================================================================
# 3. Chatbox
# =============================================================================

def _try_chatbox_export(data: Any) -> list[ChatThread]:
    if not isinstance(data, Mapping):
        return []
    # yr<79>_<1A>chat-sessions-list
    if "chat-sessions-list" not in data:
        return []

    sessions_meta = {
        item.get("id"): item
        for item in data.get("chat-sessions-list", []) or []
        if isinstance(item, Mapping)
    }
    threads: list[ChatThread] = []
    for key, value in data.items():
        if not isinstance(key, str) or not key.startswith("session:"):
            continue
        session_id = key.split("session:", 1)[-1]
        session_payload = value if isinstance(value, Mapping) else {}
        messages_data = session_payload.get("messages", []) or []
        if not messages_data:
            continue
        title = session_payload.get("name") or sessions_meta.get(session_id, {}).get("name")
        if not title:
            idx = len(threads) + 1
            title = f"Chatbox <00>[݋_{idx}"
        messages: list[dict[str, Any]] = []
        for midx, message in enumerate(messages_data, start=1):
            role = message.get("role") or "user"
            content_parts = message.get("contentParts") or []
            text = _extract_chatbox_content(content_parts)
            if not text.strip():
                continue
            messages.append(
                {
                    "id": message.get("id") or f"{session_id}-{midx}",
                    "role": role,
                    "content": text.strip(),
                    "timestamp": _normalize_timestamp(message.get("timestamp")),
                    "metadata": {
                        "session": title,
                        "session_id": session_id,
                        "source": "chatbox",
                    },
                }
            )
        if messages:
            threads.append(ChatThread(id=session_id or f"chatbox-{len(threads)+1}", title=title, messages=messages))
    return threads


def _extract_chatbox_content(parts: Iterable[Mapping[str, Any]]) -> str:
    fragments: list[str] = []
    for part in parts or []:
        if not isinstance(part, Mapping):
            fragments.append(str(part))
            continue
        ptype = part.get("type")
        if ptype == "text":
            fragments.append(str(part.get("text", "")))
        elif ptype in {"image_url", "image"} and part.get("url"):
            fragments.append(f"[image: {part['url']}]")
        elif "text" in part:
            fragments.append(str(part["text"]))
        else:
            fragments.append(json.dumps(part, ensure_ascii=False))
    return "\n\n".join(fragment for fragment in fragments if fragment)


# =============================================================================
# 4. Cherry Studio
# =============================================================================

def _try_cherry_studio(data: Any) -> list[ChatThread]:
    """㉐g Cherry Studio <00>[<5B>Q (localStorage JSON dump)"""
    if not isinstance(data, Mapping):
        return []
    if "localStorage" not in data:
        return []
    cherry_str = data["localStorage"].get("persist:cherry-studio")
    if not cherry_str:
        return []
    
    try:
        cherry = json.loads(cherry_str)
    except json.JSONDecodeError:
        return []
        
    if not isinstance(cherry, dict):
        return []
    
    assistants = cherry.get("assistants", [])
    if isinstance(assistants, str): # 	g<>eP persist <00>v<P/f JSON string ̑<>v JSON string? 
N<EFBFBD><EFBFBD>8^/f JSON object
        try:
            assistants = json.loads(assistants)
        except:
            pass

    # Cherry Studio structure check: top level keys often strings that need parsing? 
    # In chat-exporter: cherry = json.loads(cherry_str)
    # assistants = cherry.get("assistants", [])
    # But in redux-persist, values might be strings. Let's assume standard structure or parsed.
    # If `assistants` is a string, parse it.
    if isinstance(assistants, str):
         try:
            assistants = json.loads(assistants)
         except:
             return []

    if not isinstance(assistants, list):
        return []

    threads: list[ChatThread] = []
    for assistant in assistants:
        topics = assistant.get("topics", [])
        if not topics:
            continue
            
        model_info = assistant.get("model", {})
        model_name = model_info.get("name") if isinstance(model_info, dict) else "Unknown"
        
        for topic in topics:
            topic_name = topic.get("name", "*g}T
T݋<EFBFBD><EFBFBD>")
            topic_msgs = topic.get("messages", [])
            if not topic_msgs:
                continue
            
            messages: list[dict[str, Any]] = []
            for idx, msg in enumerate(topic_msgs, start=1):
                content = msg.get("content", "")
                if not content:
                    continue
                role = msg.get("role", "user")
                
                messages.append({
                    "id": msg.get("id") or f"cherry-{idx}",
                    "role": role,
                    "content": content.strip(),
                    "timestamp": _normalize_timestamp(msg.get("createdAt")),
                    "metadata": {
                        "model": model_name
                    }
                })
            
            if messages:
                tid = topic.get("id") or f"cherry-{len(threads)}"
                # Combine assistant name/model with topic for clarity
                full_title = f"[Cherry] {topic_name}"
                threads.append(ChatThread(id=str(tid), title=full_title, messages=messages))
    
    return threads


# =============================================================================
# 5. DeepSeek
# =============================================================================

def _try_deepseek_export(data: Any) -> list[ChatThread]:
    """㉐g DeepSeek <00>[<5B>Q<EFBFBD>e<EFBFBD>N"""
    if not isinstance(data, list) or not data:
        return []
    
    # yr<79>_<EFBFBD>hKm<1A>item S+T mapping N W[&{2Nb__S+T fragments
    first = data[0]
    if not isinstance(first, Mapping) or "mapping" not in first:
        return []
    # <00>{US<55>v heuristic: <00>h<EFBFBD>g mapping ̑<>v message /f&T	g fragments
    is_deepseek = False
    mapping_sample = first.get("mapping", {})
    for node in mapping_sample.values():
        if node and node.get("message", {}).get("fragments"):
            is_deepseek = True
            break
    
    if not is_deepseek:
        return []

    threads: list[ChatThread] = []
    
    for idx, conversation in enumerate(data, start=1):
        title = conversation.get("title", f"DeepSeek<00>[݋_{idx}")
        mapping = conversation.get("mapping", {})
        if not mapping:
            continue
            
        # <00>[~b<>SP[<5B><><EFBFBD>p
        all_nodes = set(mapping.keys())
        parent_nodes = {node.get("parent") for node in mapping.values() if node}
        leaf_nodes = all_nodes - parent_nodes - {"root"}
        
        if not leaf_nodes:
            continue

        # <00>c<EFBFBD>Sg<67>R/e (Main Branch)
        # <00>Y<EFBFBD>g<00><><EFBFBD>@b	gR/e<0C><>S<EFBFBD>NM<4E><4D>S leaf_nodes
        # ُ̑:N<>N<EFBFBD>{S UI<0C>؞<EFBFBD><D89E><EFBFBD>c<EFBFBD>Sg<67>R/e
        longest_branch_ids: list[str] = []
        
        # Helper to reconstruct path
        def reconstruct_path(leaf_id: str) -> list[str]:
            path = []
            cur = leaf_id
            while cur and cur != "root":
                if cur not in mapping:
                    break
                path.insert(0, cur)
                cur = mapping[cur].get("parent", "root")
            return path

        branches = [reconstruct_path(leaf) for leaf in leaf_nodes]
        if not branches:
            continue
            
        longest_branch_ids = max(branches, key=len)
        
        messages: list[dict[str, Any]] = []
        for node_id in longest_branch_ids:
            node = mapping.get(node_id) or {}
            msg_obj = node.get("message", {})
            if not msg_obj:
                continue
            
            # DeepSeek specific content extraction
            fragments = msg_obj.get("fragments", [])
            content_buf = []
            role = "user" # default
            
            # Check type from fragments or fallback to author
            # Fragments typically define content parts
            for frag in fragments:
                text = frag.get("content", "")
                frag_type = frag.get("type")
                if frag_type == "REQUEST":
                    role = "user"
                elif frag_type == "RESPONSE":
                    role = "assistant"
                
                if text:
                    content_buf.append(text)
            
            full_text = "".join(content_buf)
            if not full_text.strip():
                continue
            
            # If role wasn't determined by fragments (rare), try author
            if not fragments:
                role = msg_obj.get("author", {}).get("role", "user")
                full_text = msg_obj.get("content", "")

            messages.append({
                "id": node_id,
                "role": role,
                "content": full_text.strip(),
                "timestamp": _normalize_timestamp(conversation.get("inserted_at")), # DeepSeek conv often has one time?
                "metadata": {}
            })
            
        if messages:
            conv_id = conversation.get("id") or f"deepseek-{idx}"
            threads.append(ChatThread(id=str(conv_id), title=f"[DeepSeek] {title}", messages=messages))

    return threads


# =============================================================================
# Common Helpers
# =============================================================================

def _reconstruct_longest_branch(mapping: Mapping[str, Any], leaf_id: str | None) -> list[str]:
    """Generic branch reconstruction, preferring provided leaf_id or longest path"""
    if not leaf_id:
        # choose longest path from all leaves
        nodes = set(mapping.keys())
        parents = {node.get("parent") for node in mapping.values() if node}
        leaf_nodes = nodes - parents - {"root"}
        longest: list[str] = []
        
        def get_path(leaf: str) -> list[str]:
            path = []
            cur = leaf
            visited = set()
            while cur and cur not in visited and cur != "root":
                node = mapping.get(cur)
                if not node:
                    break
                path.append(cur)
                visited.add(cur)
                cur = node.get("parent")
            path.reverse()
            return path

        for leaf in leaf_nodes:
            branch = get_path(leaf)
            if len(branch) > len(longest):
                longest = branch
        return longest
        
    # Reconstruct from specific leaf
    path: list[str] = []
    visited: set[str] = set()
    cur = leaf_id
    while cur and cur not in visited and cur != "root":
        node = mapping.get(cur)
        if not node:
            break
        path.append(cur)
        visited.add(cur)
        cur = node.get("parent")
    path.reverse()
    return path


def _normalize_timestamp(value: Any) -> str:
    if value is None:
        return ""
    if isinstance(value, (int, float)):
        try:
            if value > 1e12:  # milliseconds
                dt = datetime.fromtimestamp(value / 1000)
            else:
                dt = datetime.fromtimestamp(value)
            return dt.isoformat(sep=" ", timespec="minutes")
        except Exception:
            return str(value)
    if isinstance(value, str):
        try:
            dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
            return dt.isoformat(sep=" ", timespec="minutes")
        except Exception:
            return str(value)
    return str(value)


# =============================================================================
# Text Import
# =============================================================================

def parse_text_transcript(text: str) -> list[ChatThread]:
    """
    <EFBFBD>g<EFBFBD>{<EFBFBD>e,g㉐g0
    <EFBFBD>N<EFBFBD>W<EFBFBD>N(u7bc<EFBFBD>[<EFBFBD>v:_R<EFBFBD><EFBFBD>&{ۏL<EFBFBD>RR0
    """
    if not text.strip():
        return []
        
    lines = text.strip().split('\n')
    messages: list[dict[str, Any]] = []
    
    # (u7bc<>[<5B>v:_R<><52>&{
    # 1. [`O<><4F>]
    # 2. `O<><4F>:
    # 3. ChatGPT<00><>:
    # 4. ChatGPT <00><>:
    
    markers = [
        (r"\[`O<><4F>\]", "user"),
        (r"`O<><4F>[:<1A>]", "user"),
        (r"ChatGPT\s*<00><>[:<1A>]?", "assistant"), # 9SM<53> "ChatGPT<00><>" b "ChatGPT<00><><1A>"
        (r"AI\s*<00><>[:<1A>]?", "assistant"),      # <00><>2<EFBFBD>'`<60>m<EFBFBD>R AI <00><>
    ]
    
    # <00>OYu<59>W@x<>v User/AI <00>Q<EFBFBD>S<h_<0C>FO<46>_{<7B>L<EFBFBD><4C><EFBFBD>
    markers.extend([
        (r"^User[:<1A>]", "user"),
        (r"^AI[:<1A>]", "assistant"),
    ])

    for line in lines:
        line_str = line.strip()
        if not line_str:
            continue
            
        # 9SM<53>҉r<D289>
        matched_role = None
        for pattern, role in markers:
            if re.search(pattern, line_str, re.IGNORECASE):
                matched_role = role
                break
        
        if matched_role:
            # /f Header L<>
            # \Ջ<>c<EFBFBD>S<EFBFBD>Q<EFBFBD>ST<>v<EFBFBD>Q<EFBFBD>[
            content = ""
            parts = re.split(r"[:<1A>]", line_str, 1)
            if len(parts) > 1:
                content = parts[1].strip()
            
            messages.append({
                "id": f"text-{len(messages)+1}",
                "role": matched_role,
                "content": content,
                "timestamp": None
            })
        else:
            # /f<>Q<EFBFBD>[L<>
            if messages:
                if messages[-1]["content"]:
                    messages[-1]["content"] += "\n" + line
                else:
                    messages[-1]["content"] = line
            else:
                # ,{NL<4E>1\/f<>Q<EFBFBD>[<0C>؞<EFBFBD><D89E> User
                messages.append({
                    "id": "text-1",
                    "role": "user",
                    "content": line,
                    "timestamp": None
                })
                
    if not messages:
        return []

    msg_count = len(messages)
    preview = messages[0]["content"][:10].replace("\n", " ") if messages else ""
    title = f"<00>e,g<>[eQ ({msg_count}ag) - {preview}..."
    
    return [ChatThread(id="pasted-text", title=title, messages=messages)]


# =============================================================================
# HTML Import
# =============================================================================

class ChatGPTHTMLParser(HTMLParser):
    VOID_ELEMENTS = {
        'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 
        'link', 'meta', 'param', 'source', 'track', 'wbr'
    }
    # <00>yd<79> meta <00>T link<0C><>V:N<>[<5B>N/f void elements<0C>
NO<EFBFBD><EFBFBD><EFBFBD>S handle_endtag<EFBFBD>
    # <00>[<5B><> ignore_level <00>e<EFBFBD>l
YMO<EFBFBD><EFBFBD>N<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>_euT<EFBFBD>~@b	g<EFBFBD>Q<EFBFBD>[0
    IGNORE_TAGS = {'button', 'svg', 'style', 'script', 'head', 'title'}

    def __init__(self):
        super().__init__()
        self.messages = []
        self.current_msg = None
        self.recording = False
        self.depth = 0
        self.turn_start_depth = 0
        self.ignore_level = 0

    def handle_starttag(self, tag, attrs):
        if tag in self.IGNORE_TAGS:
            self.ignore_level += 1
        
        # Void elements do not increase depth because they don't have end tags
        if tag not in self.VOID_ELEMENTS:
            self.depth += 1
            
        attrs_dict = dict(attrs)

        # <00>hKm Turn <00>[hV (data-testid="conversation-turn-X")
        if 'data-testid' in attrs_dict and attrs_dict['data-testid'].startswith('conversation-turn-'):
            # <00>S<EFBFBD><53>G<EFBFBD>0R<30>e<EFBFBD>v Turn<0C>:_6R<36>OX[
NN*N<EFBFBD>2<EFBFBD>bk depth <EFBFBD><EFBFBD>$R<EFBFBD>[<EFBFBD><EFBFBD>*g<EFBFBD>OX[	<EFBFBD>
            if self.current_msg:
                 self._save_current()
                 
            # ؞<><D89E>:N user<0C><>Y<EFBFBD>gTb<54>~b0R<30>N role h~{O<>f<EFBFBD>e
            self.current_msg = {"role": "user", "content": []}
            self.recording = True
            self.turn_start_depth = self.depth
            return

        # (WU_6R<36>r`N<0C><>[~b҉r<D289>hƋeg<65>Ock҉r<D289>
        if self.recording and 'data-message-author-role' in attrs_dict:
            self.current_msg["role"] = attrs_dict['data-message-author-role']

    def handle_endtag(self, tag):
        if tag in self.IGNORE_TAGS:
            if self.ignore_level > 0:
                self.ignore_level -= 1

        # YtbcL<63> (WW<57>~CQ }<7D>~_g<5F>e)
        if self.recording and self.ignore_level == 0:
            if tag in ('p', 'div', 'br', 'li', 'tr', 'pre', 'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
                self.current_msg["content"].append("\n")

        if tag not in self.VOID_ELEMENTS:
            # <00><>T Turn <00>[hV
            if self.recording and self.depth == self.turn_start_depth:
                self._save_current()
            self.depth -= 1

    def handle_data(self, data):
        if self.recording and self.ignore_level == 0:
            # <00>OYu<59>S<EFBFBD>Y<EFBFBD>Q<EFBFBD>[<0C>
N strip<EFBFBD><EFBFBD>MQL<EFBFBD><EFBFBD>QCQ }<EFBFBD>|ޏ
            self.current_msg["content"].append(data)
    
    def close(self):
        super().close()
        # nx<6E>OgTNag<61>mo`<60><><EFBFBD>OX[
        if self.current_msg:
            self._save_current()

    def _save_current(self):
        if self.current_msg:
            full_text = "".join(self.current_msg["content"]).strip()
            # <00>{US<55>vzz}vW[&{nt
            full_text = re.sub(r'\n{3,}', '\n\n', full_text)
            
            if full_text:
                self.messages.append({
                    "role": self.current_msg["role"], 
                    "content": full_text
                })
            self.current_msg = None
            self.recording = False

def parse_html_transcript(html_content: str) -> list[ChatThread]:
    """㉐g ChatGPT HTML <00>[<5B>Q<EFBFBD>e<EFBFBD>N"""
    parser = ChatGPTHTMLParser()
    parser.feed(html_content)
    
    if not parser.messages:
        return []
        
    # <00>g <20> ChatThread
    title = "HTML <00>[eQ<65>[݋"
    title_match = re.search(r"<title>(.*?)</title>", html_content)
    if title_match:
        title = title_match.group(1)
        
    # l<>bc:Nh<>Q<EFBFBD>mo`<h_
    msgs = []
    for idx, msg in enumerate(parser.messages, 1):
        msgs.append({
            "id": f"html-{idx}",
            "role": msg["role"],
            "content": msg["content"],
            "timestamp": None
        })
        
    return [ChatThread(id="html-import", title=title, messages=msgs)]


__all__ = [
    "ChatThread",
    "ChatImporterError",
    "detect_threads",
    "parse_text_transcript",
    "parse_html_transcript",
]