API Reference

This page is generated from source code docstrings via mkdocstrings.

Package

`gimkit`

Core Modules

`gimkit.guides`

`FormMixin`

Source code in src/gimkit/guides.py

class FormMixin:
    def single_word(self, name: str | None = None) -> MaskedTag:
        """A single word without spaces."""
        return MaskedTag(name=name, desc=self.single_word.__doc__, regex=r"\S+")

    def select(self, name: str | None = None, choices: list[str] | None = None) -> MaskedTag:
        """Choose one from the given options."""
        if not choices:
            raise ValueError("choices must be a non-empty list of strings.")
        desc = f"Choose one from the following options: {', '.join(choices)}."
        regex = "|".join(re.escape(choice) for choice in choices)
        return MaskedTag(name=name, desc=desc, regex=regex)

    def datetime(
        self, name: str | None = None, require_date: bool = True, require_time: bool = True
    ) -> MaskedTag:
        """A date and/or time string, e.g., 2023-10-05, 14:30:00, 2023-10-05 14:30:00, etc."""
        date_regex = r"(?:\d{4}-\d{2}-\d{2})"  # YYYY-MM-DD
        time_regex = r"(?:\d{2}:\d{2}(?::\d{2})?)"  # HH:MM or HH:MM:SS

        if require_date and require_time:
            regex = rf"{date_regex}[ T]{time_regex}"
            desc = "A date and time in the format YYYY-MM-DD HH:MM[:SS]."
        elif require_date:
            regex = date_regex
            desc = "A date in the format YYYY-MM-DD."
        elif require_time:
            regex = time_regex
            desc = "A time in the format HH:MM[:SS]."
        else:
            raise ValueError("At least one of require_date or require_time must be True.")

        return MaskedTag(name=name, desc=desc, regex=regex)

`datetime(name=None, require_date=True, require_time=True)`

A date and/or time string, e.g., 2023-10-05, 14:30:00, 2023-10-05 14:30:00, etc.

Source code in src/gimkit/guides.py

def datetime(
    self, name: str | None = None, require_date: bool = True, require_time: bool = True
) -> MaskedTag:
    """A date and/or time string, e.g., 2023-10-05, 14:30:00, 2023-10-05 14:30:00, etc."""
    date_regex = r"(?:\d{4}-\d{2}-\d{2})"  # YYYY-MM-DD
    time_regex = r"(?:\d{2}:\d{2}(?::\d{2})?)"  # HH:MM or HH:MM:SS

    if require_date and require_time:
        regex = rf"{date_regex}[ T]{time_regex}"
        desc = "A date and time in the format YYYY-MM-DD HH:MM[:SS]."
    elif require_date:
        regex = date_regex
        desc = "A date in the format YYYY-MM-DD."
    elif require_time:
        regex = time_regex
        desc = "A time in the format HH:MM[:SS]."
    else:
        raise ValueError("At least one of require_date or require_time must be True.")

    return MaskedTag(name=name, desc=desc, regex=regex)

`select(name=None, choices=None)`

Choose one from the given options.

Source code in src/gimkit/guides.py

def select(self, name: str | None = None, choices: list[str] | None = None) -> MaskedTag:
    """Choose one from the given options."""
    if not choices:
        raise ValueError("choices must be a non-empty list of strings.")
    desc = f"Choose one from the following options: {', '.join(choices)}."
    regex = "|".join(re.escape(choice) for choice in choices)
    return MaskedTag(name=name, desc=desc, regex=regex)

`single_word(name=None)`

A single word without spaces.

Source code in src/gimkit/guides.py

def single_word(self, name: str | None = None) -> MaskedTag:
    """A single word without spaces."""
    return MaskedTag(name=name, desc=self.single_word.__doc__, regex=r"\S+")

`PersonalInfoMixin`

Source code in src/gimkit/guides.py

class PersonalInfoMixin:
    def person_name(self, name: str | None = None) -> MaskedTag:
        """A person's name, e.g., John Doe, Alice, Bob, Charlie Brown, 张三, etc."""
        return MaskedTag(name=name, desc=self.person_name.__doc__)

    def phone_number(self, name: str | None = None) -> MaskedTag:
        """A phone number, e.g., +1-123-456-7890, (123) 456-7890, 123-456-7890, etc."""

        # Adapted from https://regexr.com/38pvb
        regex = (
            r"(?:\+?(\d{1,3}))?([-. (]*(\d{3})[-. )]*)?((\d{3})[-. ]*(\d{2,4})(?:[-.x ]*(\d+))?)"
        )
        return MaskedTag(name=name, desc=self.phone_number.__doc__, regex=regex)

    def e_mail(self, name: str | None = None) -> MaskedTag:
        """An email address, e.g., john.doe@example.com, alice@example.com, etc."""

        # Adapted from https://regexr.com/3a2i5
        regex = r"([\w\.]+)@([\w\.]+)\.(\w+)"
        return MaskedTag(name=name, desc=self.e_mail.__doc__, regex=regex)

`e_mail(name=None)`

An email address, e.g., john.doe@example.com, alice@example.com, etc.

Source code in src/gimkit/guides.py

def e_mail(self, name: str | None = None) -> MaskedTag:
    """An email address, e.g., john.doe@example.com, alice@example.com, etc."""

    # Adapted from https://regexr.com/3a2i5
    regex = r"([\w\.]+)@([\w\.]+)\.(\w+)"
    return MaskedTag(name=name, desc=self.e_mail.__doc__, regex=regex)

`person_name(name=None)`

A person's name, e.g., John Doe, Alice, Bob, Charlie Brown, 张三, etc.

Source code in src/gimkit/guides.py

def person_name(self, name: str | None = None) -> MaskedTag:
    """A person's name, e.g., John Doe, Alice, Bob, Charlie Brown, 张三, etc."""
    return MaskedTag(name=name, desc=self.person_name.__doc__)

`phone_number(name=None)`

A phone number, e.g., +1-123-456-7890, (123) 456-7890, 123-456-7890, etc.

Source code in src/gimkit/guides.py

def phone_number(self, name: str | None = None) -> MaskedTag:
    """A phone number, e.g., +1-123-456-7890, (123) 456-7890, 123-456-7890, etc."""

    # Adapted from https://regexr.com/38pvb
    regex = (
        r"(?:\+?(\d{1,3}))?([-. (]*(\d{3})[-. )]*)?((\d{3})[-. ]*(\d{2,4})(?:[-.x ]*(\d+))?)"
    )
    return MaskedTag(name=name, desc=self.phone_number.__doc__, regex=regex)

`gimkit.schemas`

Defines the schema for GIM.

`MaskedTag` `dataclass`

Represents a masked tag in the GIM schema.

A masked tag consists of three main types of components: 1. Tag ID: An integer identifier for the tag, represented as m_{id} in the tag attributes. 2. Tag content: The content located between the opening and closing masked tag markers. 3. Tag common attributes: All other tag attributes aside from the ID (e.g., name, desc, regex).

Example of a masked tag

<|MASKED id="m_0" name="xxx" desc="xxx" regex="xxx"|>content here<|/MASKED|>

Source code in src/gimkit/schemas.py

@dataclass
class MaskedTag:
    """Represents a masked tag in the GIM schema.

    A masked tag consists of three main types of components:
    1. **Tag ID**: An integer identifier for the tag, represented as `m_{id}` in the tag attributes.
    2. **Tag content**: The content located between the opening and closing masked tag markers.
    3. **Tag common attributes**: All other tag attributes aside from the ID (e.g., name, desc, regex).

    Example of a masked tag:
        `<|MASKED id="m_0" name="xxx" desc="xxx" regex="xxx"|>content here<|/MASKED|>`
    """

    id: int | str | None = None
    name: str | None = None
    desc: str | None = None
    regex: str | None = None
    content: str | None = None

    # Read-only class variable for additional attribute escapes. These
    # characters may appear in tag attributes such as `desc` or `grammar`.
    # Hexadecimal numeric character references are used for consistency and
    # compatibility with Python's built-in `html.escape` conventions.
    # Ref: https://www.w3.org/MarkUp/html-spec/html-spec_13.html
    _ADDITIONAL_ATTR_ESCAPES: ClassVar[Mapping[str, str]] = MappingProxyType(
        {
            "\t": "&#x09;",  # Tab
            "\n": "&#x0a;",  # Line Feed
            "\r": "&#x0d;",  # Carriage Return
        }
    )

    @classmethod
    def attr_escape(cls, text: str) -> str:
        escaped_text = html.escape(text, quote=True)
        for char, escape_seq in cls._ADDITIONAL_ATTR_ESCAPES.items():
            escaped_text = escaped_text.replace(char, escape_seq)
        return escaped_text

    @classmethod
    def attr_unescape(cls, text: str) -> str:
        return html.unescape(text)

    def __post_init__(self):
        # 1. Validate id
        if not (
            self.id is None
            or isinstance(self.id, int)
            or (isinstance(self.id, str) and self.id.isdigit())
        ):
            raise ValueError(f"{type(self.id)=}, {self.id=}, should be int, str of digits, or None")
        if isinstance(self.id, str):
            self.id = int(self.id)

        # 2. Validate common attributes
        for attr in COMMON_ATTRS:
            attr_val = getattr(self, attr)
            if isinstance(attr_val, str):
                setattr(self, attr, MaskedTag.attr_unescape(attr_val))
            elif attr_val is not None:
                raise ValueError(f"{type(attr_val)=}, {attr_val=}, should be str or None")

        # 3. Validate content
        if isinstance(self.content, str):
            # TAG_OPEN_RIGHT is common in text, so we allow it in content.
            # But other magic strings are not allowed.
            special_marks = [s for s in MAGIC_STRINGS if s != TAG_OPEN_RIGHT]
            if any(special_mark in self.content for special_mark in special_marks):
                raise ValueError(
                    "content should not contain special marks like "
                    + " or ".join(f"`{x}`" for x in special_marks)
                )
        elif self.content is not None:
            raise ValueError(f"{type(self.content)=}, {self.content=}, should be str or None")

        # 4. Validate regex if provided
        if isinstance(self.regex, str):
            if self.regex.startswith("^") or self.regex.endswith("$"):
                raise ValueError(
                    "regex should not start with ^ or end with $, "
                    "as it will be used within a larger regex pattern."
                )
            if self.regex.startswith("/") or self.regex.endswith("/"):
                raise ValueError(
                    "regex should not start or end with /, "
                    "as it will be wrapped with /.../ in CFG grammar."
                )
            if self.regex == "":
                raise ValueError("regex should not be an empty string.")
            try:
                re.compile(self.regex)
            except re.error as e:
                raise ValueError(f"Invalid regex pattern: {self.regex}") from e

    def to_string(
        self,
        fields: list[TagField] | Literal["all"] = "all",
    ) -> str:
        attr_part = ""
        if fields == "all":
            fields = cast("list[TagField]", list(ALL_FIELDS))
        if "id" in fields and self.id is not None:
            attr_part += f' id="m_{self.id}"'
        for attr in COMMON_ATTRS:
            if attr in fields and getattr(self, attr) is not None:
                escaped_val = self.attr_escape(getattr(self, attr))
                attr_part += f' {attr}="{escaped_val}"'
        content_part = ""
        if "content" in fields and self.content is not None:
            content_part = f"{self.content}"
        return TAG_OPEN_LEFT + attr_part + TAG_OPEN_RIGHT + content_part + TAG_END

    def __str__(self):
        return self.to_string()

    def __repr__(self):
        return self.to_string()

    def __add__(self, other: str) -> str:
        if isinstance(other, str):
            return str(self) + other
        return str(self) + str(other)

    def __radd__(self, other: str) -> str:
        if isinstance(other, str):
            return other + str(self)
        return str(other) + str(self)

`parse_parts(s)`

Parse a string into a list of ContextParts (str or MaskedTag).

Parameters:

Name	Type	Description	Default
`s`	`str`	The string to be parsed. Note it only contains masked tags or plain texts. Tag id may start from any non-negative integer, but must be in order 0, 1, 2, ...	required

Returns:

Type	Description
`list[ContextPart]`	list[ContextPart]: A list of ContextParts (str or MaskedTag).

Source code in src/gimkit/schemas.py

def parse_parts(s: str) -> list[ContextPart]:
    """Parse a string into a list of ContextParts (str or MaskedTag).

    Args:
        s (str): The string to be parsed. Note it only contains masked tags or plain texts.
            Tag id may start from any non-negative integer, but must be in order 0, 1, 2, ...

    Returns:
        list[ContextPart]: A list of ContextParts (str or MaskedTag).
    """
    open_matches = list(TAG_OPEN_PATTERN.finditer(s))
    end_matches = list(TAG_END_PATTERN.finditer(s))
    full_matches = list(TAG_FULL_PATTERN.finditer(s))
    if not (len(open_matches) == len(end_matches) == len(full_matches)):
        raise InvalidFormatError(f"Mismatched or nested masked tags in {s}")

    parts: list[ContextPart] = []
    curr_tag_id = None
    last_end = 0
    for match in full_matches:
        start, end = match.span()
        if start > last_end:
            parts.append(s[last_end:start])

        fields = match.groupdict()
        tag_id = fields.get("id")
        if tag_id is not None:
            tag_id = int(tag_id)
            if curr_tag_id is None:
                curr_tag_id = tag_id
            elif tag_id != curr_tag_id:
                raise InvalidFormatError(
                    f"Tag ids should be in order, got {tag_id} at position {curr_tag_id}."
                )
        if curr_tag_id is not None:
            curr_tag_id += 1
        parts.append(MaskedTag(**fields))

        last_end = end
    if last_end < len(s):
        parts.append(s[last_end:])
    return parts

`parse_tags(s, prefix=None, suffix=None)`

Parse a string into a list of MaskedTags.

Parameters:

Name	Type	Description	Default
`s`	`str`	The string to be parsed. It may be wrapped with a prefix and suffix. Tag id may start from any non-negative integer, but must be in order 0, 1, 2, ...	required
`prefix`	`str \| None`	The prefix tag that the string should start with. Default is None.	`None`
`suffix`	`str \| None`	The suffix tag that the string should end with. Default is None.	`None`

Returns:

Type	Description
`list[MaskedTag]`	list[MaskedTag]: A list of MaskedTags.

Source code in src/gimkit/schemas.py

def parse_tags(s: str, prefix: str | None = None, suffix: str | None = None) -> list[MaskedTag]:
    """Parse a string into a list of MaskedTags.

    Args:
        s (str): The string to be parsed. It may be wrapped with a prefix and suffix.
            Tag id may start from any non-negative integer, but must be in order 0, 1, 2, ...
        prefix (str | None): The prefix tag that the string should start with. Default is None.
        suffix (str | None): The suffix tag that the string should end with. Default is None.

    Returns:
        list[MaskedTag]: A list of MaskedTags.
    """

    if prefix is not None:
        s = s.lstrip()
        if not s.startswith(prefix):
            raise InvalidFormatError(f"String must start with the {prefix} tag.")

        s = s[len(prefix) :]
        if prefix in s:
            raise InvalidFormatError(f"Nested or duplicate {prefix} tag are not allowed.")

    if suffix is not None:
        s = s.rstrip()
        if not s.endswith(suffix):
            raise InvalidFormatError(f"String must end with the {suffix} tag.")

        s = s[: -len(suffix)]
        if suffix in s:
            raise InvalidFormatError(f"Nested or duplicate {suffix} tag are not allowed.")

    parts = parse_parts(s)
    tags = [part for part in parts if isinstance(part, MaskedTag)]

    if prefix is not None:
        expected_ids = list(range(len(tags)))
        actual_ids = [tag.id or idx for idx, tag in enumerate(tags)]
        if expected_ids != actual_ids:
            raise InvalidFormatError(
                f"Tag ids should be in order 0, 1, 2, ..., got {', '.join(map(str, actual_ids))}."
            )

    return tags

`validate(query, response)`

Validate the GIM query or/and GIM response.

Parameters:

Name	Type	Description	Default
`query`	`str`	Wrapped with query prefix and suffix.	required
`response`	`str`	Wrapped with response prefix and suffix.	required

Raises:

Type	Description
`ValueError`	If both query and response are None.
`InvalidFormatError`	If the format of query or response is invalid, or if the number of masked tags or their ids do not match between query and response.

Source code in src/gimkit/schemas.py

def validate(query: str | None, response: str | None):
    """Validate the GIM query or/and GIM response.

    Args:
        query (str): Wrapped with query prefix and suffix.
        response (str): Wrapped with response prefix and suffix.

    Raises:
        ValueError: If both query and response are None.
        InvalidFormatError: If the format of query or response is invalid,
            or if the number of masked tags or their ids do not match
            between query and response.
    """
    if query is None and response is None:
        raise ValueError("At least one of query or response must be provided.")
    if query is not None:
        query_tags = parse_tags(query, QUERY_PREFIX, QUERY_SUFFIX)
    if response is not None:
        response_tags = parse_tags(response, RESPONSE_PREFIX, RESPONSE_SUFFIX)
    if query is not None and response is not None and len(query_tags) != len(response_tags):
        raise InvalidFormatError("Mismatched number of masked tags between query and response.")

`gimkit.contexts`

`Query`

Bases: Context

Source code in src/gimkit/contexts.py

class Query(Context):
    def __init__(self, *args: ContextInput) -> None:
        super().__init__(QUERY_PREFIX, QUERY_SUFFIX, *args)

        # Validate and standardize the tags
        tag_count = 0
        tag_names = set()
        for part in self._parts:
            if isinstance(part, MaskedTag):
                if part.id is not None and part.id != tag_count:
                    raise InvalidFormatError("Tag ids must be sequential starting from 0.")
                part.id = tag_count
                tag_count += 1

                if part.name is not None:
                    if part.name in tag_names:
                        raise InvalidFormatError(f"Tag name '{part.name}' already exists.")
                    tag_names.add(part.name)

                if part.content == "":
                    part.content = None

    def infill(self, response: Response | ContextInput) -> Result:
        """Fills tags in this query (self) with content from the provided response."""
        return infill(self, response)

    def __str__(self) -> str:
        return self.to_string(fields=["id", "desc", "content"])

    def to_string_with_grammar(self) -> str:
        return self.to_string(fields=["id", "desc", "content", "regex"])

`infill(response)`

Fills tags in this query (self) with content from the provided response.

Source code in src/gimkit/contexts.py

def infill(self, response: Response | ContextInput) -> Result:
    """Fills tags in this query (self) with content from the provided response."""
    return infill(self, response)

`Response`

Bases: Context

Source code in src/gimkit/contexts.py

class Response(Context):
    def __init__(self, *args: ContextInput) -> None:
        super().__init__(RESPONSE_PREFIX, RESPONSE_SUFFIX, *args)

    def infill(self, query: Query | ContextInput) -> Result:
        """Fills the tags in the provided query with content from this response (self)."""
        return infill(query, self)

    def __str__(self) -> str:
        return self.to_string(fields=["id", "content"])

`infill(query)`

Fills the tags in the provided query with content from this response (self).

Source code in src/gimkit/contexts.py

def infill(self, query: Query | ContextInput) -> Result:
    """Fills the tags in the provided query with content from this response (self)."""
    return infill(query, self)

`infill(query, response, strict=False)`

Combines query and response by infilling missing content.

Parameters:

Name	Type	Description	Default
`query`	`Query \| ContextInput`	The query containing masked tags to be filled	required
`response`	`Response \| ContextInput`	The response containing content to fill the tags	required
`strict`	`bool`	If True, raises errors on format mismatches. If False, attempts to repair missing ending tags in a best-effort manner.	`False`

Returns:

Type	Description
`Result`	A Result object with tags filled from the response

Raises:

Type	Description
`InvalidFormatError`	If strict=True and there are format mismatches

Source code in src/gimkit/contexts.py

def infill(
    query: Query | ContextInput, response: Response | ContextInput, strict: bool = False
) -> Result:
    """Combines query and response by infilling missing content.

    Args:
        query: The query containing masked tags to be filled
        response: The response containing content to fill the tags
        strict: If True, raises errors on format mismatches. If False, attempts to repair
                missing ending tags in a best-effort manner.

    Returns:
        A Result object with tags filled from the response

    Raises:
        InvalidFormatError: If strict=True and there are format mismatches
    """
    if not isinstance(query, Query):
        query = Query(query)

    # When strict=False, try to repair missing endings before parsing
    if not strict and isinstance(response, str):
        response_str = response
        try:
            response = Response(response_str)
        except InvalidFormatError:
            # Try to repair missing endings
            repaired = _repair_missing_endings(response_str)
            if repaired != response_str:
                warnings.warn(
                    "Response has missing ending tags. Attempting automatic repair.",
                    stacklevel=2,
                )
                response = Response(repaired)
            else:
                raise
    elif not isinstance(response, Response):
        response = Response(response)

    query_tags = list(query.tags)
    response_tags = list(response.tags)
    if len(query_tags) != len(response_tags):
        msg = (
            "Mismatch in number of tags between query and response. "
            f"Query has {len(query_tags)} tag(s), response has {len(response_tags)} tag(s)."
        )
        if strict:
            raise InvalidFormatError(msg)
        else:
            warnings.warn(msg + " Will merge as many as possible.", stacklevel=2)

    result_parts: list[ContextPart] = []
    for part in query.parts[1:-1]:  # Exclude prefix and suffix
        if isinstance(part, MaskedTag) and query_tags and response_tags:
            q_tag = query_tags.pop(0)
            r_tag = response_tags.pop(0)
            part = MaskedTag(
                id=q_tag.id,
                name=q_tag.name,
                desc=q_tag.desc,
                regex=q_tag.regex,
                content=r_tag.content if r_tag.content is not None else q_tag.content,
            )
        result_parts.append(part)

    return Result(result_parts)

`gimkit.dsls`

Define DSL builders for various output types.

build_cfg constructs a context-free grammar (CFG) using LLGuidance syntax
build_json_schema constructs a JSON schema representing the response structure.

`build_cfg(query)`

Build an LLGuidance context-free grammar (CFG) string based on the query object.

Constructs a flattened grammar structure compatible with LLGuidance's suffix/capture logic.

Ref:
- https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md: Incomplete documentation of llguidance grammar syntax
- https://github.com/guidance-ai/guidance/blob/main/guidance/_ast.py: LarkSerializer implementation
- https://github.com/guidance-ai/llguidance: Source code

Real-World Example:
```python
query = '<|GIM_QUERY|>The capital of <|MASKED desc="single word" regex="中国|法国"|><|/MASKED|> is Beijing<|MASKED desc="punctuation mark" regex="\."|><|/MASKED|><|/GIM_QUERY|>'
print(repr(build_cfg(Query(query))))
>>> '%llguidance {}

Source code in src/gimkit/dsls.py

def build_cfg(query: Query) -> str:
    """Build an LLGuidance context-free grammar (CFG) string based on the query object.

    Constructs a flattened grammar structure compatible with LLGuidance's suffix/capture logic.

    Ref:
    - https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md: Incomplete documentation of llguidance grammar syntax
    - https://github.com/guidance-ai/guidance/blob/main/guidance/_ast.py: LarkSerializer implementation
    - https://github.com/guidance-ai/llguidance: Source code

    Real-World Example:
    ```python
    query = '<|GIM_QUERY|>The capital of <|MASKED desc="single word" regex="中国|法国"|><|/MASKED|> is Beijing<|MASKED desc="punctuation mark" regex="\\."|><|/MASKED|><|/GIM_QUERY|>'
    print(repr(build_cfg(Query(query))))
    >>> '%llguidance {}\nstart: "<|GIM_RESPONSE|>" REGEX "<|MASKED id=\\"m_0\\"|>" m_0 REGEX "<|MASKED id=\\"m_1\\"|>" m_1 REGEX "<|/GIM_RESPONSE|>"\nREGEX: /\\s*/\nm_0[capture, suffix="<|/MASKED|>"]: T_0\nm_1[capture, suffix="<|/MASKED|>"]: T_1\nT_0: /中国|法国/\nT_1: /\\./\n'
    ```
    """
    num_tags = len(query.tags)

    # 1. Header declaration
    lines = ["%llguidance {}"]

    # 2. Build start rule
    # Target format: start: "PREFIX" REGEX "OPEN_TAG_0" m_0 REGEX "OPEN_TAG_1" m_1 ... REGEX "SUFFIX"
    start_parts = [f'"{RESPONSE_PREFIX}"']

    for i in range(num_tags):
        # Add whitespace rule reference
        start_parts.append("REGEX")

        # Add opening tag literal, e.g.: "<|MASKED id=\"m_0\"|>"
        # Note escaping: id=\"m_{i}\"
        open_tag_str = f'"{TAG_OPEN_LEFT} id=\\"m_{i}\\"{TAG_OPEN_RIGHT}"'
        start_parts.append(open_tag_str)

        # Add content rule reference (lowercase m_i)
        start_parts.append(f"m_{i}")

    # Add trailing whitespace and suffix
    start_parts.append("REGEX")
    start_parts.append(f'"{RESPONSE_SUFFIX}"')

    lines.append(f"start: {' '.join(start_parts)}")

    # 3. Define whitespace rule (named REGEX to match examples, usually can also be called WS)
    lines.append(r"REGEX: /\s*/")

    # 4. Collect unique patterns and create a mapping for terminal reuse
    # This optimization avoids creating duplicate terminal rules for tags with the same regex
    unique_pattern_terminals: dict[str, str] = {}
    terminal_definitions: list[str] = []

    for i, tag in enumerate(query.tags):
        # Note: When used with suffix, using greedy match /(?s:.*)/ instead of /(?s:.)*?/ is correct and legal.
        pattern = f"/{tag.regex}/" if tag.regex else "/(?s:.*)/"

        # Get or create a shared terminal for this pattern
        if pattern not in unique_pattern_terminals:
            # Create a new terminal name for this unique pattern
            terminal_name = f"T_{len(unique_pattern_terminals)}"
            unique_pattern_terminals[pattern] = terminal_name
            terminal_definitions.append(f"{terminal_name}: {pattern}")

        terminal_name = unique_pattern_terminals[pattern]

        # Rule m_i (logical layer):
        # - capture: tells the engine to capture this part.
        # - suffix: specifies the ending tag, the engine stops and consumes it when encountered.
        # Note: Here we reference the TAG_END constant (i.e., "<|/MASKED|>")
        lines.append(f'm_{i}[capture, suffix="{TAG_END}"]: {terminal_name}')

    # 5. Add all unique terminal definitions
    lines.extend(terminal_definitions)

    # 6. Assemble final string
    grammar = "\n".join(lines) + "\n"

    is_error, msgs = validate_grammar_spec(get_grammar_spec(grammar))
    if is_error:
        raise ValueError(
            "Invalid CFG grammar constructed from the query object:\n"
            + "\n".join(msgs)
            + "\nWe recommend checking the syntax documentation at https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md"
        )
    return grammar

`build_json_schema(query)`

Build a JSON schema dictionary based on the query object.

The JSON schema represents the response structure where each masked tag becomes a field in the JSON object. The field name is "m_{id}" to match the tag id, and patterns are applied when regex is specified.

Source code in src/gimkit/dsls.py

def build_json_schema(query: Query) -> dict:
    """Build a JSON schema dictionary based on the query object.

    The JSON schema represents the response structure where each masked tag
    becomes a field in the JSON object. The field name is "m_{id}" to match
    the tag id, and patterns are applied when regex is specified.
    """
    properties = {}
    required_fields = []

    for tag in query.tags:
        field_name = f"m_{tag.id}"
        field_schema = {"type": "string"}

        # Add regex pattern if specified
        if tag.regex is not None:
            field_schema["pattern"] = f"^({tag.regex})$"

        # Add description if available
        if tag.desc is not None:
            field_schema["description"] = tag.desc

        properties[field_name] = field_schema
        required_fields.append(field_name)

    schema = {
        "type": "object",
        "properties": properties,
        "required": required_fields,
        "additionalProperties": False,
    }

    return schema

`gimkit.prompts`

For models that weren't trained using the Guided Infilling Modeling method, we may be able to achieve functionality similar to GIM by using a system prompt and few-shot prompting.

`gimkit.log`

`gimkit.exceptions`

`GIMError`

Bases: Exception

Base exception class for GIM-related errors.

Source code in src/gimkit/exceptions.py

class GIMError(Exception):
    """Base exception class for GIM-related errors."""

`InvalidFormatError`

Bases: GIMError

Exception raised for invalid GIM query/response format.

Source code in src/gimkit/exceptions.py

class InvalidFormatError(GIMError):
    """Exception raised for invalid GIM query/response format."""

Model Backends

`gimkit.models.base`

`gimkit.models.openai`

`gimkit.models.vllm`

`gimkit.models.vllm_offline`

`gimkit.models.utils`

`get_outlines_model_input(model_input, output_type, use_gim_prompt, include_grammar=False, force_chat_input=False)`

Transform the model input to an Outlines-compatible format.

Source code in src/gimkit/models/utils.py

def get_outlines_model_input(
    model_input: ContextInput | Query,
    output_type: Literal["cfg", "json"] | None,
    use_gim_prompt: bool,
    include_grammar: bool = False,
    force_chat_input: bool = False,
) -> str | Chat:
    """Transform the model input to an Outlines-compatible format."""
    query_obj = Query(model_input) if not isinstance(model_input, Query) else model_input
    outlines_model_input: str | Chat = (
        query_obj.to_string_with_grammar() if include_grammar else str(query_obj)
    )

    if use_gim_prompt:
        # Use JSON-specific prompts when output_type is "json"
        if output_type == "json":
            system_prompt = SYSTEM_PROMPT_MSG_JSON
            demo_msgs = DEMO_CONVERSATION_MSGS_JSON
        else:
            system_prompt = SYSTEM_PROMPT_MSG
            demo_msgs = DEMO_CONVERSATION_MSGS
        outlines_model_input = Chat(
            [
                system_prompt,
                *demo_msgs,
                {"role": "user", "content": outlines_model_input},
            ]
        )

    if force_chat_input and isinstance(outlines_model_input, str):
        outlines_model_input = Chat([{"role": "user", "content": outlines_model_input}])

    return outlines_model_input

`get_outlines_output_type(model_input, output_type)`

Transform the output type to an Outlines-compatible format.

Source code in src/gimkit/models/utils.py

def get_outlines_output_type(
    model_input: ContextInput | Query, output_type: Literal["cfg", "json"] | None
) -> None | CFG | JsonSchema:
    """Transform the output type to an Outlines-compatible format."""
    query_obj = Query(model_input) if not isinstance(model_input, Query) else model_input
    if output_type is None:
        return None
    elif output_type == "cfg":
        return CFG(build_cfg(query_obj))
    elif output_type == "json":
        return JsonSchema(build_json_schema(query_obj))
    else:
        raise ValueError(f"Invalid output type: {output_type}")

`infill_responses(query, responses, json_responses=False)`

infill_responses(query: ContextInput | Query, responses: str, json_responses: bool = False) -> Result

infill_responses(query: ContextInput | Query, responses: list[str], json_responses: bool = False) -> list[Result]

Infill the provided query with content from the GIM responses or JSON responses.

Source code in src/gimkit/models/utils.py

def infill_responses(
    query: ContextInput | Query, responses: str | list[str], json_responses: bool = False
) -> Result | list[Result]:
    """Infill the provided query with content from the GIM responses or JSON responses."""
    # Handle single string response
    if isinstance(responses, str):
        if json_responses:
            responses = json_responses_to_gim_response(responses)
        return infill(query, responses)

    # Handle list of responses
    if not isinstance(responses, list):
        raise TypeError(f"Expected responses to be str or list of str, got {type(responses)}")

    if len(responses) == 0:
        raise ValueError("Response list is empty.")

    if not all(isinstance(resp, str) for resp in responses):
        raise TypeError(f"All items in the response list must be strings, got: {responses}")

    return [infill_responses(query, resp, json_responses=json_responses) for resp in responses]

`json_responses_to_gim_response(json_response)`

Convert a JSON response string to a GIM response string.

Parameters:

Name	Type	Description	Default
`json_response`	`str`	A JSON string representing the response.	required

Returns:

Type	Description
`str`	A properly formatted GIM response string.

Raises:

Type	Description
`ValueError`	If any key does not follow the "m_X" format where X is an integer.

Source code in src/gimkit/models/utils.py

def json_responses_to_gim_response(json_response: str) -> str:
    """Convert a JSON response string to a GIM response string.

    Args:
        json_response: A JSON string representing the response.

    Returns:
        A properly formatted GIM response string.

    Raises:
        ValueError: If any key does not follow the "m_X" format where X is an integer.
    """
    import re

    import json_repair

    from gimkit.log import get_logger

    logger = get_logger(__name__)

    result = json_repair.loads(json_response, logging=True)
    # When logging=True, json_repair.loads returns a tuple (json_obj, repair_log)
    if isinstance(result, tuple):
        json_obj, repair_log = result
        if repair_log:
            logger.warning(
                "JSON response required repair. Original: %s, Repair actions: %s",
                json_response,
                repair_log,
            )
    else:  # pragma: no cover
        # This shouldn't happen when logging=True, but handle gracefully
        json_obj = result  # type: ignore[assignment]
    if not isinstance(json_obj, dict):
        raise ValueError(f"Expected JSON response to be a dictionary, got {type(json_obj)}")

    validated_items = []
    for field_name, content in json_obj.items():
        match_result = re.fullmatch(r"m_(\d+)", field_name)
        if not match_result:
            raise ValueError(
                f"Invalid field name in JSON response: {field_name}. Expected format 'm_X' where X is an integer."
            )
        tag_id = int(match_result.group(1))
        validated_items.append((tag_id, content))

    validated_items.sort(key=lambda x: x[0])
    return str(
        Response([MaskedTag(id=tag_id, content=content) for tag_id, content in validated_items])
    )

API Reference

Package

gimkit

Core Modules

gimkit.guides

FormMixin

datetime(name=None, require_date=True, require_time=True)

select(name=None, choices=None)

single_word(name=None)

PersonalInfoMixin

e_mail(name=None)

person_name(name=None)

phone_number(name=None)

gimkit.schemas

MaskedTag dataclass

parse_parts(s)

parse_tags(s, prefix=None, suffix=None)

validate(query, response)

gimkit.contexts

Query

infill(response)

Response

infill(query)

infill(query, response, strict=False)

gimkit.dsls

build_cfg(query)

build_json_schema(query)

gimkit.prompts

gimkit.log

gimkit.exceptions

GIMError

InvalidFormatError

Model Backends

gimkit.models.base

gimkit.models.openai

gimkit.models.vllm

gimkit.models.vllm_offline

gimkit.models.utils

get_outlines_model_input(model_input, output_type, use_gim_prompt, include_grammar=False, force_chat_input=False)

get_outlines_output_type(model_input, output_type)

infill_responses(query, responses, json_responses=False)

json_responses_to_gim_response(json_response)

`gimkit`

`gimkit.guides`

`FormMixin`

`datetime(name=None, require_date=True, require_time=True)`

`select(name=None, choices=None)`

`single_word(name=None)`

`PersonalInfoMixin`

`e_mail(name=None)`

`person_name(name=None)`

`phone_number(name=None)`

`gimkit.schemas`

`MaskedTag` `dataclass`

`parse_parts(s)`

`parse_tags(s, prefix=None, suffix=None)`

`validate(query, response)`

`gimkit.contexts`

`Query`

`infill(response)`

`Response`

`infill(query)`

`infill(query, response, strict=False)`

`gimkit.dsls`

`build_cfg(query)`

`build_json_schema(query)`

`gimkit.prompts`

`gimkit.log`

`gimkit.exceptions`

`GIMError`

`InvalidFormatError`

`gimkit.models.base`

`gimkit.models.openai`

`gimkit.models.vllm`

`gimkit.models.vllm_offline`

`gimkit.models.utils`

`get_outlines_model_input(model_input, output_type, use_gim_prompt, include_grammar=False, force_chat_input=False)`

`get_outlines_output_type(model_input, output_type)`

`infill_responses(query, responses, json_responses=False)`

`json_responses_to_gim_response(json_response)`