Skip to content

to_protobuf does not work for Stacked DocumentArray in some cases #1069

@Jackmin801

Description

@Jackmin801

script.py

import torch
from typing import Optional
from docarray import BaseDocument, DocumentArray
from docarray.typing import TorchTensor
from docarray.documents import Text as BaseText, Image


class Tokens(BaseDocument):
    input_ids: TorchTensor[48]
    attention_mask: TorchTensor


class Text(BaseText):
    tokens: Optional[Tokens]


class PairTextImage(BaseDocument):
    text: Text
    image: Image


def get_da(N: Optional[int] = None):
    da = DocumentArray[PairTextImage](
        PairTextImage(
            text=Text(
                text=str(i),
                tokens=Tokens(
                    input_ids=torch.randn(48), attention_mask=torch.randn(48)
                ),
            ),
            image=Image(url=f"Images/{i}.png"),
        )
        for i in range(N)
    )
    return da.stack()

da = get_da(10)
da.to_protobuf()

Output

Traceback (most recent call last):
  File "tiger.py", line 38, in <module>
    da.to_protobuf()
  File "/home/jackmin/Documents/da-testing/docarray/docarray/array/array_stacked.py", line 263, in to_protobuf
    document_array=DocumentArrayProto(stack=column.to_protobuf())
  File "/home/jackmin/Documents/da-testing/docarray/docarray/array/array_stacked.py", line 263, in to_protobuf
    document_array=DocumentArrayProto(stack=column.to_protobuf())
ValueError: Protocol message DocumentArrayProto has no "stack" field.

Metadata

Metadata

Labels

No labels
No labels

Type

No type

Projects

Status

Done

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions