interface ItemAlternative {
  confidence: string;
  content: string;
}

interface Item {
  start_time?: string;
  end_time?: string;
  type: 'pronunciation' | 'punctuation';
  alternatives: ItemAlternative[];
}

interface Segment {
  start_time: string;
  end_time: string;
  speaker_label: string;
  items: Item[];
}

interface Input {
  segments: Segment[];
}

interface OutputSegment extends Segment {
  transcript: string;
}

interface Output {
  segments: OutputSegment[];
}

export const transform = (input: Input): Output => {
  const results: OutputSegment[] = [];

  const { segments } = input;

  for (const segment of segments) {
    const { items } = segment;
    let i = 0;
    let transcript = '';
    for (i = 0; i < items.length; i++) {
      const item = items[i];
      if (i > 0 && item.type === 'pronunciation') {
        transcript += ' ';
      }
      transcript += item.alternatives[0].content;
    }
    results.push({
      ...segment,
      transcript,
    });
  }

  return { segments: results };
};
