This is harder than it looks because fields are defined both by syntax and position. Here is a ksh script that works with your sample data. But any surprises in your real data could break it.
Code:
#! /usr/bin/ksh
IFS=""
while read line ; do
line=${line##+(_)}
((${#line})) || continue
if [[ "$line" != "Group of orthologs"* ]] ; then
echo error looking for start of record 1>&2
echo $line 1>&2
exit 1
fi
line=${line#"Group of orthologs #"}
Group_number=${line%%\.*}
line=${line#*"Best score "}
Best_Score=${line%" "*}
read line
if [[ $line != "Score difference with "* ]] ; then
echo "error stepping over 2nd line of group $Group_number" 1>&2
echo $line 1>&2
exit 1
fi
ProteinLines=1
while ((ProteinLines)) ; do
if read line ; then
line=${line##+(_)}
if ((!${#line})) ; then
ProteinLines=0
else
eval set $line
firstchar="${line%${line#?}}"
if [[ $# -eq 4 ]] ; then
S_one=$1
P_one=$2
S_two=$3
P_two=$4
else
if [[ $firstchar = [a-zA-Z0-9] ]] ; then
S_one=$1
P_one=$2
S_two=""
P_two=""
else
S_one=""
P_one=""
S_two=$1
P_two=$2
fi
fi
echo "${Group_number};${Best_Score};${S_one};${P_one};${S_two};${P_two};"
fi
else
ProteinLines=0
fi
done
done
exit 0