Andlabs/Sega of Japan Dreamcast Master List

From Sega Retro

because I've had it now
ok more explanation: Guardiana (where our DC master list appears to comes from) prioritizes JP name over US name so I kept getting caught off-guard when making lists (what tipped me over the edge was seeing HuneX's website list the JP and US versions of games separately) or just has wrong info (wonders never cease); sometimes the Sega pages have wrong serial numbers (happens more often with Saturn) or randomly combine versions or something, and again I want to make sure everything here is correct

Sometimes there will be a translated name on the left spine that differs from the Japanese name; that will be listed second, after a slash. I personally consider this to be a secondary name :/

Everything (all fields in table + page categories) verified
Everything verified, but box/master list genre mismatch
Problem with verification; cell(s) left white explains what (everything else is fine)
Unverified (either I didn't get to it yet or the page doesn't exist)

1998 1999 2000 2001 2002 2003 2004 2005-2007


Romanized Name Japanese Name Publisher Genre on Master List Genre on Box Release Date Price Catalogue Page Homepage Serial on Master List Serial on Box CERO Overseas Names
July/? July 45XLV ADV ADV 1998/11/27 5,800円 [1] T-35401M No
Pen Pen TriIcelon/(check) ペンペン トライアイスロン GE RCG RCG 1998/11/27 5,800円 [2] T-17001M T-17001M Same
バーチャファイター3tb リピート版 Sega ACT 1998/11/27 5,800円 [3]
Virtua Fighter 3tb バーチャファイター3tb Sega ACT 1998/11/27 5,800円 [4]
Godzilla Generations GODZILLA GENERATIONS Sega ACT ACT 1998/11/27 5,800円 [5] HDR-0004 HDR-0004 No
?/Incoming Humanity Last Battle インカミング 人類最終決戦 Imagineer STG STG 1998/12/17 5,800円 [6] T-15001M T-15001M Incoming
Sonic Adventure SONIC ADVENTURE Sega ACT ACT 1998/12/23 5,800円 [7] HDR-000 HDR-0001 Same
Tetris 4D TETRIS 4D(テトリス フォーディー) BPS PZL PZL 1998/12/23 4,800円 [8] T-20801M T-20801M No
Seventh Cross SEVENTH CROSS NEC SRPG SRPG 1998/12/23 5,800円 [9] T-38802M US: Seventh Cross Evolution
EU: none


# 27-28 oct 2011

import sys
import urllib.request
import io
from lxml import etree

def pageURLGen():
	p_hw  = 10  # Dreamcast
	p_sr  = 640 # start record according to clicking on the last page of results
	p_rpp = 20  # decrement p_sr each time; sega seems to ignore this parameter so we have to deal with it ourselves
	while p_sr >= 0:
		yield "{}&sr={}&tt=&sy=&gr=&hw={}&fw=&sort=2".format(p_rpp, p_sr, p_hw)
		p_sr -= p_rpp

# constants
genreIcons = {
	"/shared/images/icons/genre_act.gif": "ACT",
	"/shared/images/icons/genre_ftg.gif": "FTG",
	"/shared/images/icons/genre_stg.gif": "STG",
	"/shared/images/icons/genre_tbl.gif": "TBL",
	"/shared/images/icons/genre_rcg.gif": "RCG",
	"/shared/images/icons/genre_slg.gif": "SLG",
	"/shared/images/icons/genre_spg.gif": "SPG",
	"/shared/images/icons/genre_etc.gif": "ETC",
	"/shared/images/icons/genre_rpg.gif": "RPG",
	"/shared/images/icons/genre_adv.gif": "ADV",
	"/shared/images/icons/genre_pzl.gif": "PZL",
	"/shared/images/icons/genre_arpg.gif": "ARPG",
	"/shared/images/icons/genre_srpg.gif": "SRPG",
	"/shared/images/icons/genre_aadv.gif": "AADV"
ceroIcons = {
	"/shared/images/icons/icon_cero-a_xsmall.gif": "a",
	"/shared/images/icons/icon_cero-b_xsmall.gif": "b",
	"/shared/images/icons/icon_cero-c_xsmall.gif": "c",
	"/shared/images/icons/icon_cero-d_xsmall.gif": "d",
	"/shared/images/icons/icon_cero-z_xsmall.gif": "z",
	"?? free": "free",
	"?? 12": "12",
	"?? 15": "15",
	"/shared/images/icons/icon_cero-h_xsmall.gif": "18"
noCEROIcon = "/shared/images/icons/icon_cero-_xsmall.gif"
partnerIcon = "/shared/images/icons/partners.gif"
dreamcastIcon = "/shared/images/icons/hard_DC.gif"
networkIcon = "/shared/images/icons/function_network.gif"
# TODO other icons that may or may not be skipped
homepageLinkIcon = "/shared/images/icons/btn_official.gif"
cataloguePageLinkIcon = "/shared/images/icons/btn_product.gif"

def isComment(e):
	return e.tag == etree.Comment

def getImgTag(element):
	for e in element:
		if isComment(e):
		if e.tag.lower() == "img":
			return e
	raise Exception("expected img element; found none")

def handleDivClassIcon(element):
	imgTag = getImgTag(element)
	src = imgTag.attrib["src"]
	if src == dreamcastIcon: # skip this icon
		return ""
	elif src == partnerIcon: # note that this is third party
		return "\n| publisher=THIRD PARTY"
	elif src == networkIcon:
		return "\n| network={{yes}}"
	elif src in genreIcons:
		return "\n| genrelist={}".format(genreIcons[src]) + \
			"\n| genrebox="
		raise Exception("unknown icon {}".format(src))

def handleDivClassTitleText(element):
	return "\n| title=" + \
		"\n| jptitle={}".format(element.text) + \
		"\n| overseas="

def handleATag(element):
	url = element.attrib["href"]
	img = getImgTag(element)
	imgsrc = img.attrib["src"]
	if imgsrc == homepageLinkIcon:
		return "\n| homepage={}".format(url)
	elif imgsrc == cataloguePageLinkIcon:
		return "\n| catalogue={}".format(url)
		raise Exception("unknown image link {} -> {}".format(src, url))

def handleDivRightColumn(element):
	s = ""
	for e in element.getiterator():
		if isComment(e):
		if e.tag.lower() == "a":
			s += handleATag(e)
		elif e.tag.lower() == "div":
			divclass = e.attrib.get("class")
			if divclass == "icon":
				s += handleDivClassIcon(e)
			elif divclass == "titleText":
				s += handleDivClassTitleText(e)
	return s

def handleFirstColumn(element):
	for e in element.getiterator():
		if isComment(e):
		if e.tag.lower() == "div" and e.attrib.get("class") == "rightColumn":
			return handleDivRightColumn(e)
	return ""

def handleCEROCell(element):
	for e in element.getiterator():
		if isComment(e):
		if e.tag.lower() == "img":
			if e.attrib.get("src") in ceroIcons:
				return "\n| cero={}".format(ceroIcons[e.attrib["src"]])
			elif e.attrib.get("src") == noCEROIcon:
				return ""
				raise Exception("unknown CERO icon {}".format(e.attrib["src"]))
	return ""

def handleRow(element):
	if element[0].attrib.get("class").startswith("th1"): # skip table header
		return ""
	s = "{{wipdclist"
	s += handleFirstColumn(element[0])
	s += "\n| release={}".format(element[1].text)
	s += "\n| price={}".format(element[2].text)
	s += handleCEROCell(element[3])
	s += "\n| seriallist="
	s += "\n| serialbox="
	return s + "\n}}"

from lxml import html

for no in [1]:
	for page in pageURLGen():
		tree = html.parse(page)
		glist = []
		for e in tree.getiterator(): # search through all tags for the one we want
			if isComment(e):
			if e.tag.lower() == "div" and e.attrib.get("id") == "resultTable":
				for tr in e[0]: # we assume this is a <table>
					game = handleRow(tr)
					if game != "":
						glist += [game]
		glist.reverse() # pages store in reverse chronological order
		for game in glist:
# TODO figure out the cleanest way to get a traceback