'From Squeakland.396-Nihongo7.29 of 18 March 2005 [latest update: #80] on 22 May 2005 at 12:16:33 am'!
"Change Set: NihongoScamper
Date: 22 May 2005
Author: ICHIKAWA, Yuji & Takashi Yamamiya
Version: 0.5
ChangeLog:
#0.5 adapted Nihongo7.
#0.4 improved readability.
#0.3 added Automatic recognition among JIS, SJIS, EUC, and others.
#0.2 let the default coding be EUC.
#0.1 initial release.
References:
-http://www.net.is.uec.ac.jp/~ueno/material/kanji/auto.html
-http://tohoho.wakusei.ne.jp/wwwkanji.htm
"
Scamper StartUrl: 'http://languagegame.org:8080/propella/91'.
!
Model subclass: #Scamper
instanceVariableNames: 'status currentUrl pageSource document formattedPage downloadingProcess documentQueue recentDocuments currentAnchorLocation currentUrlIndex backgroundColor bookmark bookDir bindings '
classVariableNames: 'StartUrl '
poolDictionaries: ''
category: 'Network-Web Browser'!
!EUCJPTextConverter class methodsFor: 'as yet unclassified' stamp: 'ich. 5/1/2004 16:47'!
checkCode: upperCharacter followed: lowerCharacter
| u l |
u _ upperCharacter asInteger.
u < 16r80 ifTrue: [^ true].
l _ lowerCharacter asInteger.
(u = 16r8E) & (l between: 16rA0 and: 16rDF) ifTrue: [^ true]. "hankaku katakana"
(u between: 16rA1 and: 16rFE) & (l between: 16rA1 and: 16rFE) ifTrue: [^ true].
^ false.
! !
!EUCJPTextConverter class methodsFor: 'as yet unclassified' stamp: 'ich. 5/1/2004 18:07'!
isFirstByte: aCharacter
^ (aCharacter asInteger = 16r8E) | (aCharacter asInteger between: 16rA1 and: 16rFE).
! !
!FileUrl methodsFor: 'downloading' stamp: 'tak 4/26/2005 15:39'!
retrieveContents
| file pathString s type entries |
pathString _ self pathForFile.
file _ [StandardFileStream readOnlyFileNamed: pathString]
on: FileDoesNotExistException do:[:ex| ex return: nil].
file ifNotNil: [
type _ file mimeTypes.
type ifNotNil:[type _ type first].
type ifNil:[MIMEDocument guessTypeFromName: self path last].
^MIMELocalFileDocument
contentType: type
contentStream: file].
"see if it's a directory..."
entries _ [(FileDirectory on: pathString) entries]
on: InvalidDirectoryError do:[:ex| ex return: nil].
entries ifNil:[^nil].
s _ WriteStream on: String new.
(pathString endsWith: '/') ifFalse: [ pathString _ pathString, '/' ].
s nextPutAll: '
Directory Listing for ', pathString, ''.
s nextPutAll: 'Directory Listing for ', pathString, '
'.
s nextPutAll: ''.
^MIMEDocument contentType: 'text/html' content: s contents url: ('file://', pathString)! !
!FreeTranslation class methodsFor: 'scamper' stamp: 'tak 7/7/2004 09:44'!
openGooDictionaryScamperOn: currentSelection
"Submit the string to the translation server at dictionary.goo.ne.jp."
| inputs scamperWindow |
inputs _ Dictionary new.
inputs at: 'MT' put: {currentSelection}.
inputs at: 'kind' put: #('ej' ).
scamperWindow _ (WebBrowser default
ifNil: [^ self])
newOrExistingOn: 'http://dictionary.goo.ne.jp/search.php'.
scamperWindow model
submitFormWithInputs: inputs
url: 'http://dictionary.goo.ne.jp/search.php' asUrl
method: 'GET'.
scamperWindow activate! !
!FreeTranslation class methodsFor: 'scamper' stamp: 'tak 7/7/2004 09:41'!
openScamperOn: currentSelection
"Submit the string to the translation server at www.freetranslation.com. Ask it to translate from (Preferences parameterAt: #languageTranslateFrom) to (Preferences parameterAt: #languageTranslateTo). Display the results in a Scamper window, reusing the previous one if possible."
| inputs scamperWindow from to |
currentSelection size >= 10000 ifTrue: [^ self inform: 'Text selection is too long.'].
from _ Preferences parameterAt: #languageTranslateFrom ifAbsentPut: ['English'].
to _ Preferences parameterAt: #languageTranslateTo ifAbsentPut: ['German'].
from = to ifTrue:
[^ self inform: 'You asked to translate from ', from, ' to ', to, '.\' withCRs,
'Use "choose language" to set these.'].
to = 'Japanese' ifTrue: [ ^self openGooDictionaryScamperOn: currentSelection ].
inputs _ Dictionary new.
inputs at: 'SrcText' put: (Array with: currentSelection).
inputs at: 'Sequence' put: #('core').
inputs at: 'Mode' put: #('html').
inputs at: 'template' put: #('TextResult2.htm').
inputs at: 'Language' put: (Array with: from, '/', to).
scamperWindow _ (WebBrowser default ifNil: [^self]) newOrExistingOn: 'http://ets.freetranslation.com'.
scamperWindow model submitFormWithInputs: inputs
url: 'http://ets.freetranslation.com:5081' asUrl
method: 'post'.
scamperWindow activate.
! !
!HtmlDocument methodsFor: 'as yet unclassified' stamp: 'ich. 4/17/2004 23:03'!
charset
| result |
result _ self head charset.
^ result
ifNotNil: [result]
ifNil: [self asHtml guessJapaneseEncoding].
! !
!HtmlDocument methodsFor: 'as yet unclassified' stamp: 'ich. 4/11/2004 00:51'!
replaceAllTextsToMultiString
| charset converter |
charset _ self charset.
charset ifNotNil:
[TextConverter allSubclassesDo:
[:each |
(each encodingNames includes: charset asLowercase)
ifTrue: [converter _ each new]]].
self allSubentitiesDo:
[:each |
(each isMemberOf: HtmlTextEntity)
ifTrue: [each replaceToMultiString: converter]].
! !
!HtmlEntity class methodsFor: 'character entities' stamp: 'tak 6/20/2004 18:34'!
valueOfHtmlEntity: specialEntity
"Return the character equivalent to the HTML entity."
| value |
(specialEntity beginsWith: '#') "Handle numeric entities"
ifTrue: [
"NB: We can display only simple numeric special entities in the"
"range [9..255] (HTML 3.2). HTML 4.01 allows the specification of 16 bit"
"characters, so we do a little fiddling to handle a few special cases"
value _ ('Xx' includes: specialEntity second)
ifTrue: [Number readFrom: '16r', (specialEntity allButFirst: 2) asUppercase]
ifFalse: [specialEntity allButFirst asNumber].
"Replace rounded left & right double quotes (HTML 4.01) with simple double quote"
(value = 8220 or: [value = 8221]) ifTrue: [ value _ $" asInteger ].
"Replace rounded left & right single quotes (HTML 4.01) with simple single quote"
(value = 8216 or: [value = 8217]) ifTrue: [ value _ $' asInteger ].
"Replace with a space if outside the normal range (arbitrary choice)"
(value < 9 or: [value > 255]) ifTrue: [ value _ 32 ].
]
ifFalse: [
"Otherwise this is most likely a named character entity"
value _ ReverseCharacterEntities at: specialEntity ifAbsent: [^nil].
].
^Character value: value.! !
!HtmlHead methodsFor: 'as yet unclassified' stamp: 'ich. 5/1/2004 16:49'!
charset
self allSubentitiesDo:
[:each |
(each isMemberOf: HtmlMeta) ifTrue:
[| retval |
retval _ each charset.
retval ifNotNil: [^ retval]]].
^ nil.
! !
!HtmlMeta methodsFor: 'formatting' stamp: 'tak 7/5/2004 17:11'!
charset
(self getAttribute: #'http-equiv')
ifNil: [^ nil].
(self getAttribute: #'http-equiv') asLowercase = 'content-type'
ifFalse: [^ nil].
((self getAttribute: #content)
findTokens: '; ')
do: [:each | (each beginsWith: 'charset')
ifTrue: [^ (each findTokens: '= ')
at: 2]].
^ nil! !
!HtmlTextEntity methodsFor: 'access' stamp: 'ich. 4/11/2004 00:50'!
replaceToMultiString: aTextConverter
aTextConverter
ifNil: [self text: self text isoToSqueak] "Original Scamper transforms using isoToSqueak."
ifNotNil: [self text: (self text convertFromWithConverter: aTextConverter)].
! !
!HtmlTokenizer methodsFor: 'private-tokenizing' stamp: 'tak 6/20/2004 18:23'!
nextTag
"we've seen a < and peek-ed something other than a !!. Parse and return a tag"
| source negated name attribs attribName attribValue sourceStart sourceEnd c |
sourceStart _ pos-1.
attribs _ Dictionary new.
"determine if its negated"
self peekChar = $/
ifTrue: [ negated _ true. self nextChar. ]
ifFalse: [ negated _ false ].
"read in the name"
self skipSpaces.
name _ self nextName.
name _ name asLowercase.
"read in any attributes"
[ self skipSpaces.
c _ self peekChar.
c = nil or: [(c isLetter or: ['?/' includes: c]) not ]
] whileFalse: [
attribName _ self nextName.
attribName _ attribName asLowercase.
self skipSpaces.
self peekChar = $=
ifTrue: [
self nextChar.
self skipSpaces.
attribValue _ self nextAttributeValue withoutQuoting ]
ifFalse: [ attribValue _ '' ].
attribs at: attribName put: attribValue ].
self peekChar = $> ifTrue: [ self nextChar ].
sourceEnd _ pos-1.
source _ text copyFrom: sourceStart to: sourceEnd.
^HtmlTag source: source name: name asLowercase negated: negated attribs: attribs! !
!MultiString methodsFor: 'converting' stamp: 'tak 7/5/2004 15:47'!
encodeForHTTP
self flag: #toBeImplemented.
^ (self convertToWithConverter: EUCJPTextConverter new) encodeForHTTP! !
!Scamper methodsFor: 'menus' stamp: 'tak 7/7/2004 09:48'!
menu: menu shifted: shifted
menu addList:
#(
('back' back)
('forward' forward)
-
('do it (d)' doIt)
('print it (p)' printIt)
('inspect it (i)' inspectIt)
('file it in (G)' fileItIn)
('debug it' debugIt)
-
('new URL' jumpToNewUrl)
('history' displayHistory)
-
('view source' viewSource)
('inspect parse tree' inspectParseTree)
-
('go to start page' visitStartPage)
('edit start page' editStartPage)
('bookmark' bookmark)
('translate' translateIt)
).
downloadingProcess
ifNotNil:
[menu addList: #(
-
('stop downloading' stopEverything)
)].
^ menu! !
!Scamper methodsFor: 'changing page' stamp: 'tak 6/23/2004 22:05'!
jumpToAbsoluteUrl: urlText
"start downloading a new page. The page source is downloaded in a background thread"
| newUrl newSource morph |
self stopEverything.
"get the new url"
newUrl _ urlText asUrl.
"if it fundamentally doesn't fit the pages-and-contents model used internally, spawn off an external viewer for it"
newUrl hasContents ifFalse: [ newUrl activate. ^true ].
"fork a Process to do the actual downloading, parsing, and formatting. It's results will be picked up in #step"
self status: ('downloading {1}...' translated format:{newUrl toText}).
downloadingProcess _ [
newSource _ [ newUrl retrieveContentsForBrowser: self ] ifError: [ :msg :ctx |
MIMEDocument contentType: 'text/plain' content: msg ].
newSource
ifNil: [ newSource _ MIMEDocument contentType: 'text/plain' content: 'Error retrieving this URL' translated].
newSource url ifNil: [
newSource _ MIMEDocument contentType: newSource contentType content: newSource content url: newUrl ].
documentQueue nextPut: newSource.
downloadingProcess _ nil.
] newProcess.
downloadingProcess resume.
[recentDocuments size > currentUrlIndex] whileTrue: [
"delete all elements in recentDocuments after currentUrlIndex"
recentDocuments removeLast.
].
currentUrlIndex _ currentUrlIndex + 1.
morph _ self containingWindow.
morph ifNotNil: [morph start].
^true! !
!Scamper methodsFor: 'document handling' stamp: 'tak 4/26/2005 15:25'!
displayTextHtmlPage: newSource
"HTML page--format it"
| formatter bgimageUrl bgimageDoc bgimage |
currentUrl _ newSource url.
pageSource _ newSource content.
self status: 'parsing...' translated.
document _ HtmlParser parse: (ReadStream on: pageSource).
document replaceAllTextsToMultiString.
self status: 'laying out...' translated.
formatter _ HtmlFormatter preferredFormatterClass new.
formatter browser: self.
formatter baseUrl: currentUrl.
document addToFormatter: formatter.
formattedPage _ formatter text.
(bgimageUrl _ document body background)
ifNotNil:
[bgimageDoc _ (bgimageUrl asUrlRelativeTo: currentUrl) retrieveContents.
[bgimage _ ImageReadWriter formFromStream: bgimageDoc contentStream binary]
ifError: [:err :rcvr | "ignore" bgimage _ nil]].
bgimage
ifNotNil: [backgroundColor _ bgimage]
ifNil: [backgroundColor _ Color fromString: document body bgcolor].
currentUrl fragment
ifNil: [ currentAnchorLocation _ nil ]
ifNotNil: [ currentAnchorLocation _
formatter anchorLocations
at: currentUrl fragment asLowercase
ifAbsent: [ nil ] ].
self startDownloadingMorphState: (formatter incompleteMorphs).
self changeAll: #(currentUrl relabel hasLint lint backgroundColor formattedPage formattedPageSelection).
self status: 'done.' translated.
"pardon this horrible hack...(tk)"
(currentUrl authority beginsWith: 'ets.freetranslation.com') ifTrue: [
self status: 'done.
**** Please Scroll Down To See Your Results ****' translated].
^true! !
!Scamper methodsFor: 'not yet categorized' stamp: 'tak 5/12/2004 16:17'!
doItReceiver
^self! !
!Scamper methodsFor: 'bindings' stamp: 'tak 5/9/2004 16:08'!
bindingOf: aString
bindings isNil
ifTrue: [bindings _ Dictionary new].
(bindings includesKey: aString)
ifFalse: [bindings at: aString put: nil].
^bindings associationAt: aString! !
!ShiftJISTextConverter class methodsFor: 'utilities' stamp: 'ich. 4/11/2004 00:33'!
encodingNames
^ #('shift-jis' 'shift_jis' 'sjis', 'x-sjis') copy
! !
!ShiftJISTextConverter class methodsFor: 'as yet unclassified' stamp: 'ich. 5/1/2004 16:52'!
checkCode: upperCharacter followed: lowerCharacter
| u l |
u _ upperCharacter asInteger.
u < 16r80 ifTrue: [^ true].
l _ lowerCharacter asInteger.
((u between: 16r81 and: 16r9F) | (u between: 16rE0 and: 16rFF))
& ((l between: 16r40 and: 16rFC) & (l ~= 16r7F)) ifTrue: [^ true].
^ false.
! !
!ShiftJISTextConverter class methodsFor: 'as yet unclassified' stamp: 'ich. 5/1/2004 16:30'!
isFirstByte: aCharacter
^ (aCharacter asInteger between: 16r81 and: 16r9F) | (aCharacter asInteger between: 16rE0 and: 16rEF).
! !
!String methodsFor: 'testing' stamp: 'ich. 5/8/2004 16:22'!
mayBeJIS
| esc |
esc _ Character escape asString.
^ (self includesSubString: esc , '$@') or:
[(self includesSubString: esc , '$B') or:
[self includesSubString: esc , '$(D']].
! !
!String methodsFor: 'encoding' stamp: 'ich. 5/1/2004 18:16'!
guessJapaneseEncoding
| stream isEuc isSjis eucFirstByte sjisFirstByte |
self mayBeJIS ifTrue: [^ 'iso-2022-jp'].
stream _ ReadStream on: self.
isEuc _ nil.
isSjis _ nil.
eucFirstByte _ nil.
sjisFirstByte _ nil.
[stream atEnd] whileFalse:
[| char |
char _ stream next.
eucFirstByte
ifNil:
[(EUCJPTextConverter isFirstByte: char) ifTrue:
[eucFirstByte _ char.
isEuc ifNil: [isEuc _ true]]]
ifNotNil:
[(EUCJPTextConverter checkCode: eucFirstByte followed: char) ifFalse:
[isEuc _ false].
eucFirstByte _ nil].
sjisFirstByte
ifNil:
[(ShiftJISTextConverter isFirstByte: char) ifTrue:
[sjisFirstByte _ char.
isSjis ifNil: [isSjis _ true]]]
ifNotNil:
[(ShiftJISTextConverter checkCode: sjisFirstByte followed: char) ifFalse:
[isSjis _ false].
sjisFirstByte _ nil]].
(isEuc isNil & isSjis isNil) ifTrue: [^ nil]. "It means ASCII."
(isEuc notNil and: [isEuc]) ifTrue: [^ 'eucjp']. "EUC has high priority."
(isSjis notNil and: [isSjis]) ifTrue: [^ 'sjis'].
^ nil. "unrecognized code"
! !
Model subclass: #Scamper
instanceVariableNames: 'status currentUrl pageSource document formattedPage downloadingProcess documentQueue recentDocuments currentAnchorLocation currentUrlIndex backgroundColor bookmark bookDir bindings'
classVariableNames: 'StartUrl'
poolDictionaries: ''
category: 'Network-Web Browser'!