'From Squeakland.396-Nihongo7.29 of 18 March 2005 [latest update: #80] on 22 May 2005 at 12:16:33 am'! "Change Set: NihongoScamper Date: 22 May 2005 Author: ICHIKAWA, Yuji & Takashi Yamamiya Version: 0.5 ChangeLog: #0.5 adapted Nihongo7. #0.4 improved readability. #0.3 added Automatic recognition among JIS, SJIS, EUC, and others. #0.2 let the default coding be EUC. #0.1 initial release. References: -http://www.net.is.uec.ac.jp/~ueno/material/kanji/auto.html -http://tohoho.wakusei.ne.jp/wwwkanji.htm " Scamper StartUrl: 'http://languagegame.org:8080/propella/91'. ! Model subclass: #Scamper instanceVariableNames: 'status currentUrl pageSource document formattedPage downloadingProcess documentQueue recentDocuments currentAnchorLocation currentUrlIndex backgroundColor bookmark bookDir bindings ' classVariableNames: 'StartUrl ' poolDictionaries: '' category: 'Network-Web Browser'! !EUCJPTextConverter class methodsFor: 'as yet unclassified' stamp: 'ich. 5/1/2004 16:47'! checkCode: upperCharacter followed: lowerCharacter | u l | u _ upperCharacter asInteger. u < 16r80 ifTrue: [^ true]. l _ lowerCharacter asInteger. (u = 16r8E) & (l between: 16rA0 and: 16rDF) ifTrue: [^ true]. "hankaku katakana" (u between: 16rA1 and: 16rFE) & (l between: 16rA1 and: 16rFE) ifTrue: [^ true]. ^ false. ! ! !EUCJPTextConverter class methodsFor: 'as yet unclassified' stamp: 'ich. 5/1/2004 18:07'! isFirstByte: aCharacter ^ (aCharacter asInteger = 16r8E) | (aCharacter asInteger between: 16rA1 and: 16rFE). ! ! !FileUrl methodsFor: 'downloading' stamp: 'tak 4/26/2005 15:39'! retrieveContents | file pathString s type entries | pathString _ self pathForFile. file _ [StandardFileStream readOnlyFileNamed: pathString] on: FileDoesNotExistException do:[:ex| ex return: nil]. file ifNotNil: [ type _ file mimeTypes. type ifNotNil:[type _ type first]. type ifNil:[MIMEDocument guessTypeFromName: self path last]. ^MIMELocalFileDocument contentType: type contentStream: file]. "see if it's a directory..." entries _ [(FileDirectory on: pathString) entries] on: InvalidDirectoryError do:[:ex| ex return: nil]. entries ifNil:[^nil]. s _ WriteStream on: String new. (pathString endsWith: '/') ifFalse: [ pathString _ pathString, '/' ]. s nextPutAll: 'Directory Listing for ', pathString, ''. s nextPutAll: '

Directory Listing for ', pathString, '

'. s nextPutAll: ''. ^MIMEDocument contentType: 'text/html' content: s contents url: ('file://', pathString)! ! !FreeTranslation class methodsFor: 'scamper' stamp: 'tak 7/7/2004 09:44'! openGooDictionaryScamperOn: currentSelection "Submit the string to the translation server at dictionary.goo.ne.jp." | inputs scamperWindow | inputs _ Dictionary new. inputs at: 'MT' put: {currentSelection}. inputs at: 'kind' put: #('ej' ). scamperWindow _ (WebBrowser default ifNil: [^ self]) newOrExistingOn: 'http://dictionary.goo.ne.jp/search.php'. scamperWindow model submitFormWithInputs: inputs url: 'http://dictionary.goo.ne.jp/search.php' asUrl method: 'GET'. scamperWindow activate! ! !FreeTranslation class methodsFor: 'scamper' stamp: 'tak 7/7/2004 09:41'! openScamperOn: currentSelection "Submit the string to the translation server at www.freetranslation.com. Ask it to translate from (Preferences parameterAt: #languageTranslateFrom) to (Preferences parameterAt: #languageTranslateTo). Display the results in a Scamper window, reusing the previous one if possible." | inputs scamperWindow from to | currentSelection size >= 10000 ifTrue: [^ self inform: 'Text selection is too long.']. from _ Preferences parameterAt: #languageTranslateFrom ifAbsentPut: ['English']. to _ Preferences parameterAt: #languageTranslateTo ifAbsentPut: ['German']. from = to ifTrue: [^ self inform: 'You asked to translate from ', from, ' to ', to, '.\' withCRs, 'Use "choose language" to set these.']. to = 'Japanese' ifTrue: [ ^self openGooDictionaryScamperOn: currentSelection ]. inputs _ Dictionary new. inputs at: 'SrcText' put: (Array with: currentSelection). inputs at: 'Sequence' put: #('core'). inputs at: 'Mode' put: #('html'). inputs at: 'template' put: #('TextResult2.htm'). inputs at: 'Language' put: (Array with: from, '/', to). scamperWindow _ (WebBrowser default ifNil: [^self]) newOrExistingOn: 'http://ets.freetranslation.com'. scamperWindow model submitFormWithInputs: inputs url: 'http://ets.freetranslation.com:5081' asUrl method: 'post'. scamperWindow activate. ! ! !HtmlDocument methodsFor: 'as yet unclassified' stamp: 'ich. 4/17/2004 23:03'! charset | result | result _ self head charset. ^ result ifNotNil: [result] ifNil: [self asHtml guessJapaneseEncoding]. ! ! !HtmlDocument methodsFor: 'as yet unclassified' stamp: 'ich. 4/11/2004 00:51'! replaceAllTextsToMultiString | charset converter | charset _ self charset. charset ifNotNil: [TextConverter allSubclassesDo: [:each | (each encodingNames includes: charset asLowercase) ifTrue: [converter _ each new]]]. self allSubentitiesDo: [:each | (each isMemberOf: HtmlTextEntity) ifTrue: [each replaceToMultiString: converter]]. ! ! !HtmlEntity class methodsFor: 'character entities' stamp: 'tak 6/20/2004 18:34'! valueOfHtmlEntity: specialEntity "Return the character equivalent to the HTML entity." | value | (specialEntity beginsWith: '#') "Handle numeric entities" ifTrue: [ "NB: We can display only simple numeric special entities in the" "range [9..255] (HTML 3.2). HTML 4.01 allows the specification of 16 bit" "characters, so we do a little fiddling to handle a few special cases" value _ ('Xx' includes: specialEntity second) ifTrue: [Number readFrom: '16r', (specialEntity allButFirst: 2) asUppercase] ifFalse: [specialEntity allButFirst asNumber]. "Replace rounded left & right double quotes (HTML 4.01) with simple double quote" (value = 8220 or: [value = 8221]) ifTrue: [ value _ $" asInteger ]. "Replace rounded left & right single quotes (HTML 4.01) with simple single quote" (value = 8216 or: [value = 8217]) ifTrue: [ value _ $' asInteger ]. "Replace with a space if outside the normal range (arbitrary choice)" (value < 9 or: [value > 255]) ifTrue: [ value _ 32 ]. ] ifFalse: [ "Otherwise this is most likely a named character entity" value _ ReverseCharacterEntities at: specialEntity ifAbsent: [^nil]. ]. ^Character value: value.! ! !HtmlHead methodsFor: 'as yet unclassified' stamp: 'ich. 5/1/2004 16:49'! charset self allSubentitiesDo: [:each | (each isMemberOf: HtmlMeta) ifTrue: [| retval | retval _ each charset. retval ifNotNil: [^ retval]]]. ^ nil. ! ! !HtmlMeta methodsFor: 'formatting' stamp: 'tak 7/5/2004 17:11'! charset (self getAttribute: #'http-equiv') ifNil: [^ nil]. (self getAttribute: #'http-equiv') asLowercase = 'content-type' ifFalse: [^ nil]. ((self getAttribute: #content) findTokens: '; ') do: [:each | (each beginsWith: 'charset') ifTrue: [^ (each findTokens: '= ') at: 2]]. ^ nil! ! !HtmlTextEntity methodsFor: 'access' stamp: 'ich. 4/11/2004 00:50'! replaceToMultiString: aTextConverter aTextConverter ifNil: [self text: self text isoToSqueak] "Original Scamper transforms using isoToSqueak." ifNotNil: [self text: (self text convertFromWithConverter: aTextConverter)]. ! ! !HtmlTokenizer methodsFor: 'private-tokenizing' stamp: 'tak 6/20/2004 18:23'! nextTag "we've seen a < and peek-ed something other than a !!. Parse and return a tag" | source negated name attribs attribName attribValue sourceStart sourceEnd c | sourceStart _ pos-1. attribs _ Dictionary new. "determine if its negated" self peekChar = $/ ifTrue: [ negated _ true. self nextChar. ] ifFalse: [ negated _ false ]. "read in the name" self skipSpaces. name _ self nextName. name _ name asLowercase. "read in any attributes" [ self skipSpaces. c _ self peekChar. c = nil or: [(c isLetter or: ['?/' includes: c]) not ] ] whileFalse: [ attribName _ self nextName. attribName _ attribName asLowercase. self skipSpaces. self peekChar = $= ifTrue: [ self nextChar. self skipSpaces. attribValue _ self nextAttributeValue withoutQuoting ] ifFalse: [ attribValue _ '' ]. attribs at: attribName put: attribValue ]. self peekChar = $> ifTrue: [ self nextChar ]. sourceEnd _ pos-1. source _ text copyFrom: sourceStart to: sourceEnd. ^HtmlTag source: source name: name asLowercase negated: negated attribs: attribs! ! !MultiString methodsFor: 'converting' stamp: 'tak 7/5/2004 15:47'! encodeForHTTP self flag: #toBeImplemented. ^ (self convertToWithConverter: EUCJPTextConverter new) encodeForHTTP! ! !Scamper methodsFor: 'menus' stamp: 'tak 7/7/2004 09:48'! menu: menu shifted: shifted menu addList: #( ('back' back) ('forward' forward) - ('do it (d)' doIt) ('print it (p)' printIt) ('inspect it (i)' inspectIt) ('file it in (G)' fileItIn) ('debug it' debugIt) - ('new URL' jumpToNewUrl) ('history' displayHistory) - ('view source' viewSource) ('inspect parse tree' inspectParseTree) - ('go to start page' visitStartPage) ('edit start page' editStartPage) ('bookmark' bookmark) ('translate' translateIt) ). downloadingProcess ifNotNil: [menu addList: #( - ('stop downloading' stopEverything) )]. ^ menu! ! !Scamper methodsFor: 'changing page' stamp: 'tak 6/23/2004 22:05'! jumpToAbsoluteUrl: urlText "start downloading a new page. The page source is downloaded in a background thread" | newUrl newSource morph | self stopEverything. "get the new url" newUrl _ urlText asUrl. "if it fundamentally doesn't fit the pages-and-contents model used internally, spawn off an external viewer for it" newUrl hasContents ifFalse: [ newUrl activate. ^true ]. "fork a Process to do the actual downloading, parsing, and formatting. It's results will be picked up in #step" self status: ('downloading {1}...' translated format:{newUrl toText}). downloadingProcess _ [ newSource _ [ newUrl retrieveContentsForBrowser: self ] ifError: [ :msg :ctx | MIMEDocument contentType: 'text/plain' content: msg ]. newSource ifNil: [ newSource _ MIMEDocument contentType: 'text/plain' content: 'Error retrieving this URL' translated]. newSource url ifNil: [ newSource _ MIMEDocument contentType: newSource contentType content: newSource content url: newUrl ]. documentQueue nextPut: newSource. downloadingProcess _ nil. ] newProcess. downloadingProcess resume. [recentDocuments size > currentUrlIndex] whileTrue: [ "delete all elements in recentDocuments after currentUrlIndex" recentDocuments removeLast. ]. currentUrlIndex _ currentUrlIndex + 1. morph _ self containingWindow. morph ifNotNil: [morph start]. ^true! ! !Scamper methodsFor: 'document handling' stamp: 'tak 4/26/2005 15:25'! displayTextHtmlPage: newSource "HTML page--format it" | formatter bgimageUrl bgimageDoc bgimage | currentUrl _ newSource url. pageSource _ newSource content. self status: 'parsing...' translated. document _ HtmlParser parse: (ReadStream on: pageSource). document replaceAllTextsToMultiString. self status: 'laying out...' translated. formatter _ HtmlFormatter preferredFormatterClass new. formatter browser: self. formatter baseUrl: currentUrl. document addToFormatter: formatter. formattedPage _ formatter text. (bgimageUrl _ document body background) ifNotNil: [bgimageDoc _ (bgimageUrl asUrlRelativeTo: currentUrl) retrieveContents. [bgimage _ ImageReadWriter formFromStream: bgimageDoc contentStream binary] ifError: [:err :rcvr | "ignore" bgimage _ nil]]. bgimage ifNotNil: [backgroundColor _ bgimage] ifNil: [backgroundColor _ Color fromString: document body bgcolor]. currentUrl fragment ifNil: [ currentAnchorLocation _ nil ] ifNotNil: [ currentAnchorLocation _ formatter anchorLocations at: currentUrl fragment asLowercase ifAbsent: [ nil ] ]. self startDownloadingMorphState: (formatter incompleteMorphs). self changeAll: #(currentUrl relabel hasLint lint backgroundColor formattedPage formattedPageSelection). self status: 'done.' translated. "pardon this horrible hack...(tk)" (currentUrl authority beginsWith: 'ets.freetranslation.com') ifTrue: [ self status: 'done. **** Please Scroll Down To See Your Results ****' translated]. ^true! ! !Scamper methodsFor: 'not yet categorized' stamp: 'tak 5/12/2004 16:17'! doItReceiver ^self! ! !Scamper methodsFor: 'bindings' stamp: 'tak 5/9/2004 16:08'! bindingOf: aString bindings isNil ifTrue: [bindings _ Dictionary new]. (bindings includesKey: aString) ifFalse: [bindings at: aString put: nil]. ^bindings associationAt: aString! ! !ShiftJISTextConverter class methodsFor: 'utilities' stamp: 'ich. 4/11/2004 00:33'! encodingNames ^ #('shift-jis' 'shift_jis' 'sjis', 'x-sjis') copy ! ! !ShiftJISTextConverter class methodsFor: 'as yet unclassified' stamp: 'ich. 5/1/2004 16:52'! checkCode: upperCharacter followed: lowerCharacter | u l | u _ upperCharacter asInteger. u < 16r80 ifTrue: [^ true]. l _ lowerCharacter asInteger. ((u between: 16r81 and: 16r9F) | (u between: 16rE0 and: 16rFF)) & ((l between: 16r40 and: 16rFC) & (l ~= 16r7F)) ifTrue: [^ true]. ^ false. ! ! !ShiftJISTextConverter class methodsFor: 'as yet unclassified' stamp: 'ich. 5/1/2004 16:30'! isFirstByte: aCharacter ^ (aCharacter asInteger between: 16r81 and: 16r9F) | (aCharacter asInteger between: 16rE0 and: 16rEF). ! ! !String methodsFor: 'testing' stamp: 'ich. 5/8/2004 16:22'! mayBeJIS | esc | esc _ Character escape asString. ^ (self includesSubString: esc , '$@') or: [(self includesSubString: esc , '$B') or: [self includesSubString: esc , '$(D']]. ! ! !String methodsFor: 'encoding' stamp: 'ich. 5/1/2004 18:16'! guessJapaneseEncoding | stream isEuc isSjis eucFirstByte sjisFirstByte | self mayBeJIS ifTrue: [^ 'iso-2022-jp']. stream _ ReadStream on: self. isEuc _ nil. isSjis _ nil. eucFirstByte _ nil. sjisFirstByte _ nil. [stream atEnd] whileFalse: [| char | char _ stream next. eucFirstByte ifNil: [(EUCJPTextConverter isFirstByte: char) ifTrue: [eucFirstByte _ char. isEuc ifNil: [isEuc _ true]]] ifNotNil: [(EUCJPTextConverter checkCode: eucFirstByte followed: char) ifFalse: [isEuc _ false]. eucFirstByte _ nil]. sjisFirstByte ifNil: [(ShiftJISTextConverter isFirstByte: char) ifTrue: [sjisFirstByte _ char. isSjis ifNil: [isSjis _ true]]] ifNotNil: [(ShiftJISTextConverter checkCode: sjisFirstByte followed: char) ifFalse: [isSjis _ false]. sjisFirstByte _ nil]]. (isEuc isNil & isSjis isNil) ifTrue: [^ nil]. "It means ASCII." (isEuc notNil and: [isEuc]) ifTrue: [^ 'eucjp']. "EUC has high priority." (isSjis notNil and: [isSjis]) ifTrue: [^ 'sjis']. ^ nil. "unrecognized code" ! ! Model subclass: #Scamper instanceVariableNames: 'status currentUrl pageSource document formattedPage downloadingProcess documentQueue recentDocuments currentAnchorLocation currentUrlIndex backgroundColor bookmark bookDir bindings' classVariableNames: 'StartUrl' poolDictionaries: '' category: 'Network-Web Browser'!