#lang racket
(require html-parsing
sxml
csv-writing)
;; パス設定
(define *base*
(current-directory
(string->path "D:\\gooBlog引越しテスト")))
;; デフォルトファイル名
;; 以下は変更してO.K.
"log.csv")
;; ヘッダ
(define *header* '(記事タイトル 投稿日時 枚数 写真アドレス))
;; ログ生成器
(define (data-maker fname)
(with-input-from-file fname
(lambda ()
(let loop ((c (read-line)) (title '()) (date '()) (html '()) (acc '()))
(with-handlers ((exn:fail:contract?
(lambda (e) (loop (read-line) title date html acc))))
;; ファイル末尾に到達したらデータテーブルを出力
(cond ((eof-object? c)
(unless (file-exists? *log*)
(system (format "type null > ~a" *log*)))
(with-output-to-file *log*
(lambda ()
(let ((table (cons *header* (reverse acc))))
(display-table table)))
#:exists 'update))
;; 基本的な構造はphoto_downloaderと同じ
(else
(let ((lst (string-split c ":")))
(let ((head (string-trim (car lst)))
(tail (string-trim (string-join (cdr lst)))))
(case head
(("AUTHOR" "PRIMARY CATEGORY" "STATUS" "ALLOW COMMENTS" "CONVERT BREAKS" "COMMENT" "-----")
(loop (read-line) title date html acc))
(("TITLE")
(loop (read-line) (cons tail title) date html acc))
(("DATE")
(loop (read-line) title (cons tail date) html acc))
(("BODY")
(loop (read-line) title date (cons tail html) acc))
(("--------") (let ((urls
(map cadr
((sxpath '(// img @ src))
(html->xexp (string-join (reverse html)))))))
(loop (read-line) '() '() '() (cons `(,(car title) ,(car date) ,(length urls) ,@urls) acc))))
(else (loop (read-line) title date (cons c html) acc))))))))))))
(module+ main
(data-maker (vector-ref (current-command-line-arguments) 0)))