Fanbox downloader
Revisión | 03deff2667d2360ed3e4d333ea2f8c801370e41a (tree) |
---|---|
Tiempo | 2023-11-24 20:11:35 |
Autor | supercell <stigma@disr...> |
Commiter | supercell |
start work on HTML version of contents
little late night work. going to create an additional content.html file
which will mimic the content.txt file, but contain HTML formatting and
make use of images and links.
@@ -342,6 +342,50 @@ def shared_download(url, output_filename, headers = dict()): | ||
342 | 342 | os.rename(partial_filename, output_filename) |
343 | 343 | return True |
344 | 344 | |
345 | +def print_embedded_preamble(post_info, embed_file): | |
346 | + """ | |
347 | + Print starting HTML and basic styling for the url_embed.html | |
348 | + """ | |
349 | + import time | |
350 | + | |
351 | + post_title = post_info['title'] | |
352 | + if sys.version_info[0] == 2: | |
353 | + post_title = post_title.encode('utf-8') | |
354 | + | |
355 | + JST_OFFSET_SECONDS = (9 * 60 * 60) # seconds in 9 hours | |
356 | + post_date_obj = py2compat_datetime_strptime(post_info['publishedDatetime']) | |
357 | + post_date_obj_utc = post_date_obj - timedelta(seconds=JST_OFFSET_SECONDS) | |
358 | + post_date_obj_local = post_date_obj_utc + timedelta(seconds=-time.timezone) | |
359 | + post_date = post_date_obj_local.strftime('%c') | |
360 | + | |
361 | + post_fee = post_info['feeRequired'] | |
362 | + visibility_message = 'All users' | |
363 | + if post_fee != 0: | |
364 | + visibility_message = '{} JPY'.format(post_fee) | |
365 | + | |
366 | + print('<!DOCTYPE html>', file=embed_file) | |
367 | + print('<html>', file=embed_file) | |
368 | + print('<head>', file=embed_file) | |
369 | + print('<meta charset="utf-8">', file=embed_file) | |
370 | + print('<title>{}</title>'.format(post_title), file=embed_file) | |
371 | + print('<style type="text/css">', file=embed_file) | |
372 | + print('body { font-family: sans-serif; }', file=embed_file) | |
373 | + print('</style>', file=embed_file) | |
374 | + print('</head>', file=embed_file) | |
375 | + print('<body>', file=embed_file) | |
376 | + print('<h1>{}</h1>'.format(post_title), file=embed_file) | |
377 | + print('<p><small>{} · {}</small></p>'.format(post_date, | |
378 | + visibility_message), file=embed_file) | |
379 | + | |
380 | + | |
381 | +def handle_embedded_blocks(post_info, block_info, embed_file): | |
382 | + """ | |
383 | + Handle embedded content in posts. | |
384 | + | |
385 | + This will print formatting of *block_info* to *embed_file*. | |
386 | + """ | |
387 | + pass | |
388 | + | |
345 | 389 | def download_article(config, info): |
346 | 390 | # Create directory |
347 | 391 | print('[article]: Creating directory') |
@@ -359,6 +403,11 @@ def download_article(config, info): | ||
359 | 403 | url_embeds_file = open(url_embeds_file_path, 'w') |
360 | 404 | content_file_path = os.path.join(directory, 'content.txt') |
361 | 405 | content_file = open(content_file_path, 'w') |
406 | + | |
407 | + # Have we already printed <head>, <title>, etc. for embedded | |
408 | + # posts? | |
409 | + printed_embedded_preamble = False | |
410 | + | |
362 | 411 | for block in info['body']['blocks']: |
363 | 412 | if block['type'] == 'p': |
364 | 413 | if sys.version_info[0] == 2: |
@@ -383,6 +432,10 @@ def download_article(config, info): | ||
383 | 432 | elif block['type'] == 'image': |
384 | 433 | print('[image:{}]'.format(block['imageId']), file=content_file) |
385 | 434 | elif block['type'] == 'url_embed': |
435 | + if printed_embedded_preamble is not True: | |
436 | + print_embedded_preamble(info, url_embeds_file) | |
437 | + printed_embedded_preamble = True | |
438 | + handle_embedded_blocks(info, block, url_embeds_file) | |
386 | 439 | embedId = block['urlEmbedId'] |
387 | 440 | # Print embed to HTML file |
388 | 441 | print('<h2>' + embedId + '</h2>', file=url_embeds_file) |
@@ -418,6 +471,7 @@ def download_article(config, info): | ||
418 | 471 | print("[article]: %s" % msg) |
419 | 472 | log(msg, "article") |
420 | 473 | |
474 | + print('</body>', file=url_embeds_file) | |
421 | 475 | url_embeds_file.close() |
422 | 476 | if os.stat(url_embeds_file_path).st_size == 0: |
423 | 477 | os.remove(url_embeds_file_path) |