Appending a column in xlsx file using Python


 
Thread Tools Search this Thread
Top Forums Programming Appending a column in xlsx file using Python
# 8  
Old 06-23-2017
Hmm... the indentation seems a bit awry.

Quote:
Originally Posted by nans
...
...
Code:
import os
from openpyxl import load_workbook
from datetime import datetime
import csv
  
# Variables
sheet_directory = r'/home/test'
text_directory = r'/home/test'
  
# Subroutines
def get_text_data(txt_filename):
    dict_pos = {}
    first_line = True
    with open('scores.txt') as txt_filename:
        tab_reader = csv.reader(txt_filename, delimiter='\t')
        for line in tab_reader:
            if first_line:
                first_line = False
                continue
                line = line.rstrip('\n')  #Move the lines in red one level out
                x = line.split('\t')
                dict_pos[x[0]] = x[3]
                #print dict_pos          #Move the lines in blue three levels out
                return dict_pos


def process_xl_sheets():
    for sheet_root, sheet_dirs, sheet_files in os.walk(sheet_directory):
        for sheet_file in sheet_files:
            if sheet_file.endswith('.xlsx'):
                dpos = get_text_data(sheet_file.replace('.xlsx', '.txt'))
                sheet_xl_file = os.path.join(sheet_root, sheet_file)
                wb = load_workbook(sheet_xl_file, data_only=True)
                ws = wb.get_sheet_by_name('raw_data')
                pos_col_no = 'F'
                score_col_no = 'V'
                row_no = 4
                cell = ws[pos_col_no + str(row_no)]
                while cell.value:
                    if str(cell.value) in dpos:
                        ws[score_col_no + str(row_no)] = dpos[str(cell.value)]
                    else:
                        ws[score_col_no + str(row_no)] = 'Unknown_' + datetime.now().strftime("%B") + datetime.now().strftime("%Y")
                        row_no += 1
                        cell = ws[pos_col_no + str(row_no)]
                        wb.save(sheet_xl_file)

                # Main section
process_xl_sheets()

Here's my code for reference. Check the indentation level and the comments:

Code:
# Subroutines
def get_text_data(txt_filename):
    dict_pos = {}
    first_line = True
    for text_root, text_dirs, text_files in os.walk(text_directory):
        for text_file in text_files:
            if text_file == txt_filename:
                # A matching text file was found
                fh = open(os.path.join(text_root, text_file))
                for line in fh:
                    # Skip the header; read the data into the dictionary
                    if first_line:                 # One level inside "for line" loop
                        first_line = False         # One level inside "for line" loop, one level inside "if first_line" branch
                        continue                   # One level inside "for line" loop, one level inside "if first_line" branch
                    line = line.rstrip('\n')       # One level inside "for line" loop
                    x = line.split('\t')           # One level inside "for line" loop
                    dict_pos[x[0]] = x[3]          # One level inside "for line" loop
    return dict_pos                                # One level inside "def"; this is at subroutine level

In your code, the value of "first_line" is True the first time it enters the subroutine and subsequently the first line of "tab_reader".
You then set it to False and then everything else is done inside that "if" branch.
From the second line of "tab_reader" onwards, the control will never go inside the "if" branch because it was set to False in the first line.

---------- Post updated at 12:13 PM ---------- Previous update was at 12:08 PM ----------

Also check the level of "wb.save()" in your code. It should be at the same level as "while cell.value".
This User Gave Thanks to durden_tyler For This Post:
# 9  
Old 06-24-2017
It still doesn;t work for me. the code runs but doesn't give any output at all or print 'dict_pos'. I have attached the code I am using now.


Code:
#!/usr/bin/python

import sys
sys.path.append('/usr/local/lib/python2.7/site-packages')

import os
from openpyxl import load_workbook
from datetime import datetime
import csv

# Variables
sheet_directory = r'/home/test'
text_directory = r'/home/test'

# Subroutines
def get_text_data(txt_filename):
    dict_pos = {}
    first_line = True
    with open('scores.txt') as txt_filename:
        tab_reader = csv.reader(txt_filename, delimiter='\t')
        for line in tab_reader:
            if first_line:
                first_line = False
                continue
            line = line.rstrip('\n')
            x = line.split('\t')
            dict_pos[x[0]] = x[3]
        print dict_pos          
        return dict_pos


def process_xl_sheets():
    for sheet_root, sheet_dirs, sheet_files in os.walk(sheet_directory):
        for sheet_file in sheet_files:
            if sheet_file.endswith('.xlsx'):
                dpos = get_text_data(sheet_file.replace('.xlsx', '.txt'))
                sheet_xl_file = os.path.join(sheet_root, sheet_file)
                wb = load_workbook(sheet_xl_file, data_only=True)
                ws = wb.get_sheet_by_name('raw_data')
                pos_col_no = 'F'
                score_col_no = 'V'
                row_no = 4
                cell = ws[pos_col_no + str(row_no)]
                while cell.value:
                    if str(cell.value) in dpos:
                        ws[score_col_no + str(row_no)] = dpos[str(cell.value)]
                    else:
                        ws[score_col_no + str(row_no)] = 'Unknown_' + datetime.now().strftime("%B") + datetime.now().strftime("%Y")
                        row_no += 1
                        cell = ws[pos_col_no + str(row_no)]
                wb.save(sheet_xl_file)

# Main section
process_xl_sheets()

# 10  
Old 06-25-2017
Quote:
Originally Posted by nans
It still doesn;t work for me. the code runs but doesn't give any output at all or print 'dict_pos'. I have attached the code I am using now.
...
...
Does it throw any error messages?
# 11  
Old 06-25-2017
Quote:
Originally Posted by durden_tyler
Does it throw any error messages?
No error msg.
# 12  
Old 06-25-2017
My hunch is that:
(a) either there are no ".xlsx" files in "/home/test" or
(b) there are ".xlsx" files in "/home/test" but the Python script does not have the privilege to write to them

Print the value of "sheet_file" right after the "if sheet_file.endswith()" condition, run the Python program from the command line and post the result here (select, copy, paste from your terminal window.)
# 13  
Old 06-26-2017
Following your suggestion, now I get these errors
All the files and codes are running in the same directory. I've also made sure the xlsx has permissions.
Code:
Traceback (most recent call last):
  File "/home/test/annotate.py", line 53, in <module>
    process_xl_sheets()
  File "/home/test/annotate.py", line 35, in process_xl_sheets
    dpos = get_text_data(sheet_file.replace('.xlsx', '.txt'))
  File "/home/test/annotate.py", line 23, in get_text_data
    line = line.rstrip('\n')
AttributeError: 'list' object has no attribute 'rstrip'

# 14  
Old 06-26-2017
Quote:
Originally Posted by nans
Following your suggestion, now I get these errors
All the files and codes are running in the same directory. I've also made sure the xlsx has permissions.
Code:
Traceback (most recent call last):
  File "/home/test/annotate.py", line 53, in <module>
    process_xl_sheets()
  File "/home/test/annotate.py", line 35, in process_xl_sheets
    dpos = get_text_data(sheet_file.replace('.xlsx', '.txt'))
  File "/home/test/annotate.py", line 23, in get_text_data
    line = line.rstrip('\n')
AttributeError: 'list' object has no attribute 'rstrip'

You are getting the "list has no attribute rstrip" error because you are trying to use the "rstrip()" function on the list (or array) called "line".

The "rstrip('\n')" function removes the trailing newline ('\n') characters from a string. It cannot work on an array. (What are the trailing characters of an array?)
It is documented here: 7.1. string — Common string operations — Python 2.7.13 documentation

Notice that "line" in your code is a list (or array). But "line" in my code is a string. That's because you are reading your file using csv.reader which returns a reader object. And when you iterate through that reader object, each iterator variable is a list (or array).
It is documented here: 13.1. csv — CSV File Reading and Writing — Python 2.7.13 documentation

To give a concrete example, if a tab-separated file looks like this:

Code:
$
$ cat -n countries.txt
    1  Continent       Country Capital
    2  Europe  Germany Berlin
    3  North America   Canada  Ottawa
    4  Africa  Namibia Windhoek
    5  Asia    Japan   Tokyo
$
$

then my code does something like this (check the comments):

Code:
>>>
>>>
>>> fh = open('countries.txt')
>>> for line in fh:
...     print 'line is a string: ==>|', line, '|<=='   # 'line' is a string with a newline character at the end
...     line = line.rstrip('\n')                       # Strip the newline character at the end of the string 'line'
...     x = line.split('\t')                           # Now split the string 'line' on the Tab character ('\t') to obtain the list (or array) 'x'
...     print 'x is an array:    ==>|', x, '|<==\n\n'  # Print the list (or array) 'x'
...
line is a string: ==>| Continent        Country Capital
|<==
x is an array:    ==>| ['Continent', 'Country', 'Capital'] |<==
 
line is a string: ==>| Europe   Germany Berlin
|<==
x is an array:    ==>| ['Europe', 'Germany', 'Berlin'] |<==
 
line is a string: ==>| North America    Canada  Ottawa
|<==
x is an array:    ==>| ['North America', 'Canada', 'Ottawa'] |<==
 
line is a string: ==>| Africa   Namibia Windhoek
|<==
x is an array:    ==>| ['Africa', 'Namibia', 'Windhoek'] |<==
 
line is a string: ==>| Asia     Japan   Tokyo
|<==
x is an array:    ==>| ['Asia', 'Japan', 'Tokyo'] |<==
 
>>>
>>>
>>>

And your code does something like this (check the comments):

Code:
>>>
>>> import csv
>>> with open('countries.txt') as txt_filename:
...     tab_reader = csv.reader(txt_filename, delimiter='\t')    # tab_reader is a reader object
...     print 'tab_reader is:        ==>|', tab_reader, '|<=='
...     for line in tab_reader:
...         print 'line is an array: ==>|', line, '|<=='         # 'line' is a list (or array)
...
tab_reader is:        ==>| <_csv.reader object at 0x000000000250D8E8> |<==
line is an array: ==>| ['Continent', 'Country', 'Capital'] |<==
line is an array: ==>| ['Europe', 'Germany', 'Berlin'] |<==
line is an array: ==>| ['North America', 'Canada', 'Ottawa'] |<==
line is an array: ==>| ['Africa', 'Namibia', 'Windhoek'] |<==
line is an array: ==>| ['Asia', 'Japan', 'Tokyo'] |<==
>>>
>>>

So, the inference is that "x" in my code is the same as "line" in your code.
I hope you know how to proceed from here.
Try to form the dictionary "dict_pos" and post your attempt if you cannot make it to work.
This User Gave Thanks to durden_tyler For This Post:
Login or Register to Ask a Question

Previous Thread | Next Thread

10 More Discussions You Might Find Interesting

1. UNIX for Beginners Questions & Answers

How to insert data into black column( Secound Column ) in excel (.XLSX) file using shell script?

Source Code of the original script is down below please run the script and try to solve this problem this is my data and I want it column wise 2019-03-20 13:00:00:000 2019-03-20 15:00:00:000 1 Operating System LAB 0 1 1 1 1 1 1 1 1 1 0 1 (5 Replies)
Discussion started by: Shubham1182
5 Replies

2. Shell Programming and Scripting

Python soap and string to .xlsx conversion

Hi experts - I'm relatively new to python, but I have an requirement to automate getting a file from a WebLib server using an API. The file I'm requesting from this sever is an excel spreadsheet (.xlsx). I get a valid response back via an xml doc from the server. In this xml file I get... (8 Replies)
Discussion started by: timj123
8 Replies

3. Shell Programming and Scripting

Appending = in particular column in csv file

Hi, I have a requirement to append = in particular row in csv file. Data in csv is as follow: row1,a,a,a row2,b,b,b row3,c,c,c row4,d,d,d csv should be modified at row3 and no. of columns are not fixed but rows are. output should be as: row1,a,a,a row2,b,b,b row3,=c,=c,=c... (2 Replies)
Discussion started by: Divya1987
2 Replies

4. Shell Programming and Scripting

Appending column to rows

Hi All, Input.txt KGO Id "003" .......... .......... Par "CPara" BIN RECGET Name "DIR_PATH" Prompt "DIR_PATH" END RECGET KGO ............ .......... ............... KGO Id "077" .......... .......... (7 Replies)
Discussion started by: unme
7 Replies

5. UNIX for Dummies Questions & Answers

Appending a column of numbers in ascending order to a text file

I have a text file where I want to append a column of numbers in ascending orders. Input: 57 abc 25 def 32 ghi 54 jkl Output:57 abc 57 abc 1 25 def 2 32 ghi 3 54 jkl 4 How do I go about doing that? Thanks! (11 Replies)
Discussion started by: evelibertine
11 Replies

6. Shell Programming and Scripting

Appending new column to existing files

Hi, i want to add another column to existing files containing strings and need to have the final output as a csv file. i have quite a number of files, each with varying number of rows and i need to append the string "test" for all the valid rows for each file. my sample raw files looks like this... (8 Replies)
Discussion started by: ida1215
8 Replies

7. UNIX for Dummies Questions & Answers

Appending date value mmdd to first column in file

Hi , I have a file with a running sequence number. I need to append a date value mmdd format on to the first column. for e.g.: The file contains records as 001 abc 002 cde 003 edf 004 fgh 005 hik The output should be 1111001 abc 1111002 cde 1111003 edf 1111004 ... (1 Reply)
Discussion started by: kalyansid
1 Replies

8. Shell Programming and Scripting

appending column file

Hi all, I have two files with the same number of lines the first file is a.dat and looks like 0.000 1.000 1.000 2.000 ... the fields are tab separated the second file is b.dat and looks like 1.2347 0.546 2.3564 0.321 ... the fields are tab separated I would like to have a file c.dat... (4 Replies)
Discussion started by: f_o_555
4 Replies

9. Shell Programming and Scripting

Appending 'string' to file as first column.

Hi , I have the below file with 6 columns.I want to append 'File1' as the 1 column to the file. i have the sample code .It is not working . can u please correct this or make new one ..... awk 'print {'File1',$1,$2,$3,$4,$5,$6}' Source_File> Result_File Source_File:... (6 Replies)
Discussion started by: satyam_sat
6 Replies

10. Shell Programming and Scripting

Appending a column in one file to the corresponding line in a second

It appears that this has been asked and answered in similar fashions previously, but I am still unsure how to approach this. I have two files containing user information: fileA ttim:/home/ttim:Tiny Tim:632 ppinto:/home/ppinto:Pam Pinto:633 fileB ttim:xkfgjkd*&#^jhdfh... (3 Replies)
Discussion started by: suzannef
3 Replies
Login or Register to Ask a Question